1. Re: Mac text files and gets()
Since one cannot rely on the file opening mode to apply line termination
translation or not, then reading lines from a text file will have to be done
using a library routine, possibly standard. Here is a possibility of
implementation:
include get.e
include file.e
-- use this when the file needs to be rewinded for read
global constant LINE_UNKNOWN = 0
constant termline={10,11,12,13,26,-1} -- physical line terminators
constant n_termline={10,10,10,13,-1,-1} -- normalised
constant LF=1,CR=2,CRLF=4 -- used to report which line endings were found
global function get_lines(object file,integer current_line,integer
start_line,integer numm_lines)
-- Returns a sequence of num_lines \n terminated lines from file,
-- or less if the file doesn't hold that many lines.
-- First returned line is line #start_line in file.
-- If file doesn't have that many lines,
-- or start_line or num_lines is not positive, {} is returned.
-- If start_line >= current_line > 0, file is a valid file handle
-- and its file pointer is currently inside line #current_line.
-- Otherwise, file is rewinded and lines start being reckoned from
-- the beginning of the file.
-- If file is a sequence, it is a file name which will be closed on return.
-- Otherwise, the file pointer of the still open file is either at
-- end of file or start of next line.
-- Supports all known line terminators (CR, LF, CR+LF) and treats FF and VT as
\n.
integer fh,read_0x0D,early_end,final_line,line_mask,len_line
integer idx,p,pos,c
sequence result,line
if start_line<=0 or num_lines<=0 then
return {}
end if
if sequence(file) then
fh=open(file,"rb")
if fh=-1 then
return {}
end if
current_line=1
else
fh=file
if where(fh)=-1 then -- not a valid file handle
-- is there a better test?
return {}
end if
if current_line<=LINE_UNKNOWN or start_line<current_line then
-- rewind
if seek(fh,0) then
return {}
end if
current_line=1
end if
end if
early_end=0
final_line=start_line+num_lines-1
idx=1
pos=1
result=repeat(0,num_lines)
len_line=128 -- should be enough
line=repeat(0,len_line)
line_mask=0
while current_line<=final_line do
c=getc(fh)
p=find(c,termline)
if idx>len_line then
line&=line
len_line*=2
end if
if p=0 then
if read_0x0D then
line_mask=or_bits(line_mask,CR)
read_0x0D=0
end if
if current_line>=start_line then
line[idx]=c
idx+=1
end if
else
c=n_termline[p]
if c=-1 then
c='\n'
early_end=1
end if
if c='\r' then
if current_line>=start_line then
line[idx]='\n'
result[pos]=line[1..idx]
pos+=1
idx=1
end if
current_line+=1
read_0x0D=1
else -- \n
if read_0x0D then -- skip this LF part
line_mask=or_bit(line_mask,CRLF)
read_0x0D=0
else
if current_line>=start_line then
line[idx]=c
result[pos]=line[1..idx]
line_mask=or_bits(line_mask,LF)
pos+=1
idx=1
end if
if early_end then
if current_line<start_line then
result={}
else
result=result[1..pos-1]
end if
exit
end if
current_line+=1
end if
end if
end if
end while
if sequence(file) then
close(fh)
elsif not early_end and line_mask=CRLF then
-- if this is a DOS/Windows text file, and line reading didn't stop on an
-- EOF, then read an extra \n.
c=getc(fh)
end if
return result
end function
Hopefully there are not too many typos.
Obviously, returning {} could be replaced by eturning an atomic error code (not
enough lines, file is invalid, file couldn't be open, and so on).
CChris