Re: Parsing
-- parser.ex
-- HTML parser
function parse(sequence line)
integer l, g
sequence output
output = {}
while 1 do
l = find('<', line)
g = find('>', line[l+1..length(line)]) + l
if l = 0 or g = 0 then -- no tags found
output = output & line
if compare(output, {10}) = 0 then
output = {}
end if
return output
end if
output = output & line[1..l-1]
line = line[g+1..length(line)]
end while
end function -- parse
procedure if_err(object test, sequence err_message)
-- generic error handler
-- if test is true, abort with message
if test then
puts(1, err_message & '\n')
abort(0)
end if
end procedure -- if_err
function read_file(sequence filename)
-- see help file under 'gets()'
-- read file filename, return as sequence
integer handle
object line
sequence buffer
-- open file
handle = open(filename, "r")
if_err(handle = -1, "Can't open file " & filename)
-- clear buffer
buffer = {}
-- read until end of file
while 1 do
line = gets(handle)
if atom(line) then
exit -- end of file
else
buffer = append(buffer, line)
end if
end while
close(handle)
return buffer
end function -- read_file
integer handle
sequence buffer, output
constant filename = "output.txt"
-- read the file
buffer = read_file("file.htm")
-- open a file to write
handle = open(filename, "w")
if_err(handle = -1, "Can't open file " & filename)
output = repeat("", length(buffer))
for line = 1 to length(buffer) do
output[line] = parse(buffer[line])
puts(1, output[line])
puts(handle, output[line])
end for
close(handle)
|
Not Categorized, Please Help
|
|