Re: Parsing

new topic     » goto parent     » topic index » view thread      » older message » newer message

-- parser.ex
-- HTML parser

function parse(sequence line)
integer l, g
sequence output
    output = {}
    while 1 do
        l = find('<', line)
        g = find('>', line[l+1..length(line)]) + l
        if l = 0 or g = 0 then  -- no tags found
            output = output & line
            if compare(output, {10}) = 0 then
                output = {}
            end if
            return output
        end if
    output = output & line[1..l-1]
    line = line[g+1..length(line)]
    end while
end function    -- parse

procedure if_err(object test, sequence err_message)
-- generic error handler
-- if test is true, abort with message
    if test then
        puts(1, err_message & '\n')
        abort(0)
    end if
end procedure   -- if_err

function read_file(sequence filename)
-- see help file under 'gets()'
-- read file filename, return as sequence
integer handle
object line
sequence buffer
    -- open file
    handle = open(filename, "r")
    if_err(handle = -1, "Can't open file " & filename)
    -- clear buffer
    buffer = {}
    -- read until end of file
    while 1 do
        line = gets(handle)
        if atom(line) then
            exit    -- end of file
        else
            buffer = append(buffer, line)
        end if
    end while
    close(handle)
    return buffer
end function    -- read_file

integer handle
sequence buffer, output

constant filename = "output.txt"

-- read the file
buffer = read_file("file.htm")

-- open a file to write
handle = open(filename, "w")
if_err(handle = -1, "Can't open file " & filename)

output = repeat("", length(buffer))
for line = 1 to length(buffer) do
    output[line] = parse(buffer[line])
    puts(1, output[line])
    puts(handle, output[line])
end for
close(handle)

new topic     » goto parent     » topic index » view thread      » older message » newer message

Search



Quick Links

User menu

Not signed in.

Misc Menu