OpenEuphoria: Forum: Re: Attn: Robert (Non-Blocking Support Revisited)

Re: Attn: Robert (Non-Blocking Support Revisited)

new topic » goto parent » topic index » view thread » older message » newer message

Posted by Robert Craig <rds at RapidEuphoria.com> Mar 27, 2007
835 views

I noticed there was still some old debug code in there.
Here's a cleaner version with a dozen lines of 
debug code stripped out ...

-- Search news pages

-- All this demo does is read several Web pages in parallel 
-- and report on the number of occurences of a word or phrase.
-- Each page is handled by a separate Euphoria task running
-- in parallel with several other tasks.

-- usage:
--    exw news.exu string
-- or:
--    exw news.exu "a multi-word phrase"
-- (case insensitive search)

-- On Linux/FreeBSD use exu instead of exw.

-- This demo uses Euphoria's new multitasking feature. It can run
-- on Linux, FreeBSD, or Windows, without change.
-- It creates multiple wget background processes, each retrieving one Web page.

-- You can get a version of wget for Windows from:
-- http://www.gnu.org/software/wget/wget.html
-- Linux and FreeBSD systems will probably already have it.

-- A Euphoria task is assigned to each instance of wget, searching the 
-- Web page text as it arrives. In this way, when a task is blocked due 
-- to a delayed response from a particular server, the program can easily 
-- switch to another task that is not blocked. The program quits after a 
-- period of 10-15 seconds with no progress made on any page.

include wildcard.e
include graphics.e

sequence cl
sequence search_phrase

cl = command_line()
if length(cl) >= 3 then
    search_phrase = cl[3]
else
    puts(1, "Usage:\n")
    puts(1, "       exw news.exu search-phrase\n")
    if getc(0) then
    end if
    abort(1)
end if

-- news sources
sequence URLs
URLs = {
    "http://www.cbc.ca/news/",
    "http://www.juancole.com/",
    "http://www.abc.net.au/",
    "http://abcnews.go.com/",
    "http://english.aljazeera.net/HomePage",
    "http://news.bbc.co.uk/",
    "http://www.cbsnews.com/",
    "http://cnn.com/",
    "http://www.democracynow.org/index.pl",
    "http://www.foxnews.com/",
    "http://www.guardian.co.uk/",
    "http://www.msnbc.msn.com/",
    "http://www.reuters.com/",
    "http://www.whatreallyhappened.com/",
    "http://news.yahoo.com/"
}

sequence null_device, del_cmd

if platform() = LINUX then
    URLs = URLs[1..9] -- less room on screen
    null_device = "/dev/null"
    del_cmd = "rm"
else
    null_device = "NUL"
    del_cmd = "del"
end if

integer progress, quit

procedure search_url(sequence url, sequence string)
-- download a Web page and search it for a string   
    integer f, hits
    integer line_count
    object line
    sequence mytemp, ustring
    
    position(task_self()*2+1, 1)
printf(1, "task %2.0f: %s\n         waiting for wget...", {task_self(),
    url})
    
    ustring = upper(string)
    hits = 0
    
    -- run a copy of wget as a background process
    mytemp = sprintf("newstemp%.0f.html", task_self())
    system(sprintf("wget -q -b -O %s %s > %s", {mytemp, url, null_device}), 2)
    
    f = -1
    while f = -1 do
        -- wait until file exists
        if quit then
            return
        end if
    
        task_schedule(task_self(), {1.0, 2.0})
        task_yield()
        f = open(mytemp, "rb")
    end while

    position(task_self()*2+2, 1)
    text_color(BRIGHT_RED)
    puts(1, "         waiting for data...")
    text_color(WHITE)
    
    line_count = 0
    while 1 do
        line = gets(f)
        if atom(line) then
            -- could be actual end-of-file, or maybe there's more coming
            task_schedule(task_self(), {1.0, 1.5})
            while 1 do
                line = gets(f)
                if sequence(line) then
                    exit -- more data came in
                end if
                if quit then
                    return -- we've been told to quit
                end if  
                task_yield()
            end while
        end if
        
        if match(ustring, upper(line)) then
            hits += 1
        end if
        
        line_count += 1
        position(task_self()*2+2, 1)
        text_color(BRIGHT_GREEN)
        printf(1, "         matched %d lines out of %d   ", {hits, line_count})
        text_color(WHITE)
        progress = 1
        -- this yield is not necessary, but it 
        -- lets you see the parallelism better
        task_schedule(task_self(), 1)
        task_yield()
    end while
end procedure

integer t

for i = 1 to length(URLs) do
    t = task_create(routine_id("search_url"), {URLs[i], search_phrase})
    task_schedule(t, 1)
end for

system(del_cmd & " newstemp*.html > " & null_device, 2)
clear_screen()
if text_rows(43) then
end if
puts(1, "Looking for lines containing \"" & search_phrase & "\"")

atom time_out
time_out = time() + 45
task_schedule(0, {2.5, 3.0}) -- check the time every 2.5 to 3.0 seconds

quit = 0
while 1 do
    progress = 0
    task_yield()
    if progress then
        -- quit 10 seconds after no more lines are read
        -- from any file by any task
        time_out = time() + 10
    else
        if time() > time_out then
            exit
        end if
    end if
end while

quit = 1 -- signal all tasks to report any final results and terminate

while length(task_list()) > 1 do
    task_yield()
end while

position(2*length(URLs)+3, 1)
puts(1, "\nAll Done.\n")

if getc(0) then
end if

system(del_cmd & " wget-log* > " & null_device, 2)


Regards,
   Rob Craig
   Rapid Deployment Software
   http://www.RapidEuphoria.com

new topic » goto parent » topic index » view thread » older message » newer message

OpenEuphoria

Re: Attn: Robert (Non-Blocking Support Revisited)

Search

Include:

Quick Links

User menu

Misc Menu