OpenEuphoria: Forum: Re: Attn: Robert (Non-Blocking Support Revisited)

Re: Attn: Robert (Non-Blocking Support Revisited)

new topic » goto parent » topic index » view thread » older message » newer message

Posted by Robert Craig <rds at RapidEuphoria.com> Mar 27, 2007
668 views

One of the main reasons for wanting non-blocking support is
so a program can access a URL and not get stuck waiting for the
download to occur (or time-out).

The Linux/FreeBSD distribution has this "news.exu" multitasking
demo, which also runs on Windows, provided you have wget.exe
in the current directory, or on your PATH.
In the next release I'll include it in the Windows installation as well.
This might be useful for some people, until we have something better.

-- news.exu: Search news pages

-- All this demo does is read several Web pages in parallel 
-- and report on the number of occurences of a word or phrase.
-- Each page is handled by a separate Euphoria task running
-- in parallel with several other tasks.

-- usage:
--    exw news.exu string
-- or:
--    exw news.exu "a multi-word phrase"
-- (case insensitive search)

-- On Linux/FreeBSD use exu instead of exw.

-- This demo uses Euphoria's new multitasking feature. It can run
-- on Linux, FreeBSD, or Windows, without change.
-- It creates multiple wget background processes, each retrieving one Web page.

-- You can get a version of wget for Windows from:
-- http://www.gnu.org/software/wget/wget.html
-- Linux and FreeBSD systems will probably already have it.

-- A Euphoria task is assigned to each instance of wget, searching the 
-- Web page text as it arrives. In this way, when a task is blocked due 
-- to a delayed response from a particular server, the program can easily 
-- switch to another task that is not blocked. The program quits after a 
-- period of 10-15 seconds with no progress made on any page.

include wildcard.e
include graphics.e

sequence cl
sequence search_phrase

cl = command_line()
if length(cl) >= 3 then
    search_phrase = cl[3]
else
    puts(1, "Usage:\n")
    puts(1, "       exw news search-phrase\n")
    if getc(0) then
    end if
    abort(1)
end if

-- news sources
sequence URLs
URLs = {
    "http://www.cbc.ca/news/",
    "http://www.juancole.com/",
    "http://www.abc.net.au/",
    "http://abcnews.go.com/",
    "http://english.aljazeera.net/HomePage",
    "http://news.bbc.co.uk/",
    "http://www.cbsnews.com/",
    "http://cnn.com/",
    "http://www.democracynow.org/index.pl",
    "http://www.foxnews.com/",
    "http://www.guardian.co.uk/",
    "http://www.msnbc.msn.com/",
    "http://www.reuters.com/",
    "http://www.whatreallyhappened.com/",
    "http://news.yahoo.com/"
}

sequence null_device, del_cmd

if platform() = LINUX then
    URLs = URLs[1..9] -- less room on screen
    null_device = "/dev/null"
    del_cmd = "rm"
else
    null_device = "NUL"
    del_cmd = "del"
end if

integer progress, quit

procedure search_url(sequence url, sequence string)
-- download a Web page and search it for a string   
    integer f, hits
    integer line_count
    object line
    sequence mytemp, ustring
    
    if task_self() > 20 or task_self() < 0 then
        printf(1, "\n1. my task id is rubbish: %g\n", task_self())
        if getc(0) then
        end if
        abort(0)
    end if
    
    position(task_self()*2+1, 1)
printf(1, "task %2.0f: %s\n         waiting for wget...", {task_self(),
    url})
    
    ustring = upper(string)
    hits = 0
    
    -- run a copy of wget as a background process
    mytemp = sprintf("newstemp%.0f.html", task_self())
    system(sprintf("wget -q -b -O %s %s > %s", {mytemp, url, null_device}), 2)
    
    f = -1
    while f = -1 do
        -- wait until file exists
        if quit then
            return
        end if
        if task_self() > 20 or task_self() < 0 then
            printf(1, "\n2. my task id is rubbish: %.0f\n", task_self())
            if getc(0) then
            end if
            abort(0)
        end if
    
        task_schedule(task_self(), {1.0, 2.0})
        task_yield()
        f = open(mytemp, "rb")
    end while

    position(task_self()*2+2, 1)
    text_color(BRIGHT_RED)
    puts(1, "         waiting for data...")
    text_color(WHITE)
    
    line_count = 0
    while 1 do
        line = gets(f)
        if atom(line) then
            -- could be actual end-of-file, or maybe there's more coming
            task_schedule(task_self(), {1.0, 1.5})
            while 1 do
                line = gets(f)
                if sequence(line) then
                    exit -- more data came in
                end if
                if quit then
                    return -- we've been told to quit
                end if  
                task_yield()
            end while
        end if
        
        if match(ustring, upper(line)) then
            hits += 1
        end if
        
        line_count += 1
        position(task_self()*2+2, 1)
        text_color(BRIGHT_GREEN)
        printf(1, "         matched %d lines out of %d   ", {hits, line_count})
        text_color(WHITE)
        progress = 1
        -- this yield is not necessary, but it 
        -- lets you see the parallelism better
        task_schedule(task_self(), 1)
        task_yield()
    end while
end procedure

integer t

for i = 1 to length(URLs) do
    t = task_create(routine_id("search_url"), {URLs[i], search_phrase})
    task_schedule(t, 1)
end for

system(del_cmd & " newstemp*.html > " & null_device, 2)
clear_screen()
if text_rows(43) then
end if
puts(1, "Looking for lines containing \"" & search_phrase & "\"")

atom time_out
time_out = time() + 45
task_schedule(0, {2.5, 3.0}) -- check the time every 2.5 to 3.0 seconds

quit = 0
while 1 do
    progress = 0
    task_yield()
    if progress then
        -- quit 10 seconds after no more lines are read
        -- from any file by any task
        time_out = time() + 10
    else
        if time() > time_out then
            exit
        end if
    end if
end while

quit = 1 -- signal all tasks to report any final results and terminate

while length(task_list()) > 1 do
    task_yield()
end while

position(2*length(URLs)+3, 1)
puts(1, "\nAll Done.\n")

if getc(0) then
end if

system(del_cmd & " wget-log* > " & null_device, 2)


Regards,
   Rob Craig
   Rapid Deployment Software
   http://www.RapidEuphoria.com

new topic » goto parent » topic index » view thread » older message » newer message

OpenEuphoria

Re: Attn: Robert (Non-Blocking Support Revisited)

Search

Include:

Quick Links

User menu

Misc Menu