Re: Attn: Robert (Non-Blocking Support Revisited)
I noticed there was still some old debug code in there.
Here's a cleaner version with a dozen lines of
debug code stripped out ...
-- Search news pages
-- All this demo does is read several Web pages in parallel
-- and report on the number of occurences of a word or phrase.
-- Each page is handled by a separate Euphoria task running
-- in parallel with several other tasks.
-- usage:
-- exw news.exu string
-- or:
-- exw news.exu "a multi-word phrase"
-- (case insensitive search)
-- On Linux/FreeBSD use exu instead of exw.
-- This demo uses Euphoria's new multitasking feature. It can run
-- on Linux, FreeBSD, or Windows, without change.
-- It creates multiple wget background processes, each retrieving one Web page.
-- You can get a version of wget for Windows from:
-- http://www.gnu.org/software/wget/wget.html
-- Linux and FreeBSD systems will probably already have it.
-- A Euphoria task is assigned to each instance of wget, searching the
-- Web page text as it arrives. In this way, when a task is blocked due
-- to a delayed response from a particular server, the program can easily
-- switch to another task that is not blocked. The program quits after a
-- period of 10-15 seconds with no progress made on any page.
include wildcard.e
include graphics.e
sequence cl
sequence search_phrase
cl = command_line()
if length(cl) >= 3 then
search_phrase = cl[3]
else
puts(1, "Usage:\n")
puts(1, " exw news.exu search-phrase\n")
if getc(0) then
end if
abort(1)
end if
-- news sources
sequence URLs
URLs = {
"http://www.cbc.ca/news/",
"http://www.juancole.com/",
"http://www.abc.net.au/",
"http://abcnews.go.com/",
"http://english.aljazeera.net/HomePage",
"http://news.bbc.co.uk/",
"http://www.cbsnews.com/",
"http://cnn.com/",
"http://www.democracynow.org/index.pl",
"http://www.foxnews.com/",
"http://www.guardian.co.uk/",
"http://www.msnbc.msn.com/",
"http://www.reuters.com/",
"http://www.whatreallyhappened.com/",
"http://news.yahoo.com/"
}
sequence null_device, del_cmd
if platform() = LINUX then
URLs = URLs[1..9] -- less room on screen
null_device = "/dev/null"
del_cmd = "rm"
else
null_device = "NUL"
del_cmd = "del"
end if
integer progress, quit
procedure search_url(sequence url, sequence string)
-- download a Web page and search it for a string
integer f, hits
integer line_count
object line
sequence mytemp, ustring
position(task_self()*2+1, 1)
printf(1, "task %2.0f: %s\n waiting for wget...", {task_self(),
url})
ustring = upper(string)
hits = 0
-- run a copy of wget as a background process
mytemp = sprintf("newstemp%.0f.html", task_self())
system(sprintf("wget -q -b -O %s %s > %s", {mytemp, url, null_device}), 2)
f = -1
while f = -1 do
-- wait until file exists
if quit then
return
end if
task_schedule(task_self(), {1.0, 2.0})
task_yield()
f = open(mytemp, "rb")
end while
position(task_self()*2+2, 1)
text_color(BRIGHT_RED)
puts(1, " waiting for data...")
text_color(WHITE)
line_count = 0
while 1 do
line = gets(f)
if atom(line) then
-- could be actual end-of-file, or maybe there's more coming
task_schedule(task_self(), {1.0, 1.5})
while 1 do
line = gets(f)
if sequence(line) then
exit -- more data came in
end if
if quit then
return -- we've been told to quit
end if
task_yield()
end while
end if
if match(ustring, upper(line)) then
hits += 1
end if
line_count += 1
position(task_self()*2+2, 1)
text_color(BRIGHT_GREEN)
printf(1, " matched %d lines out of %d ", {hits, line_count})
text_color(WHITE)
progress = 1
-- this yield is not necessary, but it
-- lets you see the parallelism better
task_schedule(task_self(), 1)
task_yield()
end while
end procedure
integer t
for i = 1 to length(URLs) do
t = task_create(routine_id("search_url"), {URLs[i], search_phrase})
task_schedule(t, 1)
end for
system(del_cmd & " newstemp*.html > " & null_device, 2)
clear_screen()
if text_rows(43) then
end if
puts(1, "Looking for lines containing \"" & search_phrase & "\"")
atom time_out
time_out = time() + 45
task_schedule(0, {2.5, 3.0}) -- check the time every 2.5 to 3.0 seconds
quit = 0
while 1 do
progress = 0
task_yield()
if progress then
-- quit 10 seconds after no more lines are read
-- from any file by any task
time_out = time() + 10
else
if time() > time_out then
exit
end if
end if
end while
quit = 1 -- signal all tasks to report any final results and terminate
while length(task_list()) > 1 do
task_yield()
end while
position(2*length(URLs)+3, 1)
puts(1, "\nAll Done.\n")
if getc(0) then
end if
system(del_cmd & " wget-log* > " & null_device, 2)
Regards,
Rob Craig
Rapid Deployment Software
http://www.RapidEuphoria.com
|
Not Categorized, Please Help
|
|