Re: Attn: Robert (Non-Blocking Support Revisited)
- Posted by Robert Craig <rds at RapidEuphoria.com> Mar 27, 2007
- 693 views
I noticed there was still some old debug code in there. Here's a cleaner version with a dozen lines of debug code stripped out ...
-- Search news pages -- All this demo does is read several Web pages in parallel -- and report on the number of occurences of a word or phrase. -- Each page is handled by a separate Euphoria task running -- in parallel with several other tasks. -- usage: -- exw news.exu string -- or: -- exw news.exu "a multi-word phrase" -- (case insensitive search) -- On Linux/FreeBSD use exu instead of exw. -- This demo uses Euphoria's new multitasking feature. It can run -- on Linux, FreeBSD, or Windows, without change. -- It creates multiple wget background processes, each retrieving one Web page. -- You can get a version of wget for Windows from: -- http://www.gnu.org/software/wget/wget.html -- Linux and FreeBSD systems will probably already have it. -- A Euphoria task is assigned to each instance of wget, searching the -- Web page text as it arrives. In this way, when a task is blocked due -- to a delayed response from a particular server, the program can easily -- switch to another task that is not blocked. The program quits after a -- period of 10-15 seconds with no progress made on any page. include wildcard.e include graphics.e sequence cl sequence search_phrase cl = command_line() if length(cl) >= 3 then search_phrase = cl[3] else puts(1, "Usage:\n") puts(1, " exw news.exu search-phrase\n") if getc(0) then end if abort(1) end if -- news sources sequence URLs URLs = { "http://www.cbc.ca/news/", "http://www.juancole.com/", "http://www.abc.net.au/", "http://abcnews.go.com/", "http://english.aljazeera.net/HomePage", "http://news.bbc.co.uk/", "http://www.cbsnews.com/", "http://cnn.com/", "http://www.democracynow.org/index.pl", "http://www.foxnews.com/", "http://www.guardian.co.uk/", "http://www.msnbc.msn.com/", "http://www.reuters.com/", "http://www.whatreallyhappened.com/", "http://news.yahoo.com/" } sequence null_device, del_cmd if platform() = LINUX then URLs = URLs[1..9] -- less room on screen null_device = "/dev/null" del_cmd = "rm" else null_device = "NUL" del_cmd = "del" end if integer progress, quit procedure search_url(sequence url, sequence string) -- download a Web page and search it for a string integer f, hits integer line_count object line sequence mytemp, ustring position(task_self()*2+1, 1) printf(1, "task %2.0f: %s\n waiting for wget...", {task_self(), url}) ustring = upper(string) hits = 0 -- run a copy of wget as a background process mytemp = sprintf("newstemp%.0f.html", task_self()) system(sprintf("wget -q -b -O %s %s > %s", {mytemp, url, null_device}), 2) f = -1 while f = -1 do -- wait until file exists if quit then return end if task_schedule(task_self(), {1.0, 2.0}) task_yield() f = open(mytemp, "rb") end while position(task_self()*2+2, 1) text_color(BRIGHT_RED) puts(1, " waiting for data...") text_color(WHITE) line_count = 0 while 1 do line = gets(f) if atom(line) then -- could be actual end-of-file, or maybe there's more coming task_schedule(task_self(), {1.0, 1.5}) while 1 do line = gets(f) if sequence(line) then exit -- more data came in end if if quit then return -- we've been told to quit end if task_yield() end while end if if match(ustring, upper(line)) then hits += 1 end if line_count += 1 position(task_self()*2+2, 1) text_color(BRIGHT_GREEN) printf(1, " matched %d lines out of %d ", {hits, line_count}) text_color(WHITE) progress = 1 -- this yield is not necessary, but it -- lets you see the parallelism better task_schedule(task_self(), 1) task_yield() end while end procedure integer t for i = 1 to length(URLs) do t = task_create(routine_id("search_url"), {URLs[i], search_phrase}) task_schedule(t, 1) end for system(del_cmd & " newstemp*.html > " & null_device, 2) clear_screen() if text_rows(43) then end if puts(1, "Looking for lines containing \"" & search_phrase & "\"") atom time_out time_out = time() + 45 task_schedule(0, {2.5, 3.0}) -- check the time every 2.5 to 3.0 seconds quit = 0 while 1 do progress = 0 task_yield() if progress then -- quit 10 seconds after no more lines are read -- from any file by any task time_out = time() + 10 else if time() > time_out then exit end if end if end while quit = 1 -- signal all tasks to report any final results and terminate while length(task_list()) > 1 do task_yield() end while position(2*length(URLs)+3, 1) puts(1, "\nAll Done.\n") if getc(0) then end if system(del_cmd & " wget-log* > " & null_device, 2)
Regards, Rob Craig Rapid Deployment Software http://www.RapidEuphoria.com