Re: Euphoria web usage analysis
- Posted by Robert Craig <rds at RapidEuphoria.com> Feb 15, 2002
- 460 views
J. Kenneth Riviere writes: > Do you have such a program that you would be willing to put in the archive? It's really very specific to my needs. I doubt that anyone else could make much use of it, but it's a good example of the kind of thing Euphoria is good at, since it requires speed, but it's also something that I wanted to develop quickly and play around with a lot. (without having to compile, link and resolve machine crashes). I'm using it right now to evaluate various "pay-per-clickthrough" advertising sites. It tells me how many people came from various search keywords that I bid on, and how "interested" they were when they arrived, based on the number of extra pages that they viewed after seeing the main page. I've found significant differences in the "quality" of the visitors that various places send me, and of course differences depending on what the keyword is. This will influence which places I continue with, and how much I bid for various words. A typical line in my log file looks like (wrapped onto 5 lines here): 195.92.168.171 - - [03/feb/2002:09:59:55 -0800] "get /spellchk.zip http/1.1" 200 32768 "http://www.programmersheaven.com/search/download.asp?fileid=14415" "mozilla/4.6 [en-gb]c-cck-mcd netscapeonline.co.uk (win98; i)" It shows the IP address of the visitor, the date, the file that he accessed, info on the success of the access, the URL the person was referred from, what kind of browser they are using, their o/s etc. By the way, there were 28,533 visits to the RapidEuphoria Web site in January, smashing the previous record. Here's the code, for what it's worth. Sorry about the indentation and lack of comments. -- extract stats from RapidEuphoria.com access_log without type_check include sort.e constant TO_LOWER = 'a' - 'A' function fast_lower(sequence s) -- Faster than the standard lower(). -- Speed of lower() is very important for "any-case" search. integer c for i = 1 to length(s) do c = s[i] if c <= 'Z' then if c >= 'A' then s[i] = c + TO_LOWER end if end if end for return s end function sequence target_list, target_count, referrer_list, referrer_count integer line_count, gif_count integer total_referrers, unknown_referrers sequence referrer sequence ip_address sequence cl cl = command_line() if length(cl) < 3 then puts(2, "Usage: ex stats access_log\n") abort(1) end if sequence special_referrer, special_target, special_words special_referrer = { "freshmeat", "linkexchange", "directhit.com", "google.com", "altavista.com" } special_target = { "?sp981", -- all Sprinks "?bayf", -- Bay9 freeware "?bayc", -- Bay9 C "?baysh", -- Bay9 Shareware "?bayso", -- Bay9 Software "?bayfs", -- Bay9 Free Software "?bayd", -- Bay9 DOS "?gc981", -- all goCLick "?fw981", -- Overture freeware "?pl981", -- Overture programming language "?f981", -- all FindWhat "?7se" -- all 7Search } constant S_WORD = 1, S_LIST = 2, S_DUPS = 3 constant L_EXTRA = 1, L_IP = 2, L_LINE = 3 special_words = special_referrer & special_target for i = 1 to length(special_words) do special_words[i] = {special_words[i], {}, 0} end for procedure visitor(sequence word) -- a person has entered with a special target or referrer integer dups -- ignore visualbasic from sprinks -- if equal(word, "?sp981") then -- if not match("basic", referrer) and not match("visual", referrer) then -- if not match("cplus", referrer) then -- return -- end if -- end if for i = 1 to length(special_words) do if equal(word, special_words[i][S_WORD]) then dups = special_words[i][S_DUPS] for j = 1 to length(special_words[i][S_LIST]) do if equal(ip_address, special_words[i][S_LIST][j][L_IP]) then dups += 1 exit end if end for special_words[i][S_LIST] = prepend(special_words[i][S_LIST], {0, ip_address, line_count}) special_words[i][S_DUPS] = dups return end if end for puts(2, "Couldn't find " & word & '\n') end procedure procedure credit(sequence ip_address) -- give credit for this ip_address to special target or referrer sequence list, temp for i = 1 to length(special_words) do list = special_words[i][S_LIST] for j = 1 to length(list) do if line_count > list[j][L_LINE]+3000 then exit end if if equal(ip_address, list[j][L_IP]) then if line_count < list[j][L_LINE]+3000 then special_words[i][S_LIST][j][L_EXTRA] += 1 special_words[i][S_LIST][j][L_LINE] = line_count -- move it to first position temp = special_words[i][S_LIST][j] special_words[i][S_LIST][j] = special_words[i][S_LIST][1] special_words[i][S_LIST][1] = temp exit -- allow double credit for two or more words, -- but not for the same word end if end if end for end for end procedure procedure gather_stats() -- one pass through the access log integer q, s, p, special object line sequence target integer log_file log_file = open(cl[3], "r") if log_file = -1 then puts(2, "Couldn't open " & cl[3] & '\n') end if target_list = {} target_count = {} referrer_list = {} referrer_count = {} line_count = 0 gif_count = 0 total_referrers = 0 unknown_referrers = 0 while 1 do line = gets(log_file) if atom(line) then exit end if line_count += 1 line = fast_lower(line) if match(".gif ", line) or match(".jpg ", line) then gif_count += 1 else q = find(' ', line) if q then ip_address = line[1..q-1] else ip_address = "" end if credit(ip_address) q = find('"', line) if q then -- target address line = line[q+1..length(line)] s = find('/', line) if s then target = "/" while 1 do s += 1 if s > length(line) or line[s] = ' ' then exit end if target &= line[s] end while line = line[s+1..length(line)] p = find(target, target_list) if p then target_count[p] += 1 else target_list = append(target_list, target) target_count = append(target_count, 1) end if end if -- referrer address q = find('"', line) if q then line = line[q+1..length(line)] q = find('"', line) if q then referrer = "" while 1 do q += 1 if q > length(line) or line[q] = '"' then exit end if referrer &= line[q] end while if not match("rapideuphoria", referrer) and not match("addr.com", referrer) then -- coming in from outside world special = 0 for i = 1 to length(special_target) do if match(special_target[i], target) then visitor(special_target[i]) exit end if end for if not special then for i = 1 to length(special_referrer) do if match(special_referrer[i], referrer) then visitor(special_referrer[i]) exit end if end for end if total_referrers += 1 if length(referrer) < 3 then unknown_referrers += 1 end if p = find(referrer, referrer_list) if p then referrer_count[p] += 1 else referrer_list = append(referrer_list, referrer) referrer_count = append(referrer_count, 1) end if end if end if end if end if end if end while for i = 1 to length(target_list) do target_list[i] = {target_count[i], target_list[i]} end for for i = 1 to length(referrer_list) do referrer_list[i] = {referrer_count[i], referrer_list[i]} end for close(log_file) end procedure atom t t = time() gather_stats() puts(1, "Targets:\n") target_list = sort(target_list) printf(1, "%d total .gifs\n", gif_count) for i = length(target_list) to 1 by -1 do printf(1, "%d %s\n", target_list[i]) end for puts(1, "\nReferrers:\n") referrer_list = sort(referrer_list) for i = length(referrer_list) to 1 by -1 do printf(1, "%d %s\n", referrer_list[i]) end for printf(1, "\n\nTotal Lines: %d\n", line_count) printf(1, "Total External Referrers: %d\n", total_referrers) printf(1, "Total Unknown Referrers: %d\n\n", unknown_referrers) integer total, v, max, extra sequence max_ip for i = 1 to length(special_words) do max = -1 max_ip = "" printf(1, "Special word: %s\n", {special_words[i][S_WORD]}) v = length(special_words[i][S_LIST]) printf(1, "Total: %d\n", v) if v > 0 then printf(1, "Total Dups: %d (%.0f%%)\n", {special_words[i][S_DUPS], 100 * special_words[i][S_DUPS] / v}) end if total = 0 for j = 1 to length(special_words[i][S_LIST]) do extra = special_words[i][S_LIST][j][L_EXTRA] if extra > max then max = extra max_ip = special_words[i][S_LIST][j][L_IP] end if if extra > 25 then extra = 25 -- avoid huge excesses end if total += extra end for printf(1, "Total extra pages: %d\n", total) printf(1, "Max extra pages for one visitor: %d by %s\n", {max, max_ip}) if v > 0 then printf(1, "Average extra pages: %.2f\n", total / v) end if puts(1, '\n') end for puts(2, '\n') print(2, time()-t) Regards, Rob Craig Rapid Deployment Software http://www.RapidEuphoria.com