Re: Euphoria web usage analysis

new topic     » goto parent     » topic index » view thread      » older message » newer message

J. Kenneth Riviere writes:
> Do you have such a program that you would be willing to put in the archive? 

It's really very specific to my needs.
I doubt that anyone else could make much use of it,
but it's a good example of the kind of thing
Euphoria is good at, since it requires speed,
but it's also something that I wanted to develop quickly
and play around with a lot. (without having to compile, link and 
resolve machine crashes).

I'm using it right now to evaluate various "pay-per-clickthrough"
advertising sites. It tells me how many people came from
various search keywords that I bid on, and how "interested" they were
when they arrived, based on the number of extra pages that they
viewed after seeing the main page. I've found significant 
differences in the "quality" of the visitors that various 
places send me, and of course differences depending on 
what the keyword is. This will influence which places I continue
with, and how much I bid for various words.

A typical line in my log file looks like (wrapped onto 5 lines here):

195.92.168.171 - - [03/feb/2002:09:59:55 -0800] 
"get /spellchk.zip http/1.1"
200 32768
"http://www.programmersheaven.com/search/download.asp?fileid=14415"
"mozilla/4.6 [en-gb]c-cck-mcd netscapeonline.co.uk  (win98; i)"

It shows the IP address of the visitor, the date, the file that he accessed,
info on the success of the access, the URL the person was referred from,
what kind of browser they are using, their o/s etc.

By the way, there were 28,533 visits to the RapidEuphoria Web site
in January, smashing the previous record.

Here's the code, for what it's worth. 
Sorry about the indentation and lack of comments.

-- extract stats from RapidEuphoria.com access_log
without type_check

include sort.e

constant TO_LOWER = 'a' - 'A' 
function fast_lower(sequence s)
-- Faster than the standard lower().
-- Speed of lower() is very important for "any-case" search.
    integer c
    
    for i = 1 to length(s) do
 c = s[i]
 if c <= 'Z' then
     if c >= 'A' then
  s[i] = c + TO_LOWER
     end if
 end if
    end for
    return s
end function

sequence target_list, target_count, referrer_list, referrer_count
integer line_count, gif_count
integer total_referrers, unknown_referrers 
sequence referrer
sequence ip_address
sequence cl

cl = command_line()
if length(cl) < 3 then
    puts(2, "Usage: ex stats access_log\n")
    abort(1)
end if

sequence special_referrer, special_target, special_words

special_referrer = {
    "freshmeat",
    "linkexchange",
    "directhit.com",
    "google.com",
    "altavista.com"
}

special_target = {
    "?sp981",  -- all Sprinks
    "?bayf",   -- Bay9 freeware
    "?bayc",   -- Bay9 C
    "?baysh",  -- Bay9 Shareware
    "?bayso",  -- Bay9 Software
    "?bayfs",  -- Bay9 Free Software
    "?bayd",   -- Bay9 DOS
    "?gc981",  -- all goCLick
    "?fw981",  -- Overture freeware
    "?pl981",  -- Overture programming language
    "?f981",   -- all FindWhat
    "?7se"     -- all 7Search
}

constant S_WORD = 1,
  S_LIST = 2,
  S_DUPS = 3

constant L_EXTRA = 1,
  L_IP = 2,         
  L_LINE = 3
  
special_words = special_referrer & special_target
for i = 1 to length(special_words) do
    special_words[i] = {special_words[i], {}, 0}
end for

procedure visitor(sequence word)
-- a person has entered with a special target or referrer
    integer dups
    
    -- ignore visualbasic from sprinks
--  if equal(word, "?sp981") then
--      if not match("basic", referrer) and not match("visual", referrer) then
--      if not match("cplus", referrer) then
--          return
--      end if
--  end if
    
    for i = 1 to length(special_words) do
 if equal(word, special_words[i][S_WORD]) then
     dups = special_words[i][S_DUPS]
     for j = 1 to length(special_words[i][S_LIST]) do
  if equal(ip_address, special_words[i][S_LIST][j][L_IP]) then
      dups += 1
      exit
  end if
     end for
     special_words[i][S_LIST] = prepend(special_words[i][S_LIST], 
      {0, ip_address, line_count}) 
     special_words[i][S_DUPS] = dups
     return
 end if
    end for
    puts(2, "Couldn't find " & word & '\n')
end procedure

procedure credit(sequence ip_address)
-- give credit for this ip_address to special target or referrer    
    sequence list, temp
    
    for i = 1 to length(special_words) do
 list = special_words[i][S_LIST]
 for j = 1 to length(list) do
     if line_count > list[j][L_LINE]+3000 then
  exit
     end if
     if equal(ip_address, list[j][L_IP]) then
  if line_count < list[j][L_LINE]+3000 then
      special_words[i][S_LIST][j][L_EXTRA] += 1
      special_words[i][S_LIST][j][L_LINE] = line_count
      
      -- move it to first position
      temp = special_words[i][S_LIST][j]
      special_words[i][S_LIST][j] = special_words[i][S_LIST][1]
      special_words[i][S_LIST][1] = temp
      exit  -- allow double credit for two or more words, 
     -- but not for the same word
  end if
     end if
 end for
    end for
end procedure

procedure gather_stats()
-- one pass through the access log
    integer q, s, p, special
    object line
    sequence target
    integer log_file
    
    log_file = open(cl[3], "r")
    if log_file = -1 then
 puts(2, "Couldn't open " & cl[3] & '\n')
    end if
    target_list = {}
    target_count = {}
    referrer_list = {}
    referrer_count = {}
    line_count = 0
    gif_count = 0
    
    total_referrers = 0
    unknown_referrers = 0
    
    while 1 do
 line = gets(log_file)
 if atom(line) then
     exit
 end if
 line_count += 1
 line = fast_lower(line)
 
 if match(".gif ", line) or match(".jpg ", line) then
     gif_count += 1
 else
     q = find(' ', line)
     if q then
  ip_address = line[1..q-1]
     else
  ip_address = ""
     end if
     
     credit(ip_address)
     
     q = find('"', line)
     if q then
  -- target address
  line = line[q+1..length(line)]  
  s = find('/', line)
  if s then
      target = "/"
      while 1 do
   s += 1
   if s > length(line) or line[s] = ' ' then
       exit
   end if
   target &= line[s]
      end while
      line = line[s+1..length(line)]
      p = find(target, target_list) 
      if p then
   target_count[p] += 1
      else
   target_list = append(target_list, target)
   target_count = append(target_count, 1)
      end if
  end if
  
  -- referrer address
  q = find('"', line)
  if q then
      line = line[q+1..length(line)]
      q = find('"', line)
      if q then
   referrer = ""
   while 1 do
       q += 1
       if q > length(line) or line[q] = '"' then
    exit
       end if
       referrer &= line[q]
   end while
   
   if not match("rapideuphoria", referrer) and
      not match("addr.com", referrer) then
       -- coming in from outside world
       
       special = 0
       for i = 1 to length(special_target) do
    if match(special_target[i], target) then
        visitor(special_target[i])
        exit
    end if
       end for
       
       if not special then
    for i = 1 to length(special_referrer) do
        if match(special_referrer[i], referrer) then
     visitor(special_referrer[i])
     exit
        end if
    end for
       end if
       
       total_referrers += 1
       
       if length(referrer) < 3 then
    unknown_referrers += 1
       end if
       
       p = find(referrer, referrer_list)                    
       if p then
    referrer_count[p] += 1
       else
    referrer_list = append(referrer_list, referrer)
    referrer_count = append(referrer_count, 1)
       end if
   end if
      end if
  end if
     end if
 end if
    end while
    
    for i = 1 to length(target_list) do
 target_list[i] = {target_count[i], target_list[i]}
    end for
    
    for i = 1 to length(referrer_list) do
 referrer_list[i] = {referrer_count[i], referrer_list[i]}
    end for
    close(log_file)
end procedure

atom t
t = time()

gather_stats()
puts(1, "Targets:\n")
target_list = sort(target_list)
printf(1, "%d total .gifs\n", gif_count)
for i = length(target_list) to 1 by -1 do
    printf(1, "%d %s\n", target_list[i])
end for

puts(1, "\nReferrers:\n") 
referrer_list = sort(referrer_list)
for i = length(referrer_list) to 1 by -1 do
    printf(1, "%d %s\n", referrer_list[i])
end for
    
printf(1, "\n\nTotal Lines: %d\n", line_count)
printf(1, "Total External Referrers: %d\n", total_referrers)
printf(1, "Total Unknown Referrers: %d\n\n", unknown_referrers)

integer total, v, max, extra
sequence max_ip

for i = 1 to length(special_words) do   
    max = -1
    max_ip = ""
    printf(1, "Special word: %s\n", {special_words[i][S_WORD]})
    v = length(special_words[i][S_LIST])
    printf(1, "Total: %d\n", v)
    if v > 0 then
 printf(1, "Total Dups: %d (%.0f%%)\n", {special_words[i][S_DUPS],
       100 * special_words[i][S_DUPS] / v})
    end if
    
    total = 0
    for j = 1 to length(special_words[i][S_LIST]) do
 extra = special_words[i][S_LIST][j][L_EXTRA]
 if extra > max then
     max = extra
     max_ip = special_words[i][S_LIST][j][L_IP]
 end if
 if extra > 25 then
     extra = 25  -- avoid huge excesses
 end if
 total += extra
    end for
    printf(1, "Total extra pages: %d\n", total)
    printf(1, "Max extra pages for one visitor: %d by %s\n", {max, max_ip})
    if v > 0 then
 printf(1, "Average extra pages: %.2f\n", total / v)
    end if
    puts(1, '\n')
end for
puts(2, '\n')

print(2, time()-t)

Regards,
   Rob Craig
   Rapid Deployment Software
   http://www.RapidEuphoria.com

new topic     » goto parent     » topic index » view thread      » older message » newer message

Search



Quick Links

User menu

Not signed in.

Misc Menu