Re: data analysis
	
	
	
	
Kat wrote:
> Graeme, since you did that amazing
> pattern finding code, i was wondering
> what your thoughts would be on this problem...
I'm obviously not Graeme, but I couldn't resist. This routine will compare
two strings, returning the differences between them. For example:
   diff( "abczz", "dbnz" )
will return:
   "[a,d]b[c,n]z[z,]"
The notation is a bit funky, but you can adjust it as you see fit:
1. 'x' means that 'x' was found in both strings.
2. '[x,]' means that 'x' was found in the first string, but not the second.
3. '[,x]' means that 'x' was found in the second string, but not the first.
4. [x,y] means that 'x' was found in the first string, but not the second
and 'y' was found in the second string but not the first.
-- David Cuny
-- compare two strings, return differences
function diff( sequence s1, sequence s2 )
    integer at1, at2, len, sync1, sync2
    sequence result
    result = ""
    -- find shortest string
    if length( s1 ) > length( s2 ) then
        len = length( s1 )
    else
        len = length( s2 )
    end if
    at1 = 0
    at2 = 0
    -- process until the end of one string
    while 1 do
        -- move ahead
        at1 += 1
        at2 += 1
        -- past end of one string?
        if at1 > length( s1 )
        or at2 > length( s2 ) then
            exit
        end if
        -- same?
        if s1[at1] = s2[at2] then
            result &= s1[at1]
        else
            -- attempt to resync
            while 1 do
                -- find closest sync
                sync1 = 9999
                for i = at1+1 to length(s1) do
                    if s2[at2] = s1[i] then
                        sync1 = i
                        exit
                    end if
                end for
                sync2 = 9999
                for i = at2+1 to length(s2) do
                    if s1[at1] = s2[i] then
                        sync2 = i
                        exit
                    end if
                end for
                -- result of sync
                if sync1 = 9999
                and sync2 = 9999 then
                    -- no sync
                    result &= sprintf( "[%s,%s]", {s1[at1],s2[at2]} )
                    -- at end?
                    if at1 = length( s1 )
                    or at2 = length( s2 ) then
                        exit
                    end if
                    -- skip
                    at1 += 1
                    at2 += 1
                elsif sync1 < sync2 then
                    -- match on sync1
                    for i = at1 to sync1-1 do
                        result &= sprintf( "[%s,]", {s1[i]} )
                    end for
                    -- sync
                    at1 = sync1
                    result &= s1[at1]
                    -- leave loop
                    exit
                else
                    -- match on sync2
                    for i = at2 to sync2-1 do
                        result &= sprintf( "[,%s]", {s2[i]})
                    end for
                    -- sync
                    at2 = sync2
                    result &= s2[at2]
                    -- leave loop
                    exit
                end if
            end while
        end if
    end while
    -- remainder?
    if at1 <= length( s1 ) then
        for i = at1 to length(s1) do
            result &= sprintf( "[%s,]", {s1[i]} )
        end for
    elsif at2 <= length( s2 ) then
        for i = at2 to length(s2) do
            result &= sprintf( "[,%s]", {s2[i]} )
        end for
    end if
    return result
end function
	
	
		| 
									Not Categorized, Please Help
						 |  |