diff4b
-- if you build something with this, -- or derive something from this, -- i'd like a credit. Kat -- I built this from inspiration and code provided by DCuny. -- Eu to OE conversion -- change `loop` to `Loop` -- `totalglobalresult` not declared sequence totalglobalresult -- `tempresult` not declared sequence tempresult -- `sort` not declared include std/sort.e ---- original code by Kat follows: global function diff4b( sequence s1, sequence s2, integer MinG, integer MaxG ,integer looping) integer at1, at2, sync1, sync2 sequence result, bigresult, old_s1, old_s2, old_result integer diff1, diff2, best, bracketfound, old_MinG, old_MaxG sequence left,right, bracketcount, temp old_result = "" old_MinG = 0 old_MaxG = 0 old_s1 = "" old_s2 = "" if equal(old_s1,s1) and equal(old_s2,s2) and ( old_MinG = MinG ) and ( old_MaxG = MaxG ) then return old_result end if old_s1 = s1 old_s2 = s2 bigresult = "" --at1 = match(s2,s1) --if ( at1 != 0 ) then -- result = s1[at1..length(s1)] --end if if ( MaxG = 0 ) and ( MinG = 0 ) then if sequence(s1[1]) then MinG = 1 else MinG = 2 end if MaxG = floor(.5 * ( length(s1) + length(s2) ) ) if ( MaxG > floor( 0.5 * length(s1)) ) then MaxG = floor( 0.5 * length(s1)) +1 end if if ( MaxG > floor( 0.5 * length(s2)) ) then MaxG = floor( 0.5 * length(s2)) +1 end if if ( MaxG < MinG ) then -- something is wrong.... if ( length(s1) > length(s2) ) then MaxG = length(s1) else MaxG = length(s2) end if end if if ( MaxG = 1 ) then if ( looping = 0 ) then result = '0' & " ZERO MATCHES" else result = "ZERO MATCHES" end if return result end if end if for MaxGap = MinG to MaxG do result = "" at1 = 0 at2 = 0 --set at1 and at2 here to account for the {"eeyes","eyes"} problem -- process until the end of one string while 1 do -- move ahead at1 += 1 at2 += 1 -- past end of one string? if at1 > length( s1 ) or at2 > length( s2 ) then exit end if -- same? if equal(s1[at1],s2[at2]) then result &= s1[at1] else -- attempt to resync while 1 do -- find closest sync sync1 = find( s2[at2], s1[at1..length(s1)] ) -- too far? if sync1 > 0 and sync1 < MaxGap then sync1 += at1 - 1 else sync1 = 9999 end if -- find closest sync point sync2 = find( s1[at1], s2[at2..length(s2)] ) -- too far? if sync2 > 0 and sync2 < MaxGap then sync2 += at2 - 1 else sync2 = 9999 end if -- evaluate sync if sync1 = 9999 and sync2 = 9999 then -- no sync result &= sprintf( "[%s,%s]", {s1[at1],s2[at2]} ) -- at end? if at1 = length( s1 ) or at2 = length( s2 ) then exit end if -- skip at1 += 1 at2 += 1 elsif sync1 < sync2 then -- match on sync1 for i = at1 to sync1-1 do result &= sprintf( "[%s,]", {s1[i]} ) end for -- sync at1 = sync1 result &= s1[at1] -- leave loop exit else -- match on sync2 for i = at2 to sync2-1 do result &= sprintf( "[,%s]", {s2[i]}) end for -- sync at2 = sync2 result &= s2[at2] if (at1 < length(s1)) and (at2 < length(s2)) and (length(s1) > 0) and (length(s2) > 0) and ( compare(s1[at1+1..length(s1)],s2[at2+1..length(s2)]) != 0 ) then -- we are sync'd now, but won't be sync'd next char! -- so recurse with whatever is left of s1 and s2 totalglobalresult = diff4b(s1[at1+1..length(s1)],s2[at2+1..length(s2)],0,0,1) for tgrindex = 1 to length(totalglobalresult) do tempresult = result & totalglobalresult[tgrindex] if ( find(tempresult,bigresult) = 0 ) then bigresult = append(bigresult,tempresult) end if end for end if -- leave loop exit end if end while end if end while -- remainder? if at1 <= length( s1 ) then for i = at1 to length(s1) do result &= sprintf( "[%s,]", {s1[i]} ) end for elsif at2 <= length( s2 ) then for i = at2 to length(s2) do result &= sprintf( "[,%s]", {s2[i]} ) end for end if if ( find(result,bigresult) = 0 ) then -- result &= s2[at2] bigresult = append(bigresult,result) end if result = "" left = "" right = "" at1 = 0 at2 = 0 -- process until the end of one string while 1 do -- move ahead at1 += 1 at2 += 1 -- past end of one string? if at1 > length( s1 ) or at2 > length( s2 ) then exit end if -- same? if equal(s1[at1],s2[at2]) then if length( left ) or length( right ) then result &= sprintf( "[%s,%s]", {left,right}) left = "" right = "" end if result &= s1[at1] else -- attempt to resync while 1 do -- swapped letters? if at1 < length( s1 ) and at2 < length( s2 ) and equal(s1[at1] , s2[at2+1]) and equal(s1[at1+1] , s2[at2]) then left &= s1[at1..at1+1] right &= s2[at2..at2+1] at1 += 1 at2 += 1 exit end if -- find closest sync diff1 = find( s2[at2], s1[at1..length(s1)] ) -- too far? if diff1 > 0 and diff1 < MaxGap then sync1 = at1 + diff1 - 1 else sync1 = 9999 diff1 = 9999 end if -- find closest sync point diff2 = find( s1[at1], s2[at2..length(s2)] ) -- too far? if diff2 > 0 and diff2 < MaxGap then sync2 = at2 + diff2 - 1 else sync2 = 9999 diff2 = 9999 end if -- better to remove chars? if sync1 != 9999 or sync2 != 9999 then if diff1 < diff2 then best = diff1 else best = diff2 end if for i = 1 to best do if at1+i > length( s1 ) or at2+i > length( s2 ) then exit end if if equal(s1[at1+i-1] , s2[at2+i-1]) then -- better match diff1 = i diff2 = i sync1 = at1+i-1 sync2 = at2+i-1 exit end if end for end if -- evaluate sync if sync1 = 9999 and sync2 = 9999 then -- no sync left &= s1[at1] right &= s2[at2] -- at end? if at1 = length( s1 ) or at2 = length( s2 ) then exit end if -- skip at1 += 1 at2 += 1 -- on a match? if equal(s1[at1] , s2[at2]) then if length( left ) or length( right ) then result &= sprintf( "[%s,%s]", {left,right}) left = "" right = "" end if result &= s1[at1] exit end if elsif diff1 = diff2 and equal(s1[sync1] , s2[sync2]) then -- match to both for i = 0 to diff1-2 do left &= s1[at1+i] right &= s2[at2+i] end for -- sync at1 = sync1 at2 = sync2 if length( left ) or length( right ) then result &= sprintf( "[%s,%s]", {left,right}) left = "" right = "" end if result &= s1[at1] -- leave loop exit elsif diff1 <= diff2 then -- match on sync1 for i = at1 to sync1-1 do left &= s1[i] end for -- sync at1 = sync1 if length( left ) or length( right ) then result &= sprintf( "[%s,%s]", {left,right}) left = "" right = "" end if result &= s1[at1] -- leave loop exit else -- match on sync2 for i = at2 to sync2-1 do right &= s2[i] end for -- sync at2 = sync2 if length( left ) or length( right ) then result &= sprintf( "[%s,%s]", {left,right}) left = "" right = "" end if result &= s2[at2] -- leave loop exit end if end while end if end while -- remainder? if at1 <= length( s1 ) then for i = at1 to length(s1) do left &= s1[i] end for elsif at2 <= length( s2 ) then for i = at2 to length(s2) do right &= s2[i] end for end if if length( left ) or length( right ) then result &= sprintf( "[%s,%s]", {left,right}) end if if ( find(result,bigresult) = 0 ) then --bigresult &= " : " & result bigresult = append(bigresult,result) end if end for --puts(1,bigresult) -- organise them by number of [] -- 1st [xy,ab] -- 2nd [x,y] -- 3rd [,x] -- puts(1,"\nprinting out the count now:\n\n") for Loop = 1 to length(bigresult) do bracketfound = 0 temp = bigresult[Loop] & "." --trace(1) while find('[',temp) do bracketfound += 1 temp = temp[find('[',temp)+1..length(temp)] end while --puts(1,bracketfound+48 & " " & bigresult[Loop]&"\n") bigresult[Loop] = bracketfound+48 & " " &bigresult[Loop] end for return sort(bigresult) end function -- diff4
Not Categorized, Please Help
|