OpenEuphoria: Wiki View

Tiggrfone

-- if you build something with this, 
-- or derive something from this, 
-- i'd like a credit. Kat 
-- 
-- 
-- Program Tiggrphone.e 
-- 
-- This is to phonetically compare words for parsing or for 
--    spell correction use. Or anything else i think of. 
-- Both Soundex and Metaphone strip out vowels by default, 
--    i believe this is an error, they throw away too much info. 
-- 
-- The problem with *any* scheme to simplify words this way is stymied by 
--    words such as: 
--    mint/pint cough/bough clove/love cachet/hatchet 
--    These pairs, and others like them, can only be resolved 
--    by actually knowing the pronunciation. 
-- 
-- entry point is function Tiggrfoneize() 
--   
 
-- Recieves theword as "WORD", with no duplicated letters. 
-- If you cannot do uppercase, then use "+" as part of the level. 
-- We don't handle the first letter, it stays unchanged in all soundex levels. 
-- The original Soundex did levels 0,1,2 -- *very* lossy, imo. 
-- 
-- The levels herein are applied in this order: C - U - S - 0 - 4+1 - 3 - 2 
--  C = convert "3L33T" IRC chars to base letter, example: "?" becomes "o" 
--  U = upcase the word for user, so "word" or "Word" can be recieved 
--  S = strip punctuation and duplicate letters 
--  0 = no vowels, or H or W, unless it is the first letter 
--  1 = converts to numeric codes, except for the first letter 
--  2 = chops the result to 4 chars 
--  3 = enhanced Soundex 
--  4 = convert the first letter to numeric code when using level 1 
-- returns theword in uppercase and/or numeric codes, as selected 
-- 
--   sendMirc() is because i used mirc as a gui, since there isn't one for Euphoria. 
-- 
 
 
 
--without type_check 
with trace 
 
function convertstuff(sequence theword) 
 
integer location 
sequence array 
 
for loop = 1 to length(theword) do 
 -- if theword[loop] > 128 then 
  location = find(theword[loop],{"?","?","?","?","?","?","?","?","?"}) 
  if location != 0 then 
   array = {"e","e","e","e","e","E","E","E","E"} 
   theword[loop] = array[loop] 
  end if 
 
  location = find(theword[loop],{"?","?","?","?","?","?","?","?","?","?","?","?","?","?","?"}) 
  if location != 0 then 
   array = {"AE","ae","a","a","a","a","a","a","a","A","A","A","A","A","A"} 
   theword[loop] = array[loop] 
  end if 
 
  location = find(theword[loop],{"?","?","?","?","?","?","?","?","?"}) 
  if location != 0 then 
   array = {"i","i","i","i","i","I","I","I","I"} 
   theword[loop] = array[loop] 
  end if 
 
  location = find(theword[loop],{"?","©","?","?","?","?"}) 
  if location != 0 then 
   array = {"B","c","c","c","C","D"} 
   theword[loop] = array[loop] 
  end if 
 
  location = find(theword[loop],{"?","?","?","¤","°","?","?","?","?","?","?"}) 
  if location != 0 then 
   array = {"o","o","o","o","o","o","O","O","O","O","O"} 
   theword[loop] = array[loop] 
  end if 
 
  location = find(theword[loop],{"µ","?","?","?","?","?","?","?","?"}) 
  if location != 0 then 
   array = {"u","u","u","u","u","U","U","U","U"} 
   theword[loop] = array[loop] 
  end if 
 
  location = find(theword[loop],{"?","?","?","?"}) 
  if location != 0 then 
   array = {"y","y","Y","Y"} 
   theword[loop] = array[loop] 
  end if 
 
  location = find(theword[loop],{"?","?","?","¶","?","?","®","§","+","?"}) 
  if location != 0 then 
   array = {"l","n","N","P","p","p","r","S","t","?"} 
   theword[loop] = array[loop] 
  end if 
-- end if 
end for 
 
return theword 
 
end function -- convertstuff(sequence theword) 
 
global function Soundex(sequence theword, atom level) 
 
 
sequence newword 
integer startchar, place 
 
if ( length(theword) = 0 ) then return "" end if 
 
if find("C",level) then 
  theword = convertstuff(theword) 
end if 
 
if find("U",level) then 
  theword = upper(theword) 
end if -- upcase the word 
 
if find("S",level) then 
  -- remove punctuation and duplicate letters 
  newword = "." -- because theword[1] must be something <sigh> 
  for thewordindex = 1 to length(theword) do 
    if find(theword[thewordindex],"ABCDEFGHIJKLMNOPQRSTUVWXYZ") then 
      if not equal(newword[length(newword)],theword[thewordindex]) then 
        newword = newword & theword[thewordindex] 
      end if 
    end if 
  end for 
  theword = newword[2..length(newword)] -- drop that leading "." 
end if -- level "S" 
 
theword &= '.' -- so loop+1 isn't an error below 
 
if find("0",level) then 
 -- delete: A, E, I, O, U, H, W, Y 
 for loop = 2 to length(theword) do 
  if find(theword[loop], {"A","E","I","O","U","Y","H","W"}) then 
     theword = theword[1..loop-1] & theword[loop+1..length(theword)] 
  end if 
 end for 
end if -- level 0 
 
if find("1",level) then 
  if find("4",level) then 
    startchar = 1 
  else 
    startchar = 2 
  end if 
 
 for loop = startchar to length(theword) do 
 
  if find(theword[loop], "A,E,I,O,U,Y,H,W") then 
      theword = theword[1..loop-1] & '0' & theword[loop+1..length(theword)] 
  end if 
 
   -- 1 =  B, F, P, V 
  if find(theword[loop], "B,F,P,V") then 
     theword = theword[1..loop-1] & '1' & theword[loop+1..length(theword)] 
  end if 
 
 
   -- 2 = C, G, J, K, Q, S, X, Z 
  if find(theword[loop], "C,G,J,K,Q,S,X,Z") then 
       theword = theword[1..loop-1] & '2' & theword[loop+1..length(theword)] 
  end if 
 
 
   -- 3 = D, T 
  if find(theword[loop], "D,T") then 
       theword = theword[1..loop-1] & '3' & theword[loop+1..length(theword)] 
  end if 
 
 
   -- 4 = L 
  if find(theword[loop], "L") then 
       theword = theword[1..loop-1] & '4' & theword[loop+1..length(theword)] 
  end if 
 
   -- 5 = M, N 
  if find(theword[loop], "M,N") then 
       theword = theword[1..loop-1] & '5' & theword[loop+1..length(theword)] 
  end if 
 
   -- 6=  R 
  if find(theword[loop], "R") then 
       theword = theword[1..loop-1] & '6' & theword[loop+1..length(theword)] 
  end if 
 
 end for 
end if -- level 1 
 
 if find("3",level) and not find("1",level) then 
 -- "enhance" the Soundex 
 
   place = find("CHR",theword) 
   while place do 
      theword = theword[1..place] & theword[place+1..length(theword)] 
      place = find("CHR",theword) 
   end while 
 
   place = find("PH",theword) 
   while place do 
      theword = theword[1..place-1] & 'F' & theword[place+1..length(theword)] 
      place = find("PH",theword) 
   end while 
 
   place = find("Z",theword) 
   while place do 
      theword = theword[1..place-1] & 'S' & theword[place+1..length(theword)] 
      place = find("Z",theword) 
   end while 
 
   place = find("X",theword) 
   while place do 
      theword = theword[1..place-1] & "KS" & theword[place+1..length(theword)] 
      place = find("Z",theword) 
   end while 
 
 end if -- level 3 
 
 if find("2",level) then 
 -- chop it to the first 4 chars 
  theword = theword[1..4] 
 end if -- level 2 
 
 if equal('.',theword[length(theword)]) then 
   theword = theword[1..length(theword)-1] -- drop that trailing '.' 
 end if 
 
 return theword 
 
end function -- Soundex(sequence theword) 
 
--@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 
 
global function Tiggrfoneize(sequence theword, atom level) 
integer place 
sequence newword, code1, code2, code3, code4 
atom  display, newletter 
sequence consonants, vowels 
 
consonants = "BCDFGHJKLMNPQRSTVWXYZ" 
vowels = "AEIOUY" 
 
 
if find(level,{0,1,2,4}) then 
   return Soundex(theword,level) 
end if 
 
if find("C",level) then 
  theword = convertstuff(theword) 
end if 
 
 
 
display = 0 
 
 
-- the '..' makes it at least 2 chars long 
-- so i can do newword[1-2] 
newletter = ' ' 
newword = ".." 
--if equal(theword,"A") then trace(1) end if 
 
if ( length(theword) = 0 ) then return "" end if 
 
-- going to upper(), remove punctuation, 
-- and strip some duplicates at the same time here 
for thewordindex = 1 to length(theword) do 
  if find(theword[thewordindex],"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") then 
   if (( theword[thewordindex] >= 'a' ) and ( theword[thewordindex] <= 'z' )) then 
     newletter = theword[thewordindex] - 32 
   else 
     newletter = theword[thewordindex] 
   end if 
   if ( not equal(newword[length(newword)],newletter) ) or ( newletter = 'S' ) or ( newletter = 'E' ) or ( newletter = 'C' ) then 
     newword = newword & newletter 
   end if 
   newletter = ' ' 
  end if 
end for 
 
-- newword has duplicate characters removed, except for these: "ECS" 
-- and non-alphabet chars have been removed 
-- and it's all uppercase chars now 
newword = newword & ".." -- .. why? so i can index newword[index+2] 
-- now newword is "..NEWWORD.." 
 
 
--trace(1) 
 
--sendMirc("!command msg " & nickroom & " " & newword ) 
 
 
-- SS -> S 
-- else if at end of word and NOT "SS" then S -> Z 
trace(1) 
place = match("S",newword) 
if ( place != 0 ) then 
  if equal(newword[length(newword)-2],'S') and not equal(newword[length(newword)-3],'S') then 
    newword = newword[1..length(newword)-3] & "Z.." -- 'S' -> 'Z' - dogz 
  end if 
  for loop = 2 to length(newword)-2 do -- "SS" -> "S" 
    if equal(newword[loop],'S') and equal(newword[loop+1],'S') then 
      newword = newword[1..loop] & newword[loop+1..length(newword)] 
    end if 
  end for 
 
end if 
 
 
-- 'IE' -> 'EE' 
place = match("IE",newword) 
while ( place != 0 ) do 
  newword = newword[1..place-1] & "EE" & newword[place+2..length(newword)] 
  place = match("IE",newword) 
end while 
 
 
-- 'QU' -> 'KW' 
 
place = match("QU",newword) 
while ( place != 0 ) do 
  newword = newword[1..place-1] & "KW" & newword[place+2..length(newword)] 
  place = match("QU",newword) 
end while 
 
-- Q -> K 
 
place = match("Q",newword) 
while ( place != 0 ) do 
  newword = newword[1..place-1] & "K" & newword[place+1..length(newword)] 
  place = match("Q",newword) 
end while 
 
 
-- 'PH' -> 'F' 
 
place = match("PH",newword) 
while ( place != 0 ) do 
  newword = newword[1..place-1] & 'F' & newword[place+2..length(newword)

	
		
			
									Not Categorized, Please Help
						
		
		
					
	

    
      
      
	      history,
	      backlinks

	            
      
        Last modified
        Apr 22, 2018        by
        _tom
OpenEuphoria

Tiggrfone

Search

Include:

Quick Links

User menu

Misc Menu