-- if you build something with this,
-- or derive something from this,
-- i'd like a credit. Kat
--
--
-- Program Tiggrphone.e
--
-- This is to phonetically compare words for parsing or for
-- spell correction use. Or anything else i think of.
-- Both Soundex and Metaphone strip out vowels by default,
-- i believe this is an error, they throw away too much info.
--
-- The problem with *any* scheme to simplify words this way is stymied by
-- words such as:
-- mint/pint cough/bough clove/love cachet/hatchet
-- These pairs, and others like them, can only be resolved
-- by actually knowing the pronunciation.
--
-- entry point is function Tiggrfoneize()
--
-- Recieves theword as "WORD", with no duplicated letters.
-- If you cannot do uppercase, then use "+" as part of the level.
-- We don't handle the first letter, it stays unchanged in all soundex levels.
-- The original Soundex did levels 0,1,2 -- *very* lossy, imo.
--
-- The levels herein are applied in this order: C - U - S - 0 - 4+1 - 3 - 2
-- C = convert "3L33T" IRC chars to base letter, example: "?" becomes "o"
-- U = upcase the word for user, so "word" or "Word" can be recieved
-- S = strip punctuation and duplicate letters
-- 0 = no vowels, or H or W, unless it is the first letter
-- 1 = converts to numeric codes, except for the first letter
-- 2 = chops the result to 4 chars
-- 3 = enhanced Soundex
-- 4 = convert the first letter to numeric code when using level 1
-- returns theword in uppercase and/or numeric codes, as selected
--
-- sendMirc() is because i used mirc as a gui, since there isn't one for Euphoria.
--
--without type_check
with trace
function convertstuff(sequence theword)
integer location
sequence array
for loop = 1 to length(theword) do
-- if theword[loop] > 128 then
location = find(theword[loop],{"?","?","?","?","?","?","?","?","?"})
if location != 0 then
array = {"e","e","e","e","e","E","E","E","E"}
theword[loop] = array[loop]
end if
location = find(theword[loop],{"?","?","?","?","?","?","?","?","?","?","?","?","?","?","?"})
if location != 0 then
array = {"AE","ae","a","a","a","a","a","a","a","A","A","A","A","A","A"}
theword[loop] = array[loop]
end if
location = find(theword[loop],{"?","?","?","?","?","?","?","?","?"})
if location != 0 then
array = {"i","i","i","i","i","I","I","I","I"}
theword[loop] = array[loop]
end if
location = find(theword[loop],{"?","©","?","?","?","?"})
if location != 0 then
array = {"B","c","c","c","C","D"}
theword[loop] = array[loop]
end if
location = find(theword[loop],{"?","?","?","¤","°","?","?","?","?","?","?"})
if location != 0 then
array = {"o","o","o","o","o","o","O","O","O","O","O"}
theword[loop] = array[loop]
end if
location = find(theword[loop],{"µ","?","?","?","?","?","?","?","?"})
if location != 0 then
array = {"u","u","u","u","u","U","U","U","U"}
theword[loop] = array[loop]
end if
location = find(theword[loop],{"?","?","?","?"})
if location != 0 then
array = {"y","y","Y","Y"}
theword[loop] = array[loop]
end if
location = find(theword[loop],{"?","?","?","¶","?","?","®","§","+","?"})
if location != 0 then
array = {"l","n","N","P","p","p","r","S","t","?"}
theword[loop] = array[loop]
end if
-- end if
end for
return theword
end function -- convertstuff(sequence theword)
global function Soundex(sequence theword, atom level)
sequence newword
integer startchar, place
if ( length(theword) = 0 ) then return "" end if
if find("C",level) then
theword = convertstuff(theword)
end if
if find("U",level) then
theword = upper(theword)
end if -- upcase the word
if find("S",level) then
-- remove punctuation and duplicate letters
newword = "." -- because theword[1] must be something <sigh>
for thewordindex = 1 to length(theword) do
if find(theword[thewordindex],"ABCDEFGHIJKLMNOPQRSTUVWXYZ") then
if not equal(newword[length(newword)],theword[thewordindex]) then
newword = newword & theword[thewordindex]
end if
end if
end for
theword = newword[2..length(newword)] -- drop that leading "."
end if -- level "S"
theword &= '.' -- so loop+1 isn't an error below
if find("0",level) then
-- delete: A, E, I, O, U, H, W, Y
for loop = 2 to length(theword) do
if find(theword[loop], {"A","E","I","O","U","Y","H","W"}) then
theword = theword[1..loop-1] & theword[loop+1..length(theword)]
end if
end for
end if -- level 0
if find("1",level) then
if find("4",level) then
startchar = 1
else
startchar = 2
end if
for loop = startchar to length(theword) do
if find(theword[loop], "A,E,I,O,U,Y,H,W") then
theword = theword[1..loop-1] & '0' & theword[loop+1..length(theword)]
end if
-- 1 = B, F, P, V
if find(theword[loop], "B,F,P,V") then
theword = theword[1..loop-1] & '1' & theword[loop+1..length(theword)]
end if
-- 2 = C, G, J, K, Q, S, X, Z
if find(theword[loop], "C,G,J,K,Q,S,X,Z") then
theword = theword[1..loop-1] & '2' & theword[loop+1..length(theword)]
end if
-- 3 = D, T
if find(theword[loop], "D,T") then
theword = theword[1..loop-1] & '3' & theword[loop+1..length(theword)]
end if
-- 4 = L
if find(theword[loop], "L") then
theword = theword[1..loop-1] & '4' & theword[loop+1..length(theword)]
end if
-- 5 = M, N
if find(theword[loop], "M,N") then
theword = theword[1..loop-1] & '5' & theword[loop+1..length(theword)]
end if
-- 6= R
if find(theword[loop], "R") then
theword = theword[1..loop-1] & '6' & theword[loop+1..length(theword)]
end if
end for
end if -- level 1
if find("3",level) and not find("1",level) then
-- "enhance" the Soundex
place = find("CHR",theword)
while place do
theword = theword[1..place] & theword[place+1..length(theword)]
place = find("CHR",theword)
end while
place = find("PH",theword)
while place do
theword = theword[1..place-1] & 'F' & theword[place+1..length(theword)]
place = find("PH",theword)
end while
place = find("Z",theword)
while place do
theword = theword[1..place-1] & 'S' & theword[place+1..length(theword)]
place = find("Z",theword)
end while
place = find("X",theword)
while place do
theword = theword[1..place-1] & "KS" & theword[place+1..length(theword)]
place = find("Z",theword)
end while
end if -- level 3
if find("2",level) then
-- chop it to the first 4 chars
theword = theword[1..4]
end if -- level 2
if equal('.',theword[length(theword)]) then
theword = theword[1..length(theword)-1] -- drop that trailing '.'
end if
return theword
end function -- Soundex(sequence theword)
--@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
global function Tiggrfoneize(sequence theword, atom level)
integer place
sequence newword, code1, code2, code3, code4
atom display, newletter
sequence consonants, vowels
consonants = "BCDFGHJKLMNPQRSTVWXYZ"
vowels = "AEIOUY"
if find(level,{0,1,2,4}) then
return Soundex(theword,level)
end if
if find("C",level) then
theword = convertstuff(theword)
end if
display = 0
-- the '..' makes it at least 2 chars long
-- so i can do newword[1-2]
newletter = ' '
newword = ".."
--if equal(theword,"A") then trace(1) end if
if ( length(theword) = 0 ) then return "" end if
-- going to upper(), remove punctuation,
-- and strip some duplicates at the same time here
for thewordindex = 1 to length(theword) do
if find(theword[thewordindex],"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") then
if (( theword[thewordindex] >= 'a' ) and ( theword[thewordindex] <= 'z' )) then
newletter = theword[thewordindex] - 32
else
newletter = theword[thewordindex]
end if
if ( not equal(newword[length(newword)],newletter) ) or ( newletter = 'S' ) or ( newletter = 'E' ) or ( newletter = 'C' ) then
newword = newword & newletter
end if
newletter = ' '
end if
end for
-- newword has duplicate characters removed, except for these: "ECS"
-- and non-alphabet chars have been removed
-- and it's all uppercase chars now
newword = newword & ".." -- .. why? so i can index newword[index+2]
-- now newword is "..NEWWORD.."
--trace(1)
--sendMirc("!command msg " & nickroom & " " & newword )
-- SS -> S
-- else if at end of word and NOT "SS" then S -> Z
trace(1)
place = match("S",newword)
if ( place != 0 ) then
if equal(newword[length(newword)-2],'S') and not equal(newword[length(newword)-3],'S') then
newword = newword[1..length(newword)-3] & "Z.." -- 'S' -> 'Z' - dogz
end if
for loop = 2 to length(newword)-2 do -- "SS" -> "S"
if equal(newword[loop],'S') and equal(newword[loop+1],'S') then
newword = newword[1..loop] & newword[loop+1..length(newword)]
end if
end for
end if
-- 'IE' -> 'EE'
place = match("IE",newword)
while ( place != 0 ) do
newword = newword[1..place-1] & "EE" & newword[place+2..length(newword)]
place = match("IE",newword)
end while
-- 'QU' -> 'KW'
place = match("QU",newword)
while ( place != 0 ) do
newword = newword[1..place-1] & "KW" & newword[place+2..length(newword)]
place = match("QU",newword)
end while
-- Q -> K
place = match("Q",newword)
while ( place != 0 ) do
newword = newword[1..place-1] & "K" & newword[place+1..length(newword)]
place = match("Q",newword)
end while
-- 'PH' -> 'F'
place = match("PH",newword)
while ( place != 0 ) do
newword = newword[1..place-1] & 'F' & newword[place+2..length(newword)
Not Categorized, Please Help
|
|