Pastey win_hangman
- Posted by ne1uno
Apr 25, 2012
--
/*
Hangman solver
edit in path to wordlist, one per line.
will be lowercased internally. but expects Upper case pattern
use '_' for unknowns.
win_hangman a_pl_ -> solution is apple,apply,ample,amply
(implementation of input options left as for the reader)
(not optimized! may be slow with more than 100k words, max > 10 char
(actual solution depends on wordlist used and other settings)
(virtually no error checking or input validation is performed)
antidote to recent euforum posting
*/
ifdef EUC then --translate
without warning
without type_check
end ifdef
include std/error.e
include std/types.e
include std/sequence.e as seq
include std/search.e
include std/text.e
include std/io.e
include std/math.e
include std/convert.e
include std/console.e
include std/sort.e
include std/map.e
public integer minword, maxword
object wordfile
ifdef EUC then
minword=3 maxword = 12
--wordfile = `u:\bin\dicts\w3-17list.txt` --1M
wordfile = `u:\bin\dicts\scrabbledict.txt` --100k
elsedef
minword=4 maxword = 8
wordfile = `u:\bin\dicts\commonwords.txt` --50k
end ifdef
public map dict = map:new()
public map idict = map:new()
object wordfound = ""
integer useUPPER = FALSE --default, upper() not impemented
object pats = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
--it might make sense to assign pattern based on frequency?
--immediate problem is assigned pattern won't match search pattern
--2 words with the same pattern won't be recognized as the same
--
public function patternize(sequence w)
integer nextp = 2, f
w = lower(w)
object p = pats[1], assigned = ""&w[1]
for i=2 to length(w) do
if w[i] = '_' then
p &= "_" --unknown wildcard
continue
end if
f = find(w[i], assigned)
if f then
p &= pats[f]
else
assigned &= w[i]
p &= pats[nextp]
nextp +=1
end if
end for
return p
end function
--kind of an inverse map, may already exist in the libs somewhere?
--in map stuff, predates early versions of map
--
function map_inverse(map amap, integer nonest=1, object defget="")
map nm= map:new()
sequence akeys = keys(amap)
for x=1 to length(akeys) do
if nonest then
map:put(nm, z, {akeys[x]}, map:CONCAT)
else
for y=1 to length(z) do
map:put(nm, z[y], {akeys[x]}, map:CONCAT)
end for
end if
end for
return nm
end function
--callbach for map routine
--
function wild_handler(object k, object v, object ud, integer pc)
if length(k) != length(ud[1]) then
return 0
end if
integer found = 0
for i=1 to length(v) do
--might be ok to allow contractions and plurals len>len ud?
--will be found if last char '_' and all the rest matched
--
found = 1
for j=1 to length(v[i]) do
if ud[1][j] = '_' then
--extra problems, or anything goes"
--printf(1, " wild handler %d char wild", {j})
continue
elsif ud[1][j] != v[i][j] then
--printf(1, " wild handler not match j %d", {j})
found = 0
exit
--else
end if
end for
-- if all match append to hangm?
if found = 1 then
wordfound &= {v[i]}
--return 1
end if
end for
return 0
end function
--could be slow, iterate over whole map looking for match
--
public function wild_get(map source, object pat)
object s = ""
integer found
wordfound = {} --global
found = map:for_each(source
, routine_id("wild_handler")
, pat
, 1) --return code found
if found or length(wordfound) then
return wordfound
end if
return ""
end function
--there is no way to know which is the right word for multiple patterns!
--
public function known_get(map source, object pat)
--check for all upper no '_'
object fpat = map:get(idict, pat)
object spat = sort(fpat)
return join(spat, ",")
end function
public function initdict(integer nmin=4, integer nmax=10)
object wordseq = read_lines(wordfile)
minword=nmin maxword = nmax
for i=1 to length(wordseq) do
if minword > length(wordseq[i]) or
maxword < length(wordseq[i]) or
not t_alpha(wordseq[i])
then
continue
end if
--
map:put(dict, lower(wordseq[i]),patternize(wordseq[i]) )
end for
?map:size(dict) --3..12 113,870 , 3-17 1meg
idict = map_inverse(dict)
?map:size(idict) --
--wordseq = {}
return 1
end function
-- word == {hanganword /_ unknowns, patternized form}
public function solve_hangman(object word)
object s = ""
object fpat = wild_get(idict, word)
if length(fpat) > 1 then
s &= join(fpat, ",")
printf(1, "multiple solutions %s\n", {s } )
elsif not length(fpat) then
printf(1, "no solution {%s, %s} \n", {word[1], word[2]} )
else
printf(1, "solution is %s\n", {fpat[1] } )
s &= fpat[1]
end if
return s
end function
ifdef UNITTESTING then
include std/unittest.e
atom timer = time()
?time()-timer
initdict(5,10)
?time()-timer
object fpat = map:get(idict, "ABCDEFGAH")
object spat = sort(fpat)
test_equal("idict 1", 1, --multiple returns
/**/ 0< match("objection",
join(spat, ","))
)
test_equal("known_get 1", 1, --multiple returns
/**/ 0< match("insecurity",
known_get(idict,
patternize("insecurity")))
)
test_equal("has_get apple", 1, map:has(dict, "apple"))
test_equal("has_get 1", 1, map:has(dict, "injection"))
test_equal("paterze 1", "ABAC", patternize("eset"))
test_equal("paterze 2", "ABCDEFAGB"
, patternize("injection"))
test_equal("paterze 3", "AB_C_DA_B"
, patternize("in_e_ti_n"))
fpat = wild_get(idict, {"in_e_ti_n", "AB_C_DA_B"} )
spat = sort(fpat)
--returns 7 more or less simar words dependng on wordlist
test_equal("wild_get 1", "injection", spat)
-- _eth__ate_ 4
fpat = wild_get(idict, {"u_il_z__le", "A_BC_DC_BC"} )
test_equal("wild_get 0", "", fpat)
?time()-timer
?33
test_report()
elsedef
initdict(minword, maxword)
atom timer
sequence hman = {"a_pl_", patternize("a_pl_")}
--object ns = solve_hangman({"a_pl_", "A_BC_"})
printf(1, "minword %d to maxword %d\n", {minword, maxword } )
loop do
timer = time()
?time()-timer
printf(1, "find %s ...", {hman[1] } )
object ns = solve_hangman(hman)
--printf(1, "ns is %s\n", {ns } )
?time()-timer
hman = prompt_string("enter hangman:")
if not length(hman) then exit end if
hman = {hman, patternize(hman)}
until 0
end loop
end ifdef
ifdef EUC then --translate
any_key()
end ifdef
/*
a few weeks ago I had occasion to create a simple hangman solver.
an earlier form of this program worked well enough for what I needed.
some kind of trie or suffix tree would probably be faster.
there is some pathological memory usage if you have a wordlist
with more than a few hundred thousand words, even translated
the wait to invert the map and memory use over 400meg is painful.
the time to run through all the words goes up exponentially.
translated with a 100k list, memory is about 50meg
but very slow to invert the map in the init routine.
init seems to use less memory and be a little faster interpreted.
though once everything is going, translated is twice or more faster.
no doubt some of this could be fixed by preallocating the map
and or saving it to disk once created in TEXT mode.
BINARY mode might be just as slow to read back as recreating it.
another possible bug or misuse of maps. haven't looked at the docs.
I leave the need for optimization and all the rough edges intact.
if the wordlist is around 50k words or less it's quite usable.
with a little more work it could be a scrabble or crossword helper.
*/