Pastey win_hangman
- Posted by ne1uno Apr 25, 2012
-- /* Hangman solver edit in path to wordlist, one per line. will be lowercased internally. but expects Upper case pattern use '_' for unknowns. win_hangman a_pl_ -> solution is apple,apply,ample,amply (implementation of input options left as for the reader) (not optimized! may be slow with more than 100k words, max > 10 char (actual solution depends on wordlist used and other settings) (virtually no error checking or input validation is performed) antidote to recent euforum posting */ ifdef EUC then --translate without warning without type_check end ifdef include std/error.e include std/types.e include std/sequence.e as seq include std/search.e include std/text.e include std/io.e include std/math.e include std/convert.e include std/console.e include std/sort.e include std/map.e public integer minword, maxword object wordfile ifdef EUC then minword=3 maxword = 12 --wordfile = `u:\bin\dicts\w3-17list.txt` --1M wordfile = `u:\bin\dicts\scrabbledict.txt` --100k elsedef minword=4 maxword = 8 wordfile = `u:\bin\dicts\commonwords.txt` --50k end ifdef public map dict = map:new() public map idict = map:new() object wordfound = "" integer useUPPER = FALSE --default, upper() not impemented object pats = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" --it might make sense to assign pattern based on frequency? --immediate problem is assigned pattern won't match search pattern --2 words with the same pattern won't be recognized as the same -- public function patternize(sequence w) integer nextp = 2, f w = lower(w) object p = pats[1], assigned = ""&w[1] for i=2 to length(w) do if w[i] = '_' then p &= "_" --unknown wildcard continue end if f = find(w[i], assigned) if f then p &= pats[f] else assigned &= w[i] p &= pats[nextp] nextp +=1 end if end for return p end function --kind of an inverse map, may already exist in the libs somewhere? --in map stuff, predates early versions of map -- function map_inverse(map amap, integer nonest=1, object defget="") map nm= map:new() sequence akeys = keys(amap) for x=1 to length(akeys) do if nonest then map:put(nm, z, {akeys[x]}, map:CONCAT) else for y=1 to length(z) do map:put(nm, z[y], {akeys[x]}, map:CONCAT) end for end if end for return nm end function --callbach for map routine -- function wild_handler(object k, object v, object ud, integer pc) if length(k) != length(ud[1]) then return 0 end if integer found = 0 for i=1 to length(v) do --might be ok to allow contractions and plurals len>len ud? --will be found if last char '_' and all the rest matched -- found = 1 for j=1 to length(v[i]) do if ud[1][j] = '_' then --extra problems, or anything goes" --printf(1, " wild handler %d char wild", {j}) continue elsif ud[1][j] != v[i][j] then --printf(1, " wild handler not match j %d", {j}) found = 0 exit --else end if end for -- if all match append to hangm? if found = 1 then wordfound &= {v[i]} --return 1 end if end for return 0 end function --could be slow, iterate over whole map looking for match -- public function wild_get(map source, object pat) object s = "" integer found wordfound = {} --global found = map:for_each(source , routine_id("wild_handler") , pat , 1) --return code found if found or length(wordfound) then return wordfound end if return "" end function --there is no way to know which is the right word for multiple patterns! -- public function known_get(map source, object pat) --check for all upper no '_' object fpat = map:get(idict, pat) object spat = sort(fpat) return join(spat, ",") end function public function initdict(integer nmin=4, integer nmax=10) object wordseq = read_lines(wordfile) minword=nmin maxword = nmax for i=1 to length(wordseq) do if minword > length(wordseq[i]) or maxword < length(wordseq[i]) or not t_alpha(wordseq[i]) then continue end if -- map:put(dict, lower(wordseq[i]),patternize(wordseq[i]) ) end for ?map:size(dict) --3..12 113,870 , 3-17 1meg idict = map_inverse(dict) ?map:size(idict) -- --wordseq = {} return 1 end function -- word == {hanganword /_ unknowns, patternized form} public function solve_hangman(object word) object s = "" object fpat = wild_get(idict, word) if length(fpat) > 1 then s &= join(fpat, ",") printf(1, "multiple solutions %s\n", {s } ) elsif not length(fpat) then printf(1, "no solution {%s, %s} \n", {word[1], word[2]} ) else printf(1, "solution is %s\n", {fpat[1] } ) s &= fpat[1] end if return s end function ifdef UNITTESTING then include std/unittest.e atom timer = time() ?time()-timer initdict(5,10) ?time()-timer object fpat = map:get(idict, "ABCDEFGAH") object spat = sort(fpat) test_equal("idict 1", 1, --multiple returns /**/ 0< match("objection", join(spat, ",")) ) test_equal("known_get 1", 1, --multiple returns /**/ 0< match("insecurity", known_get(idict, patternize("insecurity"))) ) test_equal("has_get apple", 1, map:has(dict, "apple")) test_equal("has_get 1", 1, map:has(dict, "injection")) test_equal("paterze 1", "ABAC", patternize("eset")) test_equal("paterze 2", "ABCDEFAGB" , patternize("injection")) test_equal("paterze 3", "AB_C_DA_B" , patternize("in_e_ti_n")) fpat = wild_get(idict, {"in_e_ti_n", "AB_C_DA_B"} ) spat = sort(fpat) --returns 7 more or less simar words dependng on wordlist test_equal("wild_get 1", "injection", spat) -- _eth__ate_ 4 fpat = wild_get(idict, {"u_il_z__le", "A_BC_DC_BC"} ) test_equal("wild_get 0", "", fpat) ?time()-timer ?33 test_report() elsedef initdict(minword, maxword) atom timer sequence hman = {"a_pl_", patternize("a_pl_")} --object ns = solve_hangman({"a_pl_", "A_BC_"}) printf(1, "minword %d to maxword %d\n", {minword, maxword } ) loop do timer = time() ?time()-timer printf(1, "find %s ...", {hman[1] } ) object ns = solve_hangman(hman) --printf(1, "ns is %s\n", {ns } ) ?time()-timer hman = prompt_string("enter hangman:") if not length(hman) then exit end if hman = {hman, patternize(hman)} until 0 end loop end ifdef ifdef EUC then --translate any_key() end ifdef /* a few weeks ago I had occasion to create a simple hangman solver. an earlier form of this program worked well enough for what I needed. some kind of trie or suffix tree would probably be faster. there is some pathological memory usage if you have a wordlist with more than a few hundred thousand words, even translated the wait to invert the map and memory use over 400meg is painful. the time to run through all the words goes up exponentially. translated with a 100k list, memory is about 50meg but very slow to invert the map in the init routine. init seems to use less memory and be a little faster interpreted. though once everything is going, translated is twice or more faster. no doubt some of this could be fixed by preallocating the map and or saving it to disk once created in TEXT mode. BINARY mode might be just as slow to read back as recreating it. another possible bug or misuse of maps. haven't looked at the docs. I leave the need for optimization and all the rough edges intact. if the wordlist is around 50k words or less it's quite usable. with a little more work it could be a scrabble or crossword helper. */


