Pastey win_hangman

--
/* 
Hangman solver 
edit in path to wordlist, one per line.  
will be lowercased internally. but expects Upper case pattern 
use '_' for unknowns. 
 
win_hangman a_pl_  -> solution is  apple,apply,ample,amply  
 (implementation of input options left as for the reader) 
 (not optimized! may be slow with more than 100k words, max > 10 char 
 (actual solution depends on wordlist used and other settings) 
 (virtually no error checking or input validation is performed) 
 
antidote to recent euforum posting 
*/ 
 
ifdef EUC then --translate 
	without warning 
	without type_check 
end ifdef 
 
include std/error.e 
include std/types.e  
include std/sequence.e as seq 
include std/search.e 
include std/text.e 
include std/io.e 
include std/math.e 
include std/convert.e  
include std/console.e  
 
include std/sort.e 
include std/map.e 
 
	public integer minword, maxword 
	object wordfile 
	ifdef EUC then  
		minword=3 maxword = 12 
		--wordfile = `u:\bin\dicts\w3-17list.txt` --1M 
		wordfile = `u:\bin\dicts\scrabbledict.txt` --100k 
	elsedef 
		minword=4 maxword = 8 
		wordfile = `u:\bin\dicts\commonwords.txt`  --50k 
	end ifdef 
 
	public map dict = map:new() 
	public map idict = map:new() 
	object wordfound = "" 
	integer useUPPER = FALSE	--default, upper() not impemented 
 
object pats = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 
 
--it might make sense to assign pattern based on frequency? 
--immediate problem is assigned pattern won't match search pattern 
--2 words with the same pattern won't be recognized as the same 
-- 
public function patternize(sequence w) 
	integer nextp = 2, f 
	w = lower(w) 
	object p = pats[1], assigned = ""&w[1] 
 
	for i=2 to length(w) do 
		if w[i] = '_' then 
			p &= "_"	--unknown wildcard 
			continue 
		end if 
		f = find(w[i], assigned) 
		if f then 
			p &= pats[f] 
		else 
			assigned &= w[i] 
			p &= pats[nextp] 
			nextp +=1  
		end if 
	end for  
	 
	return p 
end function 
 
--kind of an inverse map, may already exist in the libs somewhere? 
--in map stuff, predates early versions of map 
-- 
function map_inverse(map amap, integer nonest=1, object defget="") 
	map nm= map:new() 
	sequence akeys = keys(amap) 
	for x=1 to length(akeys) do 
 
	if nonest then 
		map:put(nm, z, {akeys[x]}, map:CONCAT) 
	else 
		for y=1 to length(z) do 
			map:put(nm, z[y], {akeys[x]}, map:CONCAT) 
		end for 
	end if 
	end for 
 return nm  
end function 
 
--callbach for map routine 
-- 
function wild_handler(object k, object v, object ud, integer pc)  
 
	if length(k) != length(ud[1]) then 
		return 0   
	end if 
	integer found = 0 
 
	for i=1 to length(v) do 
		--might be ok to allow contractions and plurals len>len ud? 
		--will be found if last char '_' and all the rest matched 
		-- 
		found = 1 
		for j=1 to length(v[i]) do 
			if ud[1][j] = '_' then 
				--extra problems, or anything goes" 
				--printf(1, " wild handler %d char wild", {j}) 
				continue 
			elsif ud[1][j] != v[i][j] then 
				--printf(1, " wild handler not match j %d", {j}) 
				found = 0 
				exit 
			--else 
			end if 
		end for 
		-- if all match append to hangm? 
		if found = 1 then 
			wordfound &= {v[i]} 
			--return 1 
		end if 
		 
	end for 
 
	return 0  
end function 
 
 
--could be slow, iterate over whole map looking for match 
-- 
public function wild_get(map source, object pat) 
	object s = "" 
 
	integer found 
	wordfound = {}  --global 
	found = map:for_each(source 
			, routine_id("wild_handler") 
			, pat 
			, 1)		--return code found 
	if found or length(wordfound) then 
		return wordfound 
	end if 
 return "" 
end function 
 
--there is no way to know which is the right word for multiple patterns! 
-- 
public function known_get(map source, object pat) 
	--check for all upper no '_' 
	object fpat = map:get(idict, pat) 
	object spat = sort(fpat) 
 
 return join(spat, ",")  
end function 
 
public function initdict(integer nmin=4, integer nmax=10) 
	object wordseq = read_lines(wordfile) 
 
	minword=nmin  maxword = nmax 
	 
	for i=1 to length(wordseq) do 
		if minword > length(wordseq[i]) or 
			maxword < length(wordseq[i]) or 
		 	not t_alpha(wordseq[i]) 
		then 
			continue 
		end if 
		-- 
		map:put(dict, lower(wordseq[i]),patternize(wordseq[i]) ) 
	end for  
	?map:size(dict) --3..12 113,870 , 3-17 1meg 
	idict = map_inverse(dict) 
	?map:size(idict) -- 
	--wordseq = {} 
	return 1 
end function 
 
 
-- word == {hanganword /_ unknowns, patternized form} 
public function solve_hangman(object word) 
	object s = "" 
 
 
	object fpat = wild_get(idict, word) 
	if length(fpat) > 1 then 
 
		s &= join(fpat, ",") 
		printf(1, "multiple solutions  %s\n", {s } ) 
 
	elsif not length(fpat) then 
		printf(1, "no solution {%s, %s} \n", {word[1], word[2]} ) 
 
	else 
		printf(1, "solution is %s\n", {fpat[1] } ) 
		s &= fpat[1] 
	end if 
	return s 
end function 
 
 
 
ifdef UNITTESTING  then 
  include std/unittest.e 
 
 
	atom timer = time() 
	 
	 ?time()-timer 
	initdict(5,10) 
	 ?time()-timer 
 
	object fpat = map:get(idict, "ABCDEFGAH") 
	object spat = sort(fpat) 
	test_equal("idict 1", 1,   --multiple returns 
			/**/ 0< match("objection", 
				 join(spat, ","))  
			) 
 
	test_equal("known_get 1", 1,   --multiple returns 
			/**/ 0< match("insecurity", 
				 known_get(idict,  
				 	patternize("insecurity")))  
			) 
	test_equal("has_get apple", 1, map:has(dict, "apple")) 
	test_equal("has_get 1", 1, map:has(dict, "injection")) 
 
	test_equal("paterze 1", "ABAC", patternize("eset")) 
	test_equal("paterze 2", "ABCDEFAGB" 
							, patternize("injection")) 
	test_equal("paterze 3", "AB_C_DA_B" 
							, patternize("in_e_ti_n")) 
 
	 fpat = wild_get(idict, {"in_e_ti_n", "AB_C_DA_B"} ) 
	 spat = sort(fpat) 
	 --returns 7 more or less simar words dependng on wordlist 
	test_equal("wild_get 1", "injection", spat) 
 
	-- _eth__ate_  4  
 
	 fpat = wild_get(idict, {"u_il_z__le", "A_BC_DC_BC"} ) 
	test_equal("wild_get 0", "", fpat) 
 
	 ?time()-timer 
	?33 
	test_report() 
 
elsedef 
 
	initdict(minword, maxword) 
 
	atom timer 
	sequence hman = {"a_pl_", patternize("a_pl_")} 
	--object ns = solve_hangman({"a_pl_", "A_BC_"}) 
 
	printf(1, "minword %d to maxword %d\n", {minword, maxword } ) 
	loop do 
		timer = time() 
		 ?time()-timer 
		printf(1, "find %s ...", {hman[1] } ) 
		 
		object ns = solve_hangman(hman) 
		--printf(1, "ns is %s\n", {ns } ) 
		 
		 ?time()-timer 
	 
		hman = prompt_string("enter hangman:") 
		if not length(hman) then exit end if 
		hman = {hman, patternize(hman)} 
		until 0 
	end loop 
end ifdef 
 
ifdef EUC then --translate 
	any_key() 
end ifdef 
 
/* 
a few weeks ago I had occasion to create a simple hangman solver.  
an earlier form of this program worked well enough for what I needed. 
some kind of trie or suffix tree would probably be faster.  
  
there is some pathological memory usage if you have a wordlist 
with more than a few hundred thousand words, even translated 
the wait to invert the map and memory use over 400meg is painful.  
the time to run through all the words goes up exponentially. 
translated with a 100k list, memory is about 50meg  
but very slow to invert the map in the init routine. 
init seems to use less memory and be a little faster interpreted. 
though once everything is going, translated is twice or more faster. 
  
no doubt some of this could be fixed by preallocating the map 
and or saving it to disk once created in TEXT mode. 
BINARY mode might be just as slow to read back as recreating it. 
another possible bug or misuse of maps. haven't looked at the docs. 
  
I leave the need for optimization and all the rough edges intact. 
if the wordlist is around 50k words or less it's quite usable. 
with a little more work it could be a scrabble or crossword helper. 
*/