Pastey alternative std/map.e #2

--****
-- == Map (Hash Table) 
-- 
-- <<LEVELTOC level=2 depth=4>> 
-- 
-- A **map** is a special array, often called an associative array or dictionary; 
-- in a map the data **values** (any Euphoria object) are indexed by **keys** (also any Euphoria object). 
-- 
-- When programming think in terms of //key:value// pairs. 
-- For example we can code things like this~: 
--    custrec = new() -- Create a new map 
--    put(custrec, "Name", "Joe Blow") 
--    put(custrec, "Address", "555 High Street") 
--    put(custrec, "Phone", 555675632) 
-- This creates three elements in the map, and they are indexed by ##"Name"##,  
-- ##"Address"## and ##"Phone"##, meaning that to get the data associated with those 
-- keys we can code~: 
--    object data = get(custrec, "Phone") 
--    -- data now set to 555675632 
--  
-- Note that //only one instance of a given key// can exist in a given map, meaning 
-- for example, we could not have two separate ##"Name"## values in the above ##custrec## 
-- map. 
-- 
-- Maps automatically grow to accommodate all the elements placed into it. 
-- 
-- Associative arrays can be implemented in many different ways, depending 
-- on what efficiency trade-offs have been made. This implementation allows 
-- you to specify how many items you expect the map to hold, or simply start 
-- with the default size. 
-- 
-- As the number of items in the map grows, the map may increase its size 
-- to accommodate larger numbers of items. 
-- 
 
namespace map 
 
include std/hash.e 
include std/datetime.e 
include std/error.e 
include std/eumem.e 
include std/get.e 
include std/io.e 
include std/math.e 
include std/pretty.e 
include std/primes.e 
include std/search.e 
include std/serialize.e 
include std/sort.e 
include std/stats.e as stats 
include std/text.e 
include std/types.e 
 
include euphoria/info.e 
 
constant type_is_map   = "Eu:StdMap" 
 
constant 
	DEFAULT_SIZE  = 8, 
	$ 
 
--**** 
-- === Operation Codes for Put 
 
public enum 
	PUT, 
	ADD, 
	SUBTRACT, 
	MULTIPLY, 
	DIVIDE, 
	APPEND, 
	CONCAT, 
	LEAVE 
 
ifdef BITS32 then 
	constant DEFAULT_HASH = HSIEH30 
elsedef 
	constant DEFAULT_HASH = HSIEH30 
end ifdef 
 
function hash( object x ) 
	return eu:hash( x, DEFAULT_HASH ) 
end function 
 
--**** 
-- === Types 
-- 
 
--** 
-- defines the datatype 'map'. 
-- 
-- Comments: 
-- Used when declaring a map variable. 
-- 
-- Example 1: 
--   map SymbolTable = new() -- Create a new map to hold the symbol table. 
 
public type map( object m ) 
	if not atom( m ) then 
		return 0 
	end if 
	if length( eumem:ram_space ) < m then 
		return 0 
	end if 
	if m < 1 then 
		return 0 
	end if 
	if length( eumem:ram_space[m] ) != 2 then 
		return 0 
	end if 
	if not sequence( eumem:ram_space[m][1] ) then 
		return 0 
	end if 
	if not sequence( eumem:ram_space[m][2] ) then 
		return 0 
	end if 
	return 1 
end type 
 
--**** 
-- === Routines 
-- 
 
--** 
-- calculates a Hashing value from the supplied data. 
-- 
-- Parameters: 
--   # ##key_p## : The data for which you want a hash value calculated. 
--   # ##max_hash_p## :  The returned value will be no larger than this value. 
-- 
-- Returns: 
--   An **integer**, the value of which depends only on the supplied data. 
-- 
-- Comments: 
--   This is used whenever you need a single number to represent the data you supply. 
--   It can calculate the number based on all the data you give it, which can be 
--   an atom or sequence of any value. 
-- 
-- Example 1: 
--   integer h1 
--   -- calculate a hash value and ensure it will be a value from 1 to 4097. 
--   h1 = calc_hash( symbol_name, 4097 ) 
-- 
 
--** 
-- deprecated. 
-- 
-- Parameters: 
-- # ##new_value_p## : unused 
-- value. 
-- 
-- Returns: 
--Zero.. 
-- 
public function threshold(integer new_value_p = 0) 
	return 0 
end function 
	 
--** 
-- deprecated 
-- 
-- Parameters: 
-- # ##m## : A map 
-- 
-- Returns: 
-- Zero. 
-- 
public function type_of(map the_map_p) 
	return 0 
end function 
	 
--** 
-- changes the width (that is the number of buckets) of a map.  
-- 
-- Parameters: 
--   # ##m## : the map to resize 
--   # ##requested_size_p## : a lower limit for the new size. 
-- 
-- Comments: 
-- If ##requested_size_p## is not greater than zero, a new width is automatically derived from the current one. 
--  
-- See Also: 
--		[[:statistics]], [[:optimize]] 
 
public procedure rehash( map the_map_p, integer requested_size_p = 0 ) 
end procedure 
 
 
--** 
-- creates a new map data structure. 
-- 
-- Parameters: 
--		# ##initial_size_p## : An estimate of how many initial elements will be stored 
--   in the map. 
-- 
-- Returns: 
--		An empty **map**. 
-- 
-- Comments: 
--   A new object of type map is created.  The resources allocated for the map will 
--   be automatically cleaned up if the reference count of the returned value drops 
--  to zero, or if passed in a call to [[:delete]]. 
-- 
-- Example 1: 
--   map m = new()  -- m is now an empty map 
--   x = new()    -- the resources for the map previously stored in x are released automatically 
--   delete( m )  -- the resources for the map are released 
 
public function new( integer initial_size_p = DEFAULT_SIZE ) 
	return eumem:malloc( {"",""} ) 
end function 
 
--** 
-- returns either the supplied map or a new map. 
-- 
-- Parameters: 
--      # ##the_map_p## : An object, that could be an existing map 
--		# ##initial_size_p## : An estimate of how many initial elements will be stored 
--   in a new map. 
-- 
-- Returns: 
-- A **map**, 
--		If ##m## is an existing map then it is returned otherwise this  
--      returns a new empty **map**. 
-- 
-- Comments: 
--   This is used to return a new map if the supplied variable isn't already 
--   a map. 
-- 
-- Example 1: 
--   map m = new_extra( foo() ) -- If foo() returns a map it is used, otherwise 
--                              --  a new map is created. 
-- 
 
public function new_extra(object the_map_p, integer initial_size_p = 8) 
	if map(the_map_p) then 
		return the_map_p 
	else 
		return new(initial_size_p) 
	end if 
end function 
 
--** 
-- compares two maps to test equality. 
-- 
-- Parameters: 
--		# ##map_1_p## : A map 
--		# ##map_2_p## : A map 
--      # ##scope_p## : An integer that specifies what to compare. 
--        ** 'k' or 'K' to only compare keys. 
--        ** 'v' or 'V' to only compare values. 
--        ** 'd' or 'D' to compare both keys and values. This is the default. 
-- 
-- Returns: 
--   An **integer**, 
--   * -1 if they are not equal. 
--   * 0 if they are literally the same map. 
--   * 1 if they contain the same keys and/or values. 
-- 
-- Example 1: 
--   map map_1_p = foo() 
--   map map_2_p = bar() 
--   if compare(map_1_p, map_2_p, 'k') >= 0 then 
--        ... -- two maps have the same keys 
-- 
 
public function compare(map map_1_p, map map_2_p, integer scope_p = 'd') 
	sequence data_set_1_ 
	sequence data_set_2_ 
 
	if map_1_p = map_2_p then 
		return 0 
	end if 
	 
	switch scope_p do 
		case 'v', 'V' then 
			data_set_1_ = stdsort:sort(values(map_1_p)) 
			data_set_2_ = stdsort:sort(values(map_2_p)) 
			 
		case 'k', 'K' then 
			data_set_1_ = keys(map_1_p, 1) 
			data_set_2_ = keys(map_2_p, 1) 
			 
		case else 
			data_set_1_ = pairs(map_1_p, 1) 
			data_set_2_ = pairs(map_2_p, 1) 
 
	end switch 
				 
	if equal(data_set_1_, data_set_2_) then 
		return 1 
	end if 
	 
	return -1 
	 
end function 
 
--** 
-- checks whether map has a given key. 
-- 
-- Parameters: 
-- 		# ##the_map_p## : the map to inspect 
--		# ##the_key_p## : an object to be looked up 
-- 
-- Returns: 
--		An **integer**, 0 if not present, 1 if present. 
-- 
-- Example 1: 
--   map the_map_p 
--   the_map_p = new() 
--   put(the_map_p, "name", "John") 
--   ? has(the_map_p, "name") -- 1 
--   ? has(the_map_p, "age")  -- 0 
-- 
-- See Also: 
--   [[:get]] 
-- 
 
public function has( map the_map_p, object key ) 
	return find(key, eumem:ram_space[the_map_p][1]) > 0 
end function 
 
--** 
-- retrieves the value associated to a key in a map. 
-- 
-- Parameters: 
--		# ##the_map_p## : the map to inspect 
--		# ##the_key_p## : an object, the the_key_p being looked tp 
--		# ##default_value_p## : an object, a default value returned if ##the_key_p## not found. 
--                         The default is 0. 
-- 
-- Returns: 
--		An **object**, the value that corresponds to ##the_key_p## in ##the_map_p##.  
--      If ##the_key_p## is not in ##the_map_p##, ##default_value_p## is returned instead. 
-- 
-- Example 1: 
--   map ages 
--   ages = new() 
--   put(ages, "Andy", 12) 
--   put(ages, "Budi", 13) 
-- 
--   integer age 
--   age = get(ages, "Budi", -1) 
--   if age = -1 then 
--       puts(1, "Age unknown") 
--   else 
--       printf(1, "The age is %d", age) 
--   end if 
-- 
-- See Also: 
--   [[:has]] 
-- 
 
public function get( map the_map_p, object key, object default = 0 ) 
	integer ix = find(key, eumem:ram_space[the_map_p][1]) 
	if ix then 
	return eumem:ram_space[the_map_p][2][ix] 
	end if 
	return default 
end function 
 
--** 
-- returns the value given a nested key. 
-- 
-- Comments: 
-- Returns the value that corresponds to the object ##the_keys_p## in the nested map  
-- the_map_p.  ##the_keys_p## is a sequence of keys.  If any key is not in the map, the  
-- object default_value_p is returned instead. 
--  
 
public function nested_get( map the_map_p, sequence the_keys_p, object default_value_p = 0) 
	for i = 1 to length( the_keys_p ) - 1 do 
		object val_ = get( the_map_p, the_keys_p[1], 0 ) 
 
		if not map( val_ ) then 
			-- not a map 
			return default_value_p 
		else 
			the_map_p = val_ 
			the_keys_p = the_keys_p[2..$] 
		end if 
	end for 
 
	return get( the_map_p, the_keys_p[1], default_value_p ) 
end function 
 
--** 
-- adds or updates an entry on a map. 
-- 
-- Parameters: 
--		# ##the_map_p## : the map where an entry is being added or updated 
--		# ##the_key_p## : an object, the the_key_p to look up 
--		# ##the_value_p## : an object, the value to add, or to use for updating. 
--		# ##operation## : an integer, indicating what is to be done with ##the_value_p##. Defaults to PUT. 
--		# ##trigger_p## : Deprecated. This parameter defaults to zero and is not used. 
-- 
-- Comments: 
-- * The operation parameter can be used to modify the existing value.  Valid operations are:  
--  
-- ** ##PUT## ~--  This is the default, and it replaces any value in there already 
-- ** ##ADD## ~--  Equivalent to using the += operator  
-- ** ##SUBTRACT## ~--  Equivalent to using the -= operator  
-- ** ##MULTIPLY## ~--  Equivalent to using the *= operator 
-- ** ##DIVIDE## ~-- Equivalent to using the /= operator  
-- ** ##APPEND## ~-- Appends the value to the existing data  
-- ** ##CONCAT## ~-- Equivalent to using the &= operator 
-- ** ##LEAVE## ~--  If it already exists, the current value is left unchanged 
--               otherwise the new value is added to the map. 
-- 
-- Example 1: 
--   map ages 
--   ages = new() 
--   put(ages, "Andy", 12) 
--   put(ages, "Budi", 13) 
--   put(ages, "Budi", 14) 
-- 
--   -- ages now contains 2 entries: "Andy" => 12, "Budi" => 14 
-- 
-- See Also: 
--		[[:remove]], [[:has]],  [[:nested_put]] 
-- 
 
public procedure put( map the_map_p, object key, object val, object op = PUT, object deprecated = 0 ) 
	integer ix = find(key, eumem:ram_space[the_map_p][1]) 
	if not ix then 
	-- TODO this doesn't crash on MUL or DIVIDE, but it should 
		eumem:ram_space[the_map_p][1] =  
		append(eumem:ram_space[the_map_p][1], key) 
		if op = APPEND then 
		eumem:ram_space[the_map_p][2] =  
		append(eumem:ram_space[the_map_p][2], {val}) 
		else 
		eumem:ram_space[the_map_p][2] =  
		append(eumem:ram_space[the_map_p][2], val) 
		end if 
		--pretty_print(1, {-1, 
		--eumem:ram_space[the_map_p][1], 
		--eumem:ram_space[the_map_p][2]} ) 
		return 
	end if 
 
	switch op do 
		case PUT then 
		eumem:ram_space[the_map_p][2][ix] = val 
		case ADD then 
		eumem:ram_space[the_map_p][2][ix] += val 
		case SUBTRACT then 
		eumem:ram_space[the_map_p][2][ix] -= val 
		case MULTIPLY then 
		eumem:ram_space[the_map_p][2][ix] *= val 
		case DIVIDE then 
		eumem:ram_space[the_map_p][2][ix] /= val 
		case APPEND then 
		eumem:ram_space[the_map_p][2][ix] = 
		append(eumem:ram_space[the_map_p][2][ix], val) 
		case CONCAT then 
		eumem:ram_space[the_map_p][2][ix] &= val 
		case LEAVE then 
		case else 
			error:crash("Unknown operation given to map.e:put()") 
	end switch 
		--pretty_print(1,  {-2, 
		--eumem:ram_space[the_map_p][1], 
		--eumem:ram_space[the_map_p][2]}) 
end procedure 
 
 
--** 
-- adds or updates an entry on a map. 
-- 
-- Parameters: 
--		# ##the_map_p## : the map where an entry is being added or updated 
--		# ##the_keys_p## : a sequence of keys for the nested maps 
--		# ##the_value_p## : an object, the value to add, or to use for updating. 
--		# ##operation_p## : an integer, indicating what is to be done with ##value##. Defaults to PUT. 
--		# ##deprecated_trigger_p## : Deprecated. This parameter defaults to zero and is not used. 
-- 
-- 
-- Comments: 
-- Valid operations are~:  
--  
-- * ##PUT## ~--  This is the default, and it replaces any value in there already 
-- * ##ADD## ~--  Equivalent to using the += operator  
-- * ##SUBTRACT## ~--  Equivalent to using the -= operator  
-- * ##MULTIPLY## ~--  Equivalent to using the *= operator  
-- * ##DIVIDE## ~-- Equivalent to using the /= operator  
-- * ##APPEND## ~-- Appends the value to the existing data  
-- * ##CONCAT## ~-- Equivalent to using the &= operator 
-- 
--   * If existing entry with the same key is already in the map, the value of the entry is updated. 
-- 
-- Example 1: 
-- map city_population 
-- city_population = new() 
-- nested_put(city_population, {"United States", "California", "Los Angeles"}, 
--     3819951 ) 
-- nested_put(city_population, {"Canada",        "Ontario",    "Toronto"},      
--     2503281 ) 
-- 
-- See Also: 
--   [[:put]] 
-- 
 
public procedure nested_put( map the_map_p, sequence the_keys_p, object the_value_p, 
			integer operation_p = PUT, object deprecated_trigger_p = 0) 
	atom temp_map_ 
 
	if length( the_keys_p ) = 1 then 
		put( the_map_p, the_keys_p[1], the_value_p, operation_p ) 
	else 
		temp_map_ = new_extra( get( the_map_p, the_keys_p[1] ) ) 
		nested_put( temp_map_, the_keys_p[2..$], the_value_p, operation_p ) 
		put( the_map_p, the_keys_p[1], temp_map_, PUT ) 
	end if 
end procedure 
 
--** 
-- removes an entry with given key from a map. 
-- 
-- Parameters: 
--		# ##the_map_p## : the map to operate on 
--		# ##key## : an object, the key to remove. 
-- 
-- Comments: 
--   * If ##key## is not on ##the_map_p##, the ##the_map_p## is returned unchanged. 
--   * If you need to remove all entries, see [[:clear]] 
-- 
-- Example 1: 
--   map the_map_p 
--   the_map_p = new() 
--   put(the_map_p, "Amy", 66.9) 
--   remove(the_map_p, "Amy") 
--   -- the_map_p is now an empty map again 
-- 
-- See Also: 
--		[[:clear]], [[:has]] 
-- 
 
public procedure remove( map the_map_p, object key ) 
	integer ix = find(key, eumem:ram_space[the_map_p][1]) 
	if ix then 
		eumem:ram_space[the_map_p][1] = 
		eumem:ram_space[the_map_p][1][1..ix-1] & 
		eumem:ram_space[the_map_p][1][ix+1..$] 
 
		eumem:ram_space[the_map_p][2] = 
		eumem:ram_space[the_map_p][2][1..ix-1] & 
		eumem:ram_space[the_map_p][2][ix+1..$] 
	end if 
end procedure 
 
--** 
-- removes all entries in a map. 
-- 
-- Parameters: 
--		# ##the_map_p## : the map to operate on 
-- 
-- Comments: 
--   * This is much faster than removing each entry individually. 
--   * If you need to remove just one entry, see [[:remove]] 
-- 
-- Example 1: 
--   map the_map_p 
--   the_map_p = new() 
--   put(the_map_p, "Amy", 66.9) 
--   put(the_map_p, "Betty", 67.8) 
--   put(the_map_p, "Claire", 64.1) 
--   ... 
--   clear(the_map_p) 
--   -- the_map_p is now an empty map again 
-- 
-- See Also: 
--		[[:remove]], [[:has]] 
-- 
 
public procedure clear( map the_map_p ) 
	eumem:ram_space[the_map_p] = {"",""} 
end procedure 
 
--** 
-- returns the number of entries in a map. 
-- 
-- Parameters: 
--   ##the_map_p## : the map being queried 
-- 
-- Returns: 
--   An **integer**, the number of entries it has. 
-- 
-- Comments: 
--   For an empty map, size will be zero 
-- 
-- Example 1: 
--   map the_map_p 
--   put(the_map_p, 1, "a") 
--   put(the_map_p, 2, "b") 
--   ? size(the_map_p) -- outputs 2 
-- 
-- See Also: 
--   [[:statistics]] 
-- 
 
public function size( map the_map_p ) 
	return length(eumem:ram_space[the_map_p][1]) 
end function 
 
public enum 
	NUM_ENTRIES, 
	NUM_IN_USE, 
	NUM_BUCKETS, 
	LARGEST_BUCKET, 
	SMALLEST_BUCKET, 
	AVERAGE_BUCKET, 
	STDEV_BUCKET 
 
--** 
-- retrieves characteristics of a map. 
-- 
-- Parameters: 
--   # ##the_map_p## : the map being queried 
-- 
-- Returns: 
--   A  **sequence**, of 7 integers: 
--       * ##NUM_ENTRIES## ~-- number of entries 
--       * ##NUM_IN_USE## ~-- number of buckets in use 
--       * ##NUM_BUCKETS## ~-- number of buckets 
--       * ##LARGEST_BUCKET## ~-- size of largest bucket 
--       * ##SMALLEST_BUCKET## ~-- size of smallest bucket 
--       * ##AVERAGE_BUCKET## ~-- average size for a bucket 
--       * ##STDEV_BUCKET## ~-- standard deviation for the bucket length series 
-- 
-- Example 1: 
--   sequence s = statistics(mymap) 
--   printf(1, "The average size of the buckets is %d", s[AVERAGE_BUCKET]) 
-- 
 
public function statistics(map the_map_p) 
	sequence statistic_set_ 
	statistic_set_ = repeat( 0, STDEV_BUCKET ) 
	statistic_set_[NUM_ENTRIES] = size( the_map_p ) 
		statistic_set_[NUM_BUCKETS]     = size( the_map_p ) 
		statistic_set_[NUM_IN_USE]      = size( the_map_p ) 
		statistic_set_[SMALLEST_BUCKET] = 1 
		statistic_set_[LARGEST_BUCKET]  = 1 
		statistic_set_[STDEV_BUCKET]    = 0 
		statistic_set_[AVERAGE_BUCKET]  = 1 
	 
	return statistic_set_ 
end function 
 
--** 
-- returns all keys in a map. 
-- 
-- Parameters: 
--   # ##the_map_p##: the map being queried 
--   # ##sorted_result##: optional integer. 0 [default] means do not sort the 
--                        output and 1 means to sort the output before returning. 
-- 
-- Returns: 
--   A **sequence** made of all the keys in the map. 
-- 
-- Comments: 
--   If ##sorted_result## is not used, the order of the keys returned is not predicable.  
-- 
-- Example 1: 
--   map the_map_p 
--   the_map_p = new() 
--   put(the_map_p, 10, "ten") 
--   put(the_map_p, 20, "twenty") 
--   put(the_map_p, 30, "thirty") 
--   put(the_map_p, 40, "forty") 
-- 
--   sequence keys 
--   keys = keys(the_map_p) -- keys might be {20,40,10,30} or some other order 
--   keys = keys(the_map_p, 1) -- keys will be {10,20,30,40} 
-- 
-- See Also: 
--   [[:has]], [[:values]], [[:pairs]] 
-- 
 
public function keys( map the_map_p, integer sorted_result = 0 ) 
	sequence keys = eumem:ram_space[the_map_p][1] 
	if sorted_result then 
		return stdsort:sort( keys ) 
	end if 
	return keys 
end function 
 
--** 
-- returns values, without their keys, from a map. 
-- 
-- Parameters: 
--   # ##the_map## : the map being queried 
--   # ##keys## : optional, key list of values to return.  
--   # ##default_values## : optional default values for keys list 
-- 
-- Returns: 
--   A **sequence**, of all values stored in ##the_map##. 
-- 
-- Comments: 
--   * The order of the values returned may not be the same as the putting order.  
--   * Duplicate values are not removed. 
--   * You use the ##keys## pa