Pastey alternative std/map.e #2
- Posted by jimcbrown
(admin)
Apr 04, 2013
--****
-- == Map (Hash Table)
--
-- <<LEVELTOC level=2 depth=4>>
--
-- A **map** is a special array, often called an associative array or dictionary;
-- in a map the data **values** (any Euphoria object) are indexed by **keys** (also any Euphoria object).
--
-- When programming think in terms of //key:value// pairs.
-- For example we can code things like this~:
-- custrec = new() -- Create a new map
-- put(custrec, "Name", "Joe Blow")
-- put(custrec, "Address", "555 High Street")
-- put(custrec, "Phone", 555675632)
-- This creates three elements in the map, and they are indexed by ##"Name"##,
-- ##"Address"## and ##"Phone"##, meaning that to get the data associated with those
-- keys we can code~:
-- object data = get(custrec, "Phone")
-- -- data now set to 555675632
--
-- Note that //only one instance of a given key// can exist in a given map, meaning
-- for example, we could not have two separate ##"Name"## values in the above ##custrec##
-- map.
--
-- Maps automatically grow to accommodate all the elements placed into it.
--
-- Associative arrays can be implemented in many different ways, depending
-- on what efficiency trade-offs have been made. This implementation allows
-- you to specify how many items you expect the map to hold, or simply start
-- with the default size.
--
-- As the number of items in the map grows, the map may increase its size
-- to accommodate larger numbers of items.
--
namespace map
include std/hash.e
include std/datetime.e
include std/error.e
include std/eumem.e
include std/get.e
include std/io.e
include std/math.e
include std/pretty.e
include std/primes.e
include std/search.e
include std/serialize.e
include std/sort.e
include std/stats.e as stats
include std/text.e
include std/types.e
include euphoria/info.e
constant type_is_map = "Eu:StdMap"
constant
DEFAULT_SIZE = 8,
$
--****
-- === Operation Codes for Put
public enum
PUT,
ADD,
SUBTRACT,
MULTIPLY,
DIVIDE,
APPEND,
CONCAT,
LEAVE
ifdef BITS32 then
constant DEFAULT_HASH = HSIEH30
elsedef
constant DEFAULT_HASH = HSIEH30
end ifdef
function hash( object x )
return eu:hash( x, DEFAULT_HASH )
end function
--****
-- === Types
--
--**
-- defines the datatype 'map'.
--
-- Comments:
-- Used when declaring a map variable.
--
-- Example 1:
-- map SymbolTable = new() -- Create a new map to hold the symbol table.
public type map( object m )
if not atom( m ) then
return 0
end if
if length( eumem:ram_space ) < m then
return 0
end if
if m < 1 then
return 0
end if
if length( eumem:ram_space[m] ) != 2 then
return 0
end if
if not sequence( eumem:ram_space[m][1] ) then
return 0
end if
if not sequence( eumem:ram_space[m][2] ) then
return 0
end if
return 1
end type
--****
-- === Routines
--
--**
-- calculates a Hashing value from the supplied data.
--
-- Parameters:
-- # ##key_p## : The data for which you want a hash value calculated.
-- # ##max_hash_p## : The returned value will be no larger than this value.
--
-- Returns:
-- An **integer**, the value of which depends only on the supplied data.
--
-- Comments:
-- This is used whenever you need a single number to represent the data you supply.
-- It can calculate the number based on all the data you give it, which can be
-- an atom or sequence of any value.
--
-- Example 1:
-- integer h1
-- -- calculate a hash value and ensure it will be a value from 1 to 4097.
-- h1 = calc_hash( symbol_name, 4097 )
--
--**
-- deprecated.
--
-- Parameters:
-- # ##new_value_p## : unused
-- value.
--
-- Returns:
--Zero..
--
public function threshold(integer new_value_p = 0)
return 0
end function
--**
-- deprecated
--
-- Parameters:
-- # ##m## : A map
--
-- Returns:
-- Zero.
--
public function type_of(map the_map_p)
return 0
end function
--**
-- changes the width (that is the number of buckets) of a map.
--
-- Parameters:
-- # ##m## : the map to resize
-- # ##requested_size_p## : a lower limit for the new size.
--
-- Comments:
-- If ##requested_size_p## is not greater than zero, a new width is automatically derived from the current one.
--
-- See Also:
-- [[:statistics]], [[:optimize]]
public procedure rehash( map the_map_p, integer requested_size_p = 0 )
end procedure
--**
-- creates a new map data structure.
--
-- Parameters:
-- # ##initial_size_p## : An estimate of how many initial elements will be stored
-- in the map.
--
-- Returns:
-- An empty **map**.
--
-- Comments:
-- A new object of type map is created. The resources allocated for the map will
-- be automatically cleaned up if the reference count of the returned value drops
-- to zero, or if passed in a call to [[:delete]].
--
-- Example 1:
-- map m = new() -- m is now an empty map
-- x = new() -- the resources for the map previously stored in x are released automatically
-- delete( m ) -- the resources for the map are released
public function new( integer initial_size_p = DEFAULT_SIZE )
return eumem:malloc( {"",""} )
end function
--**
-- returns either the supplied map or a new map.
--
-- Parameters:
-- # ##the_map_p## : An object, that could be an existing map
-- # ##initial_size_p## : An estimate of how many initial elements will be stored
-- in a new map.
--
-- Returns:
-- A **map**,
-- If ##m## is an existing map then it is returned otherwise this
-- returns a new empty **map**.
--
-- Comments:
-- This is used to return a new map if the supplied variable isn't already
-- a map.
--
-- Example 1:
-- map m = new_extra( foo() ) -- If foo() returns a map it is used, otherwise
-- -- a new map is created.
--
public function new_extra(object the_map_p, integer initial_size_p = 8)
if map(the_map_p) then
return the_map_p
else
return new(initial_size_p)
end if
end function
--**
-- compares two maps to test equality.
--
-- Parameters:
-- # ##map_1_p## : A map
-- # ##map_2_p## : A map
-- # ##scope_p## : An integer that specifies what to compare.
-- ** 'k' or 'K' to only compare keys.
-- ** 'v' or 'V' to only compare values.
-- ** 'd' or 'D' to compare both keys and values. This is the default.
--
-- Returns:
-- An **integer**,
-- * -1 if they are not equal.
-- * 0 if they are literally the same map.
-- * 1 if they contain the same keys and/or values.
--
-- Example 1:
-- map map_1_p = foo()
-- map map_2_p = bar()
-- if compare(map_1_p, map_2_p, 'k') >= 0 then
-- ... -- two maps have the same keys
--
public function compare(map map_1_p, map map_2_p, integer scope_p = 'd')
sequence data_set_1_
sequence data_set_2_
if map_1_p = map_2_p then
return 0
end if
switch scope_p do
case 'v', 'V' then
data_set_1_ = stdsort:sort(values(map_1_p))
data_set_2_ = stdsort:sort(values(map_2_p))
case 'k', 'K' then
data_set_1_ = keys(map_1_p, 1)
data_set_2_ = keys(map_2_p, 1)
case else
data_set_1_ = pairs(map_1_p, 1)
data_set_2_ = pairs(map_2_p, 1)
end switch
if equal(data_set_1_, data_set_2_) then
return 1
end if
return -1
end function
--**
-- checks whether map has a given key.
--
-- Parameters:
-- # ##the_map_p## : the map to inspect
-- # ##the_key_p## : an object to be looked up
--
-- Returns:
-- An **integer**, 0 if not present, 1 if present.
--
-- Example 1:
-- map the_map_p
-- the_map_p = new()
-- put(the_map_p, "name", "John")
-- ? has(the_map_p, "name") -- 1
-- ? has(the_map_p, "age") -- 0
--
-- See Also:
-- [[:get]]
--
public function has( map the_map_p, object key )
return find(key, eumem:ram_space[the_map_p][1]) > 0
end function
--**
-- retrieves the value associated to a key in a map.
--
-- Parameters:
-- # ##the_map_p## : the map to inspect
-- # ##the_key_p## : an object, the the_key_p being looked tp
-- # ##default_value_p## : an object, a default value returned if ##the_key_p## not found.
-- The default is 0.
--
-- Returns:
-- An **object**, the value that corresponds to ##the_key_p## in ##the_map_p##.
-- If ##the_key_p## is not in ##the_map_p##, ##default_value_p## is returned instead.
--
-- Example 1:
-- map ages
-- ages = new()
-- put(ages, "Andy", 12)
-- put(ages, "Budi", 13)
--
-- integer age
-- age = get(ages, "Budi", -1)
-- if age = -1 then
-- puts(1, "Age unknown")
-- else
-- printf(1, "The age is %d", age)
-- end if
--
-- See Also:
-- [[:has]]
--
public function get( map the_map_p, object key, object default = 0 )
integer ix = find(key, eumem:ram_space[the_map_p][1])
if ix then
return eumem:ram_space[the_map_p][2][ix]
end if
return default
end function
--**
-- returns the value given a nested key.
--
-- Comments:
-- Returns the value that corresponds to the object ##the_keys_p## in the nested map
-- the_map_p. ##the_keys_p## is a sequence of keys. If any key is not in the map, the
-- object default_value_p is returned instead.
--
public function nested_get( map the_map_p, sequence the_keys_p, object default_value_p = 0)
for i = 1 to length( the_keys_p ) - 1 do
object val_ = get( the_map_p, the_keys_p[1], 0 )
if not map( val_ ) then
-- not a map
return default_value_p
else
the_map_p = val_
the_keys_p = the_keys_p[2..$]
end if
end for
return get( the_map_p, the_keys_p[1], default_value_p )
end function
--**
-- adds or updates an entry on a map.
--
-- Parameters:
-- # ##the_map_p## : the map where an entry is being added or updated
-- # ##the_key_p## : an object, the the_key_p to look up
-- # ##the_value_p## : an object, the value to add, or to use for updating.
-- # ##operation## : an integer, indicating what is to be done with ##the_value_p##. Defaults to PUT.
-- # ##trigger_p## : Deprecated. This parameter defaults to zero and is not used.
--
-- Comments:
-- * The operation parameter can be used to modify the existing value. Valid operations are:
--
-- ** ##PUT## ~-- This is the default, and it replaces any value in there already
-- ** ##ADD## ~-- Equivalent to using the += operator
-- ** ##SUBTRACT## ~-- Equivalent to using the -= operator
-- ** ##MULTIPLY## ~-- Equivalent to using the *= operator
-- ** ##DIVIDE## ~-- Equivalent to using the /= operator
-- ** ##APPEND## ~-- Appends the value to the existing data
-- ** ##CONCAT## ~-- Equivalent to using the &= operator
-- ** ##LEAVE## ~-- If it already exists, the current value is left unchanged
-- otherwise the new value is added to the map.
--
-- Example 1:
-- map ages
-- ages = new()
-- put(ages, "Andy", 12)
-- put(ages, "Budi", 13)
-- put(ages, "Budi", 14)
--
-- -- ages now contains 2 entries: "Andy" => 12, "Budi" => 14
--
-- See Also:
-- [[:remove]], [[:has]], [[:nested_put]]
--
public procedure put( map the_map_p, object key, object val, object op = PUT, object deprecated = 0 )
integer ix = find(key, eumem:ram_space[the_map_p][1])
if not ix then
-- TODO this doesn't crash on MUL or DIVIDE, but it should
eumem:ram_space[the_map_p][1] =
append(eumem:ram_space[the_map_p][1], key)
if op = APPEND then
eumem:ram_space[the_map_p][2] =
append(eumem:ram_space[the_map_p][2], {val})
else
eumem:ram_space[the_map_p][2] =
append(eumem:ram_space[the_map_p][2], val)
end if
--pretty_print(1, {-1,
--eumem:ram_space[the_map_p][1],
--eumem:ram_space[the_map_p][2]} )
return
end if
switch op do
case PUT then
eumem:ram_space[the_map_p][2][ix] = val
case ADD then
eumem:ram_space[the_map_p][2][ix] += val
case SUBTRACT then
eumem:ram_space[the_map_p][2][ix] -= val
case MULTIPLY then
eumem:ram_space[the_map_p][2][ix] *= val
case DIVIDE then
eumem:ram_space[the_map_p][2][ix] /= val
case APPEND then
eumem:ram_space[the_map_p][2][ix] =
append(eumem:ram_space[the_map_p][2][ix], val)
case CONCAT then
eumem:ram_space[the_map_p][2][ix] &= val
case LEAVE then
case else
error:crash("Unknown operation given to map.e:put()")
end switch
--pretty_print(1, {-2,
--eumem:ram_space[the_map_p][1],
--eumem:ram_space[the_map_p][2]})
end procedure
--**
-- adds or updates an entry on a map.
--
-- Parameters:
-- # ##the_map_p## : the map where an entry is being added or updated
-- # ##the_keys_p## : a sequence of keys for the nested maps
-- # ##the_value_p## : an object, the value to add, or to use for updating.
-- # ##operation_p## : an integer, indicating what is to be done with ##value##. Defaults to PUT.
-- # ##deprecated_trigger_p## : Deprecated. This parameter defaults to zero and is not used.
--
--
-- Comments:
-- Valid operations are~:
--
-- * ##PUT## ~-- This is the default, and it replaces any value in there already
-- * ##ADD## ~-- Equivalent to using the += operator
-- * ##SUBTRACT## ~-- Equivalent to using the -= operator
-- * ##MULTIPLY## ~-- Equivalent to using the *= operator
-- * ##DIVIDE## ~-- Equivalent to using the /= operator
-- * ##APPEND## ~-- Appends the value to the existing data
-- * ##CONCAT## ~-- Equivalent to using the &= operator
--
-- * If existing entry with the same key is already in the map, the value of the entry is updated.
--
-- Example 1:
-- map city_population
-- city_population = new()
-- nested_put(city_population, {"United States", "California", "Los Angeles"},
-- 3819951 )
-- nested_put(city_population, {"Canada", "Ontario", "Toronto"},
-- 2503281 )
--
-- See Also:
-- [[:put]]
--
public procedure nested_put( map the_map_p, sequence the_keys_p, object the_value_p,
integer operation_p = PUT, object deprecated_trigger_p = 0)
atom temp_map_
if length( the_keys_p ) = 1 then
put( the_map_p, the_keys_p[1], the_value_p, operation_p )
else
temp_map_ = new_extra( get( the_map_p, the_keys_p[1] ) )
nested_put( temp_map_, the_keys_p[2..$], the_value_p, operation_p )
put( the_map_p, the_keys_p[1], temp_map_, PUT )
end if
end procedure
--**
-- removes an entry with given key from a map.
--
-- Parameters:
-- # ##the_map_p## : the map to operate on
-- # ##key## : an object, the key to remove.
--
-- Comments:
-- * If ##key## is not on ##the_map_p##, the ##the_map_p## is returned unchanged.
-- * If you need to remove all entries, see [[:clear]]
--
-- Example 1:
-- map the_map_p
-- the_map_p = new()
-- put(the_map_p, "Amy", 66.9)
-- remove(the_map_p, "Amy")
-- -- the_map_p is now an empty map again
--
-- See Also:
-- [[:clear]], [[:has]]
--
public procedure remove( map the_map_p, object key )
integer ix = find(key, eumem:ram_space[the_map_p][1])
if ix then
eumem:ram_space[the_map_p][1] =
eumem:ram_space[the_map_p][1][1..ix-1] &
eumem:ram_space[the_map_p][1][ix+1..$]
eumem:ram_space[the_map_p][2] =
eumem:ram_space[the_map_p][2][1..ix-1] &
eumem:ram_space[the_map_p][2][ix+1..$]
end if
end procedure
--**
-- removes all entries in a map.
--
-- Parameters:
-- # ##the_map_p## : the map to operate on
--
-- Comments:
-- * This is much faster than removing each entry individually.
-- * If you need to remove just one entry, see [[:remove]]
--
-- Example 1:
-- map the_map_p
-- the_map_p = new()
-- put(the_map_p, "Amy", 66.9)
-- put(the_map_p, "Betty", 67.8)
-- put(the_map_p, "Claire", 64.1)
-- ...
-- clear(the_map_p)
-- -- the_map_p is now an empty map again
--
-- See Also:
-- [[:remove]], [[:has]]
--
public procedure clear( map the_map_p )
eumem:ram_space[the_map_p] = {"",""}
end procedure
--**
-- returns the number of entries in a map.
--
-- Parameters:
-- ##the_map_p## : the map being queried
--
-- Returns:
-- An **integer**, the number of entries it has.
--
-- Comments:
-- For an empty map, size will be zero
--
-- Example 1:
-- map the_map_p
-- put(the_map_p, 1, "a")
-- put(the_map_p, 2, "b")
-- ? size(the_map_p) -- outputs 2
--
-- See Also:
-- [[:statistics]]
--
public function size( map the_map_p )
return length(eumem:ram_space[the_map_p][1])
end function
public enum
NUM_ENTRIES,
NUM_IN_USE,
NUM_BUCKETS,
LARGEST_BUCKET,
SMALLEST_BUCKET,
AVERAGE_BUCKET,
STDEV_BUCKET
--**
-- retrieves characteristics of a map.
--
-- Parameters:
-- # ##the_map_p## : the map being queried
--
-- Returns:
-- A **sequence**, of 7 integers:
-- * ##NUM_ENTRIES## ~-- number of entries
-- * ##NUM_IN_USE## ~-- number of buckets in use
-- * ##NUM_BUCKETS## ~-- number of buckets
-- * ##LARGEST_BUCKET## ~-- size of largest bucket
-- * ##SMALLEST_BUCKET## ~-- size of smallest bucket
-- * ##AVERAGE_BUCKET## ~-- average size for a bucket
-- * ##STDEV_BUCKET## ~-- standard deviation for the bucket length series
--
-- Example 1:
-- sequence s = statistics(mymap)
-- printf(1, "The average size of the buckets is %d", s[AVERAGE_BUCKET])
--
public function statistics(map the_map_p)
sequence statistic_set_
statistic_set_ = repeat( 0, STDEV_BUCKET )
statistic_set_[NUM_ENTRIES] = size( the_map_p )
statistic_set_[NUM_BUCKETS] = size( the_map_p )
statistic_set_[NUM_IN_USE] = size( the_map_p )
statistic_set_[SMALLEST_BUCKET] = 1
statistic_set_[LARGEST_BUCKET] = 1
statistic_set_[STDEV_BUCKET] = 0
statistic_set_[AVERAGE_BUCKET] = 1
return statistic_set_
end function
--**
-- returns all keys in a map.
--
-- Parameters:
-- # ##the_map_p##: the map being queried
-- # ##sorted_result##: optional integer. 0 [default] means do not sort the
-- output and 1 means to sort the output before returning.
--
-- Returns:
-- A **sequence** made of all the keys in the map.
--
-- Comments:
-- If ##sorted_result## is not used, the order of the keys returned is not predicable.
--
-- Example 1:
-- map the_map_p
-- the_map_p = new()
-- put(the_map_p, 10, "ten")
-- put(the_map_p, 20, "twenty")
-- put(the_map_p, 30, "thirty")
-- put(the_map_p, 40, "forty")
--
-- sequence keys
-- keys = keys(the_map_p) -- keys might be {20,40,10,30} or some other order
-- keys = keys(the_map_p, 1) -- keys will be {10,20,30,40}
--
-- See Also:
-- [[:has]], [[:values]], [[:pairs]]
--
public function keys( map the_map_p, integer sorted_result = 0 )
sequence keys = eumem:ram_space[the_map_p][1]
if sorted_result then
return stdsort:sort( keys )
end if
return keys
end function
--**
-- returns values, without their keys, from a map.
--
-- Parameters:
-- # ##the_map## : the map being queried
-- # ##keys## : optional, key list of values to return.
-- # ##default_values## : optional default values for keys list
--
-- Returns:
-- A **sequence**, of all values stored in ##the_map##.
--
-- Comments:
-- * The order of the values returned may not be the same as the putting order.
-- * Duplicate values are not removed.
-- * You use the ##keys## pa