Pastey cargoan's std/text.e
- Posted by cargoan
Jan 23, 2015
--****
-- == Text Manipulation
--
-- <>
--
namespace text
include std/types.e
include std/convert.e
include std/error.e
include std/filesys.e
include std/io.e
include std/math.e
include std/pretty.e
include std/search.e
include std/sequence.e
include std/serialize.e
--****
-- === Routines
--****
-- Signature:
-- function sprintf(sequence format, object values)
--
-- Description:
-- returns the representation of any Euphoria object as a string of characters with formatting.
--
-- Parameters:
-- # ##format## : a sequence, the text to print. This text may contain format specifiers.
-- # ##values## : usually, a sequence of values. It should have as many elements as format specifiers in ##format##, as these values will be substituted to the specifiers.
--
-- Returns:
-- A **sequence**, of printable characters, representing ##format## with the values in ##values## spliced in.
--
-- Comments:
-- This is exactly the same as [[:printf]] except that the output is returned as a sequence
-- of characters, rather than being sent to a file or device.
--
-- ##printf(fn, st, x)## is equivalent to ##puts(fn, sprintf(st, x))##.
--
-- Some typical uses of ##sprintf## are~:
--
-- # Converting numbers to strings.
-- # Creating strings to pass to ##system##.
-- # Creating formatted error messages that can be passed to a common error message handler.
--
-- Example 1:
--
-- s = sprintf("%08d", 12345)
-- -- s is "00012345"
--
--
-- See Also:
-- [[:printf]], [[:sprint]], [[:format]]
--**
-- returns the representation of any Euphoria object as a string of characters.
--
-- Parameters:
-- # ##x## : Any Euphoria object.
--
-- Returns:
-- A **sequence**, a string representation of ##x##.
--
-- Comments:
--
-- This is exactly the same as ##print(fn, x)##, except that the output is returned as a sequence of characters, rather
-- than being sent to a file or device. ##x## can be any Euphoria object.
--
-- The atoms contained within ##x## will be displayed to a maximum of ten significant digits,
-- just as with [[:print]].
--
-- Example 1:
--
-- s = sprint(12345)
-- -- s is "12345"
--
--
-- Example 2:
--
-- s = sprint({10,20,30}+5)
-- -- s is "{15,25,35}"
--
--
-- See Also:
-- [[:sprintf]], [[:printf]]
public function sprint(object x)
sequence s
if atom(x) then
return sprintf("%.10g", x)
else
s = "{"
for i = 1 to length(x) do
if atom(x[i]) then
s &= sprintf("%.10g", x[i])
else
s &= sprint(x[i])
end if
s &= ','
end for
if s[$] = ',' then
s[$] = '}'
else
s &= '}'
end if
return s
end if
end function
--**
-- trims all items in the supplied set from the leftmost (start or head) of a sequence.
--
-- Parameters:
-- # ##source## : the sequence to trim.
-- # ##what## : the set of item to trim from ##source## (defaults to ##" \t\r\n"##).
-- # ##ret_index## : If zero (the default) returns the trimmed sequence, otherwise
-- it returns the index of the leftmost item **not** in ##what##.
--
-- Returns:
-- A **sequence**, if ##ret_index## is zero, which is the trimmed version of ##source##\\
-- A **integer**, if ##ret_index## is not zero, which is index of the leftmost
-- element in ##source## that is not in ##what##.
--
-- Example 1:
--
-- object s
-- s = trim_head("\r\nSentence read from a file\r\n", "\r\n")
-- -- s is "Sentence read from a file\r\n"
-- s = trim_head("\r\nSentence read from a file\r\n", "\r\n", TRUE)
-- -- s is 3
--
--
--
-- See Also:
-- [[:trim_tail]], [[:trim]], [[:pad_head]]
public function trim_head(sequence source, object what=" \t\r\n", integer ret_index = 0)
integer lpos
if atom(what) then
what = {what}
end if
lpos = 1
while lpos <= length(source) do
if not find(source[lpos], what) then
exit
end if
lpos += 1
end while
if ret_index then
return lpos
else
return source[lpos .. $]
end if
end function
--**
-- trims all items in the supplied set from the rightmost (end or tail) of a sequence.
--
-- Parameters:
-- # ##source## : the sequence to trim.
-- # ##what## : the set of item to trim from ##source## (defaults to ##" \t\r\n"##).
-- # ##ret_index## : If zero (the default) returns the trimmed sequence, otherwise
-- it returns the index of the rightmost item **not** in ##what##.
--
-- Returns:
-- A **sequence**, if ##ret_index## is zero, which is the trimmed version of ##source##\\
-- A **integer**, if ##ret_index## is not zero, which is index of the rightmost
-- element in ##source## that is not in ##what##.
--
-- Example 1:
--
-- object s
-- s = trim_tail("\r\nSentence read from a file\r\n", "\r\n")
-- -- s is "\r\nSentence read from a file"
-- s = trim_tail("\r\nSentence read from a file\r\n", "\r\n", TRUE)
-- -- s is 27
--
--
-- See Also:
-- [[:trim_head]], [[:trim]], [[:pad_tail]]
public function trim_tail(sequence source, object what=" \t\r\n", integer ret_index = 0)
integer rpos
if atom(what) then
what = {what}
end if
rpos = length(source)
while rpos > 0 do
if not find(source[rpos], what) then
exit
end if
rpos -= 1
end while
if ret_index then
return rpos
else
return source[1..rpos]
end if
end function
--**
-- trims all items in the supplied set from both the left end (head/start) and right end (tail/end)
-- of a sequence.
--
-- Parameters:
-- # ##source## : the sequence to trim.
-- # ##what## : the set of item to trim from ##source## (defaults to ##" \t\r\n"##).
-- # ##ret_index## : If zero (the default) returns the trimmed sequence, otherwise
-- it returns a 2-element sequence containing the index of the
-- leftmost item and rightmost item **not** in ##what##.
--
-- Returns:
-- A **sequence**, if ##ret_index## is zero, which is the trimmed version of ##source##\\
-- A **2-element sequence**, if ##ret_index## is not zero, in the form ##{left_index, right_index}## .
--
-- Example 1:
--
-- object s
-- s = trim("\r\nSentence read from a file\r\n", "\r\n")
-- -- s is "Sentence read from a file"
-- s = trim("\r\nSentence read from a file\r\n", "\r\n", TRUE)
-- -- s is {3,27}
-- s = trim(" This is a sentence.\n") -- Default is to trim off all " \t\r\n"
-- -- s is "This is a sentence."
--
--
-- See Also:
-- [[:trim_head]], [[:trim_tail]]
public function trim(sequence source, object what=" \t\r\n", integer ret_index = 0)
integer rpos
integer lpos
if atom(what) then
what = {what}
end if
lpos = 1
while lpos <= length(source) do
if not find(source[lpos], what) then
exit
end if
lpos += 1
end while
rpos = length(source)
while rpos > lpos do
if not find(source[rpos], what) then
exit
end if
rpos -= 1
end while
if ret_index then
return {lpos, rpos}
else
if lpos = 1 then
if rpos = length(source) then
return source
end if
end if
if lpos > length(source) then
return {}
end if
return source[lpos..rpos]
end if
end function
ifdef UNIX then
constant TO_LOWER = 'a' - 'A'
end ifdef
sequence lower_case_SET = {}
sequence upper_case_SET = {}
sequence encoding_NAME = "ASCII"
function load_code_page(sequence cpname)
object cpdata
integer pos
sequence kv
sequence cp_source
sequence cp_db
cp_source = filesys:defaultext(cpname, ".ecp")
cp_source = filesys:locate_file(cp_source)
cpdata = io:read_lines(cp_source)
if sequence(cpdata) then
pos = 0
while pos < length(cpdata) do
pos += 1
cpdata[pos] = trim(cpdata[pos])
if search:begins("--HEAD--", cpdata[pos]) then
continue
end if
if cpdata[pos][1] = ';' then
continue -- A comment line
end if
if search:begins("--CASE--", cpdata[pos]) then
exit
end if
kv = keyvalues(cpdata[pos],,,,"")
if equal(lower(kv[1][1]), "title") then
encoding_NAME = kv[1][2]
end if
end while
if pos > length(cpdata) then
return -2 -- No Case Conversion table found.
end if
upper_case_SET = ""
lower_case_SET = ""
while pos < length(cpdata) do
pos += 1
cpdata[pos] = trim(cpdata[pos])
if length(cpdata[pos]) < 3 then
continue
end if
if cpdata[pos][1] = ';' then
continue -- A comment line
end if
if cpdata[pos][1] = '-' then
exit
end if
kv = keyvalues(cpdata[pos])
upper_case_SET &= convert:hex_text(kv[1][1])
lower_case_SET &= convert:hex_text(kv[1][2])
end while
else
-- See if its in the database.
cp_db = filesys:locate_file("ecp.dat")
integer fh = open(cp_db, "rb")
if fh = -1 then
return -2 -- Couldn't open DB
end if
object idx
object vers
vers = serialize:deserialize(fh) -- get the database version
if atom(vers) or length(vers) = 0 then
return -3 -- DB is wrong or corrupted.
end if
switch vers[1] do
case 1, 2 then
idx = serialize:deserialize(fh) -- get Code Page index offset
pos = io:seek(fh, idx)
idx = serialize:deserialize(fh) -- get the Code Page Index
pos = find(cpname, idx[1])
if pos != 0 then
pos = io:seek(fh, idx[2][pos])
upper_case_SET = serialize:deserialize(fh) -- "uppercase"
lower_case_SET = serialize:deserialize(fh) -- "lowercase"
encoding_NAME = serialize:deserialize(fh) -- "title"
end if
case else
return -4 -- Unhandled ecp database version.
end switch
close(fh)
end if
return 0
end function
--**
-- sets the table of lowercase and uppercase characters that is used by
-- [[:lower]] and [[:upper]]
--
-- Parameters:
-- # ##en## : The name of the encoding represented by these character sets
-- # ##lc## : The set of lowercase characters
-- # ##uc## : The set of upper case characters
--
--
-- Comments:
-- * ##lc## and ##uc## must be the same length.
-- * If no parameters are given, the default ASCII table is set.
--
-- Example 1:
--
-- set_encoding_properties( "Elvish", "aeiouy", "AEIOUY")
--
--
-- Example 1:
--
-- set_encoding_properties( "1251") -- Loads a predefined code page.
--
--
-- See Also:
-- [[:lower]], [[:upper]], [[:get_encoding_properties]]
public procedure set_encoding_properties(sequence en = "", sequence lc = "", sequence uc = "")
integer res
if length(en) > 0 and length(lc) = 0 and length(uc) = 0 then
res = load_code_page(en)
if res != 0 then
printf(2, "Failed to load code page '%s'. Error # %d\n", {en, res})
end if
return
end if
if length(lc) = length(uc) then
if length(lc) = 0 and length(en) = 0 then
en = "ASCII"
end if
lower_case_SET = lc
upper_case_SET = uc
encoding_NAME = en
end if
end procedure
--**
-- gets the table of lowercase and uppercase characters that is used by
-- [[:lower]] and [[:upper]].
--
-- Parameters:
-- none
--
-- Returns:
-- A **sequence**, containing three items.\\
-- ##{Encoding_Name, LowerCase_Set, UpperCase_Set}##
--
-- Example 1:
--
-- encode_sets = get_encoding_properties()
--
--
-- See Also:
-- [[:lower]], [[:upper]], [[:set_encoding_properties]]
--
public function get_encoding_properties( )
return {encoding_NAME, lower_case_SET, upper_case_SET}
end function
ifdef WINDOWS then
include std/dll.e
include std/machine.e
include std/types.e
atom
user32 = open_dll( "user32.dll"),
api_CharLowerBuff = define_c_func(user32, "CharLowerBuffA", {C_POINTER, C_INT}, C_INT),
api_CharUpperBuff = define_c_func(user32, "CharUpperBuffA", {C_POINTER, C_INT}, C_INT),
tm_size = 1024,
temp_mem = allocate(1024)
function change_case(object x, object api)
sequence changed_text
integer single_char = 0
integer len
if not string(x) then
if atom(x) then
if x = 0 then
return 0
end if
x = {x}
single_char = 1
else
for i = 1 to length(x) do
x[i] = change_case(x[i], api)
end for
return x
end if
end if
if length(x) = 0 then
return x
end if
if length(x) >= tm_size then
tm_size = length(x) + 1
free(temp_mem)
temp_mem = allocate(tm_size)
end if
poke(temp_mem, x)
len = c_func(api, {temp_mem, length(x)} )
changed_text = peek({temp_mem, len})
if single_char then
return changed_text[1]
else
return changed_text
end if
end function
end ifdef
--**
-- converts an atom or sequence to lower case.
--
-- Parameters:
-- # ##x## : Any Euphoria object.
--
-- Returns:
-- A **sequence**, the lowercase version of ##x##
--
-- Comments:
-- * For //Windows// systems, this uses the current code page for conversion
-- * For //Unix// this only works on ASCII characters. It alters characters in
-- the ##'a'..'z'## range. If you need to do case conversion with other encodings
-- use the [[:set_encoding_properties]] first.
-- * ##x## may be a sequence of any shape, all atoms of which will be acted upon.
--
-- **WARNING**, When using ASCII encoding, this can also affect floating point
-- numbers in the range ##65## to ##90##.
--
-- Example 1:
--
-- s = lower("Euphoria")
-- -- s is "euphoria"
--
-- a = lower('B')
-- -- a is 'b'
--
-- s = lower({"Euphoria", "Programming"})
-- -- s is {"euphoria", "programming"}
--
--
-- See Also:
-- [[:upper]], [[:proper]], [[:set_encoding_properties]], [[:get_encoding_properties]]
public function lower(object x)
if length(lower_case_SET) != 0 then
return stdseq:mapping(x, upper_case_SET, lower_case_SET)
end if
ifdef WINDOWS then
return change_case(x, api_CharLowerBuff)
elsedef
return x + (x >= 'A' and x <= 'Z') * TO_LOWER
end ifdef
end function
--**
-- converts an atom or sequence to upper case.
--
-- Parameters:
-- # ##x## : Any Euphoria object.
--
-- Returns:
-- A **sequence**, the uppercase version of ##x##
--
-- Comments:
-- * For //Windows// systems, this uses the current code page for conversion
-- * For //Unix// this only works on ASCII characters. It alters characters in
-- the ##'a'..'z'## range. If you need to do case conversion with other encodings
-- use the [[:set_encoding_properties]] first.
-- * ##x## may be a sequence of any shape, all atoms of which will be acted upon.
--
-- **WARNING**, When using ASCII encoding, this can also affects floating point
-- numbers in the range ##97## to ##122##.
--
-- Example 1:
--
-- s = upper("Euphoria")
-- -- s is "EUPHORIA"
--
-- a = upper('b')
-- -- a is 'B'
--
-- s = upper({"Euphoria", "Programming"})
-- -- s is {"EUPHORIA", "PROGRAMMING"}
--
--
-- See Also:
-- [[:lower]], [[:proper]], [[:set_encoding_properties]], [[:get_encoding_properties]]
public function upper(object x)
if length(upper_case_SET) != 0 then
return stdseq:mapping(x, lower_case_SET, upper_case_SET)
end if
ifdef WINDOWS then
return change_case(x, api_CharUpperBuff)
elsedef
return x - (x >= 'a' and x <= 'z') * TO_LOWER
end ifdef
end function
--**
-- converts a text sequence to capitalized words.
--
-- Parameters:
-- # ##x## : A text sequence.
--
-- Returns:
-- A **sequence**, the Capitalized Version of ##x##
--
-- Comments:
-- A text sequence is one in which all elements are either characters or
-- text sequences. This means that if a non-character is found in the input,
-- it is not converted. However this rule only applies to elements on the
-- same level, meaning that sub-sequences could be converted if they are
-- actually text sequences.
--
-- Example 1:
--
-- s = proper("euphoria programming language")
-- -- s is "Euphoria Programming Language"
-- s = proper("EUPHORIA PROGRAMMING LANGUAGE")
-- -- s is "Euphoria Programming Language"
-- s = proper({"EUPHORIA PROGRAMMING", "language", "rapid dEPLOYMENT", "sOfTwArE"})
-- -- s is {"Euphoria Programming", "Language", "Rapid Deployment", "Software"}
-- s = proper({'a', 'b', 'c'})
-- -- s is {'A', 'b', c'} -- "Abc"
-- s = proper({'a', 'b', 'c', 3.1472})
-- -- s is {'a', 'b', c', 3.1472} -- Unchanged because it contains a non-character.
-- s = proper({"abc", 3.1472})
-- -- s is {"Abc", 3.1472} -- The embedded text sequence is converted.
--
--
-- See Also:
-- [[:lower]] [[:upper]]
public function proper(sequence x)
integer pos
integer inword
integer convert
sequence res
inword = 0 -- Initially not in a word
convert = 1 -- Initially convert text
res = x -- Work on a copy of the original, in case we need to restore.
for i = 1 to length(res) do
if integer(res[i]) then
if convert then
-- Check for upper case
pos = types:t_upper(res[i])
if pos = 0 then
-- Not upper, so check for lower case
pos = types:t_lower(res[i])
if pos = 0 then
-- Not lower so check for digits
-- n.b. digits have no effect on if its in a word or not.
pos = t_digit(res[i])
if pos = 0 then
-- not digit so check for special word chars
pos = t_specword(res[i])
if pos then
inword = 1
else
inword = 0
end if
end if
else
if inword = 0 then
-- start of word, so convert only lower to upper.
if pos <= 26 then
res[i] = upper(res[i]) -- Convert to uppercase
end if
inword = 1 -- now we are in a word
end if
end if
else
if inword = 1 then
-- Upper, but as we are in a word convert it to lower.
res[i] = lower(res[i]) -- Convert to lowercase
else
inword = 1 -- now we are in a word
end if
end if
end if
else
-- A non-integer means this is NOT a text sequence, so
-- only convert sub-sequences.
if convert then
-- Restore any values that might have been converted.
for j = 1 to i-1 do
if atom(x[j]) then
res[j] = x[j]
end if
end for
-- Turn conversion off for the rest of this level.
convert = 0
end if
if sequence(res[i]) then
res[i] = proper(res[i]) -- recursive conversion
end if
end if
end for
return res
end function
--**
-- converts a string containing Key/Value pairs into a set of
-- sequences, one per K/V pair.
--
-- Parameters:
-- # ##source## : a text sequence, containing the representation of the key/values.
-- # ##pair_delim## : an object containing a list of elements that delimit one
-- key/value pair from the next. The defaults are semi-colon (;)
-- and comma (,).
-- # ##kv_delim## : an object containing a list of elements that delimit the
-- key from its value. The defaults are colon (:) and equal (=).
-- # ##quotes## : an object containing a list of elements that can be used to
-- enclose either keys or values that contain delimiters or
-- whitespace. The defaults are double-quote ("), single-quote (')
-- and back-quote (`)
-- # ##whitespace## : an object containing a list of elements that are regarded
-- as whitespace characters. The defaults are space, tab, new-line,
-- and carriage-return.
-- # ##haskeys## : an integer containing true or false. The default is true. When
-- ##true##, the ##kv_delim## values are used to separate keys from values, but
-- when ##false## it is assumed that each 'pair' is actually just a value.
--
-- Returns:
-- A **sequence**, of pairs. Each pair is in the form ##{key, value}##.
--
-- Comments:
--
-- String representations of atoms are not converted, either in the key or value part, but returned as any regular string instead.
--
-- If ##haskeys## is ##true##, but a substring only holds what appears to be a value, the key
-- is synthesized as ##p[n]##, where ##n## is the number of the pair. See Example 2.
--
-- By default, pairs can be delimited by either a comma or semi-colon ",;" and
-- a key is delimited from its value by either an equal or a colon "=:".
-- Whitespace between pairs, and between delimiters is ignored.
--
-- If you need to have one of the delimiters in the value data, enclose it in
-- quotation marks. You can use any of single, double and back quotes, which
-- also means you can quote quotation marks themselves. See Example 3.
--
-- It is possible that the value data itself is a nested set of pairs. To do
-- this enclose the value in parentheses. Nested sets can nested to any level.
-- See Example 4.
--
-- If a sub-list has only data values and not keys, enclose it in either braces
-- or square brackets. See Example 5.
-- If you need to have a bracket as the first character in a data value, prefix
-- it with a tilde. Actually a leading tilde will always just be stripped off
-- regardless of what it prefixes. See Example 6.
--
-- Example 1:
--
-- s= keyvalues("foo=bar, qwe=1234, asdf='contains space, comma, and equal(=)'")
-- -- s is
-- -- {
-- -- {"foo", "bar"},
-- -- {"qwe", "1234"},
-- -- {"asdf", "contains space, comma, and equal(=)"}
-- -- }
--
--
-- Example 2:
--
-- s = keyvalues("abc fgh=ijk def")
-- -- s is { {"p[1]", "abc"}, {"fgh", "ijk"}, {"p[3]", "def"} }
--
--
-- Example 3:
--
-- s = keyvalues("abc=`'quoted'`")
-- -- s is { {"abc", "'quoted'"} }
--
--
-- Example 4:
--
-- s = keyvalues("colors=(a=black, b=blue, c=red)")
-- -- s is { {"colors", {{"a", "black"}, {"b", "blue"},{"c", "red"}} } }
-- s = keyvalues("colors=(black=[0,0,0], blue=[0,0,FF], red=[FF,0,0])")
-- -- s is
-- -- { {"colors",
-- -- {{"black",{"0", "0", "0"}},
-- -- {"blue",{"0", "0", "FF"}},
-- -- {"red", {"FF","0","0"}}}} }
--
--
-- Example 5:
--
-- s = keyvalues("colors=[black, blue, red]")
-- -- s is { {"colors", { "black", "blue", "red"} } }
--
--
-- Example 6:
--
-- s = keyvalues("colors=~[black, blue, red]")
-- -- s is { {"colors", "[black, blue, red]"} } }
-- -- The following is another way to do the same.
-- s = keyvalues("colors=`[black, blue, red]`")
-- -- s is { {"colors", "[black, blue, red]"} } }
--
public function keyvalues(sequence source, object pair_delim = ";,",
object kv_delim = ":=", object quotes = "\"'`",
object whitespace = " \t\n\r", integer haskeys = 1)
sequence lKeyValues
sequence value_
sequence key_
sequence lAllDelim
sequence lWhitePair
sequence lStartBracket
sequence lEndBracket
sequence lBracketed
integer lQuote
integer pos_
integer lChar
integer lBPos
integer lWasKV
source = trim(source)
if length(source) = 0 then
return {}
end if
if atom(pair_delim) then
pair_delim = {pair_delim}
end if
if atom(kv_delim) then
kv_delim = {kv_delim}
end if
if atom(quotes) then
quotes = {quotes}
end if
if atom(whitespace) then
whitespace = {whitespace}
end if
lAllDelim = whitespace & pair_delim & kv_delim
lWhitePair = whitespace & pair_delim
lStartBracket = "{[("
lEndBracket = "}])"
lKeyValues = {}
pos_ = 1
while pos_ <= length(source) do
-- ignore leading whitespace
while pos_ < length(source) do
if find(source[pos_], whitespace) = 0 then
exit
end if
pos_ +=1
end while
-- Get key. Ends at any of unquoted whitespace or unquoted delimiter
key_ = ""
lQuote = 0
lChar = 0
lWasKV = 0
if haskeys then
while pos_ <= length(source) do
lChar = source[pos_]
if find(lChar, quotes) != 0 then
if lChar = lQuote then
-- End of quoted span
lQuote = 0
lChar = -1
elsif lQuote = 0 then
-- Start of quoted span
lQuote = lChar
lChar = -1
end if
elsif lQuote = 0 and find(lChar, lAllDelim) != 0 then
exit
end if
if lChar > 0 then
key_ &= lChar
end if
pos_ += 1
end while
-- ignore next whitespace
if find(lChar, whitespace) != 0 then
pos_ += 1
while pos_ <= length(source) do
lChar = source[pos_]
if find(lChar, whitespace) = 0 then
exit
end if
pos_ +=1
end while
end if
else
pos_ -= 1 -- Put back the last char.
end if
value_ = ""
if find(lChar, kv_delim) != 0 or not haskeys then
if find(lChar, kv_delim) != 0 then
lWasKV = 1
end if
-- ignore next whitespace
pos_ += 1
while pos_ <= length(source) do
lChar = source[pos_]
if find(lChar, whitespace) = 0 then
exit
end if
pos_ +=1
end while
-- Get value. Ends at any of unquoted whitespace or unquoted delimiter
lQuote = 0
lChar = 0
lBracketed = {}
while pos_ <= length(source) do
lChar = source[pos_]
if length(lBracketed) = 0 and find(lChar, quotes) != 0 then
if lChar = lQuote then
-- End of quoted span
lQuote = 0
lChar = -1
elsif lQuote = 0 then
-- Start of quoted span
lQuote = lChar
lChar = -1
end if
elsif length(value_) = 1 and value_[1] = '~' and find(lChar, lStartBracket) > 0 then
lBPos = find(lChar, lStartBracket)
lBracketed &= lEndBracket[lBPos]
elsif find(lChar, lStartBracket) > 0 then
lBPos = find(lChar, lStartBracket)
lBracketed &= lEndBracket[lBPos]
elsif length(lBracketed) != 0 and lChar = lBracketed[$] then
lBracketed = lBracketed[1..$-1]
elsif length(lBracketed) = 0 and lQuote = 0 and find(lChar, lWhitePair) != 0 then
exit
end if
if lChar > 0 then
value_ &= lChar
end if
pos_ += 1
end while
if find(lChar, whitespace) != 0 then
-- ignore next whitespace
pos_ += 1
while pos_ <= length(source) do
lChar = source[pos_]
if find(lChar, whitespace) = 0 then
exit
end if
pos_ +=1
end while
end if
if find(lChar, pair_delim) != 0 then
pos_ += 1
if pos_ <= length(source) then
lChar = source[pos_]
end if
end if
end if
if find(lChar, pair_delim) != 0 then
pos_ += 1
end if
if length(value_) = 0 then
if length(key_) = 0 then
lKeyValues = append(lKeyValues, {})
continue
end if
if not lWasKV then
value_ = key_
key_ = ""
end if
end if
if length(key_) = 0 then
if haskeys then
key_ = sprintf("p[%d]", length(lKeyValues) + 1)
end if
end if
if length(value_) > 0 then
lChar = value_[1]
lBPos = find(lChar, lStartBracket)
if lBPos > 0 and value_[$] = lEndBracket[lBPos] then
if lChar = '(' then
value_ = keyvalues(value_[2..$-1], pair_delim, kv_delim, quotes, whitespace, haskeys)
else
value_ = keyvalues(value_[2..$-1], pair_delim, kv_delim, quotes, whitespace, 0)
end if
elsif lChar = '~' then
value_ = value_[2 .. $]
end if
end if
key_ = trim(key_)
value_ = trim(value_)
if length(key_) = 0 then
lKeyValues = append(lKeyValues, value_)
else
lKeyValues = append(lKeyValues, {key_, value_})
end if
end while
return lKeyValues
end function
--**
-- escapes special characters in a string.
--
-- Parameters:
-- # ##s##: string to escape
-- # ##what##: sequence of characters to escape
-- defaults to escaping a double quote.
--
-- Returns:
-- An escaped ##sequence## representing ##s##.
--
-- Example 1:
--
-- sequence s = escape("John \"Mc\" Doe")
-- puts(1, s)
-- -- output is: John \"Mc\" Doe
--
--
-- See Also:
-- [[:quote]]
--
public function escape(sequence s, sequence what="\"")
sequence r = ""
for i = 1 to length(s) do
if find(s[i], what) then
r &= "\\"
end if
r &= s[i]
end for
return r
end function
--**
-- returns a quoted version of the first argument.
--
-- Parameters:
-- # ##text_in## : The string or set of strings to quote.
-- # ##quote_pair## : A sequence of two strings. The first string is the opening
-- quote to use, and the second string is the closing quote to use.
-- The default is {"\"", "\""} which means that the output will be
-- enclosed by double-quotation marks.
-- # ##esc## : A single escape character. If this is not negative (the default),
-- then this is used to 'escape' any embedded quote characters and
-- 'esc' characters already in the ##text_in## string.
-- # ##sp## : A list of zero or more special characters. The ##text_in## is only
-- quoted if it contains any of the special characters. The default
-- is "" which means that the ##text_in## is always quoted.
--
-- Returns:
-- A **sequence**, the quoted version of ##text_in##.
--
-- Example 1:
--
-- -- Using the defaults. Output enclosed in double-quotes, no escapes and no specials.
-- s = quote("The small man")
-- -- 's' now contains '"the small man"' including the double-quote characters.
--
--
-- Example 2:
--
-- s = quote("The small man", {"(", ")"} )
-- -- 's' now contains '(the small man)'
--
--
-- Example 3:
--
-- s = quote("The (small) man", {"(", ")"}, '~' )
-- -- 's' now contains '(The ~(small~) man)'
--
--
-- Example 4:
--
-- s = quote("The (small) man", {"(", ")"}, '~', "#" )
-- -- 's' now contains "the (small) man"
-- -- because the input did not contain a '#' character.
--
--
-- Example 5:
--
-- s = quote("The #1 (small) man", {"(", ")"}, '~', "#" )
-- -- 's' now contains '(the #1 ~(small~) man)'
-- -- because the input did contain a '#' character.
--
--
-- Example 6:
--
-- -- input is a set of strings...
-- s = quote({"a b c", "def", "g hi"},)
-- -- 's' now contains three quoted strings: '"a b c"', '"def"', and '"g hi"'
--
--
-- See Also:
-- [[:escape]]
--
public function quote( sequence text_in, object quote_pair = {"\"", "\""}, integer esc = -1,
t_text sp = "" )
if length(text_in) = 0 then
return text_in
end if
if atom(quote_pair) then
quote_pair = {{quote_pair}, {quote_pair}}
elsif length(quote_pair) = 1 then
quote_pair = {quote_pair[1], quote_pair[1]}
elsif length(quote_pair) = 0 then
quote_pair = {"\"", "\""}
end if
if sequence(text_in[1]) then
for i = 1 to length(text_in) do
if sequence(text_in[i]) then
text_in[i] = quote(text_in[i], quote_pair, esc, sp)
end if
end for
return text_in
end if
-- Only quote the input if it contains any of the items in 'sp'
for i = 1 to length(sp) do
if find(sp[i], text_in) then
exit
end if
if i = length(sp) then
-- Contains none of them, so just return the input untouched.
return text_in
end if
end for
if esc >= 0 then
-- If the input already contains a quote, replace them with esc-quote,
-- but make sure that if the input already contains esc-quote that all
-- embedded escapes are replaced with esc-esc first.
if atom(quote_pair[1]) then
quote_pair[1] = {quote_pair[1]}
end if
if atom(quote_pair[2]) then
quote_pair[2] = {quote_pair[2]}
end if
if equal(quote_pair[1], quote_pair[2]) then
-- Simple case where both open and close quote are the same.
if match(quote_pair[1], text_in) then
if match(esc & quote_pair[1], text_in) then
text_in = search:match_replace(esc, text_in, esc & esc)
end if
text_in = search:match_replace(quote_pair[1], text_in, esc & quote_pair[1])
end if
else
if match(quote_pair[1], text_in) or
match(quote_pair[2], text_in) then
if match(esc & quote_pair[1], text_in) then
text_in = search:match_replace(esc & quote_pair[1], text_in, esc & esc & quote_pair[1])
end if
text_in = match_replace(quote_pair[1], text_in, esc & quote_pair[1])
end if
if match(quote_pair[2], text_in) then
if match(esc & quote_pair[2], text_in) then
text_in = search:match_replace(esc & quote_pair[2], text_in, esc & esc & quote_pair[2])
end if
text_in = search:match_replace(quote_pair[2], text_in, esc & quote_pair[2])
end if
end if
end if
return quote_pair[1] & text_in & quote_pair[2]
end function
--**
-- removes 'quotation' text from the argument.
--
-- Parameters:
-- # ##text_in## : The string or set of strings to de-quote.
-- # ##quote_pairs## : A set of one or more sub-sequences of two strings,
-- or an atom representing a single character to be used as
-- both the open and close quotes.
-- The first string in each sub-sequence is the opening
-- quote to look for, and the second string is the closing quote.
-- The default is {{{"\"", "\""}}} which means that the output is
-- 'quoted' if it is enclosed by double-quotation marks.
-- # ##esc## : A single escape character. If this is not negative (the default),
-- then this is used to 'escape' any embedded occurrences of the
-- quote characters. In which case the 'escape' character is also
-- removed.
--
-- Returns:
-- A **sequence**, the original text but with 'quote' strings stripped of quotes.
--
-- Example 1:
--
-- -- Using the defaults.
-- s = dequote("\"The small man\"")
-- -- 's' now contains "The small man"
--
--
-- Example 2:
--
-- -- Using the defaults.
-- s = dequote("(The small ?(?) man)", {{"(",")"}}, '?')
-- -- 's' now contains "The small () man"
--
--
public function dequote(sequence text_in, object quote_pairs = {{"\"", "\""}}, integer esc = -1)
if length(text_in) = 0 then
return text_in
end if
if atom(quote_pairs) then
quote_pairs = {{{quote_pairs}, {quote_pairs}}}
elsif length(quote_pairs) = 1 then
quote_pairs = {quote_pairs[1], quote_pairs[1]}
elsif length(quote_pairs) = 0 then
quote_pairs = {{"\"", "\""}}
end if
if sequence(text_in[1]) then
for i = 1 to length(text_in) do
if sequence(text_in[i]) then
text_in[i] = dequote(text_in[i], quote_pairs, esc)
end if
end for
return text_in
end if
-- If the text begins and ends with a quote-pair then strip them off and
-- remove the 'escape' from any 'escaped' quote_pairs within the text.
for i = 1 to length(quote_pairs) do
if length(text_in) >= length(quote_pairs[i][1]) + length(quote_pairs[i][2]) then
if search:begins(quote_pairs[i][1], text_in) and search:ends(quote_pairs[i][2], text_in) then
text_in = text_in[1 + length(quote_pairs[i][1]) .. $ - length(quote_pairs[i][2])]
integer pos = 1
while pos > 0 with entry do
if search:begins(quote_pairs[i][1], text_in[pos+1 .. $]) then
text_in = text_in[1 .. pos-1] & text_in[pos + 1 .. $]
elsif search:begins(quote_pairs[i][2], text_in[pos+1 .. $]) then
text_in = text_in[1 .. pos-1] & text_in[pos + 1 .. $]
else
pos += 1
end if
entry
pos = find(esc, text_in, pos)
end while
exit
end if
end if
end for
return text_in
end function
--**
-- formats a set of arguments in to a string based on a supplied pattern.
--
-- Parameters:
-- # ##format_pattern## : A sequence: the pattern string that contains zero or more tokens.
-- # ##arg_list## : An object: Zero or more arguments used in token replacement.
--
-- Returns:
-- A string **sequence**, the original ##format_pattern## but with tokens replaced by
-- corresponding arguments.
--
-- Comments:
-- The ##format_pattern## string contains text and argument tokens. The resulting string
-- is the same as the format string except that each token is replaced by an
-- item from the argument list.
--
-- A token has the form **##[]##**, where #### is are optional qualifier codes.
--
-- The qualifier. #### is a set of zero or more codes that modify the default
-- way that the argument is used to replace the token. The default replacement
-- way is to convert the argument to its shortest string representation and
-- use that to replace the token. This may be modified by the following codes,
-- which can occur in any order.
--
-- |= Qualifier |= Usage |
-- | N | ('N' is an integer) The index of the argument to use|
-- | {id} | Uses the argument that begins with "id=" where "id" \\
-- is an identifier name. |
-- | %envvar% | Uses the Environment Symbol 'envar' as an argument |
-- | w | For string arguments, if capitalizes the first\\
-- letter in each word |
-- | u | For string arguments, it converts it to upper case. |
-- | l | For string arguments, it converts it to lower case. |
-- | < | For numeric arguments, it left justifies it. |
-- | > | For string arguments, it right justifies it. |
-- | c | Centers the argument. |
-- | z | For numbers, it zero fills the left side. |
-- | :S | ('S' is an integer) The maximum size of the\\
-- resulting field. Also, if 'S' begins with '0' the\\
-- field will be zero-filled if the argument is an integer|
-- | .N | ('N' is an integer) The number of digits after\\
-- the decimal point |
-- | + | For positive numbers, show a leading plus sign |
-- | ( | For negative numbers, enclose them in parentheses |
-- | b | For numbers, causes zero to be all blanks |
-- | s | If the resulting field would otherwise be zero\\
-- length, this ensures that at least one space occurs\\
-- between this token's field |
-- | t | After token replacement, the resulting string up to this point is trimmed. |
-- | X | Outputs integer arguments using hexadecimal digits. |
-- | B | Outputs integer arguments using binary digits. |
-- | ? | The corresponding argument is a set of two strings. This\\
-- uses the first string if the previous token's argument is\\
-- not the value 1 or a zero-length string, otherwise it\\
-- uses the second string. |
-- | [ | Does not use any argument. Outputs a left-square-bracket symbol |
-- | ,X | Insert thousands separators. The is the character\\
-- to use. If this is a dot "." then the decimal point\\
-- is rendered using a comma. Does not apply to zero-filled\\
-- fields. \\
-- N.B. if hex or binary output was specified, the \\
-- separators are every 4 digits otherwise they are \\
-- every three digits. |
-- | T | If the argument is a number it is output as a text character, \\
-- otherwise it is output as text string |
--
-- Clearly, certain combinations of these qualifier codes do not make sense and in
-- those situations, the rightmost clashing code is used and the others are ignored.
--
-- Any tokens in the format that have no corresponding argument are simply removed
-- from the result. Any arguments that are not used in the result are ignored.
--
-- Any sequence argument that is not a string will be converted to its
-- //pretty// format before being used in token replacement.
--
-- If a token is going to be replaced by a zero-length argument, all white space
-- following the token until the next non-whitespace character is not copied to
-- the result string.
--
-- Example 1:
--
-- format("Cannot open file '[]' - code []", {"/usr/temp/work.dat", 32})
-- -- "Cannot open file '/usr/temp/work.dat' - code 32"
--
-- format("Err-[2], Cannot open file '[1]'", {"/usr/temp/work.dat", 32})
-- -- "Err-32, Cannot open file '/usr/temp/work.dat'"
--
-- format("[4w] [3z:2] [6] [5l] [2z:2], [1:4]", {2009,4,21,"DAY","MONTH","of"})
-- -- "Day 21 of month 04, 2009"
--
-- format("The answer is [:6.2]%", {35.22341})
-- -- "The answer is 35.22%"
--
-- format("The answer is [.6]", {1.2345})
-- -- "The answer is 1.234500"
--
-- format("The answer is [,,.2]", {1234.56})
-- -- "The answer is 1,234.56"
--
-- format("The answer is [,..2]", {1234.56})
-- -- "The answer is 1.234,56"
--
-- format("The answer is [,:.2]", {1234.56})
-- -- "The answer is 1:234.56"
--
-- format("[] [?]", {5, {"cats", "cat"}})
-- -- "5 cats"
--
-- format("[] [?]", {1, {"cats", "cat"}})
-- -- "1 cat"
--
-- format("[<:4]", {"abcdef"})
-- -- "abcd"
--
-- format("[>:4]", {"abcdef"})
-- -- "cdef"
--
-- format("[>:8]", {"abcdef"})
-- -- " abcdef"
--
-- format("seq is []", {{1.2, 5, "abcdef", {3}}})
-- -- `seq is {1.2,5,"abcdef",{3}}`
--
-- format("Today is [{day}], the [{date}]", {"date=10/Oct/2012", "day=Wednesday"})
-- -- "Today is Wednesday, the 10/Oct/2012"
--
-- format("'A' is [T]", 65)
-- -- `'A' is A`
--
--
-- See Also:
-- [[:sprintf]]
--
ifdef EU4_1 then
constant MAX_BITS = 64
constant MAX_DIGS = 18
constant MAX_INT = 0x3FFF_FFFF_FFFF_FFFF
constant MIN_INT = -0x4000_0000_0000_0000
elsedef
constant MAX_BITS = 32
constant MAX_DIGS = 15
constant MAX_INT = 0x3FFF_FFFF
constant MIN_INT = -0x4000_0000
end ifdef
constant MAX_UCS4 = 0xFFFF_FFFF
constant MAX_IFMT = 0x3FFF_FFFF
constant MIN_IFMT = -0x4000_0000
public function format(sequence format_pattern, object arg_list = {})
sequence result
integer in_token
integer tch
integer i
integer tend
integer cap
integer align
integer psign
integer msign
integer zfill
integer bwz
integer spacer
integer alt
integer width
integer decs
integer pos
integer argn
integer argl
integer trimming
integer hexout
integer binout
integer tsep
integer istext
object prevargv
object currargv
sequence idname
object envsym
object envvar
integer ep
integer pflag
integer count
sequence fmt
atom argval
if atom(arg_list) then
arg_list = {arg_list}
end if
result = ""
in_token = 0
i = 0
tend = 0
argl = 0
spacer = 0
prevargv = 0
while i < length(format_pattern) do
i += 1
tch = format_pattern[i]
if not in_token then
if tch = '[' then
in_token = 1
tend = 0
cap = 0
align = 0
psign = 0
msign = 0
zfill = 0
bwz = 0
spacer = 0
alt = 0
width = 0
decs = -1
argn = 0
hexout = 0
binout = 0
trimming = 0
tsep = 0
istext = 0
idname = ""
envvar = ""
envsym = ""
else
result &= tch
end if
else
switch tch do
case ']' then
in_token = 0
tend = i
case '[' then
result &= tch
while i < length(format_pattern) do
i += 1
if format_pattern[i] = ']' then
in_token = 0
tend = 0
exit
end if
end while
case 'w', 'u', 'l' then
cap = tch
case 'b' then
bwz = 1
case 's' then
spacer = 1
case 't' then
trimming = 1
case 'z' then
zfill = 1
case 'X' then
hexout = 1
case 'B' then
binout = 1
case 'c', '<', '>' then
align = tch
case '+' then
psign = 1
case '(' then
msign = 1
case '?' then
alt = 1
case 'T' then
istext = 1
case ':' then
while i < length(format_pattern) do
i += 1
tch = format_pattern[i]
pos = find(tch, "0123456789")
if pos = 0 then
i -= 1
exit
end if
width = width * 10 + pos - 1
if width = 0 then
zfill = 1
end if
end while
case '.' then
decs = 0
while i < length(format_pattern) do
i += 1
tch = format_pattern[i]
pos = find(tch, "0123456789")
if pos = 0 then
i -= 1
exit
end if
decs = decs * 10 + pos - 1
end while
case '{' then
-- Use a named argument.
integer sp
sp = i + 1
i = sp
while i < length(format_pattern) do
if format_pattern[i] = '}' then
exit
end if
if format_pattern[i] = ']' then
exit
end if
i += 1
end while
idname = trim(format_pattern[sp .. i-1]) & '='
if format_pattern[i] = ']' then
i -= 1
end if
for j = 1 to length(arg_list) do
if sequence(arg_list[j]) then
if search:begins(idname, arg_list[j]) then
if argn = 0 then
argn = j
exit
end if
end if
end if
if j = length(arg_list) then
idname = ""
argn = -1
end if
end for
case '%' then
-- Use the environment symbol
integer sp
sp = i + 1
i = sp
while i < length(format_pattern) do
if format_pattern[i] = '%' then
exit
end if
if format_pattern[i] = ']' then
exit
end if
i += 1
end while
envsym = trim(format_pattern[sp .. i-1])
if format_pattern[i] = ']' then
i -= 1
end if
envvar = getenv(envsym)
argn = -1
if atom(envvar) then
envvar = ""
end if
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' then
if argn = 0 then
i -= 1
while i < length(format_pattern) do
i += 1
tch = format_pattern[i]
pos = find(tch, "0123456789")
if pos = 0 then
i -= 1
exit
end if
argn = argn * 10 + pos - 1
end while
end if
case ',' then
if i < length(format_pattern) then
i +=1
tsep = format_pattern[i]
end if
case else
-- ignore it
end switch
if tend > 0 then
-- Time to replace the token.
sequence argtext = ""
if argn = 0 then
argn = argl + 1
end if
argl = argn
if argn < 1 or argn > length(arg_list) then
if length(envvar) > 0 then
argtext = envvar
currargv = envvar
else
argtext = ""
currargv =""
end if
else
if string(arg_list[argn]) then
if length(idname) > 0 then
argtext = arg_list[argn][length(idname) + 1 .. $]
else
argtext = arg_list[argn]
end if
elsif integer(arg_list[argn])
-- for consistent formatting, we need to test in case of 64-bit euphoria
and arg_list[argn] <= MAX_IFMT
and arg_list[argn] >= MIN_IFMT then
if istext then
argtext = {and_bits(MAX_UCS4, math:abs(arg_list[argn]))}
elsif bwz != 0 and arg_list[argn] = 0 then
argtext = repeat(' ', width)
elsif binout = 1 then
argtext = stdseq:reverse( convert:int_to_bits(arg_list[argn], MAX_BITS)) + '0'
if zfill != 0 and width > 0 then
if width > length(argtext) then
argtext = repeat('0', width - length(argtext)) & argtext
end if
else
count = 1
while count < length(argtext) and argtext[count] = '0' do
count += 1
end while
argtext = argtext[count .. $]
end if
elsif hexout = 0 then
argtext = sprintf("%d", arg_list[argn])
if zfill != 0 and width > 0 then
if argtext[1] = '-' then
if width > length(argtext) then
argtext = '-' & repeat('0', width - length(argtext)) & argtext[2..$]
end if
else
if width > length(argtext) then
argtext = repeat('0', width - length(argtext)) & argtext
end if
end if
end if
if arg_list[argn] > 0 then
if psign then
if zfill = 0 then
argtext = '+' & argtext
elsif argtext[1] = '0' then
argtext[1] = '+'
end if
end if
elsif arg_list[argn] < 0 then
if msign then
if zfill = 0 then
argtext = '(' & argtext[2..$] & ')'
else
if argtext[2] = '0' then
argtext = '(' & argtext[3..$] & ')'
else
-- Don't need the '(' prefix as its just going to
-- be trunctated to fit the requested width.
argtext = argtext[2..$] & ')'
end if
end if
end if
end if
else
argtext = sprintf("%x", arg_list[argn])
if zfill != 0 and width > 0 then
if width > length(argtext) then
argtext = repeat('0', width - length(argtext)) & argtext
end if
end if
end if
elsif atom(arg_list[argn]) then
if istext then
argtext = {and_bits(MAX_UCS4, math:abs(floor(arg_list[argn])))}
else
if hexout then
argtext = sprintf("%x", arg_list[argn])
if zfill != 0 and width > 0 then
if width > length(argtext) then
argtext = repeat('0', width - length(argtext)) & argtext
end if
end if
else
argval = arg_list[argn]
if argval < 0 then
argval = -argval
end if
if decs < 0 then
fmt = sprintf("%%.%dg", MAX_DIGS) -- default '%g' format precision
elsif argval >= power(10, 15) or
argval < 1e-4 then
fmt = sprintf("%%.%de", decs)
else
fmt = sprintf("%%.%df", decs)
end if
argtext = sprintf(fmt, arg_list[argn])
-- Remove any leading 0 after e+
while ep != 0 with entry do
argtext = remove(argtext, ep+2)
entry
ep = match("e+0", argtext)
end while
if zfill != 0 and width > 0 then
if width > length(argtext) then
if argtext[1] = '-' then
argtext = '-' & repeat('0', width - length(argtext) - msign) & argtext[2..$]
else
argtext = repeat('0', width - length(argtext)) & argtext
end if
end if
end if
if arg_list[argn] > 0 then
if psign then
if zfill = 0 then
argtext = '+' & argtext
elsif argtext[1] = '0' then
argtext[1] = '+'
end if
end if
elsif arg_list[argn] < 0 then
if msign then
if zfill = 0 then
argtext = '(' & argtext[2..$] & ')'
else
if argtext[2] = '0' then
argtext = '(' & argtext[2..$] & ')'
else
argtext = argtext[2..$] & ')'
end if
end if
end if
end if
end if
end if
else
if alt != 0 and length(arg_list[argn]) = 2 then
object tempv
if atom(prevargv) then
if prevargv != 1 then
tempv = arg_list[argn][1]
else
tempv = arg_list[argn][2]
end if
else
if length(prevargv) = 0 then
tempv = arg_list[argn][1]
else
tempv = arg_list[argn][2]
end if
end if
if string(tempv) then
argtext = tempv
elsif integer(tempv) then
if istext then
argtext = {and_bits(MAX_UCS4, math:abs(tempv))}
elsif bwz != 0 and tempv = 0 then
argtext = repeat(' ', width)
else
argtext = sprintf("%d", tempv)
end if
elsif atom(tempv) then
if istext then
argtext = {and_bits(MAX_UCS4, math:abs(floor(tempv)))}
elsif bwz != 0 and tempv = 0 then
argtext = repeat(' ', width)
else
fmt = sprintf("%%%d.%dg", {MAX_DIGS, MAX_DIGS})
argtext = trim(sprintf(fmt, tempv))
end if
else
fmt = sprintf("%%.%dg", MAX_DIGS)
argtext = pretty:pretty_sprint( tempv,
{2,0,1,1000,"%d",fmt,32,127,1,0}
)
end if
else
fmt = sprintf("%%.%dg", MAX_DIGS)
argtext = pretty:pretty_sprint( arg_list[argn],
{2,0,1,1000,"%d",fmt,32,127,1,0}
)
end if
-- Remove any leading 0 after e+
while ep != 0 with entry do
argtext = remove(argtext, ep+2)
entry
ep = match("e+0", argtext)
end while
end if
currargv = arg_list[argn]
end if
if length(argtext) > 0 then
switch cap do
case 'u' then
argtext = upper(argtext)
case 'l' then
argtext = lower(argtext)
case 'w' then
argtext = proper(argtext)
case 0 then
-- do nothing
cap = cap
case else
error:crash("logic error: 'cap' mode in format.")
end switch
if atom(currargv) then
if find('e', argtext) = 0 then
-- Only applies to non-scientific notation.
pflag = 0
if msign and currargv < 0 then
pflag = 1
end if
if decs != -1 then
pos = find('.', argtext)
if pos then
if decs = 0 then
argtext = argtext [1 .. pos - 1 ]
else
pos = length(argtext) - pos - pflag
if pos > decs then
argtext = argtext[ 1 .. $ - pos + decs ]
elsif pos < decs then
if pflag then
argtext = argtext[ 1 .. $ - 1 ] & repeat('0', decs - pos) & ')'
else
argtext = argtext & repeat('0', decs - pos)
end if
end if
end if
elsif decs > 0 then
if pflag then
argtext = argtext[1 .. $ - 1] & '.' & repeat('0', decs) & ')'
else
argtext = argtext & '.' & repeat('0', decs)
end if
end if
end if
end if
end if
if align = 0 then
if atom(currargv) then
align = '>'
else
align = '<'
end if
end if
if atom(currargv) then
if tsep != 0 and zfill = 0 then
integer dpos
integer dist
integer bracketed
if binout or hexout then
dist = 4
psign = 0
else
dist = 3
end if
bracketed = (argtext[1] = '(')
if bracketed then
argtext = argtext[2 .. $-1]
end if
dpos = find('.', argtext)
if dpos = 0 then
dpos = length(argtext) + 1
else
if tsep = '.' then
argtext[dpos] = ','
end if
end if
while dpos > dist do
dpos -= dist
if dpos > 1 + (currargv < 0) * not msign + (currargv > 0) * psign then
argtext = argtext[1.. dpos - 1] & tsep & argtext[dpos .. $]
end if
end while
if bracketed then
argtext = '(' & argtext & ')'
end if
end if
end if
if width <= 0 then
width = length(argtext)
end if
if width < length(argtext) then
if align = '>' then
argtext = argtext[ $ - width + 1 .. $]
elsif align = 'c' then
pos = length(argtext) - width
if remainder(pos, 2) = 0 then
pos = pos / 2
argtext = argtext[ pos + 1 .. $ - pos ]
else
pos = floor(pos / 2)
argtext = argtext[ pos + 1 .. $ - pos - 1]
end if
else
argtext = argtext[ 1 .. width]
end if
elsif width > length(argtext) then
if align = '>' then
argtext = repeat(' ', width - length(argtext)) & argtext
elsif align = 'c' then
pos = width - length(argtext)
if remainder(pos, 2) = 0 then
pos = pos / 2
argtext = repeat(' ', pos) & argtext & repeat(' ', pos)
else
pos = floor(pos / 2)
argtext = repeat(' ', pos) & argtext & repeat(' ', pos + 1)
end if
else
argtext = argtext & repeat(' ', width - length(argtext))
end if
end if
result &= argtext
else
if spacer then
result &= ' '
end if
end if
if trimming then
result = trim(result)
end if
tend = 0
prevargv = currargv
end if
end if
end while
return result
end function
--**
-- wraps text to a column width.
--
-- Parameters:
-- * ##content## ~-- sequence content to wrap
-- * ##width## ~-- width to wrap at, defaults to 78
-- * ##wrap_with## ~-- sequence to wrap with, defaults to ##"\n"##
-- * ##wrap_at## ~-- sequence of characters to wrap at, defaults to space and tab
--
-- Returns:
-- Sequence containing wrapped text
--
-- Example 1:
--
-- sequence result = wrap("Hello, World")
-- -- result = "Hello, World"
--
--
-- Example 2:
--
-- sequence msg = "Hello, World. Today we are going to learn about apples."
-- sequence result = wrap(msg, 40)
-- -- result =
-- -- "Hello, World. today we are going to\n"
-- -- "learn about apples."
--
--
-- Example 3:
--
-- sequence msg = "Hello, World. Today we are going to learn about apples."
-- sequence result = wrap(msg, 40, "\n ")
-- -- result =
-- -- "Hello, World. today we are going to\n"
-- -- " learn about apples."
--
--
-- Example 4:
--
-- sequence msg = "Hello, World. This, Is, A, Dummy, Sentence, Ok, World?"
-- sequence result = wrap(msg, 30, "\n", ",")
-- -- result =
-- -- "Hello, World. This, Is, A,"
-- -- "Dummy, Sentence, Ok, World?"
--
public function wrap(sequence content, integer width = 78, sequence wrap_with = "\n",
sequence wrap_at = " \t")
if length(content) < width then
return content
end if
sequence result = ""
while length(content) do
-- Find the first whitespace before width
integer split_at = 0
for i = width to 1 by -1 do
if find(content[i], wrap_at) then
split_at = i
exit
end if
end for
if split_at = 0 then
-- Cannot split at width or less, try the closest thing to width
for i = width to length(content) do
if find(content[i], wrap_at) then
split_at = i
exit
end if
end for
-- Didn't find any place to split, attache the entire string
if split_at = 0 then
if length(result) then
result &= wrap_with
end if
result &= content
exit
end if
end if
if length(result) then
result &= wrap_with
end if
result &= trim(content[1..split_at])
content = trim(content[split_at + 1..$])
if length(content) < width then
result &= wrap_with & content
exit
end if
end while
return result
end function