COVERAGE SUMMARY

FILE SUMMARY

Name	Executed	Routines	%	Executed	Lines	%	Unexecuted
/home/matt/eu/rds/include/std/text.e	15	16	93.75%	629	736	85.46%	107

ROUTINE SUMMARY

Routine	Executed	Lines		Unexecuted
format()	287	319	89.97%	32
load_code_page()	23	55	41.82%	32
get_text()	0	28	0.00%	28
keyvalues()	117	130	90.00%	13
dequote()	30	31	96.77%	1
set_encoding_properties()	12	13	92.31%	1
escape()	8	8	100.00%	0
get_encoding_properties()	2	2	100.00%	0
lower()	5	5	100.00%	0
proper()	34	34	100.00%	0
quote()	40	40	100.00%	0
sprint()	14	14	100.00%	0
trim()	23	23	100.00%	0
trim_head()	12	12	100.00%	0
trim_tail()	12	12	100.00%	0
upper()	5	5	100.00%	0

LINE COVERAGE DETAIL

#	Executed
1		-- (c) Copyright - See License.txt
2		--
3		namespace text
4
5		--****
6		-- == Text Manipulation
7		-- Page Contents
8		--
9		-- <>
10		--
11
12		--****
13		-- === Routines
14
15		include std/filesys.e
16		include std/types.e
17		include std/sequence.e
18		include std/io.e
19		include std/search.e
20		include std/convert.e
21		include std/serialize.e
22		include std/pretty.e
23		include std/error.e
24		include std/eds.e
25		include std/convert.e
26
27		--****
28		-- Signature:
29		-- function sprintf(sequence format, object values)
30		--
31		-- Description:
32		-- This is exactly the same as [[:printf]](), except that the output is returned as a sequence
33		-- of characters, rather than being sent to a file or device.
34		--
35		-- Parameters:
36		-- # ##format## : a sequence, the text to print. This text may contain format specifiers.
37		-- # ##values## : usually, a sequence of values. It should have as many elements as format specifiers in ##format##, as these values will be substituted to the specifiers.
38		--
39		-- Returns:
40		-- A sequence, of printable characters, representing ##format## with the values in ##values## spliced in.
41		--
42		-- Comments:
43		--
44		-- ##printf(fn, st, x)## is equivalent to ##puts(fn, sprintf(st, x))##.
45		--
46		-- Some typical uses of ##sprintf()## are:
47		--
48		-- # Converting numbers to strings.
49		-- # Creating strings to pass to system().
50		-- # Creating formatted error messages that can be passed to a common error message handler.
51		--
52		-- Example 1:
53		--
54		-- s = sprintf("%08d", 12345)
55		-- -- s is "00012345"
56		--
57		--
58		-- See Also:
59		-- [[:printf]], [[:sprint]], [[:format]]
60
61		--**
62		-- Returns the representation of any Euphoria object as a string of characters.
63		--
64		-- Parameters:
65		-- # ##x## : Any Euphoria object.
66		--
67		-- Returns:
68		-- A sequence, a string representation of ##x##.
69		--
70		-- Comments:
71		--
72		-- This is exactly the same as ##print(fn, x)##, except that the output is returned as a sequence of characters, rather
73		-- than being sent to a file or device. x can be any Euphoria object.
74		--
75		-- The atoms contained within ##x## will be displayed to a maximum of 10 significant digits,
76		-- just as with [[:print]]().
77		--
78		-- Example 1:
79		--
80		-- s = sprint(12345)
81		-- -- s is "12345"
82		--
83		--
84		-- Example 2:
85		--
86		-- s = sprint({10,20,30}+5)
87		-- -- s is "{15,25,35}"
88		--
89		--
90		-- See Also:
91		-- [[:sprintf]], [[:printf]]
92
93	1010	public function sprint(object x)
94		-- Return the string representation of any Euphoria data object.
95		-- This is the same as the output from print(1, x) or '?', but it's
96		-- returned as a string sequence rather than printed.
97		sequence s
98
99	1010	if atom(x) then
100	1006	return sprintf("%.10g", x)
101		else
102	4	s = "{"
103	4	for i = 1 to length(x) do
104	8	if atom(x[i]) then
105	6	s &= sprintf("%.10g", x[i])
106		else
107	2	s &= sprint(x[i])
108		end if
109	8	s &= ','
110	8	end for
111	4	if s[$] = ',' then
112	3	s[$] = '}'
113		else
114	1	s &= '}'
115		end if
116	4	return s
117		end if
118		end function
119
120		--**
121		-- Trim all items in the supplied set from the leftmost (start or head) of a sequence.
122		--
123		-- Parameters:
124		-- # ##source## : the sequence to trim.
125		-- # ##what## : the set of item to trim from ##source## (defaults to " \t\r\n").
126		-- # ##ret_index## : If zero (the default) returns the trimmed sequence, otherwise
127		-- it returns the index of the leftmost item not in ##what##.
128		--
129		-- Returns:
130		-- A sequence, if ##ret_index## is zero, which is the trimmed version of ##source##\\
131		-- A integer, if ##ret_index## is not zero, which is index of the leftmost
132		-- element in ##source## that is not in ##what##.
133		--
134		-- Example 1:
135		--
136		-- object s
137		-- s = trim_head("\r\nSentence read from a file\r\n", "\r\n")
138		-- -- s is "Sentence read from a file\r\n"
139		-- s = trim_head("\r\nSentence read from a file\r\n", "\r\n", TRUE)
140		-- -- s is 3
141		--
142		--
143		--
144		-- See Also:
145		-- [[:trim_tail]], [[:trim]], [[:pad_head]]
146
147	6	public function trim_head(sequence source, object what=" \t\r\n", integer ret_index = 0)
148		integer lpos
149	6	if atom(what) then
150	3	what = {what}
151		end if
152
153	6	lpos = 1
154	6	while lpos <= length(source) do
155	23	if not find(source[lpos], what) then
156	5	exit
157		end if
158	18	lpos += 1
159	18	end while
160
161	6	if ret_index then
162	1	return lpos
163		else
164	5	return source[lpos .. $]
165		end if
166		end function
167
168		--**
169		-- Trim all items in the supplied set from the rightmost (end or tail) of a sequence.
170		--
171		-- Parameters:
172		-- # ##source## : the sequence to trim.
173		-- # ##what## : the set of item to trim from ##source## (defaults to " \t\r\n").
174		-- # ##ret_index## : If zero (the default) returns the trimmed sequence, otherwise
175		-- it returns the index of the rightmost item not in ##what##.
176		--
177		-- Returns:
178		-- A sequence, if ##ret_index## is zero, which is the trimmed version of ##source##\\
179		-- A integer, if ##ret_index## is not zero, which is index of the rightmost
180		-- element in ##source## that is not in ##what##.
181		--
182		-- Example 1:
183		--
184		-- object s
185		-- s = trim_tail("\r\nSentence read from a file\r\n", "\r\n")
186		-- -- s is "\r\nSentence read from a file"
187		-- s = trim_tail("\r\nSentence read from a file\r\n", "\r\n", TRUE)
188		-- -- s is 27
189		--
190		--
191		-- See Also:
192		-- [[:trim_head]], [[:trim]], [[:pad_tail]]
193
194	14	public function trim_tail(sequence source, object what=" \t\r\n", integer ret_index = 0)
195		integer rpos
196
197	14	if atom(what) then
198	3	what = {what}
199		end if
200
201	14	rpos = length(source)
202	14	while rpos > 0 do
203	26	if not find(source[rpos], what) then
204	13	exit
205		end if
206	13	rpos -= 1
207	13	end while
208
209	14	if ret_index then
210	1	return rpos
211		else
212	13	return source[1..rpos]
213		end if
214		end function
215
216		--**
217		-- Trim all items in the supplied set from both the left end (head/start) and right end (tail/end)
218		-- of a sequence.
219		--
220		-- Parameters:
221		-- # ##source## : the sequence to trim.
222		-- # ##what## : the set of item to trim from ##source## (defaults to " \t\r\n").
223		-- # ##ret_index## : If zero (the default) returns the trimmed sequence, otherwise
224		-- it returns a 2-element sequence containing the index of the
225		-- leftmost item and rightmost item not in ##what##.
226		--
227		-- Returns:
228		-- A sequence, if ##ret_index## is zero, which is the trimmed version of ##source##\\
229		-- A 2-element sequence, if ##ret_index## is not zero, in the form {left_index, right_index}.
230		--
231		-- Example 1:
232		--
233		-- object s
234		-- s = trim("\r\nSentence read from a file\r\n", "\r\n")
235		-- -- s is "Sentence read from a file"
236		-- s = trim("\r\nSentence read from a file\r\n", "\r\n", TRUE)
237		-- -- s is {3,27}
238		--
239		--
240		-- See Also:
241		-- [[:trim_head]], [[:trim_tail]]
242
243	347	public function trim(sequence source, object what=" \t\r\n", integer ret_index = 0)
244		integer rpos
245		integer lpos
246
247	347	if atom(what) then
248	4	what = {what}
249		end if
250
251	347	lpos = 1
252	347	while lpos <= length(source) do
253	474	if not find(source[lpos], what) then
254	293	exit
255		end if
256	181	lpos += 1
257	181	end while
258
259	347	rpos = length(source)
260	347	while rpos > lpos do
261	323	if not find(source[rpos], what) then
262	260	exit
263		end if
264	63	rpos -= 1
265	63	end while
266
267	347	if ret_index then
268	1	return {lpos, rpos}
269		else
270	346	if lpos = 1 then
271	301	if rpos = length(source) then
272	272	return source
273		end if
274		end if
275	74	if lpos > length(source) then
276	5	return {}
277		end if
278	69	return source[lpos..rpos]
279		end if
280		end function
281
282
283	101	constant TO_LOWER = 'a' - 'A'
284
285	101	sequence lower_case_SET = {}
286	101	sequence upper_case_SET = {}
287	101	sequence encoding_NAME = "ASCII"
288
289	1	function load_code_page(sequence cpname)
290		object cpdata
291		integer pos
292		sequence kv
293		sequence cp_source
294		sequence cp_db
295
296	1	cp_source = defaultext(cpname, ".ecp")
297	1	cp_source = locate_file(cp_source)
298
299	1	cpdata = read_lines(cp_source)
300	1	if sequence(cpdata) then
301
302	0	pos = 0
303	0	while pos < length(cpdata) do
304	0	pos += 1
305	0	cpdata[pos] = trim(cpdata[pos])
306	0	if begins("--HEAD--", cpdata[pos]) then
307	0	continue
308		end if
309	0	if cpdata[pos][1] = ';' then
310	0	continue -- A comment line
311		end if
312	0	if begins("--CASE--", cpdata[pos]) then
313	0	exit
314		end if
315
316	0	kv = keyvalues(cpdata[pos],,,,"")
317	0	if equal(lower(kv[1][1]), "title") then
318	0	encoding_NAME = kv[1][2]
319		end if
320	0	end while
321	0	if pos > length(cpdata) then
322	0	return -2 -- No Case Conversion table found.
323		end if
324
325	0	upper_case_SET = ""
326	0	lower_case_SET = ""
327	0	while pos < length(cpdata) do
328	0	pos += 1
329	0	cpdata[pos] = trim(cpdata[pos])
330	0	if length(cpdata[pos]) < 3 then
331	0	continue
332		end if
333	0	if cpdata[pos][1] = ';' then
334	0	continue -- A comment line
335		end if
336	0	if cpdata[pos][1] = '-' then
337	0	exit
338		end if
339
340	0	kv = keyvalues(cpdata[pos])
341	0	upper_case_SET &= hex_text(kv[1][1])
342	0	lower_case_SET &= hex_text(kv[1][2])
343	0	end while
344
345		else
346		-- See if its in the database.
347	1	cp_db = locate_file("ecp.dat")
348	1	integer fh = open(cp_db, "rb")
349	1	if fh = -1 then
350	0	return -2 -- Couldn't open DB
351		end if
352	1	object idx
353	1	object vers
354	1	vers = deserialize(fh) -- get the database version
355	1	if vers[1] = 1 then
356	1	idx = deserialize(fh) -- get Code Page index offset
357	1	pos = seek(fh, idx)
358	1	idx = deserialize(fh) -- get the Code Page Index
359	1	pos = find(cpname, idx[1])
360	1	if pos != 0 then
361	1	pos = seek(fh, idx[2][pos])
362	1	upper_case_SET = deserialize(fh) -- "uppercase"
363	1	lower_case_SET = deserialize(fh) -- "lowercase"
364	1	encoding_NAME = deserialize(fh) -- "title"
365		end if
366		end if
367	1	close(fh)
368
369		end if
370	1	return 0
371		end function
372
373		--**
374		-- Sets the table of lowercase and uppercase characters that is used by
375		-- [[:lower]] and [[:upper]]
376		--
377		-- Parameters:
378		-- # ##en## : The name of the encoding represented by these character sets
379		-- # ##lc## : The set of lowercase characters
380		-- # ##uc## : The set of upper case characters
381		--
382		--
383		-- Comments:
384		-- * ##lc## and ##uc## must be the same length.
385		-- * If no parameters are given, the default ASCII table is set.
386		--
387		-- Example 1:
388		--
389		-- set_encoding_properties( "Elvish", "aeiouy", "AEIOUY")
390		--
391		--
392		-- Example 1:
393		--
394		-- set_encoding_properties( "1251") -- Loads a predefined code page.
395		--
396		--
397		-- See Also:
398		-- [[:lower]], [[:upper]], [[:get_encoding_properties]]
399
400	4	public procedure set_encoding_properties(sequence en = "", sequence lc = "", sequence uc = "")
401		integer res
402
403	4	if length(en) > 0 and length(lc) = 0 and length(uc) = 0 then
404	1	res = load_code_page(en)
405	1	if res != 0 then
406	0	printf(2, "Failed to load code page '%s'. Error # %d\n", {en, res})
407		end if
408	1	return
409		end if
410
411	3	if length(lc) = length(uc) then
412	3	if length(lc) = 0 and length(en) = 0 then
413	2	en = "ASCII"
414		end if
415	3	lower_case_SET = lc
416	3	upper_case_SET = uc
417	3	encoding_NAME = en
418		end if
419	3	end procedure
420
421		--**
422		-- Gets the table of lowercase and uppercase characters that is used by
423		-- [[:lower]] and [[:upper]]
424		--
425		-- Parameters:
426		-- none
427		--
428		-- Returns:
429		-- A sequence, containing three items.\\
430		-- {Encoding_Name, LowerCase_Set, UpperCase_Set}
431		--
432		-- Example 1:
433		--
434		-- encode_sets = get_encoding_properties()
435		--
436		--
437		-- See Also:
438		-- [[:lower]], [[:upper]], [[:set_encoding_properties]]
439		--
440	3	public function get_encoding_properties( )
441	3	return {encoding_NAME, lower_case_SET, upper_case_SET}
442		end function
443
444
445	101	ifdef WINDOWS then
446		include std/dll.e
447		include std/machine.e
448		include std/types.e
449		atom
450		user32 = open_dll( "user32.dll"),
451		api_CharLowerBuff = define_c_func(user32, "CharLowerBuffA", {C_POINTER, C_INT}, C_INT),
452		api_CharUpperBuff = define_c_func(user32, "CharUpperBuffA", {C_POINTER, C_INT}, C_INT),
453		tm_size = 1024,
454		temp_mem = allocate(1024)
455
456		function change_case(object x, object api)
457		sequence changed_text
458		integer single_char = 0
459		integer len
460
461		if not string(x) then
462		if atom(x) then
463		if x = 0 then
464		return 0
465		end if
466		x = {x}
467		single_char = 1
468		else
469		for i = 1 to length(x) do
470		x[i] = change_case(x[i], api)
471		end for
472		return x
473		end if
474		end if
475		if length(x) = 0 then
476		return x
477		end if
478		if length(x) >= tm_size then
479		tm_size = length(x) + 1
480		free(temp_mem)
481		temp_mem = allocate(tm_size)
482		end if
483		poke(temp_mem, x)
484		len = c_func(api, {temp_mem, length(x)} )
485		if len < 1 then
486		len = length(x)
487		end if
488		changed_text = peek({temp_mem, len})
489		if single_char then
490		return changed_text[1]
491		else
492		return changed_text
493		end if
494		end function
495		end ifdef
496
497		--**
498		-- Convert an atom or sequence to lower case.
499		--
500		-- Parameters:
501		-- # ##x## : Any Euphoria object.
502		--
503		-- Returns:
504		-- A sequence, the lowercase version of ##x##
505		--
506		-- Comments:
507		-- * For Windows systems, this uses the current code page for conversion
508		-- * For non-Windows, this only works on ASCII characters. It alters characters in
509		-- the 'a'..'z' range. If you need to do case conversion with other encodings
510		-- use the [[:set_encoding_properties]] first.
511		-- * ##x## may be a sequence of any shape, all atoms of which will be acted upon.
512		--
513		-- WARNING, When using ASCII encoding, this can also affect floating point
514		-- numbers in the range 65 to 90.
515		--
516		-- Example 1:
517		--
518		-- s = lower("Euphoria")
519		-- -- s is "euphoria"
520		--
521		-- a = lower('B')
522		-- -- a is 'b'
523		--
524		-- s = lower({"Euphoria", "Programming"})
525		-- -- s is {"euphoria", "programming"}
526		--
527		--
528		-- See Also:
529		-- [[:upper]], [[:proper]], [[:set_encoding_properties]], [[:get_encoding_properties]]
530	98	public function lower(object x)
531		-- convert atom or sequence to lower case
532	98	if length(lower_case_SET) != 0 then
533	1	return mapping(x, upper_case_SET, lower_case_SET)
534		end if
535
536	97	ifdef WINDOWS then
537		return change_case(x, api_CharLowerBuff)
538		elsedef
539	97	return x + (x >= 'A' and x <= 'Z') * TO_LOWER
540		end ifdef
541		end function
542
543		--**
544		-- Convert an atom or sequence to upper case.
545		--
546		-- Parameters:
547		-- # ##x## : Any Euphoria object.
548		--
549		-- Returns:
550		-- A sequence, the uppercase version of ##x##
551		--
552		-- Comments:
553		-- * For Windows systems, this uses the current code page for conversion
554		-- * For non-Windows, this only works on ASCII characters. It alters characters in
555		-- the 'a'..'z' range. If you need to do case conversion with other encodings
556		-- use the [[:set_encoding_properties]] first.
557		-- * ##x## may be a sequence of any shape, all atoms of which will be acted upon.
558		--
559		-- WARNING, When using ASCII encoding, this can also affects floating point
560		-- numbers in the range 97 to 122.
561		--
562		-- Example 1:
563		--
564		-- s = upper("Euphoria")
565		-- -- s is "EUPHORIA"
566		--
567		-- a = upper('b')
568		-- -- a is 'B'
569		--
570		-- s = upper({"Euphoria", "Programming"})
571		-- -- s is {"EUPHORIA", "PROGRAMMING"}
572		--
573		--
574		-- See Also:
575		-- [[:lower]], [[:proper]], [[:set_encoding_properties]], [[:get_encoding_properties]]
576
577	1135	public function upper(object x)
578		-- convert atom or sequence to upper case
579	1135	if length(upper_case_SET) != 0 then
580	3	return mapping(x, lower_case_SET, upper_case_SET)
581		end if
582	1132	ifdef WINDOWS then
583		return change_case(x, api_CharUpperBuff)
584		elsedef
585	1132	return x - (x >= 'a' and x <= 'z') * TO_LOWER
586		end ifdef
587
588		end function
589
590		--**
591		-- Convert a text sequence to capitalized words.
592		--
593		-- Parameters:
594		-- # ##x## : A text sequence.
595		--
596		-- Returns:
597		-- A sequence, the Capitalized Version of ##x##
598		--
599		-- Comments:
600		-- A text sequence is one in which all elements are either characters or
601		-- text sequences. This means that if a non-character is found in the input,
602		-- it is not converted. However this rule only applies to elements on the
603		-- same level, meaning that sub-sequences could be converted if they are
604		-- actually text sequences.
605		--
606		--
607		-- Example 1:
608		--
609		-- s = proper("euphoria programming language")
610		-- -- s is "Euphoria Programming Language"
611		-- s = proper("EUPHORIA PROGRAMMING LANGUAGE")
612		-- -- s is "Euphoria Programming Language"
613		-- s = proper({"EUPHORIA PROGRAMMING", "language", "rapid dEPLOYMENT", "sOfTwArE"})
614		-- -- s is {"Euphoria Programming", "Language", "Rapid Deployment", "Software"}
615		-- s = proper({'a', 'b', 'c'})
616		-- -- s is {'A', 'b', c'} -- "Abc"
617		-- s = proper({'a', 'b', 'c', 3.1472})
618		-- -- s is {'a', 'b', c', 3.1472} -- Unchanged because it contains a non-character.
619		-- s = proper({"abc", 3.1472})
620		-- -- s is {"Abc", 3.1472} -- The embedded text sequence is converted.
621		--
622		--
623		-- See Also:
624		-- [[:lower]] [[:upper]]
625
626	18	public function proper(sequence x)
627		-- Converts text to lowercase and makes each word start with an uppercase.
628		integer pos
629		integer inword
630		integer convert
631		sequence res
632
633	18	inword = 0 -- Initially not in a word
634	18	convert = 1 -- Initially convert text
635	18	res = x -- Work on a copy of the original, in case we need to restore.
636	18	for i = 1 to length(res) do
637	214	if integer(res[i]) then
638	201	if convert then
639		-- Check for upper case
640	200	pos = t_upper(res[i])
641	200	if pos = 0 then
642		-- Not upper, so check for lower case
643	125	pos = t_lower(res[i])
644	125	if pos = 0 then
645		-- Not lower so check for digits
646		-- n.b. digits have no effect on if its in a word or not.
647	27	pos = t_digit(res[i])
648	27	if pos = 0 then
649		-- not digit so check for special word chars
650	20	pos = t_specword(res[i])
651	20	if pos then
652	3	inword = 1
653		else
654	17	inword = 0
655		end if
656		end if
657		else
658	98	if inword = 0 then
659		-- start of word, so convert only lower to upper.
660	18	if pos <= 26 then
661	18	res[i] = upper(res[i]) -- Convert to uppercase
662		end if
663	18	inword = 1 -- now we are in a word
664		end if
665		end if
666		else
667	75	if inword = 1 then
668		-- Upper, but as we are in a word convert it to lower.
669	66	res[i] = lower(res[i]) -- Convert to lowercase
670		else
671	9	inword = 1 -- now we are in a word
672		end if
673		end if
674		end if
675		else
676		-- A non-integer means this is NOT a text sequence, so
677		-- only convert sub-sequences.
678	13	if convert then
679		-- Restore any values that might have been converted.
680	5	for j = 1 to i-1 do
681	3	if atom(x[j]) then
682	3	res[j] = x[j]
683		end if
684	3	end for
685		-- Turn conversion off for the rest of this level.
686	5	convert = 0
687		end if
688
689	13	if sequence(res[i]) then
690	10	res[i] = proper(res[i]) -- recursive conversion
691		end if
692		end if
693	214	end for
694	18	return res
695		end function
696
697		--**
698		-- Converts a string containing Key/Value pairs into a set of
699		-- sequences, one per K/V pair.
700		--
701		-- Parameters:
702		-- # ##source## : a text sequence, containing the representation of the key/values.
703		-- # ##pair_delim## : an object containing a list of elements that delimit one
704		-- key/value pair from the next. The defaults are semi-colon (;)
705		-- and comma (,).
706		-- # ##kv_delim## : an object containing a list of elements that delimit the
707		-- key from its value. The defaults are colon (:) and equal (=).
708		-- # ##quotes## : an object containing a list of elements that can be used to
709		-- enclose either keys or values that contain delimiters or
710		-- whitespace. The defaults are double-quote ("), single-quote (')
711		-- and back-quote (`)
712		-- # ##whitespace## : an object containing a list of elements that are regarded
713		-- as whitespace characters. The defaults are space, tab, new-line,
714		-- and carriage-return.
715		-- # ##haskeys## : an integer containing true or false. The default is true. When
716		-- ##true##, the ##kv_delim## values are used to separate keys from values, but
717		-- when ##false## it is assumed that each 'pair' is actually just a value.
718		--
719		-- Returns:
720		-- A sequence, of pairs. Each pair is in the form {key, value}.
721		--
722		-- Comments:
723		--
724		-- String representations of atoms are not converted, either in the key or value part, but returned as any regular string instead.
725		--
726		-- If ##haskeys## is ##true##, but a substring only holds what appears to be a value, the key
727		-- is synthesized as ##p[n]##, where ##n## is the number of the pair. See example #2.
728		--
729		-- By default, pairs can be delimited by either a comma or semi-colon ",;" and
730		-- a key is delimited from its value by either an equal or a colon "=:".
731		-- Whitespace between pairs, and between delimiters is ignored.
732		--
733		-- If you need to have one of the delimiters in the value data, enclose it in
734		-- quotation marks. You can use any of single, double and back quotes, which
735		-- also means you can quote quotation marks themselves. See example #3.
736		--
737		-- It is possible that the value data itself is a nested set of pairs. To do
738		-- this enclose the value in parentheses. Nested sets can nested to any level.
739		-- See example #4.
740		--
741		-- If a sub-list has only data values and not keys, enclose it in either braces
742		-- or square brackets. See example #5.
743		-- If you need to have a bracket as the first character in a data value, prefix
744		-- it with a tilde. Actually a leading tilde will always just be stripped off
745		-- regardless of what it prefixes. See example #6.
746		--
747		-- Example 1:
748		--
749		-- s = keyvalues("foo=bar, qwe=1234, asdf='contains space, comma, and equal(=)'")
750		-- -- s is { {"foo", "bar"}, {"qwe", "1234"}, {"asdf", "contains space, comma, and equal(=)"}}
751		--
752		--
753		-- Example 2:
754		--
755		-- s = keyvalues("abc fgh=ijk def")
756		-- -- s is { {"p[1]", "abc"}, {"fgh", "ijk"}, {"p[3]", "def"} }
757		--
758		--
759		-- Example 3:
760		--
761		-- s = keyvalues("abc=`'quoted'`")
762		-- -- s is { {"abc", "'quoted'"} }
763		--
764		--
765		-- Example 4:
766		--
767		-- s = keyvalues("colors=(a=black, b=blue, c=red)")
768		-- -- s is { {"colors", {{"a", "black"}, {"b", "blue"},{"c", "red"}} } }
769		-- s = keyvalues("colors=(black=[0,0,0], blue=[0,0,FF], red=[FF,0,0])")
770		-- -- s is { {"colors", {{"black",{"0", "0", "0"}}, {"blue",{"0", "0", "FF"}},{"red", {"FF","0","0"}}}} }
771		--
772		--
773		-- Example 5:
774		--
775		-- s = keyvalues("colors=[black, blue, red]")
776		-- -- s is { {"colors", { "black", "blue", "red"} } }
777		--
778		--
779		-- Example 6:
780		--
781		-- s = keyvalues("colors=~[black, blue, red]")
782		-- -- s is { {"colors", "[black, blue, red]"} } }
783		-- -- The following is another way to do the same.
784		-- s = keyvalues("colors=`[black, blue, red]`")
785		-- -- s is { {"colors", "[black, blue, red]"} } }
786		--
787
788	43	public function keyvalues(sequence source, object pair_delim = ";,",
789		object kv_delim = ":=", object quotes = "\"'`",
790		object whitespace = " \t\n\r", integer haskeys = 1)
791
792		sequence lKeyValues
793		sequence value_
794		sequence key_
795		sequence lAllDelim
796		sequence lWhitePair
797		sequence lStartBracket
798		sequence lEndBracket
799		sequence lBracketed
800		integer lQuote
801		integer pos_
802		integer lChar
803		integer lBPos
804		integer lWasKV
805
806	43	source = trim(source)
807	43	if length(source) = 0 then
808	1	return {}
809		end if
810
811	42	if atom(pair_delim) then
812	1	pair_delim = {pair_delim}
813		end if
814	42	if atom(kv_delim) then
815	1	kv_delim = {kv_delim}
816		end if
817	42	if atom(quotes) then
818	1	quotes = {quotes}
819		end if
820	42	if atom(whitespace) then
821	1	whitespace = {whitespace}
822		end if
823
824	42	lAllDelim = whitespace & pair_delim & kv_delim
825	42	lWhitePair = whitespace & pair_delim
826	42	lStartBracket = "{[("
827	42	lEndBracket = "}])"
828
829	42	lKeyValues = {}
830	42	pos_ = 1
831	42	while pos_ <= length(source) do
832		-- ignore leading whitespace
833	89	while pos_ < length(source) do
834	112	if find(source[pos_], whitespace) = 0 then
835	86	exit
836		end if
837	26	pos_ +=1
838	26	end while
839
840		-- Get key. Ends at any of unquoted whitespace or unquoted delimiter
841	89	key_ = ""
842	89	lQuote = 0
843	89	lChar = 0
844	89	lWasKV = 0
845	89	if haskeys then
846	50	while pos_ <= length(source) do
847	239	lChar = source[pos_]
848	239	if find(lChar, quotes) != 0 then
849	0	if lChar = lQuote then
850		-- End of quoted span
851	0	lQuote = 0
852	0	lChar = -1
853	0	elsif lQuote = 0 then
854		-- Start of quoted span
855	0	lQuote = lChar
856	0	lChar = -1
857		end if
858
859	239	elsif lQuote = 0 and find(lChar, lAllDelim) != 0 then
860	48	exit
861
862		end if
863	191	if lChar > 0 then
864	191	key_ &= lChar
865		end if
866	191	pos_ += 1
867	191	end while
868
869		-- ignore next whitespace
870	50	if find(lChar, whitespace) != 0 then
871	6	pos_ += 1
872	6	while pos_ <= length(source) do
873	24	lChar = source[pos_]
874	24	if find(lChar, whitespace) = 0 then
875	6	exit
876		end if
877	18	pos_ +=1
878	18	end while
879		end if
880		else
881	39	pos_ -= 1 -- Put back the last char.
882		end if
883
884	89	value_ = ""
885	89	if find(lChar, kv_delim) != 0 or not haskeys then
886
887	85	if find(lChar, kv_delim) != 0 then
888	46	lWasKV = 1
889		end if
890
891		-- ignore next whitespace
892	85	pos_ += 1
893	85	while pos_ <= length(source) do
894	90	lChar = source[pos_]
895	90	if find(lChar, whitespace) = 0 then
896	85	exit
897		end if
898	5	pos_ +=1
899	5	end while
900
901		-- Get value. Ends at any of unquoted whitespace or unquoted delimiter
902	85	lQuote = 0
903	85	lChar = 0
904	85	lBracketed = {}
905	85	while pos_ <= length(source) do
906	769	lChar = source[pos_]
907	769	if length(lBracketed) = 0 and find(lChar, quotes) != 0 then
908	35	if lChar = lQuote then
909		-- End of quoted span
910	15	lQuote = 0
911	15	lChar = -1
912	20	elsif lQuote = 0 then
913		-- Start of quoted span
914	15	lQuote = lChar
915	15	lChar = -1
916		end if
917	734	elsif find(lChar, lStartBracket) > 0 then
918	26	lBPos = find(lChar, lStartBracket)
919	26	lBracketed &= lEndBracket[lBPos]
920
921	708	elsif length(value_) = 1 and value_[1] = '~' and find(lChar, lStartBracket) > 0 then
922	0	lBPos = find(lChar, lStartBracket)
923	0	lBracketed &= lEndBracket[lBPos]
924
925	708	elsif length(lBracketed) != 0 and lChar = lBracketed[$] then
926	26	lBracketed = lBracketed[1..$-1]
927
928	682	elsif length(lBracketed) = 0 and lQuote = 0 and find(lChar, lWhitePair) != 0 then
929	45	exit
930
931		end if
932
933	724	if lChar > 0 then
934	694	value_ &= lChar
935		end if
936	724	pos_ += 1
937	724	end while
938
939	85	if find(lChar, whitespace) != 0 then
940		-- ignore next whitespace
941	6	pos_ += 1
942	6	while pos_ <= length(source) do
943	6	lChar = source[pos_]
944	6	if find(lChar, whitespace) = 0 then
945	6	exit
946		end if
947	0	pos_ +=1
948	0	end while
949		end if
950
951	85	if find(lChar, pair_delim) != 0 then
952	39	pos_ += 1
953	39	if pos_ <= length(source) then
954	39	lChar = source[pos_]
955		end if
956		end if
957		end if
958
959	89	if find(lChar, pair_delim) != 0 then
960	0	pos_ += 1
961		end if
962
963	89	if length(value_) = 0 then
964	4	if length(key_) = 0 then
965	0	lKeyValues = append(lKeyValues, {})
966	0	continue
967		end if
968
969	4	if not lWasKV then
970	4	value_ = key_
971	4	key_ = ""
972		end if
973		end if
974
975	89	if length(key_) = 0 then
976	43	if haskeys then
977	4	key_ = sprintf("p[%d]", length(lKeyValues) + 1)
978		end if
979		end if
980
981	89	if length(value_) > 0 then
982	89	lChar = value_[1]
983	89	lBPos = find(lChar, lStartBracket)
984	89	if lBPos > 0 and value_[$] = lEndBracket[lBPos] then
985	16	if lChar = '(' then
986	4	value_ = keyvalues(value_[2..$-1], pair_delim, kv_delim, quotes, whitespace, haskeys)
987		else
988	12	value_ = keyvalues(value_[2..$-1], pair_delim, kv_delim, quotes, whitespace, 0)
989		end if
990	73	elsif lChar = '~' then
991	2	value_ = value_[2 .. $]
992		end if
993		end if
994
995	89	key_ = trim(key_)
996	89	value_ = trim(value_)
997	89	if length(key_) = 0 then
998	39	lKeyValues = append(lKeyValues, value_)
999		else
1000	50	lKeyValues = append(lKeyValues, {key_, value_})
1001		end if
1002
1003	89	end while
1004
1005	42	return lKeyValues
1006		end function
1007
1008		--**
1009		-- Escape special characters in a string
1010		--
1011		-- Parameters:
1012		-- # ##s##: string to escape
1013		-- # ##what##: sequence of characters to escape
1014		-- defaults to escaping a double quote.
1015		--
1016		-- Returns:
1017		-- An escaped ##sequence## representing ##s##.
1018		--
1019		-- Example 1:
1020		--
1021		-- sequence s = escape("John \"Mc\" Doe")
1022		-- puts(1, s)
1023		-- -- output is: John \"Mc\" Doe
1024		--
1025		--
1026		-- See Also:
1027		-- [[:quote]]
1028		--
1029
1030	3	public function escape(sequence s, sequence what="\"")
1031	3	sequence r = ""
1032
1033	3	for i = 1 to length(s) do
1034	40	if find(s[i], what) then
1035	9	r &= "\\"
1036		end if
1037	40	r &= s[i]
1038	40	end for
1039
1040	3	return r
1041		end function
1042
1043
1044		--**
1045		-- Return a quoted version of the first argument.
1046		--
1047		-- Parameters:
1048		-- # ##text_in## : The string or set of strings to quote.
1049		-- # ##quote_pair## : A sequence of two strings. The first string is the opening
1050		-- quote to use, and the second string is the closing quote to use.
1051		-- The default is {"\"", "\""} which means that the output will be
1052		-- enclosed by double-quotation marks.
1053		-- # ##esc## : A single escape character. If this is not negative (the default),
1054		-- then this is used to 'escape' any embedded quote characters and
1055		-- 'esc' characters already in the ##text_in## string.
1056		-- # ##sp## : A list of zero or more special characters. The ##text_in## is only
1057		-- quoted if it contains any of the special characters. The default
1058		-- is "" which means that the ##text_in## is always quoted.
1059		--
1060		-- Returns:
1061		-- A sequence, the quoted version of ##text_in##.
1062		--
1063		-- Example 1:
1064		--
1065		-- -- Using the defaults. Output enclosed in double-quotes, no escapes and no specials.
1066		-- s = quote("The small man")
1067		-- -- 's' now contains '"the small man"' including the double-quote characters.
1068		--
1069		--
1070		-- Example 2:
1071		--
1072		-- s = quote("The small man", {"(", ")"} )
1073		-- -- 's' now contains '(the small man)'
1074		--
1075		--
1076		-- Example 3:
1077		--
1078		-- s = quote("The (small) man", {"(", ")"}, '~' )
1079		-- -- 's' now contains '(The ~(small~) man)'
1080		--
1081		--
1082		-- Example 4:
1083		--
1084		-- s = quote("The (small) man", {"(", ")"}, '~', "#" )
1085		-- -- 's' now contains "the (small) man"
1086		-- -- because the input did not contain a '#' character.
1087		--
1088		--
1089		-- Example 5:
1090		--
1091		-- s = quote("The #1 (small) man", {"(", ")"}, '~', "#" )
1092		-- -- 's' now contains '(the #1 ~(small~) man)'
1093		-- -- because the input did contain a '#' character.
1094		--
1095		--
1096		-- Example 6:
1097		--
1098		-- -- input is a set of strings...
1099		-- s = quote({"a b c", "def", "g hi"},)
1100		-- -- 's' now contains three quoted strings: '"a b c"', '"def"', and '"g hi"'
1101		--
1102		--
1103		-- See Also:
1104		-- [[:escape]]
1105		--
1106
1107	222	public function quote( sequence text_in, object quote_pair = {"\"", "\""}, integer esc = -1,
1108		t_text sp = "" )
1109	222	if length(text_in) = 0 then
1110	1	return text_in
1111		end if
1112
1113	221	if atom(quote_pair) then
1114	1	quote_pair = {{quote_pair}, {quote_pair}}
1115	220	elsif length(quote_pair) = 1 then
1116	4	quote_pair = {quote_pair[1], quote_pair[1]}
1117	216	elsif length(quote_pair) = 0 then
1118	1	quote_pair = {"\"", "\""}
1119		end if
1120
1121	221	if sequence(text_in[1]) then
1122	14	for i = 1 to length(text_in) do
1123	193	if sequence(text_in[i]) then
1124	193	text_in[i] = quote(text_in[i], quote_pair, esc, sp)
1125		end if
1126	193	end for
1127
1128	14	return text_in
1129		end if
1130
1131		-- Only quote the input if it contains any of the items in 'sp'
1132	207	for i = 1 to length(sp) do
1133	195	if find(sp[i], text_in) then
1134	15	exit
1135		end if
1136
1137	180	if i = length(sp) then
1138		-- Contains none of them, so just return the input untouched.
1139	179	return text_in
1140		end if
1141	1	end for
1142
1143	28	if esc >= 0 then
1144		-- If the input already contains a quote, replace them with esc-quote,
1145		-- but make sure that if the input already contains esc-quote that all
1146		-- embedded escapes are replaced with esc-esc first.
1147	21	if atom(quote_pair[1]) then
1148	1	quote_pair[1] = {quote_pair[1]}
1149		end if
1150	21	if atom(quote_pair[2]) then
1151	1	quote_pair[2] = {quote_pair[2]}
1152		end if
1153
1154	21	if equal(quote_pair[1], quote_pair[2]) then
1155		-- Simple case where both open and close quote are the same.
1156	16	if match(quote_pair[1], text_in) then
1157	4	if match(esc & quote_pair[1], text_in) then
1158	1	text_in = replace_all(text_in, esc, esc & esc)
1159		end if
1160	4	text_in = replace_all(text_in, quote_pair[1], esc & quote_pair[1])
1161		end if
1162		else
1163	5	if match(quote_pair[1], text_in) or
1164		match(quote_pair[2], text_in) then
1165	5	if match(esc & quote_pair[1], text_in) then
1166	1	text_in = replace_all(text_in, esc & quote_pair[1], esc & esc & quote_pair[1])
1167		end if
1168	5	text_in = replace_all(text_in, quote_pair[1], esc & quote_pair[1])
1169		end if
1170
1171	5	if match(quote_pair[2], text_in) then
1172	5	if match(esc & quote_pair[2], text_in) then
1173	1	text_in = replace_all(text_in, esc & quote_pair[2], esc & esc & quote_pair[2])
1174		end if
1175	5	text_in = replace_all(text_in, quote_pair[2], esc & quote_pair[2])
1176		end if
1177		end if
1178		end if
1179
1180	28	return quote_pair[1] & text_in & quote_pair[2]
1181
1182		end function
1183
1184		--**
1185		-- Removes 'quotation' text from the argument.
1186		--
1187		-- Parameters:
1188		-- # ##text_in## : The string or set of strings to de-quote.
1189		-- # ##quote_pairs## : A set of one or more sub-sequences of two strings,
1190		-- or an atom representing a single character to be used as
1191		-- both the open and close quotes.
1192		-- The first string in each sub-sequence is the opening
1193		-- quote to look for, and the second string is the closing quote.
1194		-- The default is {{{"\"", "\""}}} which means that the output is
1195		-- 'quoted' if it is enclosed by double-quotation marks.
1196		-- # ##esc## : A single escape character. If this is not negative (the default),
1197		-- then this is used to 'escape' any embedded occurrences of the
1198		-- quote characters. In which case the 'escape' character is also
1199		-- removed.
1200		--
1201		-- Returns:
1202		-- A sequence, the original text but with 'quote' strings stripped of quotes.
1203		--
1204		-- Example 1:
1205		--
1206		-- -- Using the defaults.
1207		-- s = dequote("\"The small man\"")
1208		-- -- 's' now contains "The small man"
1209		--
1210		--
1211		-- Example 2:
1212		--
1213		-- -- Using the defaults.
1214		-- s = dequote("(The small ?(?) man)", {{"(",")"}}, '?')
1215		-- -- 's' now contains "The small () man"
1216		--
1217		--
1218	8	public function dequote(sequence text_in, object quote_pairs = {{"\"", "\""}}, integer esc = -1)
1219
1220	8	if length(text_in) = 0 then
1221	1	return text_in
1222		end if
1223
1224	7	if atom(quote_pairs) then
1225	1	quote_pairs = {{{quote_pairs}, {quote_pairs}}}
1226	6	elsif length(quote_pairs) = 1 then
1227	2	quote_pairs = {quote_pairs[1], quote_pairs[1]}
1228	4	elsif length(quote_pairs) = 0 then
1229	1	quote_pairs = {{"\"", "\""}}
1230		end if
1231
1232	7	if sequence(text_in[1]) then
1233	1	for i = 1 to length(text_in) do
1234	2	if sequence(text_in[i]) then
1235	2	text_in[i] = dequote(text_in[i], quote_pairs, esc)
1236		end if
1237	2	end for
1238
1239	1	return text_in
1240		end if
1241
1242		-- If the text begins and ends with a quote-pair then strip them off and
1243		-- remove the 'escape' from any 'escaped' quote_pairs within the text.
1244	6	for i = 1 to length(quote_pairs) do
1245	7	if length(text_in) >= length(quote_pairs[i][1]) + length(quote_pairs[i][2]) then
1246	7	if begins(quote_pairs[i][1], text_in) and ends(quote_pairs[i][2], text_in) then
1247	6	text_in = text_in[1 + length(quote_pairs[i][1]) .. $ - length(quote_pairs[i][2])]
1248	6	integer pos = 1
1249	6	while pos > 0 with entry do
1250	4	if begins(quote_pairs[i][1], text_in[pos+1 .. $]) then
1251	2	text_in = text_in[1 .. pos-1] & text_in[pos + 1 .. $]
1252	2	elsif begins(quote_pairs[i][2], text_in[pos+1 .. $]) then
1253	2	text_in = text_in[1 .. pos-1] & text_in[pos + 1 .. $]
1254		else
1255	0	pos += 1
1256		end if
1257		entry
1258	10	pos = find_from(esc, text_in, pos)
1259	10	end while
1260	6	exit
1261		end if
1262		end if
1263	1	end for
1264
1265	6	return text_in
1266		end function
1267
1268		--**
1269		-- Formats a set of arguments in to a string based on a supplied pattern.
1270		--
1271		-- Parameters:
1272		-- # ##format_pattern## : A sequence: the pattern string that contains zero or more tokens.
1273		-- # ##arg_list## : An object: Zero or more arguments used in token replacement.
1274		--
1275		-- Returns:
1276		-- A string sequence, the original ##format_pattern## but with tokens replaced by
1277		-- corresponding arguments.
1278		--
1279		-- Comments:
1280		-- The ##format_pattern## string contains text and argument tokens. The resulting string
1281		-- is the same as the format string except that each token is replaced by an
1282		-- item from the argument list.
1283		--
1284		-- A token has the form ##[]##, where is are optional qualifier codes.
1285		--
1286		-- The qualifier. #### is a set of zero or more codes that modify the default
1287		-- way that the argument is used to replace the token. The default replacement
1288		-- method is to convert the argument to its shortest string representation and
1289		-- use that to replace the token. This may be modified by the following codes,
1290		-- which can occur in any order.
1291		-- \|= Qualifier \|= Usage \|
1292		-- \| N \| ('N' is an integer) The index of the argument to use\|
1293		-- \| {id} \| Uses the argument that begins with "id=" where "id" \\
1294		-- is an identifier name. \|
1295		-- \| %envvar% \| Uses the Environment Symbol 'envar' as an argument \|
1296		-- \| w \| For string arguments, if capitalizes the first\\
1297		-- letter in each word \|
1298		-- \| u \| For string arguments, it converts it to upper case. \|
1299		-- \| l \| For string arguments, it converts it to lower case. \|
1300		-- \| < \| For numeric arguments, it left justifies it. \|
1301		-- \| > \| For string arguments, it right justifies it. \|
1302		-- \| c \| Centers the argument. \|
1303		-- \| z \| For numbers, it zero fills the left side. \|
1304		-- \| :S \| ('S' is an integer) The maximum size of the\\
1305		-- resulting field. Also, if 'S' begins with '0' the\\
1306		-- field will be zero-filled if the argument is an integer\|
1307		-- \| .N \| ('N' is an integer) The number of digits after\\
1308		-- the decimal point \|
1309		-- \| + \| For positive numbers, show a leading plus sign \|
1310		-- \| ( \| For negative numbers, enclose them in parentheses \|
1311		-- \| b \| For numbers, causes zero to be all blanks \|
1312		-- \| s \| If the resulting field would otherwise be zero\\
1313		-- length, this ensures that at least one space occurs\\
1314		-- between this token's field \|
1315		-- \| t \| After token replacement, the resulting string up to this point is trimmed. \|
1316		-- \| X \| Outputs integer arguments using hexadecimal digits. \|
1317		-- \| B \| Outputs integer arguments using binary digits. \|
1318		-- \| ? \| The corresponding argument is a set of two strings. This\\
1319		-- uses the first string if the previous token's argument is\\
1320		-- not the value 1 or a zero-length string, otherwise it\\
1321		-- uses the second string. \|
1322		-- \| [ \| Does not use any argument. Outputs a left-square-bracket symbol \|
1323		-- \| ,X \| Insert thousands separators. The is the character\\
1324		-- to use. If this is a dot "." then the decimal point\\
1325		-- is rendered using a comma. Does not apply to zero-filled\\
1326		-- fields. \\
1327		-- N.B. if hex or binary output was specified, the \\
1328		-- separators are every 4 digits otherwise they are \\
1329		-- every three digits. \|
1330		--
1331		-- Clearly, certain combinations of these qualifier codes do not make sense and in
1332		-- those situations, the rightmost clashing code is used and the others are ignored.
1333		--
1334		-- Any tokens in the format that have no corresponding argument are simply removed
1335		-- from the result. Any arguments that are not used in the result are ignored.
1336		--
1337		-- Any sequence argument that is not a string will be converted to its
1338		-- //pretty// format before being used in token replacement.
1339		--
1340		-- If a token is going to be replaced by a zero-length argument, all white space
1341		-- following the token until the next non-whitespace character is not copied to
1342		-- the result string.
1343		--
1344		-- Examples:
1345		--
1346		-- format("Cannot open file '[]' - code []", {"/usr/temp/work.dat", 32})
1347		-- -- "Cannot open file '/usr/temp/work.dat' - code 32"
1348		--
1349		-- format("Err-[2], Cannot open file '[1]'", {"/usr/temp/work.dat", 32})
1350		-- -- "Err-32, Cannot open file '/usr/temp/work.dat'"
1351		--
1352		-- format("[4w] [3z:2] [6] [5l] [2z:2], [1:4]", {2009,4,21,"DAY","MONTH","of"})
1353		-- -- "Day 21 of month 04, 2009"
1354		--
1355		-- format("The answer is [:6.2]%", {35.22341})
1356		-- -- "The answer is 35.22%"
1357		--
1358		-- format("The answer is [.6]", {1.2345})
1359		-- -- "The answer is 1.234500"
1360		--
1361		-- format("The answer is [,,.2]", {1234.56})
1362		-- -- "The answer is 1,234.56"
1363		--
1364		-- format("The answer is [,..2]", {1234.56})
1365		-- -- "The answer is 1.234,56"
1366		--
1367		-- format("The answer is [,:.2]", {1234.56})
1368		-- -- "The answer is 1:234.56"
1369		--
1370		-- format("[] [?]", {5, {"cats", "cat"}})
1371		-- -- "5 cats"
1372		--
1373		-- format("[] [?]", {1, {"cats", "cat"}})
1374		-- -- "1 cat"
1375		--
1376		-- format("[<:4]", {"abcdef"})
1377		-- -- "abcd"
1378		--
1379		-- format("[>:4]", {"abcdef"})
1380		-- -- "cdef"
1381		--
1382		-- format("[>:8]", {"abcdef"})
1383		-- -- " abcdef"
1384		--
1385		-- format("seq is []", {{1.2, 5, "abcdef", {3}}})
1386		-- -- `seq is {1.2,5,"abcdef",{3}}`
1387		--
1388		-- format("Today is [{day}], the [{date}]", {"date=10/Oct/2012", "day=Wednesday"})
1389		-- -- "Today is Wednesday, the 10/Oct/2012"
1390		--
1391		--
1392		-- See Also:
1393		-- [[:sprintf]]
1394		--
1395
1396	64	public function format(sequence format_pattern, object arg_list = {})
1397		sequence result
1398		integer in_token
1399		integer tch
1400		integer i
1401		integer tstart
1402		integer tend
1403		integer cap
1404		integer align
1405		integer psign
1406		integer msign
1407		integer zfill
1408		integer bwz
1409		integer spacer
1410		integer alt
1411		integer width
1412		integer decs
1413		integer pos
1414		integer argn
1415		integer argl
1416		integer trimming
1417		integer hexout
1418		integer binout
1419		integer tsep
1420		object prevargv
1421		object currargv
1422		sequence idname
1423		object envsym
1424		object envvar
1425
1426	64	if atom(arg_list) then
1427	21	arg_list = {arg_list}
1428		end if
1429
1430	64	result = ""
1431	64	in_token = 0
1432
1433
1434	64	i = 0
1435	64	tstart = 0
1436	64	tend = 0
1437	64	argl = 0
1438	64	spacer = 0
1439	64	prevargv = 0
1440	64	while i < length(format_pattern) do
1441	642	i += 1
1442	642	tch = format_pattern[i]
1443	642	if not in_token then
1444	408	if tch = '[' then
1445	92	in_token = 1
1446	92	tstart = i
1447	92	tend = 0
1448	92	cap = 0
1449	92	align = 0
1450	92	psign = 0
1451	92	msign = 0
1452	92	zfill = 0
1453	92	bwz = 0
1454	92	spacer = 0
1455	92	alt = 0
1456	92	width = 0
1457	92	decs = -1
1458	92	argn = 0
1459	92	hexout = 0
1460	92	binout = 0
1461	92	trimming = 0
1462	92	tsep = 0
1463	92	idname = ""
1464	92	envvar = ""
1465	92	envsym = ""
1466		else
1467	316	result &= tch
1468		end if
1469		else
1470	234	switch tch do
1471		case ']' then
1472	91	in_token = 0
1473	91	tend = i
1474
1475		case '[' then
1476	1	result &= tch
1477	1	while i < length(format_pattern) do
1478	10	i += 1
1479	10	if format_pattern[i] = ']' then
1480	1	in_token = 0
1481	1	tstart = 0
1482	1	tend = 0
1483	1	exit
1484		end if
1485	9	end while
1486
1487		case 'w', 'u', 'l' then
1488	5	cap = tch
1489
1490		case 'b' then
1491	2	bwz = 1
1492
1493		case 's' then
1494	1	spacer = 1
1495
1496		case 't' then
1497	3	trimming = 1
1498
1499		case 'z' then
1500	19	zfill = 1
1501
1502		case 'X' then
1503	3	hexout = 1
1504
1505		case 'B' then
1506	4	binout = 1
1507
1508		case 'c', '<', '>' then
1509	8	align = tch
1510
1511		case '+' then
1512	10	psign = 1
1513
1514		case '(' then
1515	6	msign = 1
1516
1517		case '?' then
1518	4	alt = 1
1519
1520		case ':' then
1521	32	while i < length(format_pattern) do
1522	67	i += 1
1523	67	tch = format_pattern[i]
1524	67	pos = find(tch, "0123456789")
1525	67	if pos = 0 then
1526	32	i -= 1
1527	32	exit
1528		end if
1529	35	width = width * 10 + pos - 1
1530	35	if width = 0 then
1531	1	zfill = '0'
1532		end if
1533	35	end while
1534
1535		case '.' then
1536	11	decs = 0
1537	11	while i < length(format_pattern) do
1538	22	i += 1
1539	22	tch = format_pattern[i]
1540	22	pos = find(tch, "0123456789")
1541	22	if pos = 0 then
1542	11	i -= 1
1543	11	exit
1544		end if
1545	11	decs = decs * 10 + pos - 1
1546	11	end while
1547
1548		case '{' then
1549		-- Use a named argument.
1550	2	integer sp
1551
1552	2	sp = i + 1
1553	2	i = sp
1554	2	while i < length(format_pattern) do
1555	9	if format_pattern[i] = '}' then
1556	2	exit
1557		end if
1558	7	if format_pattern[i] = ']' then
1559	0	exit
1560		end if
1561	7	i += 1
1562	7	end while
1563	2	idname = trim(format_pattern[sp .. i-1]) & '='
1564	2	if format_pattern[i] = ']' then
1565	0	i -= 1
1566		end if
1567
1568	2	for j = 1 to length(arg_list) do
1569	3	if begins(idname, arg_list[j]) then
1570	2	if argn = 0 then
1571	2	argn = j
1572	2	exit
1573		end if
1574		end if
1575	1	if j = length(arg_list) then
1576	0	idname = ""
1577	0	argn = -1
1578		end if
1579	1	end for
1580		case '%' then
1581		-- Use the environment symbol
1582	1	integer sp
1583
1584	1	sp = i + 1
1585	1	i = sp
1586	1	while i < length(format_pattern) do
1587	8	if format_pattern[i] = '%' then
1588	1	exit
1589		end if
1590	7	if format_pattern[i] = ']' then
1591	0	exit
1592		end if
1593	7	i += 1
1594	7	end while
1595	1	envsym = trim(format_pattern[sp .. i-1])
1596	1	if format_pattern[i] = ']' then
1597	0	i -= 1
1598		end if
1599
1600	1	envvar = getenv(envsym)
1601
1602	1	argn = -1
1603	1	if atom(envvar) then
1604	0	envvar = ""
1605		end if
1606
1607
1608		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' then
1609	26	if argn = 0 then
1610	26	i -= 1
1611	26	while i < length(format_pattern) do
1612	52	i += 1
1613	52	tch = format_pattern[i]
1614	52	pos = find(tch, "0123456789")
1615	52	if pos = 0 then
1616	26	i -= 1
1617	26	exit
1618		end if
1619	26	argn = argn * 10 + pos - 1
1620	26	end while
1621		end if
1622
1623		case ',' then
1624	5	if i < length(format_pattern) then
1625	5	i +=1
1626	5	tsep = format_pattern[i]
1627		end if
1628
1629		case else
1630		-- ignore it
1631		end switch
1632
1633	234	if tend > 0 then
1634		-- Time to replace the token.
1635	91	sequence argtext = ""
1636
1637	91	if argn = 0 then
1638	62	argn = argl + 1
1639		end if
1640	91	argl = argn
1641
1642	91	if argn < 1 or argn > length(arg_list) then
1643	1	if length(envvar) > 0 then
1644	1	argtext = envvar
1645	1	currargv = envvar
1646		else
1647	0	argtext = ""
1648	0	currargv =""
1649		end if
1650		else
1651	90	if string(arg_list[argn]) then
1652	34	if length(idname) > 0 then
1653	2	argtext = arg_list[argn][length(idname) + 1 .. $]
1654		else
1655	32	argtext = arg_list[argn]
1656		end if
1657
1658	56	elsif integer(arg_list[argn]) then
1659	36	if bwz != 0 and arg_list[argn] = 0 then
1660	2	argtext = ""
1661	34	elsif binout = 1 then
1662	4	argtext = reverse(int_to_bits(arg_list[argn], 32)) + '0'
1663	4	for ib = 1 to length(argtext) do
1664	76	if argtext[ib] = '1' then
1665	4	argtext = argtext[ib .. $]
1666	4	exit
1667		end if
1668	72	end for
1669
1670	30	elsif hexout = 0 then
1671	27	argtext = sprintf("%d", arg_list[argn])
1672	27	if zfill != 0 and width > 0 then
1673	15	if length(argtext) > 0 then
1674	15	if argtext[1] = '-' then
1675	2	if width > length(argtext) then
1676	2	argtext = '-' & repeat('0', width - length(argtext)) & argtext[2..$]
1677		end if
1678		else
1679	13	if width > length(argtext) then
1680	12	argtext = repeat('0', width - length(argtext)) & argtext
1681		end if
1682		end if
1683		else
1684	0	argtext = repeat('0', width - length(argtext)) & argtext
1685		end if
1686		end if
1687
1688	27	if arg_list[argn] > 0 then
1689	20	if psign then
1690	2	if zfill = 0 then
1691	1	argtext = '+' & argtext
1692	1	elsif argtext[1] = '0' then
1693	1	argtext[1] = '+'
1694		end if
1695		end if
1696	7	elsif arg_list[argn] < 0 then
1697	4	if msign then
1698	2	if zfill = 0 then
1699	1	argtext = '(' & argtext[2..$] & ')'
1700		else
1701	1	if argtext[2] = '0' then
1702	1	argtext = '(' & argtext[3..$] & ')'
1703		else
1704	0	argtext = argtext[2..$] & ')'
1705		end if
1706		end if
1707		end if
1708		end if
1709		else
1710	3	argtext = sprintf("%x", arg_list[argn])
1711	3	if zfill != 0 and width > 0 then
1712	2	if width > length(argtext) then
1713	2	argtext = repeat('0', width - length(argtext)) & argtext
1714		end if
1715		end if
1716		end if
1717
1718	20	elsif atom(arg_list[argn]) then
1719	15	if bwz != 0 and arg_list[argn] = 0 then
1720	0	argtext = ""
1721		else
1722	15	if hexout then
1723	0	argtext = sprintf("%x", arg_list[argn])
1724	0	if zfill != 0 and width > 0 then
1725	0	if width > length(argtext) then
1726	0	argtext = repeat('0', width - length(argtext)) & argtext
1727		end if
1728		end if
1729		else
1730	15	argtext = trim(sprintf("%15.15g", arg_list[argn]))
1731	15	if zfill != 0 and width > 0 then
1732	3	if length(argtext) > 0 then
1733	3	if width > length(argtext) then
1734	3	if argtext[1] = '-' then
1735	2	argtext = '-' & repeat('0', width - length(argtext)) & argtext[2..$]
1736		else
1737	1	argtext = repeat('0', width - length(argtext)) & argtext
1738		end if
1739		end if
1740		else
1741	0	argtext = repeat('0', width - length(argtext)) & argtext
1742		end if
1743		end if
1744	15	if arg_list[argn] > 0 then
1745	11	if psign then
1746	2	if zfill = 0 then
1747	1	argtext = '+' & argtext
1748	1	elsif argtext[1] = '0' then
1749	1	argtext[1] = '+'
1750		end if
1751		end if
1752	4	elsif arg_list[argn] < 0 then
1753	4	if msign then
1754	2	if zfill = 0 then
1755	1	argtext = '(' & argtext[2..$] & ')'
1756		else
1757	1	if argtext[2] = '0' then
1758	1	argtext = '(' & argtext[3..$] & ')'
1759		else
1760	0	argtext = argtext[2..$] & ')'
1761		end if
1762		end if
1763		end if
1764		end if
1765		end if
1766		end if
1767
1768		else
1769	5	if alt != 0 and length(arg_list[argn]) = 2 then
1770	4	object tempv
1771	4	if atom(prevargv) then
1772	2	if prevargv != 1 then
1773	1	tempv = arg_list[argn][1]
1774		else
1775	1	tempv = arg_list[argn][2]
1776		end if
1777		else
1778	2	if length(prevargv) = 0 then
1779	1	tempv = arg_list[argn][1]
1780		else
1781	1	tempv = arg_list[argn][2]
1782		end if
1783		end if
1784
1785	4	if string(tempv) then
1786	4	argtext = tempv
1787	0	elsif integer(tempv) then
1788	0	if bwz != 0 and tempv = 0 then
1789	0	argtext = ""
1790		else
1791	0	argtext = sprintf("%d", tempv)
1792		end if
1793
1794	0	elsif atom(tempv) then
1795	0	if bwz != 0 and tempv = 0 then
1796	0	argtext = ""
1797		else
1798	0	argtext = trim(sprintf("%15.15g", tempv))
1799		end if
1800		else
1801	0	argtext = pretty_sprint( tempv,
1802		{2,0,1,1000,"%d","%.15g",32,127,1,0}
1803		)
1804		end if
1805		else
1806	1	argtext = pretty_sprint( arg_list[argn],
1807		{2,0,1,1000,"%d","%.15g",32,127,1,0}
1808		)
1809		end if
1810		end if
1811	90	currargv = arg_list[argn]
1812		end if
1813
1814
1815	91	if length(argtext) > 0 then
1816	84	switch cap do
1817		case 'u' then
1818	1	argtext = upper(argtext)
1819		case 'l' then
1820	3	argtext = lower(argtext)
1821		case 'w' then
1822	1	argtext = proper(argtext)
1823		case 0 then
1824		-- do nothing
1825	79	cap = cap
1826
1827		case else
1828	0	crash("logic error: 'cap' mode in format.")
1829
1830		end switch
1831
1832	84	if atom(currargv) then
1833	49	if find('e', argtext) = 0 then
1834		-- Only applies to non-scientific notation.
1835	48	if decs != -1 then
1836	8	pos = find('.', argtext)
1837	8	if pos then
1838	7	if decs = 0 then
1839	1	argtext = argtext [1 .. pos-1 ]
1840		else
1841	6	pos = length(argtext) - pos
1842	6	if pos > decs then
1843	2	argtext = argtext[ 1 .. $ - pos + decs ]
1844	4	elsif pos < decs then
1845	1	argtext = argtext & repeat('0', decs - pos)
1846		end if
1847		end if
1848	1	elsif decs > 0 then
1849	1	argtext = argtext & '.' & repeat('0', decs)
1850		end if
1851		end if
1852
1853		end if
1854		end if
1855
1856	84	if align = 0 then
1857	76	if atom(currargv) then
1858	49	align = '>'
1859		else
1860	27	align = '<'
1861		end if
1862		end if
1863
1864	84	if atom(currargv) then
1865	49	if tsep != 0 and zfill = 0 then
1866	5	integer dpos
1867	5	integer dist
1868	5	integer bracketed
1869
1870	5	if binout or hexout then
1871	2	dist = 4
1872		else
1873	3	dist = 3
1874		end if
1875	5	bracketed = (argtext[1] = '(')
1876	5	if bracketed then
1877	0	argtext = argtext[2 .. $-1]
1878		end if
1879	5	dpos = find('.', argtext)
1880	5	if dpos = 0 then
1881	2	dpos = length(argtext) + 1
1882		else
1883	3	if tsep = '.' then
1884	1	argtext[dpos] = ','
1885		end if
1886		end if
1887	5	while dpos > dist do
1888	7	dpos -= dist
1889	7	if dpos > 1 then
1890	5	argtext = argtext[1.. dpos - 1] & tsep & argtext[dpos .. $]
1891		end if
1892	7	end while
1893	5	if bracketed then
1894	0	argtext = '(' & argtext & ')'
1895		end if
1896		end if
1897		end if
1898
1899	84	if width <= 0 then
1900	52	width = length(argtext)
1901		end if
1902
1903
1904	84	if width < length(argtext) then
1905	4	if align = '>' then
1906	1	argtext = argtext[ $ - width + 1 .. $]
1907	3	elsif align = 'c' then
1908	2	pos = length(argtext) - width
1909	2	if remainder(pos, 2) = 0 then
1910	1	pos = pos / 2
1911	1	argtext = argtext[ pos + 1 .. $ - pos ]
1912		else
1913	1	pos = floor(pos / 2)
1914	1	argtext = argtext[ pos + 1 .. $ - pos - 1]
1915		end if
1916		else
1917	1	argtext = argtext[ 1 .. width]
1918		end if
1919	80	elsif width > length(argtext) then
1920	7	if align = '>' then
1921	3	argtext = repeat(' ', width - length(argtext)) & argtext
1922	4	elsif align = 'c' then
1923	2	pos = width - length(argtext)
1924	2	if remainder(pos, 2) = 0 then
1925	2	pos = pos / 2
1926	2	argtext = repeat(' ', pos) & argtext & repeat(' ', pos)
1927		else
1928	0	pos = floor(pos / 2)
1929	0	argtext = repeat(' ', pos) & argtext & repeat(' ', pos + 1)
1930		end if
1931
1932		else
1933	2	argtext = argtext & repeat(' ', width - length(argtext))
1934		end if
1935		end if
1936	84	result &= argtext
1937
1938		else
1939	7	if spacer then
1940	1	result &= ' '
1941		end if
1942		end if
1943
1944	91	if trimming then
1945	3	result = trim(result)
1946		end if
1947
1948	91	tend = 0
1949	91	prevargv = currargv
1950		end if
1951		end if
1952	642	end while
1953
1954	64	return result
1955		end function
1956
1957		--**
1958		-- Get the text associated with the message number in the requested locale.
1959		--
1960		-- Parameters:
1961		-- # ##MsgNum## : An integer. The message number whose text you are trying to get.
1962		-- # ##LocalQuals## : A sequence. Zero or more locale codes. Default is {}.
1963		-- # ##DBBase##: A sequence. The base name for the database files containing the
1964		-- locale text strings. The default is "teksto".
1965		--
1966		-- Returns:
1967		-- A string sequence, the text associated with the message number and locale.\\
1968		-- An integer, if not associated text can be found.
1969		--
1970		-- Comments:
1971		-- * This first scans the database(s) linked to the locale codes supplied.
1972		-- * The database name for each locale takes the format of "_.edb"
1973		-- so if the default DBBase is used, and the locales supplied are {"enus", "enau"}
1974		-- the databases scanned are "teksto_enus.edb" and "teksto_enau.edb".
1975		-- The database table name searched is "1" with the key being the message number,
1976		-- and the text is the record data.
1977		-- * If the message is not found in these databases (or the databases don't exist)
1978		-- a database called ".edb" is searched. Again the table name is "1" but
1979		-- it first looks for keys with the format {,msgnum} and failing that it
1980		-- looks for keys in the format {"", msgnum}, and if that fails it looks for a
1981		-- key of just the msgnum.
1982		--
1983	0	public function get_text( integer MsgNum, sequence LocalQuals = {}, sequence DBBase = "teksto")
1984	0	integer idx = 1
1985		integer db_res
1986		object lMsgText
1987
1988	0	db_res = -1
1989	0	lMsgText = 0
1990		-- First, scan through the specialized local dbs
1991	0	for i = 1 to length(LocalQuals) do
1992	0	db_res = db_select( locate_file( DBBase & "_" & LocalQuals[i] & ".edb" ), DB_LOCK_NO)
1993	0	if db_res = DB_OK then
1994	0	db_res = db_select_table("1")
1995	0	if db_res = DB_OK then
1996	0	lMsgText = db_fetch_record(MsgNum)
1997	0	if sequence(lMsgText) then
1998	0	exit
1999		end if
2000		end if
2001		end if
2002	0	end for
2003
2004		-- Next, scan through the generic db
2005	0	if atom(lMsgText) then
2006	0	db_res = db_select( locate_file( DBBase & ".edb" ), DB_LOCK_NO)
2007	0	if db_res = DB_OK then
2008	0	db_res = db_select_table("1")
2009	0	if db_res = DB_OK then
2010	0	for i = 1 to length(LocalQuals) do
2011	0	lMsgText = db_fetch_record({LocalQuals[i],MsgNum})
2012	0	if sequence(lMsgText) then
2013	0	exit
2014		end if
2015	0	end for
2016	0	if atom(lMsgText) then
2017	0	lMsgText = db_fetch_record({"",MsgNum})
2018		end if
2019	0	if atom(lMsgText) then
2020	0	lMsgText = db_fetch_record(MsgNum)
2021		end if
2022		end if
2023		end if
2024		end if
2025
2026	0	return lMsgText
2027
2028		end function
2029