Name | Executed | Routines | % | Executed | Lines | % | Unexecuted |
/home/matt/eu/rds/include/std/text.e | 15 | 16 | 93.75% | 629 | 736 | 85.46% | 107 |
Routine | Executed | Lines | Unexecuted | |
format() | 287 | 319 | 89.97% | 32 |
load_code_page() | 23 | 55 | 41.82% | 32 |
get_text() | 0 | 28 | 0.00% | 28 |
keyvalues() | 117 | 130 | 90.00% | 13 |
dequote() | 30 | 31 | 96.77% | 1 |
set_encoding_properties() | 12 | 13 | 92.31% | 1 |
escape() | 8 | 8 | 100.00% | 0 |
get_encoding_properties() | 2 | 2 | 100.00% | 0 |
lower() | 5 | 5 | 100.00% | 0 |
proper() | 34 | 34 | 100.00% | 0 |
quote() | 40 | 40 | 100.00% | 0 |
sprint() | 14 | 14 | 100.00% | 0 |
trim() | 23 | 23 | 100.00% | 0 |
trim_head() | 12 | 12 | 100.00% | 0 |
trim_tail() | 12 | 12 | 100.00% | 0 |
upper() | 5 | 5 | 100.00% | 0 |
# | Executed | |
1 | -- (c) Copyright - See License.txt | |
2 | -- | |
3 | namespace text | |
4 | ||
5 | --**** | |
6 | -- == Text Manipulation | |
7 | -- **Page Contents** | |
8 | -- | |
9 | -- < | |
10 | -- | |
11 | ||
12 | --**** | |
13 | -- === Routines | |
14 | ||
15 | include std/filesys.e | |
16 | include std/types.e | |
17 | include std/sequence.e | |
18 | include std/io.e | |
19 | include std/search.e | |
20 | include std/convert.e | |
21 | include std/serialize.e | |
22 | include std/pretty.e | |
23 | include std/error.e | |
24 | include std/eds.e | |
25 | include std/convert.e | |
26 | ||
27 | --**** | |
28 | -- Signature: | |
29 | -- | |
30 | -- | |
31 | -- Description: | |
32 | -- This is exactly the same as [[:printf]](), except that the output is returned as a sequence | |
33 | -- of characters, rather than being sent to a file or device. | |
34 | -- | |
35 | -- Parameters: | |
36 | -- # ##format## : a sequence, the text to print. This text may contain format specifiers. | |
37 | -- # ##values## : usually, a sequence of values. It should have as many elements as format specifiers in ##format##, as these values will be substituted to the specifiers. | |
38 | -- | |
39 | -- Returns: | |
40 | -- A **sequence**, of printable characters, representing ##format## with the values in ##values## spliced in. | |
41 | -- | |
42 | -- Comments: | |
43 | -- | |
44 | -- ##printf(fn, st, x)## is equivalent to ##puts(fn, sprintf(st, x))##. | |
45 | -- | |
46 | -- Some typical uses of ##sprintf()## are: | |
47 | -- | |
48 | -- # Converting numbers to strings. | |
49 | -- # Creating strings to pass to system(). | |
50 | -- # Creating formatted error messages that can be passed to a common error message handler. | |
51 | -- | |
52 | -- Example 1: | |
53 | -- | |
54 | -- s = sprintf("%08d", 12345) | |
55 | -- -- s is "00012345" | |
56 | -- | |
57 | -- | |
58 | -- See Also: | |
59 | -- [[:printf]], [[:sprint]], [[:format]] | |
60 | ||
61 | --** | |
62 | -- Returns the representation of any Euphoria object as a string of characters. | |
63 | -- | |
64 | -- Parameters: | |
65 | -- # ##x## : Any Euphoria object. | |
66 | -- | |
67 | -- Returns: | |
68 | -- A **sequence**, a string representation of ##x##. | |
69 | -- | |
70 | -- Comments: | |
71 | -- | |
72 | -- This is exactly the same as ##print(fn, x)##, except that the output is returned as a sequence of characters, rather | |
73 | -- than being sent to a file or device. x can be any Euphoria object. | |
74 | -- | |
75 | -- The atoms contained within ##x## will be displayed to a maximum of 10 significant digits, | |
76 | -- just as with [[:print]](). | |
77 | -- | |
78 | -- Example 1: | |
79 | -- | |
80 | -- s = sprint(12345) | |
81 | -- -- s is "12345" | |
82 | -- | |
83 | -- | |
84 | -- Example 2: | |
85 | -- | |
86 | -- s = sprint({10,20,30}+5) | |
87 | -- -- s is "{15,25,35}" | |
88 | -- | |
89 | -- | |
90 | -- See Also: | |
91 | -- [[:sprintf]], [[:printf]] | |
92 | ||
93 | 1010 | |
94 | -- Return the string representation of any Euphoria data object. | |
95 | -- This is the same as the output from print(1, x) or '?', but it's | |
96 | -- returned as a string sequence rather than printed. | |
97 | sequence s | |
98 | ||
99 | 1010 | if atom(x) then |
100 | 1006 | return sprintf("%.10g", x) |
101 | else | |
102 | 4 | s = "{" |
103 | 4 | for i = 1 to length(x) do |
104 | 8 | if atom(x[i]) then |
105 | 6 | s &= sprintf("%.10g", x[i]) |
106 | else | |
107 | 2 | s &= sprint(x[i]) |
108 | end if | |
109 | 8 | s &= ',' |
110 | 8 | end for |
111 | 4 | if s[$] = ',' then |
112 | 3 | s[$] = '}' |
113 | else | |
114 | 1 | s &= '}' |
115 | end if | |
116 | 4 | return s |
117 | end if | |
118 | end function | |
119 | ||
120 | --** | |
121 | -- Trim all items in the supplied set from the leftmost (start or head) of a sequence. | |
122 | -- | |
123 | -- Parameters: | |
124 | -- # ##source## : the sequence to trim. | |
125 | -- # ##what## : the set of item to trim from ##source## (defaults to " \t\r\n"). | |
126 | -- # ##ret_index## : If zero (the default) returns the trimmed sequence, otherwise | |
127 | -- it returns the index of the leftmost item **not** in ##what##. | |
128 | -- | |
129 | -- Returns: | |
130 | -- A **sequence**, if ##ret_index## is zero, which is the trimmed version of ##source##\\ | |
131 | -- A **integer**, if ##ret_index## is not zero, which is index of the leftmost | |
132 | -- element in ##source## that is not in ##what##. | |
133 | -- | |
134 | -- Example 1: | |
135 | -- | |
136 | -- object s | |
137 | -- s = trim_head("\r\nSentence read from a file\r\n", "\r\n") | |
138 | -- -- s is "Sentence read from a file\r\n" | |
139 | -- s = trim_head("\r\nSentence read from a file\r\n", "\r\n", TRUE) | |
140 | -- -- s is 3 | |
141 | -- | |
142 | -- | |
143 | -- | |
144 | -- See Also: | |
145 | -- [[:trim_tail]], [[:trim]], [[:pad_head]] | |
146 | ||
147 | 6 | |
148 | integer lpos | |
149 | 6 | if atom(what) then |
150 | 3 | what = {what} |
151 | end if | |
152 | ||
153 | 6 | lpos = 1 |
154 | 6 | while lpos <= length(source) do |
155 | 23 | if not find(source[lpos], what) then |
156 | 5 | exit |
157 | end if | |
158 | 18 | lpos += 1 |
159 | 18 | end while |
160 | ||
161 | 6 | if ret_index then |
162 | 1 | return lpos |
163 | else | |
164 | 5 | return source[lpos .. $] |
165 | end if | |
166 | end function | |
167 | ||
168 | --** | |
169 | -- Trim all items in the supplied set from the rightmost (end or tail) of a sequence. | |
170 | -- | |
171 | -- Parameters: | |
172 | -- # ##source## : the sequence to trim. | |
173 | -- # ##what## : the set of item to trim from ##source## (defaults to " \t\r\n"). | |
174 | -- # ##ret_index## : If zero (the default) returns the trimmed sequence, otherwise | |
175 | -- it returns the index of the rightmost item **not** in ##what##. | |
176 | -- | |
177 | -- Returns: | |
178 | -- A **sequence**, if ##ret_index## is zero, which is the trimmed version of ##source##\\ | |
179 | -- A **integer**, if ##ret_index## is not zero, which is index of the rightmost | |
180 | -- element in ##source## that is not in ##what##. | |
181 | -- | |
182 | -- Example 1: | |
183 | -- | |
184 | -- object s | |
185 | -- s = trim_tail("\r\nSentence read from a file\r\n", "\r\n") | |
186 | -- -- s is "\r\nSentence read from a file" | |
187 | -- s = trim_tail("\r\nSentence read from a file\r\n", "\r\n", TRUE) | |
188 | -- -- s is 27 | |
189 | -- | |
190 | -- | |
191 | -- See Also: | |
192 | -- [[:trim_head]], [[:trim]], [[:pad_tail]] | |
193 | ||
194 | 14 | |
195 | integer rpos | |
196 | ||
197 | 14 | if atom(what) then |
198 | 3 | what = {what} |
199 | end if | |
200 | ||
201 | 14 | rpos = length(source) |
202 | 14 | while rpos > 0 do |
203 | 26 | if not find(source[rpos], what) then |
204 | 13 | exit |
205 | end if | |
206 | 13 | rpos -= 1 |
207 | 13 | end while |
208 | ||
209 | 14 | if ret_index then |
210 | 1 | return rpos |
211 | else | |
212 | 13 | return source[1..rpos] |
213 | end if | |
214 | end function | |
215 | ||
216 | --** | |
217 | -- Trim all items in the supplied set from both the left end (head/start) and right end (tail/end) | |
218 | -- of a sequence. | |
219 | -- | |
220 | -- Parameters: | |
221 | -- # ##source## : the sequence to trim. | |
222 | -- # ##what## : the set of item to trim from ##source## (defaults to " \t\r\n"). | |
223 | -- # ##ret_index## : If zero (the default) returns the trimmed sequence, otherwise | |
224 | -- it returns a 2-element sequence containing the index of the | |
225 | -- leftmost item and rightmost item **not** in ##what##. | |
226 | -- | |
227 | -- Returns: | |
228 | -- A **sequence**, if ##ret_index## is zero, which is the trimmed version of ##source##\\ | |
229 | -- A **2-element sequence**, if ##ret_index## is not zero, in the form {left_index, right_index}. | |
230 | -- | |
231 | -- Example 1: | |
232 | -- | |
233 | -- object s | |
234 | -- s = trim("\r\nSentence read from a file\r\n", "\r\n") | |
235 | -- -- s is "Sentence read from a file" | |
236 | -- s = trim("\r\nSentence read from a file\r\n", "\r\n", TRUE) | |
237 | -- -- s is {3,27} | |
238 | -- | |
239 | -- | |
240 | -- See Also: | |
241 | -- [[:trim_head]], [[:trim_tail]] | |
242 | ||
243 | 347 | |
244 | integer rpos | |
245 | integer lpos | |
246 | ||
247 | 347 | if atom(what) then |
248 | 4 | what = {what} |
249 | end if | |
250 | ||
251 | 347 | lpos = 1 |
252 | 347 | while lpos <= length(source) do |
253 | 474 | if not find(source[lpos], what) then |
254 | 293 | exit |
255 | end if | |
256 | 181 | lpos += 1 |
257 | 181 | end while |
258 | ||
259 | 347 | rpos = length(source) |
260 | 347 | while rpos > lpos do |
261 | 323 | if not find(source[rpos], what) then |
262 | 260 | exit |
263 | end if | |
264 | 63 | rpos -= 1 |
265 | 63 | end while |
266 | ||
267 | 347 | if ret_index then |
268 | 1 | return {lpos, rpos} |
269 | else | |
270 | 346 | if lpos = 1 then |
271 | 301 | if rpos = length(source) then |
272 | 272 | return source |
273 | end if | |
274 | end if | |
275 | 74 | if lpos > length(source) then |
276 | 5 | return {} |
277 | end if | |
278 | 69 | return source[lpos..rpos] |
279 | end if | |
280 | end function | |
281 | ||
282 | ||
283 | 101 | constant TO_LOWER = 'a' - 'A' |
284 | ||
285 | 101 | sequence lower_case_SET = {} |
286 | 101 | sequence upper_case_SET = {} |
287 | 101 | sequence encoding_NAME = "ASCII" |
288 | ||
289 | 1 | |
290 | object cpdata | |
291 | integer pos | |
292 | sequence kv | |
293 | sequence cp_source | |
294 | sequence cp_db | |
295 | ||
296 | 1 | cp_source = defaultext(cpname, ".ecp") |
297 | 1 | cp_source = locate_file(cp_source) |
298 | ||
299 | 1 | cpdata = read_lines(cp_source) |
300 | 1 | if sequence(cpdata) then |
301 | ||
302 | 0 | pos = 0 |
303 | 0 | while pos < length(cpdata) do |
304 | 0 | pos += 1 |
305 | 0 | cpdata[pos] = trim(cpdata[pos]) |
306 | 0 | if begins("--HEAD--", cpdata[pos]) then |
307 | 0 | continue |
308 | end if | |
309 | 0 | if cpdata[pos][1] = ';' then |
310 | 0 | continue -- A comment line |
311 | end if | |
312 | 0 | if begins("--CASE--", cpdata[pos]) then |
313 | 0 | exit |
314 | end if | |
315 | ||
316 | 0 | kv = keyvalues(cpdata[pos],,,,"") |
317 | 0 | if equal(lower(kv[1][1]), "title") then |
318 | 0 | encoding_NAME = kv[1][2] |
319 | end if | |
320 | 0 | end while |
321 | 0 | if pos > length(cpdata) then |
322 | 0 | return -2 -- No Case Conversion table found. |
323 | end if | |
324 | ||
325 | 0 | upper_case_SET = "" |
326 | 0 | lower_case_SET = "" |
327 | 0 | while pos < length(cpdata) do |
328 | 0 | pos += 1 |
329 | 0 | cpdata[pos] = trim(cpdata[pos]) |
330 | 0 | if length(cpdata[pos]) < 3 then |
331 | 0 | continue |
332 | end if | |
333 | 0 | if cpdata[pos][1] = ';' then |
334 | 0 | continue -- A comment line |
335 | end if | |
336 | 0 | if cpdata[pos][1] = '-' then |
337 | 0 | exit |
338 | end if | |
339 | ||
340 | 0 | kv = keyvalues(cpdata[pos]) |
341 | 0 | upper_case_SET &= hex_text(kv[1][1]) |
342 | 0 | lower_case_SET &= hex_text(kv[1][2]) |
343 | 0 | end while |
344 | ||
345 | else | |
346 | -- See if its in the database. | |
347 | 1 | cp_db = locate_file("ecp.dat") |
348 | 1 | integer fh = open(cp_db, "rb") |
349 | 1 | if fh = -1 then |
350 | 0 | return -2 -- Couldn't open DB |
351 | end if | |
352 | 1 | object idx |
353 | 1 | object vers |
354 | 1 | vers = deserialize(fh) -- get the database version |
355 | 1 | if vers[1] = 1 then |
356 | 1 | idx = deserialize(fh) -- get Code Page index offset |
357 | 1 | pos = seek(fh, idx) |
358 | 1 | idx = deserialize(fh) -- get the Code Page Index |
359 | 1 | pos = find(cpname, idx[1]) |
360 | 1 | if pos != 0 then |
361 | 1 | pos = seek(fh, idx[2][pos]) |
362 | 1 | upper_case_SET = deserialize(fh) -- "uppercase" |
363 | 1 | lower_case_SET = deserialize(fh) -- "lowercase" |
364 | 1 | encoding_NAME = deserialize(fh) -- "title" |
365 | end if | |
366 | end if | |
367 | 1 | close(fh) |
368 | ||
369 | end if | |
370 | 1 | return 0 |
371 | end function | |
372 | ||
373 | --** | |
374 | -- Sets the table of lowercase and uppercase characters that is used by | |
375 | -- [[:lower]] and [[:upper]] | |
376 | -- | |
377 | -- Parameters: | |
378 | -- # ##en## : The name of the encoding represented by these character sets | |
379 | -- # ##lc## : The set of lowercase characters | |
380 | -- # ##uc## : The set of upper case characters | |
381 | -- | |
382 | -- | |
383 | -- Comments: | |
384 | -- * ##lc## and ##uc## must be the same length. | |
385 | -- * If no parameters are given, the default ASCII table is set. | |
386 | -- | |
387 | -- Example 1: | |
388 | -- | |
389 | -- set_encoding_properties( "Elvish", "aeiouy", "AEIOUY") | |
390 | -- | |
391 | -- | |
392 | -- Example 1: | |
393 | -- | |
394 | -- set_encoding_properties( "1251") -- Loads a predefined code page. | |
395 | -- | |
396 | -- | |
397 | -- See Also: | |
398 | -- [[:lower]], [[:upper]], [[:get_encoding_properties]] | |
399 | ||
400 | 4 | |
401 | integer res | |
402 | ||
403 | 4 | if length(en) > 0 and length(lc) = 0 and length(uc) = 0 then |
404 | 1 | res = load_code_page(en) |
405 | 1 | if res != 0 then |
406 | 0 | printf(2, "Failed to load code page '%s'. Error # %d\n", {en, res}) |
407 | end if | |
408 | 1 | return |
409 | end if | |
410 | ||
411 | 3 | if length(lc) = length(uc) then |
412 | 3 | if length(lc) = 0 and length(en) = 0 then |
413 | 2 | en = "ASCII" |
414 | end if | |
415 | 3 | lower_case_SET = lc |
416 | 3 | upper_case_SET = uc |
417 | 3 | encoding_NAME = en |
418 | end if | |
419 | 3 | end procedure |
420 | ||
421 | --** | |
422 | -- Gets the table of lowercase and uppercase characters that is used by | |
423 | -- [[:lower]] and [[:upper]] | |
424 | -- | |
425 | -- Parameters: | |
426 | -- none | |
427 | -- | |
428 | -- Returns: | |
429 | -- A **sequence**, containing three items.\\ | |
430 | -- {Encoding_Name, LowerCase_Set, UpperCase_Set} | |
431 | -- | |
432 | -- Example 1: | |
433 | -- | |
434 | -- encode_sets = get_encoding_properties() | |
435 | -- | |
436 | -- | |
437 | -- See Also: | |
438 | -- [[:lower]], [[:upper]], [[:set_encoding_properties]] | |
439 | -- | |
440 | 3 | |
441 | 3 | return {encoding_NAME, lower_case_SET, upper_case_SET} |
442 | end function | |
443 | ||
444 | ||
445 | 101 | ifdef WINDOWS then |
446 | include std/dll.e | |
447 | include std/machine.e | |
448 | include std/types.e | |
449 | atom | |
450 | user32 = open_dll( "user32.dll"), | |
451 | api_CharLowerBuff = define_c_func(user32, "CharLowerBuffA", {C_POINTER, C_INT}, C_INT), | |
452 | api_CharUpperBuff = define_c_func(user32, "CharUpperBuffA", {C_POINTER, C_INT}, C_INT), | |
453 | tm_size = 1024, | |
454 | temp_mem = allocate(1024) | |
455 | ||
456 | function change_case(object x, object api) | |
457 | sequence changed_text | |
458 | integer single_char = 0 | |
459 | integer len | |
460 | ||
461 | if not string(x) then | |
462 | if atom(x) then | |
463 | if x = 0 then | |
464 | return 0 | |
465 | end if | |
466 | x = {x} | |
467 | single_char = 1 | |
468 | else | |
469 | for i = 1 to length(x) do | |
470 | x[i] = change_case(x[i], api) | |
471 | end for | |
472 | return x | |
473 | end if | |
474 | end if | |
475 | if length(x) = 0 then | |
476 | return x | |
477 | end if | |
478 | if length(x) >= tm_size then | |
479 | tm_size = length(x) + 1 | |
480 | free(temp_mem) | |
481 | temp_mem = allocate(tm_size) | |
482 | end if | |
483 | poke(temp_mem, x) | |
484 | len = c_func(api, {temp_mem, length(x)} ) | |
485 | if len < 1 then | |
486 | len = length(x) | |
487 | end if | |
488 | changed_text = peek({temp_mem, len}) | |
489 | if single_char then | |
490 | return changed_text[1] | |
491 | else | |
492 | return changed_text | |
493 | end if | |
494 | end function | |
495 | end ifdef | |
496 | ||
497 | --** | |
498 | -- Convert an atom or sequence to lower case. | |
499 | -- | |
500 | -- Parameters: | |
501 | -- # ##x## : Any Euphoria object. | |
502 | -- | |
503 | -- Returns: | |
504 | -- A **sequence**, the lowercase version of ##x## | |
505 | -- | |
506 | -- Comments: | |
507 | -- * For Windows systems, this uses the current code page for conversion | |
508 | -- * For non-Windows, this only works on ASCII characters. It alters characters in | |
509 | -- the 'a'..'z' range. If you need to do case conversion with other encodings | |
510 | -- use the [[:set_encoding_properties]] first. | |
511 | -- * ##x## may be a sequence of any shape, all atoms of which will be acted upon. | |
512 | -- | |
513 | -- **WARNING**, When using ASCII encoding, this can also affect floating point | |
514 | -- numbers in the range 65 to 90. | |
515 | -- | |
516 | -- Example 1: | |
517 | -- | |
518 | -- s = lower("Euphoria") | |
519 | -- -- s is "euphoria" | |
520 | -- | |
521 | -- a = lower('B') | |
522 | -- -- a is 'b' | |
523 | -- | |
524 | -- s = lower({"Euphoria", "Programming"}) | |
525 | -- -- s is {"euphoria", "programming"} | |
526 | -- | |
527 | -- | |
528 | -- See Also: | |
529 | -- [[:upper]], [[:proper]], [[:set_encoding_properties]], [[:get_encoding_properties]] | |
530 | 98 | |
531 | -- convert atom or sequence to lower case | |
532 | 98 | if length(lower_case_SET) != 0 then |
533 | 1 | return mapping(x, upper_case_SET, lower_case_SET) |
534 | end if | |
535 | ||
536 | 97 | ifdef WINDOWS then |
537 | return change_case(x, api_CharLowerBuff) | |
538 | elsedef | |
539 | 97 | return x + (x >= 'A' and x <= 'Z') * TO_LOWER |
540 | end ifdef | |
541 | end function | |
542 | ||
543 | --** | |
544 | -- Convert an atom or sequence to upper case. | |
545 | -- | |
546 | -- Parameters: | |
547 | -- # ##x## : Any Euphoria object. | |
548 | -- | |
549 | -- Returns: | |
550 | -- A **sequence**, the uppercase version of ##x## | |
551 | -- | |
552 | -- Comments: | |
553 | -- * For Windows systems, this uses the current code page for conversion | |
554 | -- * For non-Windows, this only works on ASCII characters. It alters characters in | |
555 | -- the 'a'..'z' range. If you need to do case conversion with other encodings | |
556 | -- use the [[:set_encoding_properties]] first. | |
557 | -- * ##x## may be a sequence of any shape, all atoms of which will be acted upon. | |
558 | -- | |
559 | -- **WARNING**, When using ASCII encoding, this can also affects floating point | |
560 | -- numbers in the range 97 to 122. | |
561 | -- | |
562 | -- Example 1: | |
563 | -- | |
564 | -- s = upper("Euphoria") | |
565 | -- -- s is "EUPHORIA" | |
566 | -- | |
567 | -- a = upper('b') | |
568 | -- -- a is 'B' | |
569 | -- | |
570 | -- s = upper({"Euphoria", "Programming"}) | |
571 | -- -- s is {"EUPHORIA", "PROGRAMMING"} | |
572 | -- | |
573 | -- | |
574 | -- See Also: | |
575 | -- [[:lower]], [[:proper]], [[:set_encoding_properties]], [[:get_encoding_properties]] | |
576 | ||
577 | 1135 | |
578 | -- convert atom or sequence to upper case | |
579 | 1135 | if length(upper_case_SET) != 0 then |
580 | 3 | return mapping(x, lower_case_SET, upper_case_SET) |
581 | end if | |
582 | 1132 | ifdef WINDOWS then |
583 | return change_case(x, api_CharUpperBuff) | |
584 | elsedef | |
585 | 1132 | return x - (x >= 'a' and x <= 'z') * TO_LOWER |
586 | end ifdef | |
587 | ||
588 | end function | |
589 | ||
590 | --** | |
591 | -- Convert a text sequence to capitalized words. | |
592 | -- | |
593 | -- Parameters: | |
594 | -- # ##x## : A text sequence. | |
595 | -- | |
596 | -- Returns: | |
597 | -- A **sequence**, the Capitalized Version of ##x## | |
598 | -- | |
599 | -- Comments: | |
600 | -- A text sequence is one in which all elements are either characters or | |
601 | -- text sequences. This means that if a non-character is found in the input, | |
602 | -- it is not converted. However this rule only applies to elements on the | |
603 | -- same level, meaning that sub-sequences could be converted if they are | |
604 | -- actually text sequences. | |
605 | -- | |
606 | -- | |
607 | -- Example 1: | |
608 | -- | |
609 | -- s = proper("euphoria programming language") | |
610 | -- -- s is "Euphoria Programming Language" | |
611 | -- s = proper("EUPHORIA PROGRAMMING LANGUAGE") | |
612 | -- -- s is "Euphoria Programming Language" | |
613 | -- s = proper({"EUPHORIA PROGRAMMING", "language", "rapid dEPLOYMENT", "sOfTwArE"}) | |
614 | -- -- s is {"Euphoria Programming", "Language", "Rapid Deployment", "Software"} | |
615 | -- s = proper({'a', 'b', 'c'}) | |
616 | -- -- s is {'A', 'b', c'} -- "Abc" | |
617 | -- s = proper({'a', 'b', 'c', 3.1472}) | |
618 | -- -- s is {'a', 'b', c', 3.1472} -- Unchanged because it contains a non-character. | |
619 | -- s = proper({"abc", 3.1472}) | |
620 | -- -- s is {"Abc", 3.1472} -- The embedded text sequence is converted. | |
621 | -- | |
622 | -- | |
623 | -- See Also: | |
624 | -- [[:lower]] [[:upper]] | |
625 | ||
626 | 18 | |
627 | -- Converts text to lowercase and makes each word start with an uppercase. | |
628 | integer pos | |
629 | integer inword | |
630 | integer convert | |
631 | sequence res | |
632 | ||
633 | 18 | inword = 0 -- Initially not in a word |
634 | 18 | convert = 1 -- Initially convert text |
635 | 18 | res = x -- Work on a copy of the original, in case we need to restore. |
636 | 18 | for i = 1 to length(res) do |
637 | 214 | if integer(res[i]) then |
638 | 201 | if convert then |
639 | -- Check for upper case | |
640 | 200 | pos = t_upper(res[i]) |
641 | 200 | if pos = 0 then |
642 | -- Not upper, so check for lower case | |
643 | 125 | pos = t_lower(res[i]) |
644 | 125 | if pos = 0 then |
645 | -- Not lower so check for digits | |
646 | -- n.b. digits have no effect on if its in a word or not. | |
647 | 27 | pos = t_digit(res[i]) |
648 | 27 | if pos = 0 then |
649 | -- not digit so check for special word chars | |
650 | 20 | pos = t_specword(res[i]) |
651 | 20 | if pos then |
652 | 3 | inword = 1 |
653 | else | |
654 | 17 | inword = 0 |
655 | end if | |
656 | end if | |
657 | else | |
658 | 98 | if inword = 0 then |
659 | -- start of word, so convert only lower to upper. | |
660 | 18 | if pos <= 26 then |
661 | 18 | res[i] = upper(res[i]) -- Convert to uppercase |
662 | end if | |
663 | 18 | inword = 1 -- now we are in a word |
664 | end if | |
665 | end if | |
666 | else | |
667 | 75 | if inword = 1 then |
668 | -- Upper, but as we are in a word convert it to lower. | |
669 | 66 | res[i] = lower(res[i]) -- Convert to lowercase |
670 | else | |
671 | 9 | inword = 1 -- now we are in a word |
672 | end if | |
673 | end if | |
674 | end if | |
675 | else | |
676 | -- A non-integer means this is NOT a text sequence, so | |
677 | -- only convert sub-sequences. | |
678 | 13 | if convert then |
679 | -- Restore any values that might have been converted. | |
680 | 5 | for j = 1 to i-1 do |
681 | 3 | if atom(x[j]) then |
682 | 3 | res[j] = x[j] |
683 | end if | |
684 | 3 | end for |
685 | -- Turn conversion off for the rest of this level. | |
686 | 5 | convert = 0 |
687 | end if | |
688 | ||
689 | 13 | if sequence(res[i]) then |
690 | 10 | res[i] = proper(res[i]) -- recursive conversion |
691 | end if | |
692 | end if | |
693 | 214 | end for |
694 | 18 | return res |
695 | end function | |
696 | ||
697 | --** | |
698 | -- Converts a string containing Key/Value pairs into a set of | |
699 | -- sequences, one per K/V pair. | |
700 | -- | |
701 | -- Parameters: | |
702 | -- # ##source## : a text sequence, containing the representation of the key/values. | |
703 | -- # ##pair_delim## : an object containing a list of elements that delimit one | |
704 | -- key/value pair from the next. The defaults are semi-colon (;) | |
705 | -- and comma (,). | |
706 | -- # ##kv_delim## : an object containing a list of elements that delimit the | |
707 | -- key from its value. The defaults are colon (:) and equal (=). | |
708 | -- # ##quotes## : an object containing a list of elements that can be used to | |
709 | -- enclose either keys or values that contain delimiters or | |
710 | -- whitespace. The defaults are double-quote ("), single-quote (') | |
711 | -- and back-quote (`) | |
712 | -- # ##whitespace## : an object containing a list of elements that are regarded | |
713 | -- as whitespace characters. The defaults are space, tab, new-line, | |
714 | -- and carriage-return. | |
715 | -- # ##haskeys## : an integer containing true or false. The default is true. When | |
716 | -- ##true##, the ##kv_delim## values are used to separate keys from values, but | |
717 | -- when ##false## it is assumed that each 'pair' is actually just a value. | |
718 | -- | |
719 | -- Returns: | |
720 | -- A **sequence**, of pairs. Each pair is in the form {key, value}. | |
721 | -- | |
722 | -- Comments: | |
723 | -- | |
724 | -- String representations of atoms are not converted, either in the key or value part, but returned as any regular string instead. | |
725 | -- | |
726 | -- If ##haskeys## is ##true##, but a substring only holds what appears to be a value, the key | |
727 | -- is synthesized as ##p[n]##, where ##n## is the number of the pair. See example #2. | |
728 | -- | |
729 | -- By default, pairs can be delimited by either a comma or semi-colon ",;" and | |
730 | -- a key is delimited from its value by either an equal or a colon "=:". | |
731 | -- Whitespace between pairs, and between delimiters is ignored. | |
732 | -- | |
733 | -- If you need to have one of the delimiters in the value data, enclose it in | |
734 | -- quotation marks. You can use any of single, double and back quotes, which | |
735 | -- also means you can quote quotation marks themselves. See example #3. | |
736 | -- | |
737 | -- It is possible that the value data itself is a nested set of pairs. To do | |
738 | -- this enclose the value in parentheses. Nested sets can nested to any level. | |
739 | -- See example #4. | |
740 | -- | |
741 | -- If a sub-list has only data values and not keys, enclose it in either braces | |
742 | -- or square brackets. See example #5. | |
743 | -- If you need to have a bracket as the first character in a data value, prefix | |
744 | -- it with a tilde. Actually a leading tilde will always just be stripped off | |
745 | -- regardless of what it prefixes. See example #6. | |
746 | -- | |
747 | -- Example 1: | |
748 | -- | |
749 | -- s = keyvalues("foo=bar, qwe=1234, asdf='contains space, comma, and equal(=)'") | |
750 | -- -- s is { {"foo", "bar"}, {"qwe", "1234"}, {"asdf", "contains space, comma, and equal(=)"}} | |
751 | -- | |
752 | -- | |
753 | -- Example 2: | |
754 | -- | |
755 | -- s = keyvalues("abc fgh=ijk def") | |
756 | -- -- s is { {"p[1]", "abc"}, {"fgh", "ijk"}, {"p[3]", "def"} } | |
757 | -- | |
758 | -- | |
759 | -- Example 3: | |
760 | -- | |
761 | -- s = keyvalues("abc=`'quoted'`") | |
762 | -- -- s is { {"abc", "'quoted'"} } | |
763 | -- | |
764 | -- | |
765 | -- Example 4: | |
766 | -- | |
767 | -- s = keyvalues("colors=(a=black, b=blue, c=red)") | |
768 | -- -- s is { {"colors", {{"a", "black"}, {"b", "blue"},{"c", "red"}} } } | |
769 | -- s = keyvalues("colors=(black=[0,0,0], blue=[0,0,FF], red=[FF,0,0])") | |
770 | -- -- s is { {"colors", {{"black",{"0", "0", "0"}}, {"blue",{"0", "0", "FF"}},{"red", {"FF","0","0"}}}} } | |
771 | -- | |
772 | -- | |
773 | -- Example 5: | |
774 | -- | |
775 | -- s = keyvalues("colors=[black, blue, red]") | |
776 | -- -- s is { {"colors", { "black", "blue", "red"} } } | |
777 | -- | |
778 | -- | |
779 | -- Example 6: | |
780 | -- | |
781 | -- s = keyvalues("colors=~[black, blue, red]") | |
782 | -- -- s is { {"colors", "[black, blue, red]"} } } | |
783 | -- -- The following is another way to do the same. | |
784 | -- s = keyvalues("colors=`[black, blue, red]`") | |
785 | -- -- s is { {"colors", "[black, blue, red]"} } } | |
786 | -- | |
787 | ||
788 | 43 | |
789 | object kv_delim = ":=", object quotes = "\"'`", | |
790 | object whitespace = " \t\n\r", integer haskeys = 1) | |
791 | ||
792 | sequence lKeyValues | |
793 | sequence value_ | |
794 | sequence key_ | |
795 | sequence lAllDelim | |
796 | sequence lWhitePair | |
797 | sequence lStartBracket | |
798 | sequence lEndBracket | |
799 | sequence lBracketed | |
800 | integer lQuote | |
801 | integer pos_ | |
802 | integer lChar | |
803 | integer lBPos | |
804 | integer lWasKV | |
805 | ||
806 | 43 | source = trim(source) |
807 | 43 | if length(source) = 0 then |
808 | 1 | return {} |
809 | end if | |
810 | ||
811 | 42 | if atom(pair_delim) then |
812 | 1 | pair_delim = {pair_delim} |
813 | end if | |
814 | 42 | if atom(kv_delim) then |
815 | 1 | kv_delim = {kv_delim} |
816 | end if | |
817 | 42 | if atom(quotes) then |
818 | 1 | quotes = {quotes} |
819 | end if | |
820 | 42 | if atom(whitespace) then |
821 | 1 | whitespace = {whitespace} |
822 | end if | |
823 | ||
824 | 42 | lAllDelim = whitespace & pair_delim & kv_delim |
825 | 42 | lWhitePair = whitespace & pair_delim |
826 | 42 | lStartBracket = "{[(" |
827 | 42 | lEndBracket = "}])" |
828 | ||
829 | 42 | lKeyValues = {} |
830 | 42 | pos_ = 1 |
831 | 42 | while pos_ <= length(source) do |
832 | -- ignore leading whitespace | |
833 | 89 | while pos_ < length(source) do |
834 | 112 | if find(source[pos_], whitespace) = 0 then |
835 | 86 | exit |
836 | end if | |
837 | 26 | pos_ +=1 |
838 | 26 | end while |
839 | ||
840 | -- Get key. Ends at any of unquoted whitespace or unquoted delimiter | |
841 | 89 | key_ = "" |
842 | 89 | lQuote = 0 |
843 | 89 | lChar = 0 |
844 | 89 | lWasKV = 0 |
845 | 89 | if haskeys then |
846 | 50 | while pos_ <= length(source) do |
847 | 239 | lChar = source[pos_] |
848 | 239 | if find(lChar, quotes) != 0 then |
849 | 0 | if lChar = lQuote then |
850 | -- End of quoted span | |
851 | 0 | lQuote = 0 |
852 | 0 | lChar = -1 |
853 | 0 | elsif lQuote = 0 then |
854 | -- Start of quoted span | |
855 | 0 | lQuote = lChar |
856 | 0 | lChar = -1 |
857 | end if | |
858 | ||
859 | 239 | elsif lQuote = 0 and find(lChar, lAllDelim) != 0 then |
860 | 48 | exit |
861 | ||
862 | end if | |
863 | 191 | if lChar > 0 then |
864 | 191 | key_ &= lChar |
865 | end if | |
866 | 191 | pos_ += 1 |
867 | 191 | end while |
868 | ||
869 | -- ignore next whitespace | |
870 | 50 | if find(lChar, whitespace) != 0 then |
871 | 6 | pos_ += 1 |
872 | 6 | while pos_ <= length(source) do |
873 | 24 | lChar = source[pos_] |
874 | 24 | if find(lChar, whitespace) = 0 then |
875 | 6 | exit |
876 | end if | |
877 | 18 | pos_ +=1 |
878 | 18 | end while |
879 | end if | |
880 | else | |
881 | 39 | pos_ -= 1 -- Put back the last char. |
882 | end if | |
883 | ||
884 | 89 | value_ = "" |
885 | 89 | if find(lChar, kv_delim) != 0 or not haskeys then |
886 | ||
887 | 85 | if find(lChar, kv_delim) != 0 then |
888 | 46 | lWasKV = 1 |
889 | end if | |
890 | ||
891 | -- ignore next whitespace | |
892 | 85 | pos_ += 1 |
893 | 85 | while pos_ <= length(source) do |
894 | 90 | lChar = source[pos_] |
895 | 90 | if find(lChar, whitespace) = 0 then |
896 | 85 | exit |
897 | end if | |
898 | 5 | pos_ +=1 |
899 | 5 | end while |
900 | ||
901 | -- Get value. Ends at any of unquoted whitespace or unquoted delimiter | |
902 | 85 | lQuote = 0 |
903 | 85 | lChar = 0 |
904 | 85 | lBracketed = {} |
905 | 85 | while pos_ <= length(source) do |
906 | 769 | lChar = source[pos_] |
907 | 769 | if length(lBracketed) = 0 and find(lChar, quotes) != 0 then |
908 | 35 | if lChar = lQuote then |
909 | -- End of quoted span | |
910 | 15 | lQuote = 0 |
911 | 15 | lChar = -1 |
912 | 20 | elsif lQuote = 0 then |
913 | -- Start of quoted span | |
914 | 15 | lQuote = lChar |
915 | 15 | lChar = -1 |
916 | end if | |
917 | 734 | elsif find(lChar, lStartBracket) > 0 then |
918 | 26 | lBPos = find(lChar, lStartBracket) |
919 | 26 | lBracketed &= lEndBracket[lBPos] |
920 | ||
921 | 708 | elsif length(value_) = 1 and value_[1] = '~' and find(lChar, lStartBracket) > 0 then |
922 | 0 | lBPos = find(lChar, lStartBracket) |
923 | 0 | lBracketed &= lEndBracket[lBPos] |
924 | ||
925 | 708 | elsif length(lBracketed) != 0 and lChar = lBracketed[$] then |
926 | 26 | lBracketed = lBracketed[1..$-1] |
927 | ||
928 | 682 | elsif length(lBracketed) = 0 and lQuote = 0 and find(lChar, lWhitePair) != 0 then |
929 | 45 | exit |
930 | ||
931 | end if | |
932 | ||
933 | 724 | if lChar > 0 then |
934 | 694 | value_ &= lChar |
935 | end if | |
936 | 724 | pos_ += 1 |
937 | 724 | end while |
938 | ||
939 | 85 | if find(lChar, whitespace) != 0 then |
940 | -- ignore next whitespace | |
941 | 6 | pos_ += 1 |
942 | 6 | while pos_ <= length(source) do |
943 | 6 | lChar = source[pos_] |
944 | 6 | if find(lChar, whitespace) = 0 then |
945 | 6 | exit |
946 | end if | |
947 | 0 | pos_ +=1 |
948 | 0 | end while |
949 | end if | |
950 | ||
951 | 85 | if find(lChar, pair_delim) != 0 then |
952 | 39 | pos_ += 1 |
953 | 39 | if pos_ <= length(source) then |
954 | 39 | lChar = source[pos_] |
955 | end if | |
956 | end if | |
957 | end if | |
958 | ||
959 | 89 | if find(lChar, pair_delim) != 0 then |
960 | 0 | pos_ += 1 |
961 | end if | |
962 | ||
963 | 89 | if length(value_) = 0 then |
964 | 4 | if length(key_) = 0 then |
965 | 0 | lKeyValues = append(lKeyValues, {}) |
966 | 0 | continue |
967 | end if | |
968 | ||
969 | 4 | if not lWasKV then |
970 | 4 | value_ = key_ |
971 | 4 | key_ = "" |
972 | end if | |
973 | end if | |
974 | ||
975 | 89 | if length(key_) = 0 then |
976 | 43 | if haskeys then |
977 | 4 | key_ = sprintf("p[%d]", length(lKeyValues) + 1) |
978 | end if | |
979 | end if | |
980 | ||
981 | 89 | if length(value_) > 0 then |
982 | 89 | lChar = value_[1] |
983 | 89 | lBPos = find(lChar, lStartBracket) |
984 | 89 | if lBPos > 0 and value_[$] = lEndBracket[lBPos] then |
985 | 16 | if lChar = '(' then |
986 | 4 | value_ = keyvalues(value_[2..$-1], pair_delim, kv_delim, quotes, whitespace, haskeys) |
987 | else | |
988 | 12 | value_ = keyvalues(value_[2..$-1], pair_delim, kv_delim, quotes, whitespace, 0) |
989 | end if | |
990 | 73 | elsif lChar = '~' then |
991 | 2 | value_ = value_[2 .. $] |
992 | end if | |
993 | end if | |
994 | ||
995 | 89 | key_ = trim(key_) |
996 | 89 | value_ = trim(value_) |
997 | 89 | if length(key_) = 0 then |
998 | 39 | lKeyValues = append(lKeyValues, value_) |
999 | else | |
1000 | 50 | lKeyValues = append(lKeyValues, {key_, value_}) |
1001 | end if | |
1002 | ||
1003 | 89 | end while |
1004 | ||
1005 | 42 | return lKeyValues |
1006 | end function | |
1007 | ||
1008 | --** | |
1009 | -- Escape special characters in a string | |
1010 | -- | |
1011 | -- Parameters: | |
1012 | -- # ##s##: string to escape | |
1013 | -- # ##what##: sequence of characters to escape | |
1014 | -- defaults to escaping a double quote. | |
1015 | -- | |
1016 | -- Returns: | |
1017 | -- An escaped ##sequence## representing ##s##. | |
1018 | -- | |
1019 | -- Example 1: | |
1020 | -- | |
1021 | -- sequence s = escape("John \"Mc\" Doe") | |
1022 | -- puts(1, s) | |
1023 | -- -- output is: John \"Mc\" Doe | |
1024 | -- | |
1025 | -- | |
1026 | -- See Also: | |
1027 | -- [[:quote]] | |
1028 | -- | |
1029 | ||
1030 | 3 | |
1031 | 3 | sequence r = "" |
1032 | ||
1033 | 3 | for i = 1 to length(s) do |
1034 | 40 | if find(s[i], what) then |
1035 | 9 | r &= "\\" |
1036 | end if | |
1037 | 40 | r &= s[i] |
1038 | 40 | end for |
1039 | ||
1040 | 3 | return r |
1041 | end function | |
1042 | ||
1043 | ||
1044 | --** | |
1045 | -- Return a quoted version of the first argument. | |
1046 | -- | |
1047 | -- Parameters: | |
1048 | -- # ##text_in## : The string or set of strings to quote. | |
1049 | -- # ##quote_pair## : A sequence of two strings. The first string is the opening | |
1050 | -- quote to use, and the second string is the closing quote to use. | |
1051 | -- The default is {"\"", "\""} which means that the output will be | |
1052 | -- enclosed by double-quotation marks. | |
1053 | -- # ##esc## : A single escape character. If this is not negative (the default), | |
1054 | -- then this is used to 'escape' any embedded quote characters and | |
1055 | -- 'esc' characters already in the ##text_in## string. | |
1056 | -- # ##sp## : A list of zero or more special characters. The ##text_in## is only | |
1057 | -- quoted if it contains any of the special characters. The default | |
1058 | -- is "" which means that the ##text_in## is always quoted. | |
1059 | -- | |
1060 | -- Returns: | |
1061 | -- A **sequence**, the quoted version of ##text_in##. | |
1062 | -- | |
1063 | -- Example 1: | |
1064 | -- | |
1065 | -- -- Using the defaults. Output enclosed in double-quotes, no escapes and no specials. | |
1066 | -- s = quote("The small man") | |
1067 | -- -- 's' now contains '"the small man"' including the double-quote characters. | |
1068 | -- | |
1069 | -- | |
1070 | -- Example 2: | |
1071 | -- | |
1072 | -- s = quote("The small man", {"(", ")"} ) | |
1073 | -- -- 's' now contains '(the small man)' | |
1074 | -- | |
1075 | -- | |
1076 | -- Example 3: | |
1077 | -- | |
1078 | -- s = quote("The (small) man", {"(", ")"}, '~' ) | |
1079 | -- -- 's' now contains '(The ~(small~) man)' | |
1080 | -- | |
1081 | -- | |
1082 | -- Example 4: | |
1083 | -- | |
1084 | -- s = quote("The (small) man", {"(", ")"}, '~', "#" ) | |
1085 | -- -- 's' now contains "the (small) man" | |
1086 | -- -- because the input did not contain a '#' character. | |
1087 | -- | |
1088 | -- | |
1089 | -- Example 5: | |
1090 | -- | |
1091 | -- s = quote("The #1 (small) man", {"(", ")"}, '~', "#" ) | |
1092 | -- -- 's' now contains '(the #1 ~(small~) man)' | |
1093 | -- -- because the input did contain a '#' character. | |
1094 | -- | |
1095 | -- | |
1096 | -- Example 6: | |
1097 | -- | |
1098 | -- -- input is a set of strings... | |
1099 | -- s = quote({"a b c", "def", "g hi"},) | |
1100 | -- -- 's' now contains three quoted strings: '"a b c"', '"def"', and '"g hi"' | |
1101 | -- | |
1102 | -- | |
1103 | -- See Also: | |
1104 | -- [[:escape]] | |
1105 | -- | |
1106 | ||
1107 | 222 | |
1108 | t_text sp = "" ) | |
1109 | 222 | if length(text_in) = 0 then |
1110 | 1 | return text_in |
1111 | end if | |
1112 | ||
1113 | 221 | if atom(quote_pair) then |
1114 | 1 | quote_pair = {{quote_pair}, {quote_pair}} |
1115 | 220 | elsif length(quote_pair) = 1 then |
1116 | 4 | quote_pair = {quote_pair[1], quote_pair[1]} |
1117 | 216 | elsif length(quote_pair) = 0 then |
1118 | 1 | quote_pair = {"\"", "\""} |
1119 | end if | |
1120 | ||
1121 | 221 | if sequence(text_in[1]) then |
1122 | 14 | for i = 1 to length(text_in) do |
1123 | 193 | if sequence(text_in[i]) then |
1124 | 193 | text_in[i] = quote(text_in[i], quote_pair, esc, sp) |
1125 | end if | |
1126 | 193 | end for |
1127 | ||
1128 | 14 | return text_in |
1129 | end if | |
1130 | ||
1131 | -- Only quote the input if it contains any of the items in 'sp' | |
1132 | 207 | for i = 1 to length(sp) do |
1133 | 195 | if find(sp[i], text_in) then |
1134 | 15 | exit |
1135 | end if | |
1136 | ||
1137 | 180 | if i = length(sp) then |
1138 | -- Contains none of them, so just return the input untouched. | |
1139 | 179 | return text_in |
1140 | end if | |
1141 | 1 | end for |
1142 | ||
1143 | 28 | if esc >= 0 then |
1144 | -- If the input already contains a quote, replace them with esc-quote, | |
1145 | -- but make sure that if the input already contains esc-quote that all | |
1146 | -- embedded escapes are replaced with esc-esc first. | |
1147 | 21 | if atom(quote_pair[1]) then |
1148 | 1 | quote_pair[1] = {quote_pair[1]} |
1149 | end if | |
1150 | 21 | if atom(quote_pair[2]) then |
1151 | 1 | quote_pair[2] = {quote_pair[2]} |
1152 | end if | |
1153 | ||
1154 | 21 | if equal(quote_pair[1], quote_pair[2]) then |
1155 | -- Simple case where both open and close quote are the same. | |
1156 | 16 | if match(quote_pair[1], text_in) then |
1157 | 4 | if match(esc & quote_pair[1], text_in) then |
1158 | 1 | text_in = replace_all(text_in, esc, esc & esc) |
1159 | end if | |
1160 | 4 | text_in = replace_all(text_in, quote_pair[1], esc & quote_pair[1]) |
1161 | end if | |
1162 | else | |
1163 | 5 | if match(quote_pair[1], text_in) or |
1164 | match(quote_pair[2], text_in) then | |
1165 | 5 | if match(esc & quote_pair[1], text_in) then |
1166 | 1 | text_in = replace_all(text_in, esc & quote_pair[1], esc & esc & quote_pair[1]) |
1167 | end if | |
1168 | 5 | text_in = replace_all(text_in, quote_pair[1], esc & quote_pair[1]) |
1169 | end if | |
1170 | ||
1171 | 5 | if match(quote_pair[2], text_in) then |
1172 | 5 | if match(esc & quote_pair[2], text_in) then |
1173 | 1 | text_in = replace_all(text_in, esc & quote_pair[2], esc & esc & quote_pair[2]) |
1174 | end if | |
1175 | 5 | text_in = replace_all(text_in, quote_pair[2], esc & quote_pair[2]) |
1176 | end if | |
1177 | end if | |
1178 | end if | |
1179 | ||
1180 | 28 | return quote_pair[1] & text_in & quote_pair[2] |
1181 | ||
1182 | end function | |
1183 | ||
1184 | --** | |
1185 | -- Removes 'quotation' text from the argument. | |
1186 | -- | |
1187 | -- Parameters: | |
1188 | -- # ##text_in## : The string or set of strings to de-quote. | |
1189 | -- # ##quote_pairs## : A set of one or more sub-sequences of two strings, | |
1190 | -- or an atom representing a single character to be used as | |
1191 | -- both the open and close quotes. | |
1192 | -- The first string in each sub-sequence is the opening | |
1193 | -- quote to look for, and the second string is the closing quote. | |
1194 | -- The default is {{{"\"", "\""}}} which means that the output is | |
1195 | -- 'quoted' if it is enclosed by double-quotation marks. | |
1196 | -- # ##esc## : A single escape character. If this is not negative (the default), | |
1197 | -- then this is used to 'escape' any embedded occurrences of the | |
1198 | -- quote characters. In which case the 'escape' character is also | |
1199 | -- removed. | |
1200 | -- | |
1201 | -- Returns: | |
1202 | -- A **sequence**, the original text but with 'quote' strings stripped of quotes. | |
1203 | -- | |
1204 | -- Example 1: | |
1205 | -- | |
1206 | -- -- Using the defaults. | |
1207 | -- s = dequote("\"The small man\"") | |
1208 | -- -- 's' now contains "The small man" | |
1209 | -- | |
1210 | -- | |
1211 | -- Example 2: | |
1212 | -- | |
1213 | -- -- Using the defaults. | |
1214 | -- s = dequote("(The small ?(?) man)", {{"(",")"}}, '?') | |
1215 | -- -- 's' now contains "The small () man" | |
1216 | -- | |
1217 | -- | |
1218 | 8 | |
1219 | ||
1220 | 8 | if length(text_in) = 0 then |
1221 | 1 | return text_in |
1222 | end if | |
1223 | ||
1224 | 7 | if atom(quote_pairs) then |
1225 | 1 | quote_pairs = {{{quote_pairs}, {quote_pairs}}} |
1226 | 6 | elsif length(quote_pairs) = 1 then |
1227 | 2 | quote_pairs = {quote_pairs[1], quote_pairs[1]} |
1228 | 4 | elsif length(quote_pairs) = 0 then |
1229 | 1 | quote_pairs = {{"\"", "\""}} |
1230 | end if | |
1231 | ||
1232 | 7 | if sequence(text_in[1]) then |
1233 | 1 | for i = 1 to length(text_in) do |
1234 | 2 | if sequence(text_in[i]) then |
1235 | 2 | text_in[i] = dequote(text_in[i], quote_pairs, esc) |
1236 | end if | |
1237 | 2 | end for |
1238 | ||
1239 | 1 | return text_in |
1240 | end if | |
1241 | ||
1242 | -- If the text begins and ends with a quote-pair then strip them off and | |
1243 | -- remove the 'escape' from any 'escaped' quote_pairs within the text. | |
1244 | 6 | for i = 1 to length(quote_pairs) do |
1245 | 7 | if length(text_in) >= length(quote_pairs[i][1]) + length(quote_pairs[i][2]) then |
1246 | 7 | if begins(quote_pairs[i][1], text_in) and ends(quote_pairs[i][2], text_in) then |
1247 | 6 | text_in = text_in[1 + length(quote_pairs[i][1]) .. $ - length(quote_pairs[i][2])] |
1248 | 6 | integer pos = 1 |
1249 | 6 | while pos > 0 with entry do |
1250 | 4 | if begins(quote_pairs[i][1], text_in[pos+1 .. $]) then |
1251 | 2 | text_in = text_in[1 .. pos-1] & text_in[pos + 1 .. $] |
1252 | 2 | elsif begins(quote_pairs[i][2], text_in[pos+1 .. $]) then |
1253 | 2 | text_in = text_in[1 .. pos-1] & text_in[pos + 1 .. $] |
1254 | else | |
1255 | 0 | pos += 1 |
1256 | end if | |
1257 | entry | |
1258 | 10 | pos = find_from(esc, text_in, pos) |
1259 | 10 | end while |
1260 | 6 | exit |
1261 | end if | |
1262 | end if | |
1263 | 1 | end for |
1264 | ||
1265 | 6 | return text_in |
1266 | end function | |
1267 | ||
1268 | --** | |
1269 | -- Formats a set of arguments in to a string based on a supplied pattern. | |
1270 | -- | |
1271 | -- Parameters: | |
1272 | -- # ##format_pattern## : A sequence: the pattern string that contains zero or more tokens. | |
1273 | -- # ##arg_list## : An object: Zero or more arguments used in token replacement. | |
1274 | -- | |
1275 | -- Returns: | |
1276 | -- A string **sequence**, the original ##format_pattern## but with tokens replaced by | |
1277 | -- corresponding arguments. | |
1278 | -- | |
1279 | -- Comments: | |
1280 | -- The ##format_pattern## string contains text and argument tokens. The resulting string | |
1281 | -- is the same as the format string except that each token is replaced by an | |
1282 | -- item from the argument list. | |
1283 | -- | |
1284 | -- A token has the form **##[ ]##**, whereis are optional qualifier codes. | |
1285 | -- | |
1286 | -- The qualifier. ## ## is a set of zero or more codes that modify the default | |
1287 | -- way that the argument is used to replace the token. The default replacement | |
1288 | -- method is to convert the argument to its shortest string representation and | |
1289 | -- use that to replace the token. This may be modified by the following codes, | |
1290 | -- which can occur in any order. | |
1291 | -- |= Qualifier |= Usage | | |
1292 | -- | N | ('N' is an integer) The index of the argument to use| | |
1293 | -- | {id} | Uses the argument that begins with "id=" where "id" \\ | |
1294 | -- is an identifier name. | | |
1295 | -- | %envvar% | Uses the Environment Symbol 'envar' as an argument | | |
1296 | -- | w | For string arguments, if capitalizes the first\\ | |
1297 | -- letter in each word | | |
1298 | -- | u | For string arguments, it converts it to upper case. | | |
1299 | -- | l | For string arguments, it converts it to lower case. | | |
1300 | -- | < | For numeric arguments, it left justifies it. | | |
1301 | -- | > | For string arguments, it right justifies it. | | |
1302 | -- | c | Centers the argument. | | |
1303 | -- | z | For numbers, it zero fills the left side. | | |
1304 | -- | :S | ('S' is an integer) The maximum size of the\\ | |
1305 | -- resulting field. Also, if 'S' begins with '0' the\\ | |
1306 | -- field will be zero-filled if the argument is an integer| | |
1307 | -- | .N | ('N' is an integer) The number of digits after\\ | |
1308 | -- the decimal point | | |
1309 | -- | + | For positive numbers, show a leading plus sign | | |
1310 | -- | ( | For negative numbers, enclose them in parentheses | | |
1311 | -- | b | For numbers, causes zero to be all blanks | | |
1312 | -- | s | If the resulting field would otherwise be zero\\ | |
1313 | -- length, this ensures that at least one space occurs\\ | |
1314 | -- between this token's field | | |
1315 | -- | t | After token replacement, the resulting string up to this point is trimmed. | | |
1316 | -- | X | Outputs integer arguments using hexadecimal digits. | | |
1317 | -- | B | Outputs integer arguments using binary digits. | | |
1318 | -- | ? | The corresponding argument is a set of two strings. This\\ | |
1319 | -- uses the first string if the previous token's argument is\\ | |
1320 | -- not the value 1 or a zero-length string, otherwise it\\ | |
1321 | -- uses the second string. | | |
1322 | -- | [ | Does not use any argument. Outputs a left-square-bracket symbol | | |
1323 | -- | ,X | Insert thousands separators. The | |
1324 | -- to use. If this is a dot "." then the decimal point\\ | |
1325 | -- is rendered using a comma. Does not apply to zero-filled\\ | |
1326 | -- fields. \\ | |
1327 | -- N.B. if hex or binary output was specified, the \\ | |
1328 | -- separators are every 4 digits otherwise they are \\ | |
1329 | -- every three digits. | | |
1330 | -- | |
1331 | -- Clearly, certain combinations of these qualifier codes do not make sense and in | |
1332 | -- those situations, the rightmost clashing code is used and the others are ignored. | |
1333 | -- | |
1334 | -- Any tokens in the format that have no corresponding argument are simply removed | |
1335 | -- from the result. Any arguments that are not used in the result are ignored. | |
1336 | -- | |
1337 | -- Any sequence argument that is not a string will be converted to its | |
1338 | -- //pretty// format before being used in token replacement. | |
1339 | -- | |
1340 | -- If a token is going to be replaced by a zero-length argument, all white space | |
1341 | -- following the token until the next non-whitespace character is not copied to | |
1342 | -- the result string. | |
1343 | -- | |
1344 | -- Examples: | |
1345 | -- | |
1346 | -- format("Cannot open file '[]' - code []", {"/usr/temp/work.dat", 32}) | |
1347 | -- -- "Cannot open file '/usr/temp/work.dat' - code 32" | |
1348 | -- | |
1349 | -- format("Err-[2], Cannot open file '[1]'", {"/usr/temp/work.dat", 32}) | |
1350 | -- -- "Err-32, Cannot open file '/usr/temp/work.dat'" | |
1351 | -- | |
1352 | -- format("[4w] [3z:2] [6] [5l] [2z:2], [1:4]", {2009,4,21,"DAY","MONTH","of"}) | |
1353 | -- -- "Day 21 of month 04, 2009" | |
1354 | -- | |
1355 | -- format("The answer is [:6.2]%", {35.22341}) | |
1356 | -- -- "The answer is 35.22%" | |
1357 | -- | |
1358 | -- format("The answer is [.6]", {1.2345}) | |
1359 | -- -- "The answer is 1.234500" | |
1360 | -- | |
1361 | -- format("The answer is [,,.2]", {1234.56}) | |
1362 | -- -- "The answer is 1,234.56" | |
1363 | -- | |
1364 | -- format("The answer is [,..2]", {1234.56}) | |
1365 | -- -- "The answer is 1.234,56" | |
1366 | -- | |
1367 | -- format("The answer is [,:.2]", {1234.56}) | |
1368 | -- -- "The answer is 1:234.56" | |
1369 | -- | |
1370 | -- format("[] [?]", {5, {"cats", "cat"}}) | |
1371 | -- -- "5 cats" | |
1372 | -- | |
1373 | -- format("[] [?]", {1, {"cats", "cat"}}) | |
1374 | -- -- "1 cat" | |
1375 | -- | |
1376 | -- format("[<:4]", {"abcdef"}) | |
1377 | -- -- "abcd" | |
1378 | -- | |
1379 | -- format("[>:4]", {"abcdef"}) | |
1380 | -- -- "cdef" | |
1381 | -- | |
1382 | -- format("[>:8]", {"abcdef"}) | |
1383 | -- -- " abcdef" | |
1384 | -- | |
1385 | -- format("seq is []", {{1.2, 5, "abcdef", {3}}}) | |
1386 | -- -- `seq is {1.2,5,"abcdef",{3}}` | |
1387 | -- | |
1388 | -- format("Today is [{day}], the [{date}]", {"date=10/Oct/2012", "day=Wednesday"}) | |
1389 | -- -- "Today is Wednesday, the 10/Oct/2012" | |
1390 | -- | |
1391 | -- | |
1392 | -- See Also: | |
1393 | -- [[:sprintf]] | |
1394 | -- | |
1395 | ||
1396 | 64 | |
1397 | sequence result | |
1398 | integer in_token | |
1399 | integer tch | |
1400 | integer i | |
1401 | integer tstart | |
1402 | integer tend | |
1403 | integer cap | |
1404 | integer align | |
1405 | integer psign | |
1406 | integer msign | |
1407 | integer zfill | |
1408 | integer bwz | |
1409 | integer spacer | |
1410 | integer alt | |
1411 | integer width | |
1412 | integer decs | |
1413 | integer pos | |
1414 | integer argn | |
1415 | integer argl | |
1416 | integer trimming | |
1417 | integer hexout | |
1418 | integer binout | |
1419 | integer tsep | |
1420 | object prevargv | |
1421 | object currargv | |
1422 | sequence idname | |
1423 | object envsym | |
1424 | object envvar | |
1425 | ||
1426 | 64 | if atom(arg_list) then |
1427 | 21 | arg_list = {arg_list} |
1428 | end if | |
1429 | ||
1430 | 64 | result = "" |
1431 | 64 | in_token = 0 |
1432 | ||
1433 | ||
1434 | 64 | i = 0 |
1435 | 64 | tstart = 0 |
1436 | 64 | tend = 0 |
1437 | 64 | argl = 0 |
1438 | 64 | spacer = 0 |
1439 | 64 | prevargv = 0 |
1440 | 64 | while i < length(format_pattern) do |
1441 | 642 | i += 1 |
1442 | 642 | tch = format_pattern[i] |
1443 | 642 | if not in_token then |
1444 | 408 | if tch = '[' then |
1445 | 92 | in_token = 1 |
1446 | 92 | tstart = i |
1447 | 92 | tend = 0 |
1448 | 92 | cap = 0 |
1449 | 92 | align = 0 |
1450 | 92 | psign = 0 |
1451 | 92 | msign = 0 |
1452 | 92 | zfill = 0 |
1453 | 92 | bwz = 0 |
1454 | 92 | spacer = 0 |
1455 | 92 | alt = 0 |
1456 | 92 | width = 0 |
1457 | 92 | decs = -1 |
1458 | 92 | argn = 0 |
1459 | 92 | hexout = 0 |
1460 | 92 | binout = 0 |
1461 | 92 | trimming = 0 |
1462 | 92 | tsep = 0 |
1463 | 92 | idname = "" |
1464 | 92 | envvar = "" |
1465 | 92 | envsym = "" |
1466 | else | |
1467 | 316 | result &= tch |
1468 | end if | |
1469 | else | |
1470 | 234 | switch tch do |
1471 | case ']' then | |
1472 | 91 | in_token = 0 |
1473 | 91 | tend = i |
1474 | ||
1475 | case '[' then | |
1476 | 1 | result &= tch |
1477 | 1 | while i < length(format_pattern) do |
1478 | 10 | i += 1 |
1479 | 10 | if format_pattern[i] = ']' then |
1480 | 1 | in_token = 0 |
1481 | 1 | tstart = 0 |
1482 | 1 | tend = 0 |
1483 | 1 | exit |
1484 | end if | |
1485 | 9 | end while |
1486 | ||
1487 | case 'w', 'u', 'l' then | |
1488 | 5 | cap = tch |
1489 | ||
1490 | case 'b' then | |
1491 | 2 | bwz = 1 |
1492 | ||
1493 | case 's' then | |
1494 | 1 | spacer = 1 |
1495 | ||
1496 | case 't' then | |
1497 | 3 | trimming = 1 |
1498 | ||
1499 | case 'z' then | |
1500 | 19 | zfill = 1 |
1501 | ||
1502 | case 'X' then | |
1503 | 3 | hexout = 1 |
1504 | ||
1505 | case 'B' then | |
1506 | 4 | binout = 1 |
1507 | ||
1508 | case 'c', '<', '>' then | |
1509 | 8 | align = tch |
1510 | ||
1511 | case '+' then | |
1512 | 10 | psign = 1 |
1513 | ||
1514 | case '(' then | |
1515 | 6 | msign = 1 |
1516 | ||
1517 | case '?' then | |
1518 | 4 | alt = 1 |
1519 | ||
1520 | case ':' then | |
1521 | 32 | while i < length(format_pattern) do |
1522 | 67 | i += 1 |
1523 | 67 | tch = format_pattern[i] |
1524 | 67 | pos = find(tch, "0123456789") |
1525 | 67 | if pos = 0 then |
1526 | 32 | i -= 1 |
1527 | 32 | exit |
1528 | end if | |
1529 | 35 | width = width * 10 + pos - 1 |
1530 | 35 | if width = 0 then |
1531 | 1 | zfill = '0' |
1532 | end if | |
1533 | 35 | end while |
1534 | ||
1535 | case '.' then | |
1536 | 11 | decs = 0 |
1537 | 11 | while i < length(format_pattern) do |
1538 | 22 | i += 1 |
1539 | 22 | tch = format_pattern[i] |
1540 | 22 | pos = find(tch, "0123456789") |
1541 | 22 | if pos = 0 then |
1542 | 11 | i -= 1 |
1543 | 11 | exit |
1544 | end if | |
1545 | 11 | decs = decs * 10 + pos - 1 |
1546 | 11 | end while |
1547 | ||
1548 | case '{' then | |
1549 | -- Use a named argument. | |
1550 | 2 | integer sp |
1551 | ||
1552 | 2 | sp = i + 1 |
1553 | 2 | i = sp |
1554 | 2 | while i < length(format_pattern) do |
1555 | 9 | if format_pattern[i] = '}' then |
1556 | 2 | exit |
1557 | end if | |
1558 | 7 | if format_pattern[i] = ']' then |
1559 | 0 | exit |
1560 | end if | |
1561 | 7 | i += 1 |
1562 | 7 | end while |
1563 | 2 | idname = trim(format_pattern[sp .. i-1]) & '=' |
1564 | 2 | if format_pattern[i] = ']' then |
1565 | 0 | i -= 1 |
1566 | end if | |
1567 | ||
1568 | 2 | for j = 1 to length(arg_list) do |
1569 | 3 | if begins(idname, arg_list[j]) then |
1570 | 2 | if argn = 0 then |
1571 | 2 | argn = j |
1572 | 2 | exit |
1573 | end if | |
1574 | end if | |
1575 | 1 | if j = length(arg_list) then |
1576 | 0 | idname = "" |
1577 | 0 | argn = -1 |
1578 | end if | |
1579 | 1 | end for |
1580 | case '%' then | |
1581 | -- Use the environment symbol | |
1582 | 1 | integer sp |
1583 | ||
1584 | 1 | sp = i + 1 |
1585 | 1 | i = sp |
1586 | 1 | while i < length(format_pattern) do |
1587 | 8 | if format_pattern[i] = '%' then |
1588 | 1 | exit |
1589 | end if | |
1590 | 7 | if format_pattern[i] = ']' then |
1591 | 0 | exit |
1592 | end if | |
1593 | 7 | i += 1 |
1594 | 7 | end while |
1595 | 1 | envsym = trim(format_pattern[sp .. i-1]) |
1596 | 1 | if format_pattern[i] = ']' then |
1597 | 0 | i -= 1 |
1598 | end if | |
1599 | ||
1600 | 1 | envvar = getenv(envsym) |
1601 | ||
1602 | 1 | argn = -1 |
1603 | 1 | if atom(envvar) then |
1604 | 0 | envvar = "" |
1605 | end if | |
1606 | ||
1607 | ||
1608 | case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' then | |
1609 | 26 | if argn = 0 then |
1610 | 26 | i -= 1 |
1611 | 26 | while i < length(format_pattern) do |
1612 | 52 | i += 1 |
1613 | 52 | tch = format_pattern[i] |
1614 | 52 | pos = find(tch, "0123456789") |
1615 | 52 | if pos = 0 then |
1616 | 26 | i -= 1 |
1617 | 26 | exit |
1618 | end if | |
1619 | 26 | argn = argn * 10 + pos - 1 |
1620 | 26 | end while |
1621 | end if | |
1622 | ||
1623 | case ',' then | |
1624 | 5 | if i < length(format_pattern) then |
1625 | 5 | i +=1 |
1626 | 5 | tsep = format_pattern[i] |
1627 | end if | |
1628 | ||
1629 | case else | |
1630 | -- ignore it | |
1631 | end switch | |
1632 | ||
1633 | 234 | if tend > 0 then |
1634 | -- Time to replace the token. | |
1635 | 91 | sequence argtext = "" |
1636 | ||
1637 | 91 | if argn = 0 then |
1638 | 62 | argn = argl + 1 |
1639 | end if | |
1640 | 91 | argl = argn |
1641 | ||
1642 | 91 | if argn < 1 or argn > length(arg_list) then |
1643 | 1 | if length(envvar) > 0 then |
1644 | 1 | argtext = envvar |
1645 | 1 | currargv = envvar |
1646 | else | |
1647 | 0 | argtext = "" |
1648 | 0 | currargv ="" |
1649 | end if | |
1650 | else | |
1651 | 90 | if string(arg_list[argn]) then |
1652 | 34 | if length(idname) > 0 then |
1653 | 2 | argtext = arg_list[argn][length(idname) + 1 .. $] |
1654 | else | |
1655 | 32 | argtext = arg_list[argn] |
1656 | end if | |
1657 | ||
1658 | 56 | elsif integer(arg_list[argn]) then |
1659 | 36 | if bwz != 0 and arg_list[argn] = 0 then |
1660 | 2 | argtext = "" |
1661 | 34 | elsif binout = 1 then |
1662 | 4 | argtext = reverse(int_to_bits(arg_list[argn], 32)) + '0' |
1663 | 4 | for ib = 1 to length(argtext) do |
1664 | 76 | if argtext[ib] = '1' then |
1665 | 4 | argtext = argtext[ib .. $] |
1666 | 4 | exit |
1667 | end if | |
1668 | 72 | end for |
1669 | ||
1670 | 30 | elsif hexout = 0 then |
1671 | 27 | argtext = sprintf("%d", arg_list[argn]) |
1672 | 27 | if zfill != 0 and width > 0 then |
1673 | 15 | if length(argtext) > 0 then |
1674 | 15 | if argtext[1] = '-' then |
1675 | 2 | if width > length(argtext) then |
1676 | 2 | argtext = '-' & repeat('0', width - length(argtext)) & argtext[2..$] |
1677 | end if | |
1678 | else | |
1679 | 13 | if width > length(argtext) then |
1680 | 12 | argtext = repeat('0', width - length(argtext)) & argtext |
1681 | end if | |
1682 | end if | |
1683 | else | |
1684 | 0 | argtext = repeat('0', width - length(argtext)) & argtext |
1685 | end if | |
1686 | end if | |
1687 | ||
1688 | 27 | if arg_list[argn] > 0 then |
1689 | 20 | if psign then |
1690 | 2 | if zfill = 0 then |
1691 | 1 | argtext = '+' & argtext |
1692 | 1 | elsif argtext[1] = '0' then |
1693 | 1 | argtext[1] = '+' |
1694 | end if | |
1695 | end if | |
1696 | 7 | elsif arg_list[argn] < 0 then |
1697 | 4 | if msign then |
1698 | 2 | if zfill = 0 then |
1699 | 1 | argtext = '(' & argtext[2..$] & ')' |
1700 | else | |
1701 | 1 | if argtext[2] = '0' then |
1702 | 1 | argtext = '(' & argtext[3..$] & ')' |
1703 | else | |
1704 | 0 | argtext = argtext[2..$] & ')' |
1705 | end if | |
1706 | end if | |
1707 | end if | |
1708 | end if | |
1709 | else | |
1710 | 3 | argtext = sprintf("%x", arg_list[argn]) |
1711 | 3 | if zfill != 0 and width > 0 then |
1712 | 2 | if width > length(argtext) then |
1713 | 2 | argtext = repeat('0', width - length(argtext)) & argtext |
1714 | end if | |
1715 | end if | |
1716 | end if | |
1717 | ||
1718 | 20 | elsif atom(arg_list[argn]) then |
1719 | 15 | if bwz != 0 and arg_list[argn] = 0 then |
1720 | 0 | argtext = "" |
1721 | else | |
1722 | 15 | if hexout then |
1723 | 0 | argtext = sprintf("%x", arg_list[argn]) |
1724 | 0 | if zfill != 0 and width > 0 then |
1725 | 0 | if width > length(argtext) then |
1726 | 0 | argtext = repeat('0', width - length(argtext)) & argtext |
1727 | end if | |
1728 | end if | |
1729 | else | |
1730 | 15 | argtext = trim(sprintf("%15.15g", arg_list[argn])) |
1731 | 15 | if zfill != 0 and width > 0 then |
1732 | 3 | if length(argtext) > 0 then |
1733 | 3 | if width > length(argtext) then |
1734 | 3 | if argtext[1] = '-' then |
1735 | 2 | argtext = '-' & repeat('0', width - length(argtext)) & argtext[2..$] |
1736 | else | |
1737 | 1 | argtext = repeat('0', width - length(argtext)) & argtext |
1738 | end if | |
1739 | end if | |
1740 | else | |
1741 | 0 | argtext = repeat('0', width - length(argtext)) & argtext |
1742 | end if | |
1743 | end if | |
1744 | 15 | if arg_list[argn] > 0 then |
1745 | 11 | if psign then |
1746 | 2 | if zfill = 0 then |
1747 | 1 | argtext = '+' & argtext |
1748 | 1 | elsif argtext[1] = '0' then |
1749 | 1 | argtext[1] = '+' |
1750 | end if | |
1751 | end if | |
1752 | 4 | elsif arg_list[argn] < 0 then |
1753 | 4 | if msign then |
1754 | 2 | if zfill = 0 then |
1755 | 1 | argtext = '(' & argtext[2..$] & ')' |
1756 | else | |
1757 | 1 | if argtext[2] = '0' then |
1758 | 1 | argtext = '(' & argtext[3..$] & ')' |
1759 | else | |
1760 | 0 | argtext = argtext[2..$] & ')' |
1761 | end if | |
1762 | end if | |
1763 | end if | |
1764 | end if | |
1765 | end if | |
1766 | end if | |
1767 | ||
1768 | else | |
1769 | 5 | if alt != 0 and length(arg_list[argn]) = 2 then |
1770 | 4 | object tempv |
1771 | 4 | if atom(prevargv) then |
1772 | 2 | if prevargv != 1 then |
1773 | 1 | tempv = arg_list[argn][1] |
1774 | else | |
1775 | 1 | tempv = arg_list[argn][2] |
1776 | end if | |
1777 | else | |
1778 | 2 | if length(prevargv) = 0 then |
1779 | 1 | tempv = arg_list[argn][1] |
1780 | else | |
1781 | 1 | tempv = arg_list[argn][2] |
1782 | end if | |
1783 | end if | |
1784 | ||
1785 | 4 | if string(tempv) then |
1786 | 4 | argtext = tempv |
1787 | 0 | elsif integer(tempv) then |
1788 | 0 | if bwz != 0 and tempv = 0 then |
1789 | 0 | argtext = "" |
1790 | else | |
1791 | 0 | argtext = sprintf("%d", tempv) |
1792 | end if | |
1793 | ||
1794 | 0 | elsif atom(tempv) then |
1795 | 0 | if bwz != 0 and tempv = 0 then |
1796 | 0 | argtext = "" |
1797 | else | |
1798 | 0 | argtext = trim(sprintf("%15.15g", tempv)) |
1799 | end if | |
1800 | else | |
1801 | 0 | argtext = pretty_sprint( tempv, |
1802 | {2,0,1,1000,"%d","%.15g",32,127,1,0} | |
1803 | ) | |
1804 | end if | |
1805 | else | |
1806 | 1 | argtext = pretty_sprint( arg_list[argn], |
1807 | {2,0,1,1000,"%d","%.15g",32,127,1,0} | |
1808 | ) | |
1809 | end if | |
1810 | end if | |
1811 | 90 | currargv = arg_list[argn] |
1812 | end if | |
1813 | ||
1814 | ||
1815 | 91 | if length(argtext) > 0 then |
1816 | 84 | switch cap do |
1817 | case 'u' then | |
1818 | 1 | argtext = upper(argtext) |
1819 | case 'l' then | |
1820 | 3 | argtext = lower(argtext) |
1821 | case 'w' then | |
1822 | 1 | argtext = proper(argtext) |
1823 | case 0 then | |
1824 | -- do nothing | |
1825 | 79 | cap = cap |
1826 | ||
1827 | case else | |
1828 | 0 | crash("logic error: 'cap' mode in format.") |
1829 | ||
1830 | end switch | |
1831 | ||
1832 | 84 | if atom(currargv) then |
1833 | 49 | if find('e', argtext) = 0 then |
1834 | -- Only applies to non-scientific notation. | |
1835 | 48 | if decs != -1 then |
1836 | 8 | pos = find('.', argtext) |
1837 | 8 | if pos then |
1838 | 7 | if decs = 0 then |
1839 | 1 | argtext = argtext [1 .. pos-1 ] |
1840 | else | |
1841 | 6 | pos = length(argtext) - pos |
1842 | 6 | if pos > decs then |
1843 | 2 | argtext = argtext[ 1 .. $ - pos + decs ] |
1844 | 4 | elsif pos < decs then |
1845 | 1 | argtext = argtext & repeat('0', decs - pos) |
1846 | end if | |
1847 | end if | |
1848 | 1 | elsif decs > 0 then |
1849 | 1 | argtext = argtext & '.' & repeat('0', decs) |
1850 | end if | |
1851 | end if | |
1852 | ||
1853 | end if | |
1854 | end if | |
1855 | ||
1856 | 84 | if align = 0 then |
1857 | 76 | if atom(currargv) then |
1858 | 49 | align = '>' |
1859 | else | |
1860 | 27 | align = '<' |
1861 | end if | |
1862 | end if | |
1863 | ||
1864 | 84 | if atom(currargv) then |
1865 | 49 | if tsep != 0 and zfill = 0 then |
1866 | 5 | integer dpos |
1867 | 5 | integer dist |
1868 | 5 | integer bracketed |
1869 | ||
1870 | 5 | if binout or hexout then |
1871 | 2 | dist = 4 |
1872 | else | |
1873 | 3 | dist = 3 |
1874 | end if | |
1875 | 5 | bracketed = (argtext[1] = '(') |
1876 | 5 | if bracketed then |
1877 | 0 | argtext = argtext[2 .. $-1] |
1878 | end if | |
1879 | 5 | dpos = find('.', argtext) |
1880 | 5 | if dpos = 0 then |
1881 | 2 | dpos = length(argtext) + 1 |
1882 | else | |
1883 | 3 | if tsep = '.' then |
1884 | 1 | argtext[dpos] = ',' |
1885 | end if | |
1886 | end if | |
1887 | 5 | while dpos > dist do |
1888 | 7 | dpos -= dist |
1889 | 7 | if dpos > 1 then |
1890 | 5 | argtext = argtext[1.. dpos - 1] & tsep & argtext[dpos .. $] |
1891 | end if | |
1892 | 7 | end while |
1893 | 5 | if bracketed then |
1894 | 0 | argtext = '(' & argtext & ')' |
1895 | end if | |
1896 | end if | |
1897 | end if | |
1898 | ||
1899 | 84 | if width <= 0 then |
1900 | 52 | width = length(argtext) |
1901 | end if | |
1902 | ||
1903 | ||
1904 | 84 | if width < length(argtext) then |
1905 | 4 | if align = '>' then |
1906 | 1 | argtext = argtext[ $ - width + 1 .. $] |
1907 | 3 | elsif align = 'c' then |
1908 | 2 | pos = length(argtext) - width |
1909 | 2 | if remainder(pos, 2) = 0 then |
1910 | 1 | pos = pos / 2 |
1911 | 1 | argtext = argtext[ pos + 1 .. $ - pos ] |
1912 | else | |
1913 | 1 | pos = floor(pos / 2) |
1914 | 1 | argtext = argtext[ pos + 1 .. $ - pos - 1] |
1915 | end if | |
1916 | else | |
1917 | 1 | argtext = argtext[ 1 .. width] |
1918 | end if | |
1919 | 80 | elsif width > length(argtext) then |
1920 | 7 | if align = '>' then |
1921 | 3 | argtext = repeat(' ', width - length(argtext)) & argtext |
1922 | 4 | elsif align = 'c' then |
1923 | 2 | pos = width - length(argtext) |
1924 | 2 | if remainder(pos, 2) = 0 then |
1925 | 2 | pos = pos / 2 |
1926 | 2 | argtext = repeat(' ', pos) & argtext & repeat(' ', pos) |
1927 | else | |
1928 | 0 | pos = floor(pos / 2) |
1929 | 0 | argtext = repeat(' ', pos) & argtext & repeat(' ', pos + 1) |
1930 | end if | |
1931 | ||
1932 | else | |
1933 | 2 | argtext = argtext & repeat(' ', width - length(argtext)) |
1934 | end if | |
1935 | end if | |
1936 | 84 | result &= argtext |
1937 | ||
1938 | else | |
1939 | 7 | if spacer then |
1940 | 1 | result &= ' ' |
1941 | end if | |
1942 | end if | |
1943 | ||
1944 | 91 | if trimming then |
1945 | 3 | result = trim(result) |
1946 | end if | |
1947 | ||
1948 | 91 | tend = 0 |
1949 | 91 | prevargv = currargv |
1950 | end if | |
1951 | end if | |
1952 | 642 | end while |
1953 | ||
1954 | 64 | return result |
1955 | end function | |
1956 | ||
1957 | --** | |
1958 | -- Get the text associated with the message number in the requested locale. | |
1959 | -- | |
1960 | -- Parameters: | |
1961 | -- # ##MsgNum## : An integer. The message number whose text you are trying to get. | |
1962 | -- # ##LocalQuals## : A sequence. Zero or more locale codes. Default is {}. | |
1963 | -- # ##DBBase##: A sequence. The base name for the database files containing the | |
1964 | -- locale text strings. The default is "teksto". | |
1965 | -- | |
1966 | -- Returns: | |
1967 | -- A string **sequence**, the text associated with the message number and locale.\\ | |
1968 | -- An **integer**, if not associated text can be found. | |
1969 | -- | |
1970 | -- Comments: | |
1971 | -- * This first scans the database(s) linked to the locale codes supplied. | |
1972 | -- * The database name for each locale takes the format of " | |
1973 | -- so if the default DBBase is used, and the locales supplied are {"enus", "enau"} | |
1974 | -- the databases scanned are "teksto_enus.edb" and "teksto_enau.edb". | |
1975 | -- The database table name searched is "1" with the key being the message number, | |
1976 | -- and the text is the record data. | |
1977 | -- * If the message is not found in these databases (or the databases don't exist) | |
1978 | -- a database called " | |
1979 | -- it first looks for keys with the format { | |
1980 | -- looks for keys in the format {"", msgnum}, and if that fails it looks for a | |
1981 | -- key of just the msgnum. | |
1982 | -- | |
1983 | 0 | |
1984 | 0 | integer idx = 1 |
1985 | integer db_res | |
1986 | object lMsgText | |
1987 | ||
1988 | 0 | db_res = -1 |
1989 | 0 | lMsgText = 0 |
1990 | -- First, scan through the specialized local dbs | |
1991 | 0 | for i = 1 to length(LocalQuals) do |
1992 | 0 | db_res = db_select( locate_file( DBBase & "_" & LocalQuals[i] & ".edb" ), DB_LOCK_NO) |
1993 | 0 | if db_res = DB_OK then |
1994 | 0 | db_res = db_select_table("1") |
1995 | 0 | if db_res = DB_OK then |
1996 | 0 | lMsgText = db_fetch_record(MsgNum) |
1997 | 0 | if sequence(lMsgText) then |
1998 | 0 | exit |
1999 | end if | |
2000 | end if | |
2001 | end if | |
2002 | 0 | end for |
2003 | ||
2004 | -- Next, scan through the generic db | |
2005 | 0 | if atom(lMsgText) then |
2006 | 0 | db_res = db_select( locate_file( DBBase & ".edb" ), DB_LOCK_NO) |
2007 | 0 | if db_res = DB_OK then |
2008 | 0 | db_res = db_select_table("1") |
2009 | 0 | if db_res = DB_OK then |
2010 | 0 | for i = 1 to length(LocalQuals) do |
2011 | 0 | lMsgText = db_fetch_record({LocalQuals[i],MsgNum}) |
2012 | 0 | if sequence(lMsgText) then |
2013 | 0 | exit |
2014 | end if | |
2015 | 0 | end for |
2016 | 0 | if atom(lMsgText) then |
2017 | 0 | lMsgText = db_fetch_record({"",MsgNum}) |
2018 | end if | |
2019 | 0 | if atom(lMsgText) then |
2020 | 0 | lMsgText = db_fetch_record(MsgNum) |
2021 | end if | |
2022 | end if | |
2023 | end if | |
2024 | end if | |
2025 | ||
2026 | 0 | return lMsgText |
2027 | ||
2028 | end function | |
2029 |