Name | Executed | Routines | % | Executed | Lines | % | Unexecuted |
/home/matt/eu/rds/include/std/search.e | 15 | 15 | 100.00% | 218 | 225 | 96.89% | 7 |
Routine | Executed | Lines | Unexecuted | |
vlookup() | 9 | 12 | 75.00% | 3 |
is_in_range() | 19 | 21 | 90.48% | 2 |
binary_search() | 22 | 23 | 95.65% | 1 |
match_replace() | 15 | 16 | 93.75% | 1 |
begins() | 12 | 12 | 100.00% | 0 |
ends() | 12 | 12 | 100.00% | 0 |
find_all() | 8 | 8 | 100.00% | 0 |
find_any() | 6 | 6 | 100.00% | 0 |
find_nested() | 53 | 53 | 100.00% | 0 |
find_replace() | 10 | 10 | 100.00% | 0 |
is_in_list() | 2 | 2 | 100.00% | 0 |
lookup() | 9 | 9 | 100.00% | 0 |
match_all() | 8 | 8 | 100.00% | 0 |
rfind() | 12 | 12 | 100.00% | 0 |
rmatch() | 17 | 17 | 100.00% | 0 |
# | Executed | |
1 | -- (c) Copyright - See License.txt | |
2 | -- | |
3 | namespace search | |
4 | ||
5 | --**** | |
6 | -- == Searching | |
7 | -- **Page Contents** | |
8 | -- | |
9 | -- < | |
10 | ||
11 | include std/error.e | |
12 | include std/types.e | |
13 | ||
14 | --**** | |
15 | -- === Equality | |
16 | -- | |
17 | ||
18 | --**** | |
19 | -- Signature: | |
20 | -- | |
21 | -- | |
22 | -- Description: | |
23 | -- Compare two items returning less than, equal or greater than. | |
24 | -- | |
25 | -- Parameters: | |
26 | -- # ##compared## : the compared object | |
27 | -- # ##reference## : the reference object | |
28 | -- | |
29 | -- Returns: | |
30 | -- An **integer**, | |
31 | -- * 0 ~-- if objects are identical | |
32 | -- * 1 ~-- if ##compared## is greater than ##reference## | |
33 | -- * -1 ~-- if ##compared## is less than ##reference## | |
34 | -- | |
35 | -- Comments: | |
36 | -- Atoms are considered to be less than sequences. Sequences are compared alphabetically | |
37 | -- starting with the first element until a difference is found or one of the sequences is exhausted. Atoms are compared as ordinary reals. | |
38 | -- | |
39 | -- Example 1: | |
40 | -- | |
41 | -- x = compare({1,2,{3,{4}},5}, {2-1,1+1,{3,{4}},6-1}) | |
42 | -- -- identical, x is 0 | |
43 | -- | |
44 | -- | |
45 | -- Example 2: | |
46 | -- | |
47 | -- if compare("ABC", "ABCD") < 0 then -- -1 | |
48 | -- -- will be true: ABC is "less" because it is shorter | |
49 | -- end if | |
50 | -- | |
51 | -- | |
52 | -- Example 3: | |
53 | -- | |
54 | -- x = compare('a', "a") | |
55 | -- -- x will be -1 because 'a' is an atom | |
56 | -- -- while "a" is a sequence | |
57 | -- | |
58 | -- | |
59 | -- See Also: | |
60 | -- [[:equal]], [[:relational operators]], [[:operations on sequences]], [[:sort]] | |
61 | ||
62 | --**** | |
63 | -- Signature: | |
64 | -- | |
65 | -- | |
66 | -- Description: | |
67 | -- Compare two Euphoria objects to see if they are the same. | |
68 | -- | |
69 | -- Parameters: | |
70 | -- # ##left## : one of the objects to test | |
71 | -- # ##right## : the other object | |
72 | -- | |
73 | -- Returns: | |
74 | -- An **integer**, 1 if the two objects are identical, else 0. | |
75 | -- | |
76 | -- Comments: | |
77 | -- This is equivalent to the expression: ##compare(left, right) = 0##. | |
78 | -- | |
79 | -- This routine, like most other built-in routines, is very fast. It does not have any | |
80 | -- subroutine call overhead. | |
81 | -- | |
82 | -- Example 1: | |
83 | -- | |
84 | -- if equal(PI, 3.14) then | |
85 | -- puts(1, "give me a better value for PI!\n") | |
86 | -- end if | |
87 | -- | |
88 | -- | |
89 | -- Example 2: | |
90 | -- | |
91 | -- if equal(name, "George") or equal(name, "GEORGE") then | |
92 | -- puts(1, "name is George\n") | |
93 | -- end if | |
94 | -- | |
95 | -- | |
96 | -- See Also: | |
97 | -- [[:compare]] | |
98 | ||
99 | --**** | |
100 | -- === Finding | |
101 | -- | |
102 | ||
103 | --**** | |
104 | -- Signature: | |
105 | -- | |
106 | -- | |
107 | -- Description: | |
108 | -- Find the first occurrence of a "needle" as an element of a "haystack", starting from position "start".. | |
109 | -- | |
110 | -- Parameters: | |
111 | -- # ##needle## : an object whose presence is being queried | |
112 | -- # ##haystack## : a sequence, which is being looked up for ##needle## | |
113 | -- # ##start## : an integer, the position at which to start searching. Defaults to 1. | |
114 | -- | |
115 | -- Returns: | |
116 | -- An **integer**, 0 if ##needle## is not on ##haystack##, else the smallest index of an | |
117 | -- element of ##haystack## that equals ##needle##. | |
118 | -- | |
119 | -- Comments: | |
120 | -- | |
121 | -- ##find##() and [[:find_from]]() are identical, but you can omit giving ##find##() a starting point. | |
122 | -- | |
123 | -- Example 1: | |
124 | -- | |
125 | -- location = find(11, {5, 8, 11, 2, 3}) | |
126 | -- -- location is set to 3 | |
127 | -- | |
128 | -- | |
129 | -- Example 2: | |
130 | -- | |
131 | -- names = {"fred", "rob", "george", "mary", ""} | |
132 | -- location = find("mary", names) | |
133 | -- -- location is set to 4 | |
134 | -- | |
135 | -- | |
136 | -- See Also: | |
137 | -- [[:find_from]], [[:match]], [[:match_from]], [[:compare]] | |
138 | ||
139 | --**** | |
140 | -- Signature: | |
141 | -- | |
142 | -- | |
143 | -- Description: | |
144 | -- Find the first occurrence of a "needle" as an element of a "haystack". Search starts at a specified index. | |
145 | -- | |
146 | --Parameters: | |
147 | -- # ##needle## : an object whose presence is being queried | |
148 | -- # ##haystack## : a sequence, which is being looked up for ##needle## | |
149 | -- # ##start## : an integer, the index in ##haystack## at which to start searching. | |
150 | -- | |
151 | -- Returns: | |
152 | -- An **integer**, 0 if ##needle## is not on ##haystack## past position ##start##, else the smallest index, not less than ##start##, of an element of ##haystack## that equals ##needle##. | |
153 | -- | |
154 | -- Comments: | |
155 | -- start may have any value from 1 to the length of ##haystack## plus 1. (Analogous to the | |
156 | -- first index of a slice of ##haystack##). | |
157 | -- | |
158 | -- ##find##() and [[:find_from]]() are identical, but you can omit giving ##find##() a starting point. | |
159 | -- | |
160 | -- Example 1: | |
161 | -- | |
162 | -- location = find_from(11, {11, 8, 11, 2, 3}, 2) | |
163 | -- -- location is set to 3 | |
164 | -- | |
165 | -- | |
166 | -- Example 2: | |
167 | -- | |
168 | -- names = {"mary", "rob", "george", "mary", ""} | |
169 | -- location = find_from("mary", names, 3) | |
170 | -- -- location is set to 4 | |
171 | -- | |
172 | -- | |
173 | -- See Also: | |
174 | -- [[:find]], [[:match]], [[:match_from]], [[:compare]] | |
175 | ||
176 | --** | |
177 | -- Find any element from a list inside a sequence. Returns the location of the first hit. | |
178 | -- | |
179 | -- Parameters: | |
180 | -- # ##needles## : a sequence, the list of items to look for | |
181 | -- # ##haystack## : a sequence, in which "needles" are looked for | |
182 | -- # ##start## : an integer, the starting point of the search. Defaults to 1. | |
183 | -- | |
184 | -- Returns: | |
185 | -- An **integer**, the smallest index in ##haystack## of an element of ##needles##, or 0 if no needle is found. | |
186 | -- | |
187 | -- Comments: | |
188 | -- This function may be applied to a string sequence or a complex | |
189 | -- sequence. | |
190 | -- | |
191 | -- Example 1: | |
192 | -- | |
193 | -- location = find_any("aeiou", "John Smith", 3) | |
194 | -- -- location is 8 | |
195 | -- | |
196 | -- | |
197 | -- Example 2: | |
198 | -- | |
199 | -- location = find_any("aeiou", "John Doe") | |
200 | -- -- location is 2 | |
201 | -- | |
202 | -- | |
203 | -- See Also: | |
204 | -- [[:find]], [[:find_from]] | |
205 | ||
206 | 23 | |
207 | 23 | for i = start to length(haystack) do |
208 | 52 | if find(haystack[i],needles) then |
209 | 19 | return i |
210 | end if | |
211 | 33 | end for |
212 | ||
213 | 4 | return 0 |
214 | end function | |
215 | ||
216 | --** | |
217 | -- Find all occurrences of an object inside a sequence, starting at some specified point. | |
218 | -- | |
219 | -- Parameters: | |
220 | -- # ##needle## : an object, what to look for | |
221 | -- # ##haystack## : a sequence to search in | |
222 | -- # ##start## : an integer, the starting index position (defaults to 1) | |
223 | -- | |
224 | -- Returns: | |
225 | -- A **sequence**, the list of all indexes no less than ##start## of elements of ##haystack## that equal ##needle##. This sequence is empty if no match found. | |
226 | -- | |
227 | -- Example 1: | |
228 | -- | |
229 | -- s = find_all('A', "ABCABAB") | |
230 | -- -- s is {1,4,6} | |
231 | -- | |
232 | -- | |
233 | -- See Also: | |
234 | -- [[:find]], [[:match]], [[:match_all]] | |
235 | ||
236 | 24 | |
237 | 24 | sequence ret = {} |
238 | ||
239 | 24 | while start > 0 with entry do |
240 | 41 | ret &= start |
241 | 41 | start += 1 |
242 | entry | |
243 | 65 | start = find_from(needle, haystack, start) |
244 | 65 | end while |
245 | ||
246 | 24 | return ret |
247 | end function | |
248 | ||
249 | public constant | |
250 | 101 | NESTED_ANY=1, |
251 | 101 | NESTED_ALL=2, |
252 | 101 | NESTED_INDEX=4, |
253 | 101 | NESTED_BACKWARD=8 |
254 | ||
255 | ||
256 | --** | |
257 | -- Find any object (among a list) in a sequence of arbitrary shape at arbitrary nesting. | |
258 | -- | |
259 | -- Parameters: | |
260 | -- # ##needle## : an object, either what to look up, or a list of items to look up | |
261 | -- # ##haystack## : a sequence, where to look up | |
262 | -- # ##flags## : options to the function, see Comments section. Defaults to 0. | |
263 | -- # ##routine## : an integer, the routine_id of an user supplied equal function. Defaults to -1. | |
264 | -- | |
265 | -- Returns: | |
266 | -- A possibly empty **sequence**, of results, one for each hit. | |
267 | -- | |
268 | -- Comments: | |
269 | -- Each item in the returned sequence is either a sequence of indexes, or a pair {sequence of indexes, index in ##needle##}. | |
270 | -- | |
271 | -- The following flags are available to fine tune the search: | |
272 | -- * ##NESTED_BACKWARD## ~-- if on ##flags##, search is performed backward. Default is forward. | |
273 | -- * ##NESTED_ALL## ~-- if on ##flags##, all occurrences are looked for. Default is one hit only. | |
274 | -- * ##NESTED_ANY## ~-- if present on ##flags##, ##needle## is a list of items to look for. Not the default. | |
275 | -- * ##NESTED_INDEXES## ~-- if present on ##flags##, an individual result is a pair {position, index | |
276 | -- in ##needle##}. Default is just return the position. | |
277 | -- | |
278 | -- If ##s## is a single index list, or position, from the returned sequence, then ##fetch(haystack, s) = needle##. | |
279 | -- | |
280 | -- If a routine id is supplied, the routine must behave like [[:equal]]() if the NESTED_ANY | |
281 | -- flag is not supplied, and like [[:find]]() if it is. The routine is being passed the current | |
282 | -- ##haystack## item and ##needle##. The returned integer is interpreted as if returned by | |
283 | -- [[:equal]]() or [[:find]](). | |
284 | -- | |
285 | -- If the ##NESTED_ANY## flag is specified, and ##needle## is an atom, then the flag is removed. | |
286 | -- | |
287 | -- Example 1: | |
288 | -- | |
289 | -- sequence s = find_nested(3, {5, {4, {3, {2}}}}) | |
290 | -- -- s is {2 ,2 ,1} | |
291 | -- | |
292 | -- | |
293 | -- Example 2: | |
294 | -- | |
295 | -- sequence s = find_nested({3, 2}, {1, 3, {2,3}}, NESTED_ANY + NESTED_BACKWARD + NESTED_ALL) | |
296 | -- -- s is {{3,2}, {3,1}, {2}} | |
297 | -- | |
298 | -- | |
299 | -- Example 3: | |
300 | -- | |
301 | -- sequence s = find_nested({3, 2}, {1, 3, {2,3}}, NESTED_ANY + NESTED_INDEXES + NESTED_ALL) | |
302 | -- -- s is {{{2}, 1}, {{3, 1}, 2}, {{3, 2}, 1}} | |
303 | -- | |
304 | -- | |
305 | -- See Also: | |
306 | -- [[:find]], [[:rfind]], [[:find_any]], [[:fetch]] | |
307 | ||
308 | 8 | |
309 | 8 | sequence occurrences = {} -- accumulated results |
310 | 8 | integer depth = 0 |
311 | 8 | sequence branches = {}, indexes = {}, last_indexes = {} -- saved states |
312 | 8 | integer last_idx = length(haystack), current_idx = 1, direction = 1 -- assume forward searches more frequent |
313 | object x | |
314 | 8 | integer rc, any = and_bits(flags, NESTED_ANY) |
315 | ||
316 | 8 | if and_bits(flags,NESTED_BACKWARD) then |
317 | 4 | current_idx = last_idx |
318 | 4 | last_idx = 1 |
319 | 4 | direction = -1 |
320 | end if | |
321 | 8 | any = any and sequence(needle) |
322 | ||
323 | 8 | while 1 do -- traverse the whole haystack tree |
324 | 21 | while eu:compare(current_idx, last_idx) != direction do |
325 | 52 | x = haystack[current_idx] |
326 | ||
327 | -- is x what we want? | |
328 | 52 | if rtn_id = NO_ROUTINE_ID then |
329 | 40 | if any then |
330 | 17 | rc = find(x, needle) |
331 | else | |
332 | 23 | rc = equal(x, needle) |
333 | end if | |
334 | else | |
335 | 12 | rc = call_func(rtn_id, {x, needle}) |
336 | end if | |
337 | ||
338 | 52 | if rc then |
339 | -- yes, it is | |
340 | 16 | sequence info |
341 | ||
342 | -- inline head() from sequence.e | |
343 | 16 | if depth < length(indexes) then |
344 | 3 | info = indexes[1..depth] & current_idx |
345 | else | |
346 | 13 | info = indexes & current_idx |
347 | end if | |
348 | ||
349 | 16 | if and_bits(flags, NESTED_INDEX) then |
350 | 4 | info = {info, rc} |
351 | end if | |
352 | 16 | if and_bits(flags, NESTED_ALL) then |
353 | 13 | occurrences = append(occurrences, info) |
354 | else | |
355 | 3 | return info |
356 | end if | |
357 | end if | |
358 | ||
359 | -- either it wasn't, or we keep going | |
360 | 49 | if eu:compare(x, {})=1 then |
361 | -- this is a subtree, search inside | |
362 | -- save state | |
363 | 17 | depth += 1 |
364 | 17 | if length(indexes) < depth then |
365 | 13 | indexes &= current_idx |
366 | 13 | branches = append(branches, haystack) |
367 | 13 | last_indexes &= last_idx |
368 | else | |
369 | 4 | indexes[depth] = current_idx |
370 | 4 | branches[depth] = haystack |
371 | 4 | last_indexes[depth] = last_idx |
372 | end if | |
373 | ||
374 | -- set new state | |
375 | 17 | haystack = x |
376 | 17 | if direction = 1 then |
377 | 10 | current_idx = 1 |
378 | 10 | last_idx = length(haystack) |
379 | else | |
380 | 7 | last_idx = 1 |
381 | 7 | current_idx = length(haystack) |
382 | end if | |
383 | else | |
384 | -- next item | |
385 | 32 | current_idx += direction |
386 | end if | |
387 | 49 | end while |
388 | ||
389 | -- return or backtrack | |
390 | 18 | if depth=0 then |
391 | 5 | return occurrences -- either accumulated results, or {} if none -> ok |
392 | end if | |
393 | ||
394 | -- restore state | |
395 | 13 | haystack = branches[depth] |
396 | 13 | last_idx = last_indexes[depth] |
397 | 13 | current_idx = indexes[depth] + direction |
398 | 13 | depth -= 1 |
399 | 13 | end while |
400 | end function | |
401 | ||
402 | --** | |
403 | -- Find a needle in a haystack in reverse order. | |
404 | -- | |
405 | -- Parameters: | |
406 | -- # ##needle## : an object to search for | |
407 | -- # ##haystack## : a sequence to search in | |
408 | -- # ##start## : an integer, the starting index position (defaults to length(##haystack##)) | |
409 | -- | |
410 | -- Returns: | |
411 | -- An **integer**, 0 if no instance of ##needle## can be found on ##haystack## before | |
412 | -- index ##start##, or the highest such index otherwise. | |
413 | -- | |
414 | -- Comments: | |
415 | -- | |
416 | -- If ##start## is less than 1, it will be added once to length(##haystack##) | |
417 | -- to designate a position counted backwards. Thus, if ##start## is -1, the | |
418 | -- first element to be queried in ##haystack## will be ##haystack##[$-1], | |
419 | -- then ##haystack##[$-2] and so on. | |
420 | -- | |
421 | -- Example 1: | |
422 | -- | |
423 | -- location = rfind(11, {5, 8, 11, 2, 11, 3}) | |
424 | -- -- location is set to 5 | |
425 | -- | |
426 | -- | |
427 | -- Example 2: | |
428 | -- | |
429 | -- names = {"fred", "rob", "rob", "george", "mary"} | |
430 | -- location = rfind("rob", names) | |
431 | -- -- location is set to 3 | |
432 | -- location = rfind("rob", names, -4) | |
433 | -- -- location is set to 2 | |
434 | -- | |
435 | -- | |
436 | -- See Also: | |
437 | -- [[:find]], [[:rmatch]] | |
438 | ||
439 | 23 | |
440 | 23 | integer len = length(haystack) |
441 | ||
442 | 23 | if start = 0 then start = len end if |
443 | 23 | if (start > len) or (len + start < 1) then |
444 | 2 | return 0 |
445 | end if | |
446 | ||
447 | 21 | if start < 1 then |
448 | 1 | start = len + start |
449 | end if | |
450 | ||
451 | 21 | for i = start to 1 by -1 do |
452 | 96 | if equal(haystack[i], needle) then |
453 | 15 | return i |
454 | end if | |
455 | 81 | end for |
456 | ||
457 | 6 | return 0 |
458 | end function | |
459 | ||
460 | --** | |
461 | -- Finds a "needle" in a "haystack", and replace any, or only the first few, occurrences with a replacement. | |
462 | -- | |
463 | -- Parameters: | |
464 | -- | |
465 | -- # ##needle## : an object to search and perhaps replace | |
466 | -- # ##haystack## : a sequence to be inspected | |
467 | -- # ##replacement## : an object to substitute for any (first) instance of ##needle## | |
468 | -- # ##max## : an integer, 0 to replace all occurrences | |
469 | -- | |
470 | -- Returns: | |
471 | -- A **sequence**, the modified ##haystack##. | |
472 | -- | |
473 | -- Comments: | |
474 | -- Replacements will not be made recursively on the part of ##haystack## that was already changed. | |
475 | -- | |
476 | -- If ##max## is 0 or less, any occurrence of ##needle## in ##haystack## will be replaced by ##replacement##. Otherwise, only the first ##max## occurrences are. | |
477 | -- | |
478 | -- Example 1: | |
479 | -- | |
480 | -- s = find_replace('b', "The batty book was all but in Canada.", 'c', 0) | |
481 | -- -- s is "The catty cook was all cut in Canada." | |
482 | -- | |
483 | -- | |
484 | -- Example 2: | |
485 | -- | |
486 | -- s = find_replace('/', "/euphoria/demo/unix", '\\', 2) | |
487 | -- -- s is "\\euphoria\\demo/unix" | |
488 | -- | |
489 | -- | |
490 | -- Example 3: | |
491 | -- | |
492 | -- s = find_replace("theater", { "the", "theater", "theif" }, "theatre") | |
493 | -- -- s is { "the", "theatre", "theif" } | |
494 | -- | |
495 | -- | |
496 | -- See Also: | |
497 | -- [[:find]], [[:replace]], [[:match_replace]] | |
498 | ||
499 | 5 | |
500 | integer max=0) | |
501 | ||
502 | 5 | integer posn = 0 |
503 | 5 | while posn != 0 entry do |
504 | 10 | haystack[posn] = replacement |
505 | 10 | max -= 1 |
506 | 10 | if max = 0 then |
507 | 1 | exit |
508 | end if | |
509 | entry | |
510 | 14 | posn = find_from(needle, haystack, posn + 1) |
511 | 14 | end while |
512 | ||
513 | 5 | return haystack |
514 | end function | |
515 | ||
516 | --** | |
517 | -- Finds a "needle" in a "haystack", and replace any, or only the first few, occurrences with a replacement. | |
518 | -- | |
519 | -- Parameters: | |
520 | -- | |
521 | -- # ##needle## : an object to search and perhaps replace | |
522 | -- # ##haystack## : a sequence to be inspected | |
523 | -- # ##replacement## : an object to substitute for any (first) instance of ##needle## | |
524 | -- # ##max## : an integer, 0 to replace all occurrences | |
525 | -- | |
526 | -- Returns: | |
527 | -- A **sequence**, the modified ##haystack##. | |
528 | -- | |
529 | -- Comments: | |
530 | -- Replacements will not be made recursively on the part of ##haystack## that was already changed. | |
531 | -- | |
532 | -- If ##max## is 0 or less, any occurrence of ##needle## in ##haystack## will be replaced by ##replacement##. Otherwise, only the first ##max## occurrences are. | |
533 | -- | |
534 | -- If either ##needle## or ##replacement## are atoms they will be treated as if you had passed in a | |
535 | -- length-1 sequence containing the said atom. | |
536 | -- | |
537 | -- Example 1: | |
538 | -- | |
539 | -- s = match_replace("the", "the cat ate the food under the table", "THE", 0) | |
540 | -- -- s is "THE cat ate THE food under THE table" | |
541 | -- | |
542 | -- | |
543 | -- Example 2: | |
544 | -- | |
545 | -- s = match_replace("the", "the cat ate the food under the table", "THE", 2) | |
546 | -- -- s is "THE cat ate THE food under the table" | |
547 | -- | |
548 | -- | |
549 | -- Example 3: | |
550 | -- | |
551 | -- s = match_replace('/', "/euphoria/demo/unix", '\\', 2) | |
552 | -- -- s is "\\euphoria\\demo/unix" | |
553 | -- | |
554 | -- | |
555 | -- See Also: | |
556 | -- [[:find]], [[:replace]], [[:regex:find_replace]], [[:find_replace]] | |
557 | ||
558 | 72 | |
559 | integer max=0) | |
560 | integer posn, needle_len, replacement_len | |
561 | ||
562 | 72 | if atom(needle) then |
563 | 1 | needle = {needle} |
564 | end if | |
565 | 72 | if atom(replacement) then |
566 | 0 | replacement = {replacement} |
567 | end if | |
568 | ||
569 | 72 | needle_len = length(needle) |
570 | 72 | replacement_len = length(replacement) |
571 | ||
572 | 72 | posn = match(needle, haystack) |
573 | 72 | while posn do |
574 | 22 | haystack = haystack[1..posn-1] & replacement & |
575 | haystack[posn + needle_len .. $] | |
576 | 22 | posn = match_from(needle, haystack, posn + replacement_len) |
577 | ||
578 | 22 | max -= 1 |
579 | ||
580 | 22 | if max = 0 then |
581 | 1 | exit |
582 | end if | |
583 | 21 | end while |
584 | ||
585 | 72 | return haystack |
586 | end function | |
587 | ||
588 | --** | |
589 | -- Finds a "needle" in an ordered "haystack". Start and end point can be given for the search. | |
590 | -- | |
591 | -- Parameters: | |
592 | -- # ##needle## : an object to look for | |
593 | -- # ##haystack## : a sequence to search in | |
594 | -- # ##start_point## : an integer, the index at which to start searching. Defaults to 1. | |
595 | -- # ##end_point## : an integer, the end point of the search. Defaults to 0, ie search to end. | |
596 | -- | |
597 | -- Returns: | |
598 | -- An **integer**, either: | |
599 | -- # a positive integer ##i##, which means ##haystack[i]## equals ##needle##. | |
600 | -- # a negative integer, ##-i##, with ##i## between adjusted start and end | |
601 | -- points. This means that ##needle## is not in the searched slice of | |
602 | -- ##haystack##, but would be at index ##i## if it were there. | |
603 | -- # a negative integer ##-i## with ##i## out of the searched range. This | |
604 | -- means than ##needle##might be either below the start point if ##i## | |
605 | -- is below the start point, or above the end point if ##i## is. | |
606 | -- | |
607 | -- Comments: | |
608 | -- * If ##end_point## is not greater than zero, it is added to | |
609 | -- length(##haystack##) once only. Then, the end point of the search is | |
610 | -- adjusted to length(haystack) if out of bounds. | |
611 | -- * The start point is adjusted to 1 if below 1. | |
612 | -- * The way this function returns is very similar to what [[:db_find_key]] | |
613 | -- does. They use variants of the same algorithm. The latter is all the | |
614 | -- more efficient as ##haystack## is long. | |
615 | -- * ##haystack## is assumed to be in ascending order. Results are undefined | |
616 | -- if it is not. | |
617 | -- * If duplicate copies of ##needle## exist in the range searched on | |
618 | -- ##haystack##, any of the possible contiguous indexes may be returned. | |
619 | -- | |
620 | -- See Also: | |
621 | -- [[:find]], [[:db_find_key]] | |
622 | ||
623 | 105 | |
624 | integer end_point = 0) | |
625 | integer lo, hi, mid, c -- works up to 1.07 billion records | |
626 | ||
627 | 105 | lo = start_point |
628 | 105 | if end_point <= 0 then |
629 | 85 | hi = length(haystack) + end_point |
630 | else | |
631 | 20 | hi = end_point |
632 | end if | |
633 | 105 | if lo<1 then |
634 | 0 | lo=1 |
635 | end if | |
636 | 105 | if lo > hi and length(haystack) > 0 then |
637 | 1 | hi = length(haystack) |
638 | end if | |
639 | 105 | mid = start_point |
640 | 105 | c = 0 |
641 | 105 | while lo <= hi do |
642 | 553 | mid = floor((lo + hi) / 2) |
643 | 553 | c = eu:compare(needle, haystack[mid]) |
644 | 553 | if c < 0 then |
645 | 211 | hi = mid - 1 |
646 | 342 | elsif c > 0 then |
647 | 280 | lo = mid + 1 |
648 | else | |
649 | 62 | return mid |
650 | end if | |
651 | 491 | end while |
652 | -- return the position it would have, if inserted now | |
653 | 43 | if c > 0 then |
654 | 10 | mid += 1 |
655 | end if | |
656 | 43 | return -mid |
657 | end function | |
658 | ||
659 | --**** | |
660 | -- === Matching | |
661 | -- | |
662 | ||
663 | --**** | |
664 | -- Signature: | |
665 | -- | |
666 | -- | |
667 | -- Description: | |
668 | -- Try to match a "needle" against some slice of a "haystack", starting at position "start". | |
669 | -- | |
670 | -- Parameters: | |
671 | -- # ##needle## : a sequence whose presence as a "substring" is being queried | |
672 | -- # ##haystack## : a sequence, which is being looked up for ##needle## as a sub-sequence | |
673 | -- # ##start## : an integer, the point from which matching is attempted. Defaults to 1. | |
674 | -- | |
675 | -- Returns: | |
676 | -- An **integer**, 0 if no slice of ##haystack## is ##needle##, else the smallest index at which such a slice starts. | |
677 | -- | |
678 | -- Comments: | |
679 | -- | |
680 | -- ##match##() and [[:match_from]]() are identical, but you can omit giving ##match##() a starting point. | |
681 | -- | |
682 | -- Example 1: | |
683 | -- | |
684 | -- location = match("pho", "Euphoria") | |
685 | -- -- location is set to 3 | |
686 | -- | |
687 | -- | |
688 | -- See Also: | |
689 | -- [[:find]], [[:find_from]], [[:compare]], [[:match_from]], [[:wildcard:is_match]] | |
690 | ||
691 | --**** | |
692 | -- Signature: | |
693 | -- | |
694 | -- | |
695 | -- Description: | |
696 | -- Try to match a "needle" against some slice of a "haystack", starting from some index. | |
697 | -- | |
698 | -- Parameters: | |
699 | -- # ##needle## : an sequence whose presence as a sub-sequence is being queried | |
700 | -- # ##haystack## : a sequence, which is being looked up for ##needle## as a sub-sequence | |
701 | -- # ##start## : an integer, the index in ##haystack## at which to start searching. | |
702 | -- | |
703 | -- Returns: | |
704 | -- An **integer**, 0 if no slice of ##haystack## with lower index at least ##start## is ##needle##, else the smallest such index. | |
705 | -- | |
706 | -- Comments: | |
707 | -- ##start## may have any value from 1 to the length of ##haystack## plus 1. (Just like the first | |
708 | -- index of a slice of ##haystack##.) | |
709 | -- | |
710 | -- ##match##() and [[:match_from]]() are identical, but you can omit giving ##match##() a starting point. | |
711 | -- | |
712 | -- Example 1: | |
713 | -- | |
714 | -- location = match_from("pho", "phoEuphoria", 4) | |
715 | -- -- location is set to 6 | |
716 | -- | |
717 | -- | |
718 | -- See Also: | |
719 | -- [[:find]], [[:find_from]], [[:match]], [[:compare]], [[:wildcard:is_match]], [[:regex:find]] | |
720 | ||
721 | --** | |
722 | -- Match all items of haystack in needle. | |
723 | -- | |
724 | -- Parameters: | |
725 | -- # ##needle## : a sequence, what to look for | |
726 | -- # ##haystack## : a sequence to search in | |
727 | -- # ##start## : an integer, the starting index position (defaults to 1) | |
728 | -- | |
729 | -- Returns: | |
730 | -- A **sequence**, of integers, the list of all lower indexes, not less than ##start##, of all slices in ##haystack## that equal ##needle##. The list may be empty. | |
731 | -- | |
732 | -- Example 1: | |
733 | -- | |
734 | -- s = match_all("the", "the dog chased the cat under the table.") | |
735 | -- -- s is {1,16,30} | |
736 | -- | |
737 | -- | |
738 | -- See Also: | |
739 | -- [[:match]], [[:regex:find_all]] [[:find]], [[:find_all]] | |
740 | ||
741 | 4 | |
742 | 4 | sequence ret = {} |
743 | ||
744 | 4 | while start > 0 with entry do |
745 | 5 | ret &= start |
746 | 5 | start += length(needle) |
747 | entry | |
748 | 9 | start = match_from(needle, haystack, start) |
749 | 9 | end while |
750 | ||
751 | 4 | return ret |
752 | end function | |
753 | ||
754 | --** | |
755 | -- Try to match a needle against some slice of a haystack in reverse order. | |
756 | -- | |
757 | -- Parameters: | |
758 | -- # ##needle## : a sequence to search for | |
759 | -- # ##haystack## : a sequence to search in | |
760 | -- # ##start## : an integer, the starting index position (defaults to length(##haystack##)) | |
761 | -- | |
762 | -- Returns: | |
763 | -- An **integer**, either 0 if no slice of ##haystack## starting before | |
764 | -- ##start## equals ##needle##, else the highest lower index of such a slice. | |
765 | -- | |
766 | -- Comments: | |
767 | -- If ##start## is less than 1, it will be added once to length(##haystack##) | |
768 | -- to designate a position counted backwards. Thus, if ##start## is -1, the | |
769 | -- first element to be queried in ##haystack## will be ##haystack##[$-1], | |
770 | -- then ##haystack##[$-2] and so on. | |
771 | -- | |
772 | -- Example 1: | |
773 | -- | |
774 | -- location = rmatch("the", "the dog ate the steak from the table.") | |
775 | -- -- location is set to 28 (3rd 'the') | |
776 | -- location = rmatch("the", "the dog ate the steak from the table.", -11) | |
777 | -- -- location is set to 13 (2nd 'the') | |
778 | -- | |
779 | -- | |
780 | -- See Also: | |
781 | -- [[:rfind]], [[:match]] | |
782 | ||
783 | 41 | |
784 | integer len, lenX | |
785 | ||
786 | 41 | len = length(haystack) |
787 | 41 | lenX = length(needle) |
788 | ||
789 | 41 | if lenX = 0 then |
790 | 1 | return 0 |
791 | 40 | elsif (start > len) or (len + start < 1) then |
792 | 2 | return 0 |
793 | end if | |
794 | ||
795 | 38 | if start < 1 then |
796 | 1 | start = len + start |
797 | end if | |
798 | ||
799 | 38 | if start + lenX - 1 > len then |
800 | 36 | start = len - lenX + 1 |
801 | end if | |
802 | ||
803 | 38 | lenX -= 1 |
804 | ||
805 | 38 | for i=start to 1 by -1 do |
806 | 530 | if equal(needle, haystack[i..i + lenX]) then |
807 | 7 | return i |
808 | end if | |
809 | 523 | end for |
810 | ||
811 | 31 | return 0 |
812 | end function | |
813 | ||
814 | ||
815 | --** | |
816 | -- Test whether a sequence is the head of another one. | |
817 | -- | |
818 | -- Parameters: | |
819 | -- # ##sub_text## : an object to be looked for | |
820 | -- # ##full_text## : a sequence, the head of which is being inspected. | |
821 | -- | |
822 | -- Returns: | |
823 | -- An **integer**, 1 if ##sub_text## begins ##full_text##, else 0. | |
824 | -- | |
825 | -- Example 1: | |
826 | -- | |
827 | -- s = begins("abc", "abcdef") | |
828 | -- -- s is 1 | |
829 | -- s = begins("bcd", "abcdef") | |
830 | -- -- s is 0 | |
831 | -- | |
832 | -- | |
833 | -- See Also: | |
834 | -- [[:ends]], [[:head]] | |
835 | ||
836 | 29 | |
837 | 29 | if length(full_text) = 0 then |
838 | 1 | return 0 |
839 | end if | |
840 | ||
841 | 28 | if atom(sub_text) then |
842 | 2 | if equal(sub_text, full_text[1]) then |
843 | 1 | return 1 |
844 | else | |
845 | 1 | return 0 |
846 | end if | |
847 | end if | |
848 | ||
849 | 26 | if length(sub_text) > length(full_text) then |
850 | 3 | return 0 |
851 | end if | |
852 | ||
853 | 23 | if equal(sub_text, full_text[1.. length(sub_text)]) then |
854 | 14 | return 1 |
855 | else | |
856 | 9 | return 0 |
857 | end if | |
858 | end function | |
859 | ||
860 | --** | |
861 | -- Test whether a sequence ends another one. | |
862 | -- | |
863 | -- Parameters: | |
864 | -- # ##sub_text## : an object to be looked for | |
865 | -- # ##full_text## : a sequence, the tail of which is being inspected. | |
866 | -- | |
867 | -- Returns: | |
868 | -- An **integer**, 1 if ##sub_text## ends ##full_text##, else 0. | |
869 | -- | |
870 | -- Example 1: | |
871 | -- | |
872 | -- s = ends("def", "abcdef") | |
873 | -- -- s is 1 | |
874 | -- s = begins("bcd", "abcdef") | |
875 | -- -- s is 0 | |
876 | -- | |
877 | -- | |
878 | -- See Also: | |
879 | -- [[:begins]], [[:tail]] | |
880 | ||
881 | 17 | |
882 | 17 | if length(full_text) = 0 then |
883 | 1 | return 0 |
884 | end if | |
885 | ||
886 | 16 | if atom(sub_text) then |
887 | 2 | if equal(sub_text, full_text[$]) then |
888 | 1 | return 1 |
889 | else | |
890 | 1 | return 0 |
891 | end if | |
892 | end if | |
893 | ||
894 | 14 | if length(sub_text) > length(full_text) then |
895 | 3 | return 0 |
896 | end if | |
897 | ||
898 | 11 | if equal(sub_text, full_text[$ - length(sub_text) + 1 .. $]) then |
899 | 9 | return 1 |
900 | else | |
901 | 2 | return 0 |
902 | end if | |
903 | end function | |
904 | ||
905 | --** | |
906 | -- Tests to see if the ##item## is in a range of values supplied by ##range_limits## | |
907 | -- | |
908 | -- Parameters: | |
909 | -- # ##item## : The object to test for. | |
910 | -- # ##range_limits## : A sequence of two or more elements. The first is assumed | |
911 | -- to be the smallest value and the last is assumed to be the highest value. | |
912 | -- # ##boundries##: a sequence. This determines if the range limits are inclusive | |
913 | -- or not. Must be one of "[]" (the default), "[)", "(]", or | |
914 | -- "()". | |
915 | -- | |
916 | -- Returns: | |
917 | -- An **integer**, 0 if ##item## is not in the ##range_limits## otherwise it returns 1. | |
918 | -- | |
919 | -- Comments: | |
920 | -- * In ##boundries###, square brackets mean //inclusive// and round brackets | |
921 | -- mean //exclusive//. Thus "[]" includes both limits in the range, while | |
922 | -- "()" excludes both limits. And, "[)" includes the lower limit and excludes | |
923 | -- the upper limits while "(]" does the reverse. | |
924 | -- | |
925 | -- Example 1: | |
926 | -- | |
927 | -- if is_in_range(2, {2, 75}) then | |
928 | -- procA(user_data) -- Gets run (both limits included) | |
929 | -- end if | |
930 | -- if is_in_range(2, {2, 75}, "(]") then | |
931 | -- procA(user_data) -- Does not get run | |
932 | -- end if | |
933 | -- | |
934 | ||
935 | 15 | |
936 | 15 | if length(range_limits) < 2 then |
937 | 2 | return 0 |
938 | end if | |
939 | ||
940 | 13 | switch boundries do |
941 | case "()" then | |
942 | 2 | if eu:compare(item, range_limits[1]) <= 0 then |
943 | 1 | return 0 |
944 | end if | |
945 | 1 | if eu:compare(item, range_limits[$]) >= 0 then |
946 | 1 | return 0 |
947 | end if | |
948 | ||
949 | case "[)" then | |
950 | 2 | if eu:compare(item, range_limits[1]) < 0 then |
951 | 0 | return 0 |
952 | end if | |
953 | 2 | if eu:compare(item, range_limits[$]) >= 0 then |
954 | 1 | return 0 |
955 | end if | |
956 | ||
957 | case "(]" then | |
958 | 2 | if eu:compare(item, range_limits[1]) <= 0 then |
959 | 1 | return 0 |
960 | end if | |
961 | 1 | if eu:compare(item, range_limits[$]) > 0 then |
962 | 0 | return 0 |
963 | end if | |
964 | ||
965 | case else | |
966 | 7 | if eu:compare(item, range_limits[1]) < 0 then |
967 | 1 | return 0 |
968 | end if | |
969 | 6 | if eu:compare(item, range_limits[$]) > 0 then |
970 | 1 | return 0 |
971 | end if | |
972 | end switch | |
973 | ||
974 | 7 | return 1 |
975 | end function | |
976 | ||
977 | --** | |
978 | -- Tests to see if the ##item## is in a list of values supplied by ##list## | |
979 | -- | |
980 | -- Parameters: | |
981 | -- # ##item## : The object to test for. | |
982 | -- # ##list## : A sequence of elements that ##item## could be a member of. | |
983 | -- | |
984 | -- Returns: | |
985 | -- An **integer**, 0 if ##item## is not in the ##list##, otherwise | |
986 | -- it returns 1. | |
987 | -- | |
988 | -- Example 1: | |
989 | -- | |
990 | -- if is_in_list(user_data, {100, 45, 2, 75, 121}) then | |
991 | -- procA(user_data) | |
992 | -- end if | |
993 | -- | |
994 | ||
995 | 6 | |
996 | 6 | return (find(item, list) != 0) |
997 | end function | |
998 | ||
999 | --** | |
1000 | -- If the supplied item is in the source list, this returns the corresponding | |
1001 | -- element from the target list. | |
1002 | -- Parameters: | |
1003 | -- # ##find_item##: an object that might exist in ##source_list##. | |
1004 | -- # ##source_list##: a sequence that might contain ##pITem##. | |
1005 | -- # ##target_list##: a sequence from which the corresponding item will be returned. | |
1006 | -- # ##def_value##: an object (defaults to zero). This is returned when ##find_item## | |
1007 | -- is not in ##source_list## **and** ##target_list## is not longer than ##source_list##. | |
1008 | -- | |
1009 | -- Returns: | |
1010 | -- an object | |
1011 | -- * If ##find_item## is found in ##source_list## then this is the corresponding element | |
1012 | -- from ##target_list## | |
1013 | -- * If ##find_item## is not in ##source_list## then if ##target_list## is longer than ##source_list## | |
1014 | -- then the last item in ##target_list## is returned otherwise ##def_value## is returned. | |
1015 | -- | |
1016 | -- Examples: | |
1017 | -- | |
1018 | -- lookup('a', "cat", "dog") --> 'o' | |
1019 | -- lookup('d', "cat", "dogx") --> 'x' | |
1020 | -- lookup('d', "cat", "dog") --> 0 | |
1021 | -- lookup('d', "cat", "dog", -1) --> -1 | |
1022 | -- lookup("ant", {"ant","bear","cat"}, {"spider","seal","dog","unknown"}) --> "spider" | |
1023 | -- lookup("dog", {"ant","bear","cat"}, {"spider","seal","dog","unknown"}) --> "unknown" | |
1024 | -- | |
1025 | -- | |
1026 | 9 | |
1027 | integer lPosn | |
1028 | ||
1029 | 9 | lPosn = find(find_item, source_list) |
1030 | 9 | if lPosn then |
1031 | 3 | if lPosn <= length(target_list) then |
1032 | 2 | return target_list[lPosn] |
1033 | else | |
1034 | 1 | return def_value |
1035 | end if | |
1036 | ||
1037 | 6 | elsif length(target_list) > length(source_list) then |
1038 | -- Return the default built into the target list | |
1039 | 3 | return target_list[$] |
1040 | ||
1041 | else | |
1042 | -- Return the supplied default | |
1043 | 3 | return def_value |
1044 | end if | |
1045 | ||
1046 | end function | |
1047 | ||
1048 | --** | |
1049 | -- If the supplied item is in a source grid column, this returns the corresponding | |
1050 | -- element from the target column. | |
1051 | -- Parameters: | |
1052 | -- # ##find_item##: an object that might exist in ##source_col##. | |
1053 | -- # ##grid_data##: a 2D grid sequence that might contain ##pITem##. | |
1054 | -- # ##source_col##: an integer. The column number to look for ##find_item##. | |
1055 | -- # ##target_col##: an integer. The column number from which the corresponding | |
1056 | -- item will be returned. | |
1057 | -- # ##def_value##: an object (defaults to zero). This is returned when ##find_item## | |
1058 | -- is not found in the ##source_col## column, or if found but the target column | |
1059 | -- does not exist. | |
1060 | -- | |
1061 | -- Comments: | |
1062 | -- * If a row in the grid is actually a single atom, the row is ignored. | |
1063 | -- * If a row's length is less than the ##source_col##, the row is ignored. | |
1064 | -- | |
1065 | -- Returns: | |
1066 | -- an object | |
1067 | -- * If ##find_item## is found in the ##source_col## column then this is the corresponding element | |
1068 | -- from the ##target_col## column. | |
1069 | -- | |
1070 | -- Examples: | |
1071 | -- | |
1072 | -- sequence grid | |
1073 | -- grid = { | |
1074 | -- {"ant", "spider", "mortein"}, | |
1075 | -- {"bear", "seal", "gun"}, | |
1076 | -- {"cat", "dog", "ranger"}, | |
1077 | -- $ | |
1078 | -- } | |
1079 | -- vlookup("ant", grid, 1, 2, "?") --> "spider" | |
1080 | -- vlookup("ant", grid, 1, 3, "?") --> "mortein" | |
1081 | -- vlookup("seal", grid, 2, 3, "?") --> "gun" | |
1082 | -- vlookup("seal", grid, 2, 1, "?") --> "bear" | |
1083 | -- vlookup("mouse", grid, 2, 3, "?") --> "?" | |
1084 | -- | |
1085 | -- | |
1086 | 28 | |
1087 | integer lPosn | |
1088 | ||
1089 | 28 | for i = 1 to length(grid_data) do |
1090 | 306 | if atom(grid_data[i]) then |
1091 | 0 | continue |
1092 | end if | |
1093 | 306 | if length(grid_data[i]) < source_col then |
1094 | 0 | continue |
1095 | end if | |
1096 | ||
1097 | 306 | if equal(find_item, grid_data[i][source_col]) then |
1098 | 27 | if length(grid_data[i]) < target_col then |
1099 | 0 | return def_value |
1100 | end if | |
1101 | 27 | return grid_data[i][target_col] |
1102 | end if | |
1103 | 279 | end for |
1104 | ||
1105 | 1 | return def_value |
1106 | ||
1107 | end function | |
1108 |