COVERAGE SUMMARY
FILE SUMMARY
NameExecutedRoutines%ExecutedLines%Unexecuted
/home/matt/eu/rds/include/std/net/url.e44100.00%11912992.25%10
ROUTINE SUMMARY
RoutineExecutedLinesUnexecuted
parse()566191.80%5
decode()141877.78%4
parse_querystring()293096.67%1
encode()1515100.00%0
LINE COVERAGE DETAIL
#Executed
1
--****
2
-- == URL handling
3
--
4
5
namespace url
6
7
include std/get.e
8
include std/map.e
9
10
--****
11
-- === Parsing
12
--
13
142
constant PAIR_SEP = {'&', ';'}, HEX_SIG = '%', WHITESPACE = '+', VALUE_SEP = '='
15
16
--**
17
-- Parse a query string into a map
18
--
19
-- Parameters:
20
-- # ##query_string##: Query string to parse
21
--
22
-- Returns:
23
-- [[:map]] containing the key/value pairs
24
--
25
-- Example 1:
26
--
27
-- map qs = parse_querystring("name=John&age=18")
28
-- printf(1, "%s is %s years old\n", { map:get(qs, "name"), map:get(qs, "age) })
29
--
30
--
31
322
33
atom i, char
34
object tmp
352
sequence charbuf, fieldbuf, fname=""
362
map:map the_map = map:new()
37
382
if atom(query_string) then
390
return the_map
40
end if
41
422
charbuf = {} fieldbuf = {} i = 1
432
while i <= length(query_string) do
4457
char = query_string[i] -- character we're working on
4557
if equal(char, HEX_SIG) then
461
tmp = value("#" & query_string[i+1] & query_string[i+2])
471
charbuf &= tmp[2]
481
i += 3
4956
elsif equal(char, WHITESPACE) then
501
charbuf &= " "
511
i += 1
5255
elsif equal(char, VALUE_SEP) then
535
fname = charbuf
545
charbuf = {}
555
i += 1
5650
elsif find(char, PAIR_SEP) then
573
map:put(the_map, fname, charbuf)
583
fname = {}
593
charbuf = {}
603
i += 1
61
else
6247
charbuf &= char
6347
i += 1
64
end if
6557
end while
66
672
if length(fname) then
682
map:put(the_map, fname, charbuf)
69
end if
70
712
return the_map
72
end function
73
742
public enum URL_PROTOCOL, URL_HOSTNAME, URL_PORT, URL_PATH, URL_USER, URL_PASSWORD,
752
URL_QUERY_STRING
76
77
--**
78
-- Parse a URL returning its various elements.
79
--
80
-- Parameters:
81
-- # ##url##: URL to parse
82
-- # ##querystring_also##: Parse the query string into a map also?
83
--
84
-- Returns:
85
-- A multi-element sequence containing:
86
-- # protocol
87
-- # host name
88
-- # port
89
-- # path
90
-- # user name
91
-- # password
92
-- # query string
93
--
94
-- Or, zero if the URL could not be parsed.
95
--
96
-- Notes:
97
-- If the host name, port, path, username, password or query string are not part of the
98
-- URL they will be returned as an integer value of zero.
99
--
100
-- Example 1:
101
--
102
-- sequence parsed = parse("http://user:pass@www.debian.org:80/index.html?name=John&age=39")
103
-- -- parsed is
104
-- -- {
105
-- -- "http",
106
-- -- "www.debian.org",
107
-- -- 80,
108
-- -- "/index.html",
109
-- -- "user",
110
-- -- "pass",
111
-- -- "name=John&age=39"
112
-- -- }
113
--
114
--
115
1169
1179
sequence protocol = ""
118
object host_name, path, user_name, password, query_string
119
integer port
120
121
-- Set the defaults for some optional values
1229
host_name = 0
1239
port = 0
1249
path = 0
1259
user_name = 0
1269
password = 0
1279
query_string = 0
128
1299
integer pos = find(':', url)
1309
if not pos then
1310
return 0
132
end if
133
1349
protocol = url[1..pos - 1]
1359
pos += 1
136
137
-- Can have a maximum of 2 // before we move into the hostname or possibly
138
-- the path (http://john.com) or (file:///home/jeremy/hello.txt)
1399
if url[pos] = '/' then
1408
pos += 1
141
end if
1429
if url[pos] = '/' then
1438
pos += 1
144
end if
1459
if url[pos] = '/' then
146
-- We do not have a username, password, host or port, we have moved right into
147
-- the path area of the URL. Let's jump ahead
1480
goto "parse_path"
149
end if
150
1519
integer at = find('@', url)
1529
if not at then
153
-- We do not have a user or password, skip ahead to parsing the domain
1546
goto "parse_domain"
155
end if
156
1573
integer password_colon = find(':', url, pos)
1583
if password_colon > 0 and password_colon < at then
159
-- We have a password too!
1601
user_name = url[pos..password_colon-1]
1611
password = url[password_colon+1..at-1]
162
else
163
-- Just a user name
1642
user_name = url[pos..at-1]
165
end if
166
1673
pos = at + 1
168
169
label "parse_domain"
170
1719
integer qs_start = find('?', url, pos)
1729
integer first_slash = find('/', url, pos)
1739
integer port_colon = find(':', url, pos)
174
1759
if port_colon then
176
-- We can easily read the host until the port colon
1773
host_name = url[pos..port_colon-1]
178
else
179
-- Gotta go through a bit more complex way of getting the path
1806
if not first_slash then
181
-- there is no path, thus we must parse to either the query string begin
182
-- or the string end
1834
if not qs_start then
1842
host_name = url[pos..$]
185
else
1862
host_name = url[pos..qs_start-1]
187
end if
188
else
189
-- Ok, we can read up to the first slash
1902
host_name = url[pos..first_slash-1]
191
end if
192
end if
193
1949
if port_colon then
1953
integer port_end = 0
196
1973
if first_slash then
1983
port_end = first_slash - 1
1990
elsif qs_start then
2000
port_end = qs_start - 1
201
else
2020
port_end = length(url)
203
end if
204
2053
port = defaulted_value(url[port_colon+1..port_end], 0)
206
end if
207
208
-- Increment the position to the next element to parse
2099
if first_slash then
2105
pos = first_slash
2114
elsif qs_start then
2122
pos = qs_start
213
else
214
-- Nothing more to parse
2152
goto "parse_done"
216
end if
217
218
label "parse_path"
219
2207
if not qs_start then
2213
path = url[pos..$]
2223
goto "parse_done"
223
end if
224
225
-- Avoid getting a path when there is none.
2264
if pos != qs_start then
2272
path = url[pos..qs_start - 1]
228
end if
229
2304
pos = qs_start
231
232
label "parse_query_string"
233
2344
query_string = url[qs_start + 1..$]
235
2364
if querystring_also and length(query_string) then
2371
query_string = parse_querystring(query_string)
238
end if
239
240
label "parse_done"
2419
return { protocol, host_name, port, path, user_name, password, query_string }
242
end function
243
244
--****
245
-- === URL encoding and decoding
246
--
247
248
-- TODO: This is causing a creole parsing problem
249
-- HTML form data is usually URL-encoded to package it into a GET or POST submission.
250
-- In a nutshell, here's how you URL-encode the name-value pairs of the form data:
251
-- # Convert all "unsafe" characters in the names and values to "%xx", where "xx" is the ascii
252
-- value of the character, in hex. "Unsafe" characters include =, &, %, +, non-printable
253
-- characters, and any others you want to encode-- there's no danger in encoding too many
254
-- characters. For simplicity, you might encode all non-alphanumeric characters.
255
-- A big nono is \n and \r chars in POST data.
256
-- # Change all spaces to pluses.
257
-- # String the names and values together with = and &, like
258
-- name1=value1&name2=value2&name3=value3
259
-- # This string is your message body for POST submissions, or the query string for GET submissions.
260
--
261
-- For example, if a form has a field called "name" that's set to "Lucy", and a field called "neighbors"
262
-- that's set to "Fred & Ethel", the URL-encoded form data would be:
263
--
264
-- name=Lucy&neighbors=Fred+%26+Ethel <<== note no \n or \r
265
--
266
-- with a length of 34.
267
268
constant
2692
alphanum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234567890",
2702
hexnums = "0123456789ABCDEF"
271
272
--**
273
-- Converts all non-alphanumeric characters in a string to their
274
-- percent-sign hexadecimal representation, or plus sign for
275
-- spaces.
276
--
277
-- Parameters:
278
-- # ##what## : the string to encode
279
-- # ##spacecode## : what to insert in place of a space
280
--
281
-- Returns:
282
-- A **sequence**, the encoded string.
283
--
284
-- Comments:
285
-- ##spacecode## defaults to ##+## as it is more correct, however, some sites
286
-- want ##%20## as the space encoding.
287
--
288
-- Example 1:
289
--
290
-- puts(1, encode("Fred & Ethel"))
291
-- -- Prints "Fred+%26+Ethel"
292
--
293
--
294
-- See Also:
295
-- [[:decode]]
296
--
297
2982
2992
sequence encoded = ""
3002
object junk = "", junk1, junk2
301
3022
for idx = 1 to length(what) do
30326
if find(what[idx],alphanum) then
30420
encoded &= what[idx]
305
3066
elsif equal(what[idx],' ') then
3074
encoded &= spacecode
308
3092
elsif 1 then
3102
junk = what[idx]
3112
junk1 = floor(junk / 16)
3122
junk2 = floor(junk - (junk1 * 16))
3132
encoded &= "%" & hexnums[junk1+1] & hexnums[junk2+1]
314
end if
31526
end for
316
3172
return encoded
318
end function
319
320
--**
321
-- Convert all encoded entities to their decoded counter parts
322
--
323
-- Parameters:
324
-- # ##what##: what value to decode
325
--
326
-- Returns:
327
-- A decoded sequence
328
--
329
-- Example 1:
330
--
331
-- puts(1, decode("Fred+%26+Ethel"))
332
-- -- Prints "Fred & Ethel"
333
--
334
--
335
-- See Also:
336
-- [[:encode]]
337
--
338
3392
3402
integer k = 1
341
3422
while k <= length(what) do
34326
if what[k] = '+' then
3442
what[k] = ' ' -- space is a special case, converts into +
34524
elsif what[k] = '%' then
3464
if k = length(what) then
347
-- strip empty percent sign
3480
what = what[1..k-1] & what[k+1 .. $]
3494
elsif k+1 = length(what) then
3500
what[k] = value("#0" & what[k+1])
3510
what[k] = what[k][2]
3520
what = what[1..k] & what[k+2 .. $]
353
else
3544
what[k] = value("#" & what[k+1..k+2])
3554
what[k] = what[k][2]
3564
what = what[1..k] & what[k+3 .. $]
357
end if
358
else
359
-- do nothing if it is a regular char ('0' or 'A' or etc)
360
end if
361
36226
k += 1
36326
end while
364
3652
return what
366
end function