COVERAGE SUMMARY

FILE SUMMARY

Name	Executed	Routines	%	Executed	Lines	%	Unexecuted
/home/matt/eu/rds/include/std/stats.e	21	21	100.00%	277	278	99.64%	1

ROUTINE SUMMARY

Routine	Executed	Lines		Unexecuted
movavg()	29	30	96.67%	1
avedev()	21	21	100.00%	0
average()	7	7	100.00%	0
central_moment()	6	6	100.00%	0
count()	4	4	100.00%	0
emovavg()	14	14	100.00%	0
geomean()	19	19	100.00%	0
harmean()	19	19	100.00%	0
kurtosis()	10	10	100.00%	0
largest()	16	16	100.00%	0
massage()	5	5	100.00%	0
median()	10	10	100.00%	0
mode()	12	12	100.00%	0
range()	20	20	100.00%	0
raw_frequency()	22	22	100.00%	0
skewness()	7	7	100.00%	0
small()	5	5	100.00%	0
smallest()	16	16	100.00%	0
stdev()	19	19	100.00%	0
sum()	9	9	100.00%	0
sum_central_moments()	2	2	100.00%	0

LINE COVERAGE DETAIL

#	Executed
1		-- (c) Copyright - See License.txt
2		--
3		namespace stats
4
5		--****
6		-- == Statistics
7		-- Page Contents
8		--
9		-- <>
10		--
11		-- === Routines
12
13		include std/math.e
14		include std/sort.e
15		include std/sequence.e
16
17
18		--**
19		-- Determines the k-th smallest value from the supplied set of numbers.
20		--
21		-- Parameters:
22		-- # ##data_set## : The list of values from which the smallest value is chosen.
23		-- # ##ordinal_idx## : The relative index of the desired smallest value.
24		--
25		-- Returns:
26		-- A sequence, {The k-th smallest value, its index in the set}
27		--
28		-- Comments:
29		-- ##small##() is used to return a value based on its size relative to
30		-- all the other elements in the sequence. When ##index## is 1, the smallest index is returned. Use ##index = length(data_set)## to return the highest.
31		--
32		-- If ##ordinal_idx## is less than one, or greater then length of ##data_set##,
33		-- an empty sequence is returned.
34		--
35		-- The set of values does not have to be in any particular order. The values may be any Euphoria object.
36		--
37		-- Example 1:
38		--
39		-- ? small( {4,5,6,8,5,4,3,"text"}, 3 ) -- Ans: {4,1} (The 3rd smallest value)
40		-- ? small( {4,5,6,8,5,4,3,"text"}, 1 ) -- Ans: {3,7} (The 1st smallest value)
41		-- ? small( {4,5,6,8,5,4,3,"text"}, 7 ) -- Ans: {8,4} (The 7th smallest value)
42		-- ? small( {"def", "qwe", "abc", "try"}, 2 ) -- Ans: {"def", 1} (The 2nd smallest value)
43		-- ? small( {1,2,3,4}, -1) -- Ans: {} -- no-value
44		-- ? small( {1,2,3,4}, 10) -- Ans: {} -- no-value
45		--
46		--
47
48	4	public function small(sequence data_set, integer ordinal_idx)
49		sequence lSortedData
50
51	4	if ordinal_idx < 1 or ordinal_idx > length(data_set) then
52	3	return {}
53		end if
54
55	1	lSortedData = sort(data_set)
56
57	1	return {lSortedData[ordinal_idx], find(lSortedData[ordinal_idx], data_set)}
58		end function
59
60		--**
61		-- Returns the largest of the data points that are atoms.
62		--
63		-- Parameters:
64		-- # ##data_set## : a list of 1 or more numbers among which you want the largest.
65		--
66		-- Returns:
67		-- An object, either of:
68		-- * an atom (the largest value) if there is at least one atom item in the set\\
69		-- * ##{} ##if there //is// no largest value.
70		--
71		-- Comments:
72		-- Any ##data_set## element which is not an atom is ignored.
73		--
74		-- Example 1:
75		--
76		-- ? largest( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,"text"} ) -- Ans: 8
77		-- ? largest( {"just","text"} ) -- Ans: {}
78		--
79		--
80		-- See also:
81		-- [[:range]]
82		--
83	3	public function largest(object data_set)
84		atom result_, temp_
85		integer lFoundAny
86	3	if atom(data_set) then
87	1	return data_set
88		end if
89	2	lFoundAny = 0
90	2	for i = 1 to length(data_set) do
91	18	if atom(data_set[i]) then
92	15	temp_ = data_set[i]
93	15	if lFoundAny then
94	14	if temp_ > result_ then
95	1	result_ = temp_
96		end if
97		else
98	1	result_ = temp_
99	1	lFoundAny = 1
100		end if
101		end if
102	18	end for
103	2	if lFoundAny = 0 then
104	1	return {}
105		end if
106	1	return result_
107		end function
108
109		--**
110		-- Returns the smallest of the data points.
111		--
112		-- Parameters:
113		-- # ##data_set## : A list of 1 or more numbers for which you want the smallest.
114		-- Note: only atom elements are included and any sub-sequences
115		-- elements are ignored.
116		--
117		-- Returns:
118		-- An object, either of:
119		-- * an atom (the smallest value) if there is at least one atom item in the set\\
120		-- * ##{} ##if there //is// no largest value.
121		--
122		-- Comments:
123		-- Any ##data_set## element which is not an atom is ignored.
124		--
125		-- Example 1:
126		--
127		-- ? smallest( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,"text"} ) -- Ans: 1
128		-- ? smallest( {"just","text"} ) -- Ans: {}
129		--
130		--
131		-- See also:
132		-- [[:range]]
133	3	public function smallest(object data_set)
134		atom result_, temp_
135		integer lFoundAny
136	3	if atom(data_set) then
137	1	return data_set
138		end if
139	2	lFoundAny = 0
140	2	for i = 1 to length(data_set) do
141	18	if atom(data_set[i]) then
142	15	temp_ = data_set[i]
143	15	if lFoundAny then
144	14	if temp_ < result_ then
145	2	result_ = temp_
146		end if
147		else
148	1	result_ = temp_
149	1	lFoundAny = 1
150		end if
151		end if
152	18	end for
153	2	if lFoundAny = 0 then
154	1	return {}
155		end if
156	1	return result_
157		end function
158
159		--**
160		-- Determines a number of //range// statistics for the data set.
161		--
162		-- Parameters:
163		-- # ##data_set## : a list of 1 or more numbers for which you want the range data.
164		--
165		-- Returns:
166		-- A sequence, empty if no atoms were found, else like {Lowest, Highest, Range, Mid-range}
167		--
168		-- Comments:
169		-- Any sequence element in ##data_set## is ignored.
170		--
171		-- Example 1:
172		--
173		-- ? range( {7,2,8,5,6,6,4,8,6,16,3,3,4,1,8,"text"} ) -- Ans: {1, 16, 15, 8.5}
174		--
175		--
176		-- See also:
177		-- [[:smallest]] [[:largest]]
178		--
179	3	public function range(object data_set)
180		sequence result_
181		atom temp_
182	3	integer lFoundAny = 0
183
184	3	if atom(data_set) then
185	1	data_set = {data_set}
186		end if
187
188	3	for i = 1 to length(data_set) do
189	17	if atom(data_set[i]) then
190	16	temp_ = data_set[i]
191	16	if lFoundAny then
192	14	if temp_ < result_[1] then
193	2	result_[1] = temp_
194	12	elsif temp_ > result_[2] then
195	2	result_[2] = temp_
196		end if
197		else
198	2	result_ = {temp_, temp_, 0, 0}
199	2	lFoundAny = 1
200		end if
201		end if
202	17	end for
203	3	if lFoundAny = 0 then
204	1	return {}
205		end if
206	2	result_[3] = result_[2] - result_[1]
207	2	result_[4] = (result_[1] + result_[2]) / 2
208	2	return result_
209		end function
210
211		--****
212		-- Enums used to influence the results of some of these functions.
213
214		public enum
215		--**
216		-- The supplied data is the entire population.
217	21	ST_FULLPOP,
218
219		--**
220		-- The supplied data is only a random sample of the population.
221	21	ST_SAMPLE
222
223		public enum
224		--**
225		-- The supplied data consists of only atoms.
226	21	ST_ALLNUM,
227
228		--**
229		-- Any sub-sequences (eg. strings) in the supplied data are ignored.
230	21	ST_IGNSTR,
231
232		--**
233		-- Any sub-sequences (eg. strings) in the supplied data are assumed to
234		-- have the value zero.
235	21	ST_ZEROSTR,
236
237		$
238
239	151	function massage(sequence data_set, object subseq_opt)
240	151	switch subseq_opt do
241		case ST_IGNSTR then
242	16	return remove_subseq(data_set, SEQ_NOALT)
243
244		case ST_ZEROSTR then
245	3	return remove_subseq(data_set, 0)
246
247		case else
248	132	return data_set
249		end switch
250		end function
251
252		--**
253		-- Returns the standard deviation based of the population.
254		--
255		-- Parameters:
256		-- # ##data_set## : a list of 1 or more numbers for which you want the estimated standard deviation.
257		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
258		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
259		-- gives instructions about how to treat sub-sequences. See comments for details.
260		-- # ##population_type## : an integer. ST_SAMPLE (the default) assumes that ##data_set## is a random
261		-- sample of the total population. ST_FULLPOP means that ##data_set## is the
262		-- entire population.
263		--
264		-- Returns:
265		-- An atom, the estimated standard deviation.
266		-- An empty sequence means that there is no meaningful data to calculate from.
267		--
268		-- Comments:
269		-- ##stdev##() is a measure of how values are different from the average.
270		--
271		-- The numbers in ##data_set## can either be the entire population of values or
272		-- just a random subset. You indicate which in the ##population_type## parameter. By default
273		-- ##data_set## represents a sample and not the entire population. When using this
274		-- function with sample data, the result is an //estimated// standard deviation.
275		--
276		-- If the data can contain sub-sequences, such as strings, you need to let the
277		-- the function know about this otherwise it assumes every value in ##data_set## is
278		-- an number. If that is not the case then the function will crash. So it is
279		-- important that if it can possibly contain sub-sequences that you tell this
280		-- function what to do with them. Your choices are to ignore them or assume they
281		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
282		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
283		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
284		-- Note It is faster if the data only contains numbers.
285		--
286		-- The equation for standard deviation is:
287		-- {{{
288		-- stdev(X) ==> SQRT(SUM(SQ(X{1..N} - MEAN)) / (N))
289		-- }}}
290		--
291		-- Example 1:
292		--
293		-- ? stdev( {4,5,6,7,5,4,3,7} ) -- Ans: 1.457737974
294		-- ? stdev( {4,5,6,7,5,4,3,7} ,, ST_FULLPOP) -- Ans: 1.363589014
295		-- ? stdev( {4,5,6,7,5,4,3,"text"} , ST_IGNSTR) -- Ans: 1.345185418
296		-- ? stdev( {4,5,6,7,5,4,3,"text"}, ST_IGNSTR, ST_FULLPOP ) -- Ans: 1.245399698
297		-- ? stdev( {4,5,6,7,5,4,3,"text"} , 0) -- Ans: 2.121320344
298		-- ? stdev( {4,5,6,7,5,4,3,"text"}, 0, ST_FULLPOP ) -- Ans: 1.984313483
299		--
300		--
301		-- See also:
302		-- [[:average]], [[:avedev]]
303		--
304
305	39	public function stdev(sequence data_set, object subseq_opt = ST_ALLNUM, integer population_type = ST_SAMPLE)
306		atom lSum
307		atom lMean
308		integer lCnt
309
310	39	data_set = massage(data_set, subseq_opt)
311
312	39	lCnt = length(data_set)
313
314	39	if lCnt = 0 then
315	6	return {}
316		end if
317	33	if lCnt = 1 then
318	6	return 0
319		end if
320
321	27	lSum = 0
322	27	for i = 1 to length(data_set) do
323	23535	lSum += data_set[i]
324	23535	end for
325
326	27	lMean = lSum / lCnt
327	27	lSum = 0
328	27	for i = 1 to length(data_set) do
329	23535	lSum += power(data_set[i] - lMean, 2)
330	23535	end for
331
332	27	if population_type = ST_SAMPLE then
333	25	lCnt -= 1
334		end if
335
336	27	return power(lSum / lCnt, 0.5)
337		end function
338
339		--**
340		-- Returns the average of the absolute deviations of data points from their mean.
341		--
342		-- Parameters:
343		-- # ##data_set## : a list of 1 or more numbers for which you want the mean of the absolute deviations.
344		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
345		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
346		-- gives instructions about how to treat sub-sequences. See comments for details.
347		-- # ##population_type## : an integer. ST_SAMPLE (the default) assumes that ##data_set## is a random
348		-- sample of the total population. ST_FULLPOP means that ##data_set## is the
349		-- entire population.
350		--
351		-- Returns:
352		-- An atom , the deviation from the mean.\\
353		-- An empty sequence, means that there is no meaningful data to calculate from.
354		--
355		-- Comments:
356		-- ##avedev##() is a measure of the variability in a data set. Its statistical
357		-- properties are less well behaved than those of the standard deviation, which is
358		-- why it is used less.
359		--
360		-- The numbers in ##data_set## can either be the entire population of values or
361		-- just a random subset. You indicate which in the ##population_type## parameter. By default
362		-- ##data_set## represents a sample and not the entire population. When using this
363		-- function with sample data, the result is an //estimated// deviation.
364		--
365		-- If the data can contain sub-sequences, such as strings, you need to let the
366		-- the function know about this otherwise it assumes every value in ##data_set## is
367		-- an number. If that is not the case then the function will crash. So it is
368		-- important that if it can possibly contain sub-sequences that you tell this
369		-- function what to do with them. Your choices are to ignore them or assume they
370		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
371		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
372		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
373		-- Note It is faster if the data only contains numbers.
374		--
375		-- The equation for absolute average deviation is~:
376		-- {{{
377		-- avedev(X) ==> SUM( ABS(X{1..N} - MEAN(X)) ) / N
378		-- }}}
379		--
380		-- Example 1:
381		--
382		-- ? avedev( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,7} ) -- Ans: 1.966666667
383		-- ? avedev( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,7},, ST_FULLPOP ) -- Ans: 1.84375
384		-- ? avedev( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,"text"}, ST_IGNSTR ) -- Ans: 1.99047619
385		-- ? avedev( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,"text"}, ST_IGNSTR,ST_FULLPOP ) -- Ans: 1.857777778
386		-- ? avedev( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,"text"}, 0 ) -- Ans: 2.225
387		-- ? avedev( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,"text"}, 0, ST_FULLPOP ) -- Ans: 2.0859375
388		--
389		--
390		-- See also:
391		-- [[:average]], [[:stdev]]
392		--
393
394	4	public function avedev(sequence data_set, object subseq_opt = ST_ALLNUM, integer population_type = ST_SAMPLE)
395		atom lSum
396		atom lMean
397		integer lCnt
398
399	4	data_set = massage(data_set, subseq_opt)
400
401	4	lCnt = length(data_set)
402
403	4	if lCnt = 0 then
404	2	return {}
405		end if
406	2	if lCnt = 1 then
407	1	return 0
408		end if
409	1	lSum = 0
410
411	1	for i = 1 to length(data_set) do
412	15	lSum += data_set[i]
413	15	end for
414
415	1	lMean = lSum / lCnt
416	1	lSum = 0
417	1	for i = 1 to length(data_set) do
418	15	if data_set[i] > lMean then
419	8	lSum += data_set[i] - lMean
420		else
421	7	lSum += lMean - data_set[i]
422		end if
423	15	end for
424
425	1	if population_type = ST_SAMPLE then
426	1	lCnt -= 1
427		end if
428	1	return lSum / lCnt
429		end function
430
431		--**
432		-- Returns the sum of all the atoms in an object.
433		--
434		-- Parameters:
435		-- # ##data_set## : Either an atom or a list of numbers to sum.
436		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
437		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
438		-- gives instructions about how to treat sub-sequences. See comments for details.
439		--
440		-- Returns:
441		-- An atom, the sum of the set.
442		--
443		-- Comments:
444		-- ##sum##() is used as a measure of the magnitude of a sequence of positive values.
445		--
446		-- If the data can contain sub-sequences, such as strings, you need to let the
447		-- the function know about this otherwise it assumes every value in ##data_set## is
448		-- an number. If that is not the case then the function will crash. So it is
449		-- important that if it can possibly contain sub-sequences that you tell this
450		-- function what to do with them. Your choices are to ignore them or assume they
451		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
452		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
453		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
454		-- Note It is faster if the data only contains numbers.
455		--
456		-- The equation is~:
457		--
458		-- {{{
459		-- sum(X) ==> SUM( X{1..N} )
460		-- }}}
461		--
462		-- Example 1:
463		--
464		-- ? sum( {7,2,8.5,6,6,-4.8,6,6,3.341,-8,"text"}, 0 ) -- Ans: 32.041
465		--
466		--
467		-- See also:
468		-- [[:average]]
469
470	36	public function sum(object data_set, object subseq_opt = ST_ALLNUM)
471		atom result_
472	36	if atom(data_set) then
473	1	return data_set
474		end if
475
476	35	data_set = massage(data_set, subseq_opt)
477	35	result_ = 0
478	35	for i = 1 to length(data_set) do
479	23720	result_ += data_set[i]
480	23720	end for
481
482	35	return result_
483		end function
484
485		--**
486		-- Returns the count of all the atoms in an object.
487		--
488		-- Parameters:
489		-- # ##data_set## : either an atom or a list.
490		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
491		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
492		-- gives instructions about how to treat sub-sequences. See comments for details.
493		--
494		-- Comments:
495		-- This returns the number of numbers in ##data_set##
496		--
497		-- If the data can contain sub-sequences, such as strings, you need to let the
498		-- the function know about this otherwise it assumes every value in ##data_set## is
499		-- an number. If that is not the case then the function will crash. So it is
500		-- important that if it can possibly contain sub-sequences that you tell this
501		-- function what to do with them. Your choices are to ignore them or assume they
502		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
503		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
504		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
505		-- Note It is faster if the data only contains numbers.
506		--
507		-- Returns:
508		--
509		-- An integer, the number of atoms in the set. When ##data_set## is an atom, 1 is returned.
510		--
511		-- Example 1:
512		--
513		-- ? count( {7,2,8.5,6,6,-4.8,6,6,3.341,-8,"text"} ) -- Ans: 10
514		-- ? count( {"cat", "dog", "lamb", "cow", "rabbit"} ) -- Ans: 0 (no atoms)
515		-- ? count( 5 ) -- Ans: 1
516		--
517		--
518		-- See also:
519		-- [[:average]], [[:sum]]
520
521	13	public function count(object data_set, object subseq_opt = ST_ALLNUM)
522	13	if atom(data_set) then
523	1	return 1
524		end if
525
526	12	return length(massage(data_set, subseq_opt))
527
528		end function
529
530
531		--**
532		-- Returns the average (mean) of the data points.
533		--
534		-- Parameters:
535		-- # ##data_set## : A list of 1 or more numbers for which you want the mean.
536		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
537		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
538		-- gives instructions about how to treat sub-sequences. See comments for details.
539		--
540		--
541		-- Returns:
542		-- An object,
543		-- * ##{}## (the empty sequence) if there are no atoms in the set.
544		-- * an atom (the mean) if there are one or more atoms in the set.
545		--
546		-- Comments:
547		--
548		-- ##average##() is the theoretical probable value of a randomly selected item from the set.
549		--
550		-- The equation for average is:
551		--
552		-- {{{
553		-- average(X) ==> SUM( X{1..N} ) / N
554		-- }}}
555		--
556		-- If the data can contain sub-sequences, such as strings, you need to let the
557		-- the function know about this otherwise it assumes every value in ##data_set## is
558		-- an number. If that is not the case then the function will crash. So it is
559		-- important that if it can possibly contain sub-sequences that you tell this
560		-- function what to do with them. Your choices are to ignore them or assume they
561		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
562		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
563		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
564		-- Note It is faster if the data only contains numbers.
565		--
566		-- Example 1:
567		--
568		-- ? average( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,"text"}, ST_IGNSTR ) -- Ans: 5.13333333
569		--
570		--
571		-- See also:
572		-- [[:geomean]], [[:harmean]], [[:movavg]], [[:emovavg]]
573		--
574	30	public function average(object data_set, object subseq_opt = ST_ALLNUM)
575
576	30	if atom(data_set) then
577	1	return data_set
578		end if
579
580	29	data_set = massage(data_set, subseq_opt)
581
582	29	if length(data_set) = 0 then
583	1	return {}
584		end if
585	28	return sum(data_set) / length(data_set)
586		end function
587
588		--**
589		-- Returns the geometric mean of the atoms in a sequence.
590		--
591		-- Parameters:
592		-- # ##data_set## : the values to take the geometric mean of.
593		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
594		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
595		-- gives instructions about how to treat sub-sequences. See comments for details.
596		--
597		-- Returns:
598		--
599		-- An atom, the geometric mean of the atoms in ##data_set##.
600		-- If there is no atom to take the mean of, 1 is returned.
601		--
602		-- Comments:
603		--
604		-- The geometric mean of ##N## atoms is the N-th root of their product. Signs are ignored.
605		--
606		-- This is useful to compute average growth rates.
607		--
608		-- If the data can contain sub-sequences, such as strings, you need to let the
609		-- the function know about this otherwise it assumes every value in ##data_set## is
610		-- an number. If that is not the case then the function will crash. So it is
611		-- important that if it can possibly contain sub-sequences that you tell this
612		-- function what to do with them. Your choices are to ignore them or assume they
613		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
614		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
615		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
616		-- Note It is faster if the data only contains numbers.
617		--
618		-- Example 1:
619		--
620		-- ? geomean({3, "abc", -2, 6}, ST_IGNSTR) -- prints out power(36,1/3) = 3,30192724889462669
621		-- ? geomean({1,2,3,4,5,6,7,8,9,10}) -- = 4.528728688
622		--
623		--
624		-- See Also:
625		-- [[:average]]
626
627	6	public function geomean(object data_set, object subseq_opt = ST_ALLNUM)
628	6	atom prod_ = 1.0
629		integer count_
630
631	6	if atom(data_set) then
632	1	return data_set
633		end if
634
635	5	data_set = massage(data_set, subseq_opt)
636
637	5	count_ = length(data_set)
638	5	if count_ = 0 then
639	1	return 1
640		end if
641	4	if count_ = 1 then
642	1	return data_set[1]
643		end if
644
645	3	for i = 1 to length(data_set) do
646	9	atom x = data_set[i]
647
648	9	if x = 0 then
649	1	return 0
650		else
651	8	prod_ *= x
652		end if
653
654	8	end for
655
656	2	if prod_ < 0 then
657	1	return power(-prod_, 1/count_)
658		else
659	1	return power(prod_, 1/count_)
660		end if
661
662		end function
663
664		--**
665		-- Returns the harmonic mean of the atoms in a sequence.
666		--
667		-- Parameters:
668		-- # ##data_set## : the values to take the harmonic mean of.
669		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
670		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
671		-- gives instructions about how to treat sub-sequences. See comments for details.
672		--
673		-- Returns:
674		--
675		-- An atom, the harmonic mean of the atoms in ##data_set##.
676		--
677		-- Comments:
678		-- The harmonic mean is the inverse of the average of their inverses.
679		--
680		-- This is useful in engineering to compute equivalent capacities and resistances.
681		--
682		-- If the data can contain sub-sequences, such as strings, you need to let the
683		-- the function know about this otherwise it assumes every value in ##data_set## is
684		-- an number. If that is not the case then the function will crash. So it is
685		-- important that if it can possibly contain sub-sequences that you tell this
686		-- function what to do with them. Your choices are to ignore them or assume they
687		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
688		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
689		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
690		-- Note It is faster if the data only contains numbers.
691		--
692		-- Example 1:
693		--
694		-- ? harmean({3, "abc", -2, 6}, ST_IGNSTR) -- = 0.
695		-- ? harmean({{2, 3, 4}) -- 3 / (1/2 + 1/3 + 1/4) = 2.769230769
696		--
697		--
698		-- See Also:
699		-- [[:average]]
700
701	3	public function harmean(sequence data_set, object subseq_opt = ST_ALLNUM)
702		integer count_
703
704	3	data_set = massage(data_set, subseq_opt)
705
706	3	count_ = length(data_set)
707	3	if count_ = 1 then
708	1	return data_set[1]
709		end if
710
711	2	atom y = 0
712	2	atom z = 1
713	2	for i = 1 to count_ do
714	3	atom x = 1
715	3	z *= data_set[i]
716	3	for j = 1 to count_ do
717	9	if j != i then
718	6	x *= data_set[j]
719		end if
720	9	end for
721	3	y += x
722	3	end for
723
724	2	if y = 0 then
725	1	return 0
726		end if
727
728	1	return count_ * z / y
729		end function
730
731		--**
732		-- Returns the average (mean) of the data points for overlaping periods. This
733		-- can be either a simple or weighted moving average.
734		--
735		-- Parameters:
736		-- # ##data_set## : a list of 1 or more numbers for which you want a moving average.
737		-- # ##period_delta## : an object, either
738		-- * an integer representing the size of the period, or
739		-- * a list of weightings to apply to the respective period positions.
740		--
741		-- Returns:
742		-- A sequence, either the requested averages or ##{}## if the Data sequence is empty or
743		-- the supplied period is less than one.
744		--
745		-- If a list of weights was supplied, the result is a weighted average; otherwise, it is a simple average.
746		--
747		-- Comments:
748		--
749		-- A moving average is used to smooth out a set of data points over a period.\\
750		-- For example, given a period of 5:
751		-- # the first returned element is the average
752		-- of the first five data points [1..5],
753		-- # the second returned element is
754		-- the average of the second five data points [2..6], \\and so on \\until
755		-- the last returned value is the average of the last 5 data points
756		-- [$-4 .. $].
757		--
758		-- When ##period_delta## is an atom, it is rounded down to the width of the average. When it is a
759		-- sequence, the width is its length. If there are not enough data points, zeroes are inserted.
760		--
761		-- Note that only atom elements are included and any sub-sequence elements are ignored.
762		--
763		-- Example 1:
764		--
765		-- ? movavg( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8}, 10 )
766		-- -- Ans: {5.8, 5.4, 5.5, 5.1, 4.7, 4.9}
767		-- ? movavg( {7,2,8,5,6}, 2 )
768		-- -- Ans: {4.5, 5, 6.5, 5.5}
769		-- ? movavg( {7,2,8,5,6}, {0.5, 1.5} )
770		-- -- Ans: {3.25, 6.5, 5.75, 5.75}
771		--
772		--
773		-- See also:
774		-- [[:average]]
775		--
776	10	public function movavg(object data_set, object period_delta)
777		sequence result_
778		integer lLow
779		integer lHigh
780		integer j
781		integer n
782
783	10	if atom(data_set) then
784	2	data_set = {data_set}
785
786	8	elsif count(data_set) = 0 then
787	1	return data_set
788		end if
789
790	9	if atom(period_delta) then
791	7	if floor(period_delta) < 1 then
792	2	return {}
793		end if
794	5	period_delta = repeat(1, floor(period_delta))
795		end if
796
797	7	if length(data_set) < length(period_delta) then
798	1	data_set = repeat(0, length(period_delta) - length(data_set)) & data_set
799		end if
800	7	lLow = 1
801	7	lHigh = length(period_delta)
802	7	result_ = repeat(0, length(data_set) - length(period_delta) + 1)
803	7	while lHigh <= length(data_set) do
804	26	j = 1
805	26	n = 0
806	26	for i = lLow to lHigh do
807	147	if atom(data_set[i]) then
808	147	result_[lLow] += data_set[i] * period_delta[j]
809	147	n += 1
810		end if
811	147	j += 1
812	147	end for
813	26	if n > 0 then
814	26	result_[lLow] /= n
815		else
816	0	result_[lLow] = 0
817		end if
818
819	26	lLow += 1
820	26	lHigh += 1
821	26	end while
822
823	7	return result_
824		end function
825
826		--**
827		-- Returns the exponential moving average of a set of data points.
828		--
829		-- Parameters:
830		-- # ##data_set## : a list of 1 or more numbers for which you want a moving average.
831		-- # ##smoothing_factor## : an atom, the smoothing factor, typically between 0 and 1.
832		--
833		-- Returns:
834		-- A sequence, made of the requested averages, or ##{}## if ##data_set## is empty or
835		-- the supplied period is less than one.
836		--
837		-- Comments:
838		--
839		-- A moving average is used to smooth out a set of data points over a period.
840		--
841		-- The formula used is:\\
842		-- : ##Y,,i,, = Y,,i-1,, + F * (X,,i,, - Y,,i-1,,)##
843		--
844		-- Note that only atom elements are included and any sub-sequences elements are ignored.
845		--
846		-- The smoothing factor controls how data is smoothed. 0 smooths everything to 0, and 1 means no smoothing at all.
847		--
848		-- Any value for ##smoothing_factor## outside the 0.0..1.0 range causes ##smoothing_factor##
849		-- to be set to the periodic factor ##(2/(N+1))##.
850		--
851		-- Example 1:
852		--
853		-- ? emovavg( {7,2,8,5,6}, 0.75 )
854		-- -- Ans: {5.25,2.8125,6.703125,5.42578125,5.856445313}
855		-- ? emovavg( {7,2,8,5,6}, 0.25 )
856		-- -- Ans: {1.75,1.8125,3.359375,3.76953125,4.327148438}
857		-- ? emovavg( {7,2,8,5,6}, -1 )
858		-- -- Ans: {2.333333333,2.222222222,4.148148148,4.432098765,4.95473251}
859		--
860		--
861		-- See also:
862		-- [[:average]]
863
864	4	public function emovavg(object data_set, atom smoothing_factor)
865		atom lPrev
866
867	4	if atom(data_set) then
868	1	data_set = {data_set}
869
870	3	elsif count(data_set) = 0 then
871	1	return data_set
872		end if
873
874	3	if smoothing_factor < 0 or smoothing_factor > 1 then
875	1	smoothing_factor = (2 / (count(data_set) + 1))
876		end if
877
878	3	lPrev = average(data_set)
879	3	for i = 1 to length(data_set) do
880	11	if atom(data_set[i]) then
881	11	data_set[i] = (data_set[i] - lPrev) * smoothing_factor + lPrev
882	11	lPrev = data_set[i]
883		end if
884	11	end for
885	3	return data_set
886		end function
887
888		--**
889		-- Returns the mid point of the data points.
890		--
891		-- Parameters:
892		-- # ##data_set## : a list of 1 or more numbers for which you want the mean.
893		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
894		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
895		-- gives instructions about how to treat sub-sequences. See comments for details.
896		--
897		-- Returns:
898		-- An object, either ##{}## if there are no items in the set, or an atom (the median) otherwise.
899		--
900		-- Comments:
901		--
902		-- ##median##() is the item for which half the items are below it and half
903		-- are above it.
904		--
905		-- All elements are included; any sequence elements are assumed to have the value zero.
906		--
907		-- The equation for average is:
908		--
909		-- {{{
910		-- median(X) ==> sort(X)[N/2]
911		-- }}}
912		--
913		-- If the data can contain sub-sequences, such as strings, you need to let the
914		-- the function know about this otherwise it assumes every value in ##data_set## is
915		-- an number. If that is not the case then the function will crash. So it is
916		-- important that if it can possibly contain sub-sequences that you tell this
917		-- function what to do with them. Your choices are to ignore them or assume they
918		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
919		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
920		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
921		-- Note It is faster if the data only contains numbers.
922		--
923		-- Example 1:
924		--
925		-- ? median( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,4} ) -- Ans: 5
926		--
927		--
928		-- See also:
929		-- [[:average]], [[:geomean]], [[:harmean]], [[:movavg]], [[:emovavg]]
930		--
931
932	4	public function median(object data_set, object subseq_opt = ST_ALLNUM)
933
934	4	if atom(data_set) then
935	1	return data_set
936		end if
937
938	3	data_set = massage(data_set, subseq_opt)
939
940	3	if length(data_set) = 0 then
941	1	return data_set
942		end if
943
944	2	if length(data_set) < 3 then
945	1	return data_set[1]
946		end if
947	1	data_set = sort(data_set)
948	1	return data_set[ floor((length(data_set) + 1) / 2) ]
949
950		end function
951
952		--**
953		-- Returns the frequency of each unique item in the data set.
954		--
955		-- Parameters:
956		-- # ##data_set## : a list of 1 or more numbers for which you want the frequencies.
957		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
958		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
959		-- gives instructions about how to treat sub-sequences. See comments for details.
960		--
961		-- Returns:
962		-- A sequence. This will contain zero or more 2-element sub-sequences. The
963		-- first element is the frequency count and the second element is the data item
964		-- that was counted. The returned values are in descending order, meaning that
965		-- the highest frequencies are at the beginning of the returned list.
966		--
967		-- Comments:
968		-- If the data can contain sub-sequences, such as strings, you need to let the
969		-- the function know about this otherwise it assumes every value in ##data_set## is
970		-- an number. If that is not the case then the function will crash. So it is
971		-- important that if it can possibly contain sub-sequences that you tell this
972		-- function what to do with them. Your choices are to ignore them or assume they
973		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
974		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
975		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
976		-- Note It is faster if the data only contains numbers.
977		--
978		-- Example 1:
979		--
980		-- ? raw_frequency("the cat is the hatter")
981		--
982		-- This returns
983		-- {{{
984		-- {
985		-- {5,116},
986		-- {4,32},
987		-- {3,104},
988		-- {3,101},
989		-- {2,97},
990		-- {1,115},
991		-- {1,114},
992		-- {1,105},
993		-- {1,99}
994		-- }
995		-- }}}
996		--
997
998	5	public function raw_frequency(object data_set, object subseq_opt = ST_ALLNUM)
999
1000		sequence lCounts
1001		sequence lKeys
1002	5	integer lNew = 0
1003		integer lPos
1004	5	integer lMax = -1
1005
1006	5	if atom(data_set) then
1007	1	return {{1,data_set}}
1008		end if
1009
1010	4	data_set = massage(data_set, subseq_opt)
1011
1012	4	if length(data_set) = 0 then
1013	1	return {{1,data_set}}
1014		end if
1015	3	lCounts = repeat({0,0}, length(data_set))
1016	3	lKeys = repeat(0, length(data_set))
1017	3	for i = 1 to length(data_set) do
1018	53	lPos = find(data_set[i], lKeys)
1019	53	if lPos = 0 then
1020	24	lNew += 1
1021	24	lPos = lNew
1022	24	lCounts[lPos][2] = data_set[i]
1023	24	lKeys[lPos] = data_set[i]
1024	24	if lPos > lMax then
1025	24	lMax = lPos
1026		end if
1027		end if
1028	53	lCounts[lPos][1] += 1
1029	53	end for
1030	3	return sort(lCounts[1..lMax], DESCENDING)
1031
1032		end function
1033
1034		--**
1035		-- Returns the most frequent point(s) of the data set.
1036		--
1037		-- Parameters:
1038		-- # ##data_set## : a list of 1 or more numbers for which you want the mode.
1039		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
1040		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
1041		-- gives instructions about how to treat sub-sequences. See comments for details.
1042		--
1043		-- Returns:
1044		-- A sequence. The list of modal items in the data set.
1045		--
1046		-- Comments:
1047		--
1048		-- It is possible for the ##mode##() to return more than one item when more than
1049		-- one item in the set has the same highest frequency count.
1050		--
1051		-- If the data can contain sub-sequences, such as strings, you need to let the
1052		-- the function know about this otherwise it assumes every value in ##data_set## is
1053		-- an number. If that is not the case then the function will crash. So it is
1054		-- important that if it can possibly contain sub-sequences that you tell this
1055		-- function what to do with them. Your choices are to ignore them or assume they
1056		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
1057		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
1058		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
1059		-- Note It is faster if the data only contains numbers.
1060		--
1061		-- Example 1:
1062		--
1063		-- ? mode( {7,2,8,5,6,6,4,8,6,6,3,3,4,1,8,4} ) -- Ans: {6}
1064		-- ? mode( {8,2,8,5,6,6,4,8,6,6,3,3,4,1,8,4} ) -- Ans: {8,6}
1065		--
1066		--
1067		-- See also:
1068		-- [[:average]], [[:geomean]], [[:harmean]], [[:movavg]], [[:emovavg]]
1069		--
1070
1071	3	public function mode(sequence data_set, object subseq_opt = ST_ALLNUM)
1072
1073		sequence lCounts
1074		sequence lRes
1075
1076	3	data_set = massage(data_set, subseq_opt)
1077
1078	3	if not length( data_set ) then
1079	1	return {}
1080		end if
1081
1082	2	lCounts = raw_frequency(data_set, subseq_opt)
1083
1084	2	lRes = {lCounts[1][2]}
1085	2	for i = 2 to length(lCounts) do
1086	3	if lCounts[i][1] < lCounts[1][1] then
1087	2	exit
1088		end if
1089	1	lRes = append(lRes, lCounts[i][2])
1090	1	end for
1091
1092	2	return lRes
1093
1094		end function
1095
1096		--**
1097		-- Returns the distance between a supplied value and the mean, to some supplied
1098		-- order of magnitude. This is used to get a measure of the //shape// of a
1099		-- data set.
1100		--
1101		-- Parameters:
1102		-- # ##data_set## : a list of 1 or more numbers whose mean is used.
1103		-- # ##datum##: either a single value or a list of values for which you require
1104		-- the central moments.
1105		-- # ##order_mag##: An integer. This is the order of magnitude required. Usually
1106		-- a number from 1 to 4, but can be anything.
1107		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
1108		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
1109		-- gives instructions about how to treat sub-sequences. See comments for details.
1110		--
1111		-- Returns:
1112		-- An object. The same data type as ##datum##. This is the set of calculated
1113		-- central moments.
1114		--
1115		-- Comments:
1116		--
1117		-- For each of the items in #datum##, its central moment is calculated as ...
1118		-- {{{
1119		-- CM = power( ITEM - AVG, MAGNITUDE)
1120		-- }}}
1121		--
1122		-- If the data can contain sub-sequences, such as strings, you need to let the
1123		-- the function know about this otherwise it assumes every value in ##data_set## is
1124		-- an number. If that is not the case then the function will crash. So it is
1125		-- important that if it can possibly contain sub-sequences that you tell this
1126		-- function what to do with them. Your choices are to ignore them or assume they
1127		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
1128		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
1129		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
1130		-- Note It is faster if the data only contains numbers.
1131		--
1132		-- Example 1:
1133		--
1134		-- ? central_moment("the cat is the hatter", "the",1) --> {23.14285714, 11.14285714, 8.142857143}
1135		-- ? central_moment("the cat is the hatter", 't',2) --> 535.5918367
1136		-- ? central_moment("the cat is the hatter", 't',3) --> 12395.12536
1137		--
1138		--
1139		-- See also:
1140		-- [[:average]]
1141		--
1142	8	public function central_moment(sequence data_set, object datum, integer order_mag = 1, object subseq_opt = ST_ALLNUM)
1143
1144		atom lMean
1145
1146	8	data_set = massage(data_set, subseq_opt)
1147
1148	8	if length(data_set) = 0 then
1149	1	return 0
1150		end if
1151
1152	7	lMean = average(data_set)
1153
1154	7	return power( datum - lMean, order_mag)
1155
1156		end function
1157
1158		--**
1159		-- Returns sum of the central moments of each item in a data set.
1160		--
1161		-- Parameters:
1162		-- # ##data_set## : a list of 1 or more numbers whose mean is used.
1163		-- # ##order_mag##: An integer. This is the order of magnitude required. Usually
1164		-- a number from 1 to 4, but can be anything.
1165		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
1166		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
1167		-- gives instructions about how to treat sub-sequences. See comments for details.
1168		--
1169		-- Returns:
1170		-- An atom. The total of the central moments calculated for each of the
1171		-- items in ##data_set##.
1172		--
1173		-- Comments:
1174		-- If the data can contain sub-sequences, such as strings, you need to let the
1175		-- the function know about this otherwise it assumes every value in ##data_set## is
1176		-- an number. If that is not the case then the function will crash. So it is
1177		-- important that if it can possibly contain sub-sequences that you tell this
1178		-- function what to do with them. Your choices are to ignore them or assume they
1179		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
1180		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
1181		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
1182		-- Note It is faster if the data only contains numbers.
1183		--
1184		-- Example 1:
1185		--
1186		-- ? sum_central_moments("the cat is the hatter", 1) --> -8.526512829e-14
1187		-- ? sum_central_moments("the cat is the hatter", 2) --> 19220.57143
1188		-- ? sum_central_moments("the cat is the hatter", 3) --> -811341.551
1189		-- ? sum_central_moments("the cat is the hatter", 4) --> 56824083.71
1190		--
1191		--
1192		-- See also:
1193		-- [[:central_moment]], [[:average]]
1194		--
1195	7	public function sum_central_moments(object data_set, integer order_mag = 1, object subseq_opt = ST_ALLNUM)
1196	7	return sum( central_moment(data_set, data_set, order_mag, subseq_opt) )
1197		end function
1198
1199		--**
1200		-- Returns a measure of the asymmetry of a data set. Usually the data_set is a
1201		-- probablity distribution but it can be anything. This value is used to assess
1202		-- how suitable the data set is in representing the required analysis. It can
1203		-- help detect if there are too many extreme values in the data set.
1204		--
1205		-- Parameters:
1206		-- # ##data_set## : a list of 1 or more numbers whose mean is used.
1207		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
1208		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
1209		-- gives instructions about how to treat sub-sequences. See comments for details.
1210		--
1211		-- Returns:
1212		-- An atom. The skewness measure of the data set.
1213		--
1214		-- Comments:
1215		-- Generally speaking, a negative return indicates that most of the values are
1216		-- lower than the mean, while positive values indicate that most values are
1217		-- greater than the mean. However this might not be the case when there are a few
1218		-- extreme values on one side of the mean.
1219		--
1220		-- The larger the magnitude of the returned value, the more the data is skewed
1221		-- in that direction.
1222		--
1223		-- A returned value of zero indicates that the mean and median values are identical
1224		-- and that the data is symmetrical.
1225		--
1226		--
1227		-- If the data can contain sub-sequences, such as strings, you need to let the
1228		-- the function know about this otherwise it assumes every value in ##data_set## is
1229		-- an number. If that is not the case then the function will crash. So it is
1230		-- important that if it can possibly contain sub-sequences that you tell this
1231		-- function what to do with them. Your choices are to ignore them or assume they
1232		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
1233		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
1234		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
1235		-- Note It is faster if the data only contains numbers.
1236		--
1237		-- Example 1:
1238		--
1239		-- ? skewness("the cat is the hatter") --> -1.296820819
1240		-- ? skewness("thecatisthehatter") --> 0.1029393238
1241		--
1242		--
1243		-- See also:
1244		-- [[:kurtosis]]
1245		--
1246	4	public function skewness(object data_set, object subseq_opt = ST_ALLNUM)
1247
1248	4	if atom(data_set) then
1249	1	return data_set
1250		end if
1251
1252	3	data_set = massage(data_set, subseq_opt)
1253
1254	3	if length(data_set) = 0 then
1255	1	return data_set
1256		end if
1257	2	return sum_central_moments(data_set, 3) / ((length(data_set) - 1) * power(stdev(data_set), 3))
1258
1259		end function
1260
1261		--**
1262		-- Returns a measure of the spread of values in a dataset when compared to a
1263		-- //normal// probability curve.
1264		--
1265		-- Parameters:
1266		-- # ##data_set## : a list of 1 or more numbers whose kurtosis is required.
1267		-- # ##subseq_opt## : an object. When this is ST_ALLNUM (the default) it
1268		-- means that ##data_set## is assumed to contain no sub-sequences otherwise this
1269		-- gives instructions about how to treat sub-sequences. See comments for details.
1270		--
1271		-- Returns:
1272		-- An object. If this is an atom it is the kurtosis measure of the data set.
1273		-- Othewise it is a sequence containing an error integer. The return value {0}
1274		-- indicates that an empty dataset was passed, {1} indicates that the standard
1275		-- deviation is zero (all values are the same).
1276		--
1277		-- Comments:
1278		-- Generally speaking, a negative return indicates that most of the values are
1279		-- further from the mean, while positive values indicate that most values are
1280		-- nearer to the mean.
1281		--
1282		-- The larger the magnitude of the returned value, the more the data is 'peaked'
1283		-- or 'flatter' in that direction.
1284		--
1285		-- If the data can contain sub-sequences, such as strings, you need to let the
1286		-- the function know about this otherwise it assumes every value in ##data_set## is
1287		-- an number. If that is not the case then the function will crash. So it is
1288		-- important that if it can possibly contain sub-sequences that you tell this
1289		-- function what to do with them. Your choices are to ignore them or assume they
1290		-- have the value zero. To ignore them, use ST_IGNSTR as the ##subseq_opt## parameter
1291		-- value otherwise use ST_ZEROSTR. However, if you know that ##data_set## only
1292		-- contains numbers use the default ##subseq_opt## value, ST_ALLNUM.
1293		-- Note It is faster if the data only contains numbers.
1294		--
1295		-- Example 1:
1296		--
1297		-- ? kurtosis("thecatisthehatter") --> -1.737889192
1298		--
1299		--
1300		-- See also:
1301		-- [[:skewness]]
1302		--
1303	4	public function kurtosis(object data_set, object subseq_opt = ST_ALLNUM)
1304		atom sd
1305
1306	4	if atom(data_set) then
1307	1	return data_set
1308		end if
1309	3	data_set = massage(data_set, subseq_opt)
1310	3	if length(data_set) = 0 then
1311	1	return {0}
1312		end if
1313	2	sd = stdev(data_set)
1314	2	if sd = 0 then
1315	1	return {1}
1316		end if
1317
1318	1	return (sum_central_moments(data_set, 4) / ((length(data_set) - 1) * power(stdev(data_set), 4))) - 3
1319
1320		end function