(* File : raum.sml SCCS : "%Z%20%E% %M% %I%" Author : Richard A. O'Keefe Defines: Reverse-Append-Unit-eMpty "lists" This is a data structure where append and reverse are O(1) operations and bulk operations are O(n). It is NOT intended to make hd, tl, nth, take, drop, or splitAt fast. Nor is it supposed to make length fast, although by adding By adding a size field to AP & RV this could make length O(1) and it could make nth faster, but the md column in the table below shows nth would still be O(n), not O(log n). *) structure Raum : RAUM = struct datatype 'x raum (* represent a sequence as a tree *) = U0 (* [] *) | U1 of 'x (* [x1] *) | U2 of 'x * 'x (* [x1,x2] *) | U3 of 'x * 'x * 'x (* [x1,x2,x3] *) | AP of 'x raum * 'x raum (* a @ b *) | RV of 'x raum * 'x raum (* rev(a @ b) *) (* n fn md sdd ml sdl ms sds 0 1 0.00 0.000 0.00 0.000 0.00 0.000 1 1 1.00 0.000 1.00 0.000 2.00 0.000 2 1 1.00 0.000 1.00 0.000 1.50 0.000 3 1 1.00 0.000 1.00 0.000 1.33 0.000 4 3 2.00 0.000 2.00 0.000 2.25 0.000 5 8 2.75 0.463 2.38 0.518 2.60 0.370 6 23 3.48 0.593 2.61 0.722 2.82 0.395 7 68 4.18 0.711 2.75 0.887 2.96 0.406 8 207 4.82 0.873 2.85 1.006 3.05 0.402 9 644 5.43 1.009 2.93 1.096 3.13 0.392 10 2040 6.01 1.146 2.99 1.167 3.18 0.380 11 6558 6.57 1.277 3.03 1.224 3.22 0.368 12 21343 7.10 1.404 3.08 1.272 3.26 0.356 13 70186 7.61 1.526 3.11 1.312 3.29 0.345 14 232864 8.11 1.642 3.14 1.346 3.32 0.334 15 778550 8.58 1.755 3.17 1.377 3.34 0.325 16 2620459 9.04 1.864 3.19 1.403 3.36 0.316 17 8872074 9.49 1.970 3.21 1.427 3.37 0.307 18 30195288 9.93 2.073 3.23 1.448 3.39 0.299 19 103246502 10.35 2.172 3.25 1.467 3.40 0.292 n = number of elements fn = number of raums with n elements md = mean depth of those fn raums sdd = standard deviation of depth of those raums ml = mean depth to leftmost node (cost of hd/tl) sdl = standard deviation of depth to leftmost node ms = mean space per element under a WAM-like model sds = standard deviation of space per element We see that md ~ 0.56 n (1 ± 0.20), so raums are pretty long and stringy, NOT nice and logarithmically bushy. Don't let your intuitions about random binary search trees mislead you! These trees are intended to make @, rev, and bulk operations cheap, NOT nth. Although it looks as though hd & tl should be fast. These figures are actually somewhat pessimistic; they list the number of shapes and the properties of shapes assuming a dumb constructor (AP). In fact we use a smart constructor (cat) that tries to keep raums a bit shallower and tighter packed. *) (*---------------- Analogues of top level functions ----------------*) (* empty => an empty raum -- O(1) *) val empty = U0 (* singleton x => a raum containing just x -- O(1) *) val singleton = U1 (* cons x xs ~> singleton x @ xs add x at the front of xs -- O(1) *) fun cons x (U0) = U1 x | cons x (U1 y) = U2 (x,y) | cons x (U2 (y,z)) = U3 (x,y,z) | cons x (xs as U3 _) = AP (U1 x, xs) | cons x (AP (U1 y,b)) = AP (U2 (x,y), b) | cons x (AP (U2 (y,z),b)) = AP (U3 (x,y,z), b) | cons x (xs as AP _) = AP (U1 x, xs) | cons x (RV (a,U1 y)) = RV (a, U2 (y,x)) | cons x (RV (a,U2 (y,z))) = RV (a, U3 (y,z,x)) | cons x (xs as RV _) = AP (U1 x, xs) (* snoc x xs ~> xs @ singleton x add x at the rear of xs -- O(1) *) fun snoc x (U0) = U1 x | snoc x (U1 y) = U2 (y,x) | snoc x (U2 (y,z)) = U3 (y,z,x) | snoc x (xs as U3 _) = AP (xs, U1 x) | snoc x (AP (a, U1 y)) = AP (a, U2 (y,x)) | snoc x (AP (a, U2 (y,z))) = AP (a, U3 (y,z,x)) | snoc x (xs as AP _) = AP (xs, U1 x) | snoc x (RV (U1 y,b)) = RV (U2 (x,y), b) | snoc x (RV (U2 (y,z),b)) = RV (U3 (x,y,z), b) | snoc x (xs as RV _) = AP (xs, U1 x) (* fromList [x1,...,xn] => a raum containing x1 .... xn -- O(n) *) fun fromList [] = U0 | fromList [x] = U1 x | fromList [x,y] = U2 (x,y) | fromList [x,y,z] = U3 (x,y,z) | fromList (x::y::z::r) = AP (U3 (x,y,z), fromList r) (* toList r => a list with the same elements as r (toList o fromList) is the identity on lists, but (fromList o toList) is not in general an identity on raums as the shapes will differ. -- O(n) *) fun toList a = let fun aux (U0) r = r | aux (U1 x) r = x::r | aux (U2 (x,y)) r = x::y::r | aux (U3 (x,y,z)) r = x::y::z::r | aux (AP (a,b)) r = aux a (aux b r) | aux (RV (a,b)) r = r_aux b (r_aux a r) and r_aux (U0) r = r | r_aux (U1 x) r = x::r | r_aux (U2 (x,y)) r = y::x::r | r_aux (U3 (x,y,z)) r = z::y::x::r | r_aux (AP (a,b)) r = r_aux b (r_aux a r) | r_aux (RV (a,b)) r = aux a (aux b r) in aux a [] end (* app f (fromList [x1,...,xn]) = app f [x1,...,xn] = f x1; ...; f xn !! Take this equation literally; the calls to f are done left to right. -- O(n) + n calls to f *) fun app _ U0 = () | app f (U1 x) = f x | app f (U2 (x,y)) = (f x; f y) | app f (U3 (x,y,z)) = (f x; f y; f z) | app f (AP (a,b)) = (app f a; app f b) | app f (RV (a,b)) = (r_app f b; r_app f a) and r_app _ U0 = () | r_app f (U1 x) = f x | r_app f (U2 (x,y)) = (f y; f x) | r_app f (U3 (x,y,z)) = (f z; f y; f x) | r_app f (AP (a,b)) = (r_app f b; r_app f a) | r_app f (RV (a,b)) = (app f a; app f b) (* cat a b => a raum with all the elements of a followed by all the elements of b -- O(1) *) fun cat U0 b = b | cat a U0 = a | cat (U1 x) (U1 y) = U2 (x,y) | cat (U1 x) (U2 (y,z)) = U3 (x,y,z) | cat (U2 (x,y)) (U1 z) = U3 (x,y,z) | cat a b = AP (a,b) (* consider using cat (U1 x) (U0) = U1 x cat (U1 x) (U1 y) = U2 (x,y) cat (U1 x) (U2 (y,z)) = U3 (x,y,z) cat (U1 x) (AP (U1 y,b)) = AP (U2 (x,y),b) cat (U1 x) (AP (U2 (y,z),b)) = AP (U3 (x,y,z),b) cat (U1 x) (RV (a,U1 y)) = RV (a, U2 (y,x)) cat (U1 x) (RV (a,U2 (y,z))) = RV (a, U3 (x,z,y)) cat (U2 (x,y)) (U0) = U2 (x,y) cat (U2 (x,y)) (U1 z) = U3 (x,y,z) cat (U2 (x,y)) (AP (U1 z,b)) = AP (U3 (x,y,z),b) cat (U2 (x,y)) (RV (a,U1 z)) = RV (a, U3 (z,y,x)) cat a (U0) = a cat a b = AP (a,b) *) fun op@(a,b) = cat a b (* foldl f e (fromList [x1,...,xn]) = f(xn, ... f(x1, e) ...) The value depends on the order of the function calls; f(xi, ...) is called before f(xj, ...) if and only if i < j. -- O(n) + n calls to f *) fun foldl f e (U0) = e | foldl f e (U1 x) = f(x, e) | foldl f e (U2 (x,y)) = f(y, f(x, e)) | foldl f e (U3 (x,y,z)) = f(z, f(y, f(x, e))) | foldl f e (AP (a,b)) = foldl f (foldl f e a) b | foldl f e (RV (a,b)) = r_foldl f (r_foldl f e b) a and r_foldl f e (U0) = e | r_foldl f e (U1 x) = f(x, e) | r_foldl f e (U2 (x,y)) = f(x, f(y, e)) | r_foldl f e (U3 (x,y,z)) = f(x, f(y, f(z, e))) | r_foldl f e (AP (a,b)) = r_foldl f (r_foldl f e b) a | r_foldl f e (RV (a,b)) = foldl f (foldl f e a) b (* foldr f e (fromList [x1,...,xn]) = f(x1 ... f(xn, e) ...) The value depends on the order of the function calls; f(xi, ...) is called before f(xj, ...) if and only if i > j. -- O(n) + n calls to f *) fun foldr f e (U0) = e | foldr f e (U1 x) = f(x, e) | foldr f e (U2 (x,y)) = f(x, f(y, e)) | foldr f e (U3 (x,y,z)) = f(x, f(y, f(z, e))) | foldr f e (AP (a,b)) = foldr f (foldr f e b) a | foldr f e (RV (a,b)) = r_foldr f (r_foldr f e a) b and r_foldr f e (U0) = e | r_foldr f e (U1 x) = f(x, e) | r_foldr f e (U2 (x,y)) = f(y, f(x, e)) | r_foldr f e (U3 (x,y,z)) = f(z, f(y, f(x, e))) | r_foldr f e (AP (a,b)) = r_foldr f (r_foldr f e a) b | r_foldr f e (RV (a,b)) = foldr f (foldr f e b) a (* hd (fromList [x1,...,xn]) = x1 hd empty = exception -- O(n) in the worst case last (fromList [x1,...,xn]) = xn last empty = exception -- O(n) in the worst case *) fun hd (U0) = raise List.Empty | hd (U1 x) = x | hd (U2 (x,_)) = x | hd (U3 (x,_,_)) = x | hd (AP (a,_)) = hd a | hd (RV (_,b)) = last b and last (U0) = raise List.Empty | last (U1 x) = x | last (U2 (_,x)) = x | last (U3 (_,_,x)) = x | last (AP (_,b)) = last b | last (RV (a,_)) = hd a (* length (fromList [x1,...,xn]) = n -- O(n) *) fun length (U0) = 0 | length (U1 _) = 1 | length (U2 _) = 2 | length (U3 _) = 3 | length (AP (a,b)) = length a + length b | length (RV (a,b)) = length a + length b (* map f (fromList xs) = fromList (map xs) !! The order in which the calls to f are made is NOT SPECIFIED. -- O(n) + n calls to f *) fun map _ (U0) = U0 | map f (U1 x) = U1 (f x) | map f (U2 (x,y)) = U2 (f x, f y) | map f (U3 (x,y,z)) = U3 (f x, f y, f z) | map f (AP (a,b)) = AP (map f a, map f b) | map f (RV (a,b)) = RV (map f a, map f b) (* null (fromList xs) = null xs -- O(1) *) fun null U0 = true | null _ = false (* rev r => a raum with the elements of r in reverse order -- O(1) *) fun rev (U0) = U0 | rev (U1 x) = U1 x | rev (U2 (x,y)) = U2 (y,x) | rev (U3 (x,y,z)) = U3 (z,y,x) | rev (AP (a,b)) = RV (a,b) | rev (RV (a,b)) = AP (a,b) (* revCat a b = rev (cat a b) This reverses BOTH parts; revAppend reverses just a. -- O(1) *) fun revCat U0 b = b | revCat a U0 = a | revCat (U1 x) (U1 y) = U2 (y,x) | revCat (U1 x) (U2 (y,z)) = U3 (z,y,x) | revCat (U2 (x,y)) (U1 z) = U3 (z,y,x) | revCat a b = RV (a,b) (* tl r => a raum with all the elements of r except the first tl empty => an exception -- O(n) in the worst case init r => a raum with all the elements of r except the first init empty => an exception -- O(n) in the worst case *) fun tl (U0) = raise List.Empty | tl (U1 _) = U0 | tl (U2 (_,y)) = U1 y | tl (U3 (_,y,z)) = U2 (y,z) | tl (AP (AP (a,b), c)) = cat (tl a) (AP (b,c)) | tl (AP (a,b)) = cat (tl a) b | tl (RV (a,b)) = revCat a (init b) and init (U0) = raise List.Empty | init (U1 _) = U0 | init (U2 (x,_)) = U1 x | init (U3 (x,y,_)) = U2 (x,y) | init (AP (a, AP (b,c))) = cat (AP (a,b)) (init c) | init (AP (a,b)) = cat a (init b) | init (RV (a,b)) = revCat (tl a) b (* ---------------- analogues of List functions ---------------- *) (* all p (fromList [x1,...,xn]) = all p [x1,...,xn] !! The value depends on the order in which the calls to p are made !! so the order is defined; take the equation above literally. -- O(n) + n calls to p *) fun all _ U0 = true | all p (U1 x) = p x | all p (U2 (x,y)) = p x andalso p y | all p (U3 (x,y,z)) = p x andalso p y andalso p z | all p (AP (a,b)) = all p a andalso all p b | all p (RV (a,b)) = r_all p b andalso r_all p a and r_all _ U0 = true | r_all p (U1 x) = p x | r_all p (U2 (x,y)) = p y andalso p x | r_all p (U3 (x,y,z)) = p z andalso p y andalso p x | r_all p (AP (a,b)) = r_all p b andalso r_all p a | r_all p (RV (a,b)) = all p a andalso all p b (* collate c (r1,r2) => three-way comparison !! Done the hard way right now because two-raum is too hard. -- O(|r1| + |r2|) *) fun collate c (r1,r2) = List.collate c (toList r1, toList r2) (* concat [r1,...,rn] = r1@...@rn -- O(|result|) *) fun concat [] = U0 | concat (b :: bs) = cat b (concat bs) datatype 'x partway (* result of searching part of a collection *) = FOUND of 'x (* found what we wanted, here it is *) | MORE of int (* checked this many items with no luck *) fun f_drop 0 a = FOUND a | f_drop n (U0) = MORE n | f_drop n (U1 _) = if n > 1 then MORE (n-1) else FOUND U0 | f_drop n (U2 (_,y)) = if n > 2 then MORE (n-2) else FOUND (if n = 2 then U0 else U1 y) | f_drop n (U3 (_,y,z)) = if n > 3 then MORE (n-3) else FOUND (if n = 3 then U0 else if n = 2 then U1 z else U2 (y,z)) | f_drop n (AP (a,b)) = (case f_drop n a of FOUND a' => FOUND (cat a' b) | MORE n' => f_drop n' b) | f_drop n (RV (a,b)) = (case r_drop n b of FOUND b' => FOUND (revCat a b') | MORE n' => r_drop n' a) and r_drop 0 a = FOUND a | r_drop n (U0) = MORE n | r_drop n (U1 _) = if n > 1 then MORE (n-1) else FOUND U0 | r_drop n (U2 (x,_)) = if n > 2 then MORE (n-2) else FOUND (if n = 2 then U0 else U1 x) | r_drop n (U3 (x,y,_)) = if n > 3 then MORE (n-3) else FOUND (if n = 3 then U0 else if n = 2 then U1 x else U2 (x,y)) | r_drop n (AP (a,b)) = (case r_drop n b of FOUND b' => FOUND (revCat a b') | MORE n' => r_drop n' a) | r_drop n (RV (a,b)) = (case f_drop n a of FOUND a' => FOUND (cat a' b) | MORE n' => f_drop n' b) (* drop (r,n) = r with the first n elements removed exception if n < 0 or n > length r -- O(|r|) in the worst case *) fun drop (r,n) = if n < 0 then raise General.Subscript else case f_drop n r of FOUND r' => r' | MORE _ => raise General.Subscript (* exists p (fromList [x1,...,xn]) = exists p [x1,...,xn] !! The value depends on the order in which the calls to p are made !! so the order is defined; take the equation above literally. -- O(n) + n calls to p *) fun exists _ (U0) = false | exists p (U1 x) = p x | exists p (U2 (x,y)) = p x orelse p y | exists p (U3 (x,y,z)) = p x orelse p y orelse p z | exists p (AP (a,b)) = exists p a orelse exists p b | exists p (RV (a,b)) = r_exists p b orelse r_exists p a and r_exists _ (U0) = false | r_exists p (U1 x) = p x | r_exists p (U2 (x,y)) = p y orelse p x | r_exists p (U3 (x,y,z)) = p z orelse p y orelse p y | r_exists p (AP (a,b)) = r_exists p b orelse r_exists p a | r_exists p (RV (a,b)) = exists p a orelse exists p b (* filter p (fromList [x1,...,xn]) = fromList (filter p [x1,...,xn]) !! Do not take this literally; the result is equivalent but need NOT !! be the same shape, and the calls to p may be in any order. -- O(n) + n calls to p *) fun filter _ U0 = U0 | filter p (U1 x) = if p x then U1 x else U0 | filter p (U2 (x,y)) = if p x then if p y then U2 (x,y) else U1 x else if p y then U1 y else U0 | filter p (U3 (x,y,z)) = if p x then if p y then if p z then U3 (x,y,z) else U2 (x,y) else if p z then U2 (x,z) else U1 x else if p y then if p z then U2 (y,z) else U1 y else if p z then U1 z else U0 | filter p (AP (a,b)) = cat (filter p a) (filter p b) | filter p (RV (a,b)) = revCat (filter p a) (filter p b) (* find p (fromList [x1,...,xn]) = find [x1,...,xn] !! The value depends on the order in which the calls to p are made, !! so the order is defined; take the equation above literally. -- O(n) + n calls to p *) fun find _ (U0) = NONE | find p (U1 x) = if p x then SOME x else NONE | find p (U2 (x,y)) = if p x then SOME x else if p y then SOME y else NONE | find p (U3 (x,y,z)) = if p x then SOME x else if p y then SOME y else if p z then SOME z else NONE | find p (AP (a,b)) = (case find p a of NONE => find p b | res => res) | find p (RV (a,b)) = (case r_find p b of NONE => r_find p a | res => res) and r_find _ (U0) = NONE | r_find p (U1 x) = if p x then SOME x else NONE | r_find p (U2 (x,y)) = if p y then SOME y else if p x then SOME x else NONE | r_find p (U3 (x,y,z)) = if p z then SOME z else if p y then SOME y else if p x then SOME x else NONE | r_find p (AP (a,b)) = (case r_find p b of NONE => r_find p a | res => res) | r_find p (RV (a,b)) = (case find p a of NONE => find p b | res => res) (* getItem empty = NONE getItem r = SOME (hd r, tl r) otherwise -- O(n) worst case *) fun getItem (U0) = NONE | getItem (U1 x) = SOME (x, U0) | getItem (U2 (x,y)) = SOME (x, U1 y) | getItem (U3 (x,y,z)) = SOME (x, U2 (y,z)) | getItem (AP (a,b)) = SOME (hd a, cat (tl a) b) | getItem (RV (a,b)) = SOME (last b, revCat a (init b)) (* mapPartial f (fromList [x1,...,xn]) = fromList (mapPartial f [x1,...,xn]) !! Do not take this literally; the result is equivalent but need NOT !! be the same shape, and the calls to f may be in any order. -- O(n) + n calls to f *) fun mapPartial f (U0) = U0 | mapPartial f (U1 x) = (case f x of NONE => U0 | SOME x' => U1 x') | mapPartial f (U2 (x,y)) = (case (f x, f y) of (NONE, NONE) => U0 | (NONE, SOME y') => U1 y' | (SOME x',NONE) => U1 x' | (SOME x',SOME y') => U2 (x',y')) | mapPartial f (U3 (x,y,z)) = (case (f x, f y, f z) of (NONE, NONE, NONE ) => U0 | (NONE, NONE, SOME z') => U1 z' | (NONE ,SOME y',NONE) => U1 y' | (NONE, SOME y',SOME z') => U2 (y',z') | (SOME x',NONE, NONE ) => U1 x' | (SOME x',NONE, SOME z') => U2 (x',z') | (SOME x',SOME y',NONE) => U2 (x',y') | (SOME x',SOME y',SOME z') => U3 (x',y',z')) | mapPartial f (AP (a,b)) = cat (mapPartial f a) (mapPartial f b) | mapPartial f (RV (a,b)) = revCat (mapPartial f a) (mapPartial f b) fun f_nth n U0 = MORE n | f_nth n (U1 x) = if n > 0 then MORE (n-1) else FOUND x | f_nth n (U2 (x,y)) = if n > 1 then MORE (n-2) else FOUND (if n = 0 then x else y) | f_nth n (U3 (x,y,z)) = if n > 2 then MORE (n-3) else FOUND (if n = 0 then x else if n = 1 then y else z) | f_nth n (AP (a,b)) = (case f_nth n a of MORE n' => f_nth n' b | res => res) | f_nth n (RV (a,b)) = (case r_nth n b of MORE n' => r_nth n' a | res => res) and r_nth n U0 = MORE n | r_nth n (U1 x) = if n > 0 then MORE (n-1) else FOUND x | r_nth n (U2 (x,y)) = if n > 1 then MORE (n-2) else FOUND (if n = 0 then x else y) | r_nth n (U3 (x,y,z)) = if n > 2 then MORE (n-3) else FOUND (if n = 0 then x else if n = 1 then y else z) | r_nth n (AP (a,b)) = (case r_nth n b of MORE n' => r_nth n' a | res => res) | r_nth n (RV (a,b)) = (case f_nth n a of MORE n' => f_nth n' b | res => res) (* nth(fromList [x1,...,xn], k) = nth([x1,...,xn], k) -- O(n) in the worst case *) fun nth(a, k) = if k < 0 then raise General.Subscript else case f_nth k a of FOUND x => x | MORE _ => raise General.Subscript (* partition p (fromList xs) = (fromList ts, fromList fs) where (ts, fs) = partition p xs !! Do not take this literally; the result is equivalent but need NOT !! be the same shape, and the calls to p may be in any order. -- O(n) + n calls to p *) fun partition p (U0) = (U0, U0) | partition p (U1 x) = if p x then (U1 x, U0) else (U0, U1 x) | partition p (U2 (x,y)) = if p x then if p y then (U2 (x,y), U0) else (U1 x, U1 y) else if p y then (U1 y, U1 x) else (U0, U2 (x,y)) | partition p (U3 (x,y,z)) = if p x then if p y then if p z then (U3 (x,y,z), U0) else (U2 (x,y), U1 z) else if p z then (U2 (x,z), U1 y) else (U1 x, U2 (y,z)) else if p y then if p z then (U2 (y,z), U1 x) else (U1 y, U2 (x,z)) else if p z then (U1 z, U2 (x,y)) else (U0, U3 (x,y,z)) | partition p (AP (a,b)) = let val (ta,fa) = partition p a val (tb,fb) = partition p b in (cat ta tb, cat fa fb) end | partition p (RV (a,b)) = let val (ta,fa) = partition p a val (tb,fb) = partition p b in (revCat ta tb, revCat fa fb) end (* revAppend (fromList xs, fromList ys) = fromList (revAppend xs ys), up to shapes as usual. -- O(1) *) fun revAppend (a,b) = cat (rev a) b (* tabulate = fromList o List.tabulate and so defined -- O(n) *) fun tabulate(n, f) = if n < 0 then raise General.Size else let fun tab 0 d = U0 | tab 1 d = U1 (f d) | tab 2 d = U2 (f d, f (d+1)) | tab 3 d = U3 (f d, f (d+1), f (d+2)) | tab n d = let val m = n div 2 in AP (tab m d, tab (n-m) (d+m)) end in tab n 0 end datatype ('x,'y) complete = COMPLETE of 'x | INCOMPLETE of 'y (* f_take : int -> 'x raum -> ('x raum, 'x raum * int) complete *) fun f_take 0 _ = COMPLETE(U0) | f_take n U0 = INCOMPLETE(U0, n) | f_take n (U1 x) = if n > 1 then INCOMPLETE(U1 x, n-1) else COMPLETE(U1 x) | f_take n (U2 (x,y)) = if n > 2 then INCOMPLETE(U2 (x,y), n-2) else COMPLETE(if n = 2 then U2 (x,y) else U1 x) | f_take n (U3 (x,y,z)) = if n > 3 then INCOMPLETE(U3 (x,y,z), n-3) else COMPLETE(if n = 3 then U3 (x,y,z) else if n = 2 then U2 (x,y) else U1 x) | f_take n (AP (a,b)) = (case f_take n a of COMPLETE a' => COMPLETE a' | INCOMPLETE (a',n') => (case f_take n' b of COMPLETE b' => COMPLETE(cat a' b') | INCOMPLETE(b',n'') => INCOMPLETE(cat a' b', n''))) | f_take n (RV (a,b)) = (case r_take n b of COMPLETE b' => COMPLETE(rev b') | INCOMPLETE (b',n') => (case r_take n' a of COMPLETE a' => COMPLETE(cat b' a') | INCOMPLETE(a',n'') => INCOMPLETE(cat b' a', n''))) and r_take 0 _ = COMPLETE(U0) | r_take n U0 = INCOMPLETE(U0, n) | r_take n (U1 x) = if n > 1 then INCOMPLETE(U1 x, n-1) else COMPLETE(U1 x) | r_take n (U2 (x,y)) = if n > 2 then INCOMPLETE(U2 (x,y), n-2) else COMPLETE(if n = 2 then U2 (x,y) else U1 x) | r_take n (U3 (x,y,z)) = if n > 3 then INCOMPLETE(U3 (x,y,z), n-3) else COMPLETE(if n = 3 then U3 (x,y,z) else if n = 2 then U2 (x,y) else U1 x) | r_take n (AP (a,b)) = (case r_take n b of COMPLETE b' => COMPLETE(rev b') | INCOMPLETE (b',n') => (case r_take n' a of COMPLETE a' => COMPLETE(cat b' a') | INCOMPLETE(a',n'') => INCOMPLETE(cat b' a', n''))) | r_take n (RV (a,b)) = (case f_take n a of COMPLETE a' => COMPLETE a' | INCOMPLETE (a',n') => (case f_take n' b of COMPLETE b' => COMPLETE(cat a' b') | INCOMPLETE(b',n'') => INCOMPLETE(cat a' b', n''))); (* take(fromList [x1,...,xn], k) = fromList (take([x1,...,xn], k)) as usual, up to shape; exception of k < 0 or k >= n. -- O(n) *) fun take(r, k) = if k < 0 then raise General.Subscript else case f_take k r of COMPLETE r' => r' | INCOMPLETE _ => raise General.Subscript (* Functions copied from Haskell *) (* concatMap f [r1,...,rn] = f r1@ ... @ f rn NB: input is a _list_, not a _raum_ -- O(n) + n calls to f *) fun concatMap _ [] = U0 | concatMap f (b :: bs) = cat (f b) (concatMap f bs) fun f_drop_w p U0 = NONE | f_drop_w p (U1 x) = if not (p x) then SOME (U1 x) else NONE | f_drop_w p (U2 (x,y)) = if not (p x) then SOME (U2 (x,y)) else if not (p y) then SOME (U1 y) else NONE | f_drop_w p (U3 (x,y,z)) = if not (p x) then SOME (U3 (x,y,z)) else if not (p y) then SOME (U2 (y,z)) else if not (p z) then SOME (U1 z) else NONE | f_drop_w p (AP (a,b)) = (case f_drop_w p a of NONE => f_drop_w p b | SOME a' => SOME (cat a' b)) | f_drop_w p (RV (a,b)) = (case r_drop_w p b of NONE => r_drop_w p a | SOME b' => SOME (cat b' (rev a))) and r_drop_w p U0 = NONE | r_drop_w p (U1 x) = if not (p x) then SOME (U1 x) else NONE | r_drop_w p (U2 (x,y)) = if not (p y) then SOME (U2 (y,x)) else if not (p y) then SOME (U1 y) else NONE | r_drop_w p (U3 (x,y,z)) = if not (p z) then SOME (U3 (z,y,x)) else if not (p y) then SOME (U2 (z,y)) else if not (p x) then SOME (U1 x) else NONE | r_drop_w p (AP (a,b)) = (case r_drop_w p b of NONE => r_drop_w p a | SOME b' => SOME(cat b' (rev a))) | r_drop_w p (RV (a,b)) = (case f_drop_w p a of NONE => f_drop_w p b | SOME a' => SOME(cat a' (rev b))) (* dropWhile p (fromList [x1,...,xn]) = fromList (dropWhile p [x1,...,xn]) as usual, up to shape, where fun dropWhile p (x::xs) = if p x then (x::xs) else x :: dropWhile p xs | dropWhile _ [] = [] -- O(n) worst case, O(|result|) expected. *) fun dropWhile p r = (case f_drop_w p r of NONE => U0 | SOME f => f) (* elem(x, (fromList [x1,...,xn])) => true iff one of x1,...,xn = x There isn't any List.elem that I can see. -- O(n) *) fun elem (x, r) = exists (fn y => x = y) r (* notElem (x, ys) is the opposite of elem (x, ys). -- O(n) *) fun notElem (x, r) = not (elem (x, r)) (* f_findx : ('x -> bool) -> int -> 'x raum -> (int,int) complete *) fun f_findx p n (U0) = INCOMPLETE n | f_findx p n (U1 x) = if p x then COMPLETE n else INCOMPLETE (n+1) | f_findx p n (U2 (x,y)) = if p x then COMPLETE n else if p y then COMPLETE (n+1) else INCOMPLETE (n+2) | f_findx p n (U3 (x,y,z)) = if p x then COMPLETE n else if p y then COMPLETE (n+1) else if p z then COMPLETE (n+2) else INCOMPLETE (n+3) | f_findx p n (AP (a,b)) = (case f_findx p n a of COMPLETE n' => COMPLETE n' | INCOMPLETE n' => f_findx p n' b) | f_findx p n (RV (a,b)) = (case r_findx p n b of COMPLETE n' => COMPLETE n' | INCOMPLETE n' => r_findx p n' a) and r_findx p n (U0) = INCOMPLETE n | r_findx p n (U1 x) = if p x then COMPLETE n else INCOMPLETE (n+1) | r_findx p n (U2 (x,y)) = if p y then COMPLETE n else if p x then COMPLETE (n+1) else INCOMPLETE (n+2) | r_findx p n (U3 (x,y,z)) = if p z then COMPLETE n else if p y then COMPLETE (n+1) else if p x then COMPLETE (n+2) else INCOMPLETE (n+3) | r_findx p n (AP (a,b)) = (case r_findx p n b of COMPLETE n' => COMPLETE n' | INCOMPLETE n' => r_findx p n' a) | r_findx p n (RV (a,b)) = (case f_findx p n a of COMPLETE n' => COMPLETE n' | INCOMPLETE n' => f_findx p n' b) (* elemIndex x xs => SOME i if elem x xs and nth i xs = x or NONE if x does not occur in xs -- O(i) if found, O(n) if not *) fun elemIndex(x,xs) = case f_findx (fn y => x = y) 0 xs of COMPLETE n => SOME n | INCOMPLETE _ => NONE (* findIndex p xs => SOME i if find p xs succeeds or NONE if not(exists p xs) -- O(i) + i calls to p if found, O(p)+p if not *) fun findIndex p xs = case f_findx p 0 xs of COMPLETE n => SOME n | INCOMPLETE _ => NONE (* mapAccumL : (('a*'x) -> ('a*'y)) -> 'a -> 'x raum -> ('a * 'y raum) a cross between map and foldl. -- O(n) + n calls to f *) fun mapAccumL f a xs = let fun step (x, (a, r)) = let val (a', y) = f(a, x) in (a', snoc y r) end in foldl step (a, U0) xs end (* List version: fun mapAccumL _ s [] = (s, []) | mapAccumL f s (x:xs) = let val (s', y) = f s x val (s'', ys) = mapAccumL f s' xs in (s'', y::ys) end *) (* mapAccumR : (('a*'x) -> ('a*'y)) -> 'a -> 'x raum -> ('a * 'y raum) a cross between map and foldr. -- O(n) + n calls to f *) fun mapAccumR f a xs = let fun step (x, (a, r)) = let val (a', y) = f(a, x) in (a', cons y r) end in foldr step (a, U0) xs end (* List version: fun mapAccumR _ s [] = (s, []) | mapAccumR f s (x:xs) = let val (s',ys) = mapAccumR f s xs val (s'',y) = f s' x in (s'', y::ys) end *) (* fold1 f (fromList [x1,...,xn]) = all the x's combined using f _in no particular order_. The function f must be commutative and associative, like maximum, minimum, addition, multiplication, set union, set intersection, and so on. For this function it is an error for the raum argument to be empty, becuase the caller does not provide a identity. Use this for maximum, minimum, intersection, &c where there is no such identity. -- O(n) + n-1 calls to f *) fun fold1 _ (U0) = raise List.Empty | fold1 _ (U1 x) = x | fold1 f (U2 (x,y)) = f(x,y) | fold1 f (U3 (x,y,z)) = f(x, f(y, z)) | fold1 f (AP (a,b)) = f(fold1 f a, fold1 f b) | fold1 f (RV (a,b)) = f(fold1 f a, fold1 f b) (* fold f e (fromList [x1,...,xn]) is like fold1 but requires that e be the identity of f, f(x,e) = f(e,x) = x. Use this for sum, product, set union, anything where there is an identity you can provide. -- O(n) + n-1 calls to f *) fun fold _ e (U0) = e | fold f _ r = fold1 f r (* groupBy p (fromList [x1,...,xn]) ~> List.map fromList (List.groupBy [x1,....,xn]) except that SML's List module is missing groupBy. The answer is a list of raums, each of them a sequence as long as possible subject to p(x,x), ..., p(x,x). I would prefer to do this without using toList, and it can be done using foldr quite easily, but that turns over more memory than this version. al -- O(n) + n-1 calls to p. *) fun groupBy p xs = case toList xs of [] => [] | (x::xs) => let fun loop (y::ys) x r = if p(x, y) then loop ys y (cons y r) else rev r :: loop ys y (U1 y) | loop [] _ r = [rev r] in loop xs x (U1 x) end fun group xs = groupBy (op =) xs (* inits (fromList xs) = List.map fromList (List.inits xs) Given a raum, it returns a list of all its prefixes. The question here is whether to return a raum or a list. A list seemed to make more sense (inits, tails should agree). -- O(n) *) fun inits xs = xs :: (if null xs then [] else inits (init xs)) (* intercalate x [y1,...,yn] = y1 @ x @ ... @ x @ yn intercalate _ [] = [] This function is inside the module only because it is so in Haskell; it may serve as an example of using raums. -- O(|result|) *) fun intercalate _ [] = empty | intercalate _ [y] = y | intercalate g (y::ys) = y @ (g @ intercalate g ys) (* intersperse x (fromList [y1,...,yn]) ~> fromList [y1,x,...,x,yn] intersperse _ empty = empty This function is inside the module only because it is so in Haskell; it may serve as an example of using raums. -- O(|result|) *) fun intersperse g ys = case foldr (fn (y, NONE) => SOME (singleton y) | (y, SOME r) => SOME (cons y (cons g r))) NONE ys of NONE => empty | SOME r => r (* lookup x pairs returns SOME v where the first matching pair in pairs is (x,v), or NONE if there's no such pair. -- O(n) + n calls to = *) fun lookup x pairs = case find (fn (k,_) => k = x) pairs of SOME (_,v) => SOME v | NONE => NONE fun maximumBy c r = fold1 (fn (x,y) => case c(x, y) of LESS => x | _ => y) r fun minimumBy c r = fold1 (fn (x,y) => case c(x, y) of LESS => y | _ => x) r (* product (fromList xs) = List.product xs except that List.product does not exist in SML. In Haskell this would be :: Num t => Raum t -> r *) fun product r = fold ( Int.* ) 1 r (* replicate (n, x) = tabulate (n, fn _ => x) However, this is done in log time and log space. -- O(lg n) *) fun replicate (n, x) = let fun rep 1 = U1 x | rep 2 = U2 (x,x) | rep 3 = U3 (x,x,x) | rep m = let val a = rep (n div 2) val b = AP (a, a) in if m mod 2 = 0 then b else AP (U1 x,b) end in if n > 0 then rep n else if n = 0 then U0 else raise General.Size end (* splitAt (xs, n) = (take(xs,n), drop(xs,n)) This could be done more efficiently, but right now correctness is the aim. -- O(n) *) fun splitAt (xs, n) = (take (xs, n), drop(xs, n)) (* stripPrefix a ab = SOME b if ab = a@b, NONE otherwise. This is tricky. It does NOT stop at the first mismatch. -- O(|a|+|ab|) I think, but haven't proved. *) fun stripPrefix a ab = let fun step (y, SOME xs) = if hd xs = y then SOME (tl xs) else NONE | step (_, NONE) = NONE in foldl step (SOME ab) a end (* isPrefixOf a ab is true iff there's a b such that ab ~ a@b *) fun isPrefixOf(a, ab) = (case stripPrefix a ab of NONE => false | SOME _ => true) (* stripSuffix a ab = SOME b if ab = a@b, NONE otherwise. This is tricky. It does NOT stop at the first mismatch. -- O(|a|+|ab|) I think, but haven't proved. *) fun stripSuffix a ab = let fun step (y, SOME xs) = if last xs = y then SOME (init xs) else NONE | step (_, NONE) = NONE in foldl step (SOME ab) a end (* isSuffixOf a ab is true iff there's a b such that ab ~ a@b *) fun isSuffixOf(a, ab) = (case stripSuffix a ab of NONE => false | SOME _ => true) (* isInfixOf a bac is true iff there are b, c such that bac~b@a@c *) fun isInfixOf(a, bac) = isPrefixOf(a, bac) orelse ( not (null bac) andalso isInfixOf(a, tl bac)) (* sum (fromList xs) = List.sum xs except that List.sum does not exist in SML. In Haskell this would be :: Num t => Raum t -> r *) fun sum r = fold ( Int.+ ) 0 r (* tails (fromList xs) = List.map fromList (List.tails xs) Given a raum, it returns a list of all its suffixes.. The question here is whether to return a raum or a list. A list seemed to make more sense (inits, tails should agree). -- O(n) *) fun tails xs = xs :: (if null xs then [] else tails (tl xs)) fun f_take_w p U0 = INCOMPLETE U0 | f_take_w p (U1 x) = if not (p x) then COMPLETE U0 else INCOMPLETE (U1 x) | f_take_w p (U2 (x,y)) = if not (p x) then COMPLETE U0 else if not (p y) then COMPLETE (U1 x) else INCOMPLETE (U2 (x,y)) | f_take_w p (U3 (x,y,z)) = if not (p x) then COMPLETE U0 else if not (p y) then COMPLETE (U1 x) else if not (p z) then COMPLETE (U2 (x,y)) else INCOMPLETE (U3 (x,y,z)) | f_take_w p (AP (a,b)) = (case f_take_w p a of COMPLETE a' => COMPLETE a' | INCOMPLETE a' => (case f_take_w p b of COMPLETE b' => COMPLETE(cat a' b') | INCOMPLETE b' => INCOMPLETE (cat a' b'))) | f_take_w p (RV (a,b)) = (case r_take_w p b of COMPLETE b' => COMPLETE(rev b') | INCOMPLETE b' => (case r_take_w p a of COMPLETE a' => COMPLETE(cat b' a') | INCOMPLETE a' => INCOMPLETE(cat b' a'))) and r_take_w p U0 = INCOMPLETE U0 | r_take_w p (U1 x) = if not (p x) then COMPLETE U0 else INCOMPLETE (U1 x) | r_take_w p (U2 (x,y)) = if not (p y) then COMPLETE U0 else if not (p x) then COMPLETE (U1 y) else INCOMPLETE (U2 (y,x)) | r_take_w p (U3 (x,y,z)) = if not (p z) then COMPLETE U0 else if not (p y) then COMPLETE (U1 z) else if not (p x) then COMPLETE (U2 (z,y)) else INCOMPLETE (U3 (z,y,x)) | r_take_w p (AP (a,b)) = (case r_take_w p b of COMPLETE b' => COMPLETE(rev b') | INCOMPLETE b' => (case r_take_w p a of COMPLETE a' => COMPLETE(cat b' a') | INCOMPLETE a' => INCOMPLETE (cat b' a'))) | r_take_w p (RV (a,b)) = (case f_take_w p a of COMPLETE a' => COMPLETE a' | INCOMPLETE a' => (case f_take_w p b of COMPLETE b' => COMPLETE(cat a' b') | INCOMPLETE b' => INCOMPLETE(cat a' b'))); (* takeWhile p (fromList [x1,...,xn]) = fromList (takeWhile p [x1,...,xn]) as usual, up to shape, where fun takeWhile p (x::xs) = if p x then x :: takeWhile p xs else takeWhile p xs | takeWhile _ [] = [] -- O(n) worst case, O(|result|) expected. *) fun takeWhile p r = case f_take_w p r of COMPLETE r' => r' | INCOMPLETE r' => r' (* span p xs = (takeWhile p xs, dropWhile p xs) !! This code calls p twice as often as it needs to, !! and MUST BE REWRITTEN. -- O(|result|) + |result| calls to p. *) fun span p xs = (takeWhile p xs, dropWhile p xs) (* break -- see span *) fun break p xs = span (fn x => not (p x)) xs (* unfoldr gen init ~> fromList (List.unfoldr gen init) where the sadly non-existent list version of unfoldr is fun unfoldr gen state = case gen state of NONE => [] | SOME (x, state') => x :: unfold gen state' *) fun unfoldr f y0 = (case f y0 of NONE => U0 | SOME (x1,y1) => (case f y1 of NONE => U1 x1 | SOME (x2,y2) => (case f y2 of NONE => U2 (x1,x2) | SOME (x3,y3) => cat (U3 (x1,x2,x3)) (unfoldr f y3)))) (* Omitted functions from Haskell's Data.List, with reasons. transpose : 'x list list -> 'x list list It's not clear what to convert this to. Should list list be taken as list raum, raum list, or raum raum? subsequences : 'x list -> 'x list list permutations : 'x list -> 'x list list These would be 'x raum -> 'x raum list, but there are too many answers. They work OK in a lazy language, where they really return a stream of subsequences, but not in a strict one. foldl1' The distinction between foldl1 and foldl1' only exists in a lazy language. and, or In a lazy language these are viable alternatives to (all, exists) and don't evaluate more of the sequence than they need to. In a strict one, not so. If really needed, and = all (fn x => x) or = exists (fn x => x) maximum, minimum SML lacks typeclasses, and there's no way to plug these into the established overloadings of < . scanl, scanl1, scanr, scanr1 Useful, and I'd like to do them. Just haven't done so yet. iterate, repeat, cycle A lazy language can work with "infinite" lists. SML is strict. Using the smart constructors means that raums are spine strict even in lazy languages. elemIndices, findIndices Should do these. zip, zip3, zip4, zip5, zip6, zip7 zipWith, zipWith3, zipWith4, zipWith5, zipWith6, zipWith7 It is tricky and not as efficient as one might hope to iterate over two raums in parallel, for the simple reason that two raums the same length may have radically different shapes. In the code above, collate gives up and converts to lists stripPrefix uses a method I _think_ may be asymptotically efficient, but I'm not sure. Let's suppose there were an *efficient* zipWith. zip xs ys = zipWith (fn pair => pair) zipWith3 f xs ys zs = zipWith (fn (x,(y,z)) => f(x,y,z)) xs (zip ys zs) and so it goes; we could do all of them. lines, words, unlines, unwords Haskell Strings are lists; SML strings are not lists. nub, nubBy The definitions fun nubBy p l = let fun aux (x:xs) rs = if List.find (p x) rs then aux xs rs else x :: aux xs rs | aux [] _ = [] in aux l [] end fun nub l = nubBy (fn x => fn y => x = y) could be adapted. However, these functions are O(n**2). delete, deleteBy Could be done using a removeFirst p xs function. \\, deleteFirstsBy Quadratic. union, unionBy Quadratic. intersect, intersectBy Quadratic sort, insert No typeclasses, no obvious way to plug into < overloading. sortBy, insertBy sortBy c xs = fromList (List.sortBy c (toList xs)) does the job, where in fact List.sortBy lives somewhere else, so I'd need to provide that too. insertBy could be done, but I can't be bothered. *) end