Data.Vector.modify with nested vectors - haskell

I have a vector nested inside another. I want to use modify to update this matrix in place. So I use it for the inner vector, but do I also need to use it for the outer?

My suggestion from the comments still stands, if you do not need to operate on a ragged array then the usual rectangular array implementation is better. Here is a short list of drawbacks of vector of vectors:
performance penalty: the outer vector has to be boxed (which means an extra pointer indirection)
safety: you can't guarantee the same length of all rows
operating on ragged arrays is cumbersome
Nevertheless question still stands: how would you modify a vector of vectors in place. Below I'll provide an example function, which uses mutation to reverse rows of a ragged array and another function that reverses both rows and columns. Difference is that in the former we only mutate elements of each row, while in the latter we also mutate the outer boxed vector that corresponds to rows themselves:
{-# LANGUAGE RankNTypes #-}
import Control.Monad as M
import Control.Monad.ST
import Prelude as P
import Data.Vector as V
import Data.Vector.Generic.Mutable as VGM
import Data.Vector.Mutable as VM
import Data.Vector.Primitive as VP
import Data.Vector.Primitive.Mutable as VPM
raggedModifyRows ::
VP.Prim a
=> (forall s. V.Vector (VPM.MVector s a) -> ST s ())
-> V.Vector (VP.Vector a)
-> V.Vector (VP.Vector a)
raggedModifyRows action arr = runST $ do
-- thaw will create a copy of each row, so they can be safely modified
mvs <- V.mapM VP.thaw arr
action mvs
-- We are freezing mutated copies, so it is safe to use unsafeFreeze here too
V.mapM VP.unsafeFreeze mvs
raggedModify ::
VP.Prim a
=> (forall s. VM.MVector s (VPM.MVector s a) -> ST s ())
-> V.Vector (VP.Vector a)
-> V.Vector (VP.Vector a)
raggedModify action arr = runST $ do
arr' <- V.mapM VP.thaw arr
-- mapM already created a copy of a boxed vector, so we can use unsafeThaw
mv <- V.unsafeThaw arr'
action mv
v <- V.unsafeFreeze mv
V.mapM VP.unsafeFreeze v
generateMatrix ::
Prim a => (Int, Int) -> ((Int, Int) -> a) -> V.Vector (VP.Vector a)
generateMatrix (m, n) f = V.generate m $ \ i -> VP.generate n $ \j -> f (i, j)
generateRagged ::
Prim a => V.Vector Int -> ((Int, Int) -> a) -> V.Vector (VP.Vector a)
generateRagged v f = V.imap (\ i n -> VP.generate n $ \j -> f (i, j)) v
reverseST :: (VGM.MVector v a) => v s a -> ST s ()
reverseST mv =
let n = VGM.length mv
in M.forM_ [0 .. (n `div` 2) - 1] $ \j -> VGM.swap mv j (n - j - 1)
reverseRaggedRows :: Prim a => V.Vector (VP.Vector a) -> V.Vector (VP.Vector a)
reverseRaggedRows = raggedModifyRows $ \rows -> V.forM_ rows reverseST
reverseRagged :: Prim a => V.Vector (VP.Vector a) -> V.Vector (VP.Vector a)
reverseRagged =
raggedModify $ \mrows -> do
let reverse' i = VM.read mrows i >>= reverseST
let m = VM.length mrows
M.forM_ [0 .. (m `div` 2) - 1] $ \i -> do
reverse' i
VM.swap mrows i (m - i - 1)
reverse' i
M.when (odd m) $ reverse' (m `div` 2)
Which can be used as follows:
λ> m = generateMatrix (3, 4) $ \(i, j) -> i+j
λ> m
[[0,1,2,3],[1,2,3,4],[2,3,4,5]]
λ> reverseRaggedRows m
[[3,2,1,0],[4,3,2,1],[5,4,3,2]]
λ> reverseRagged m
[[5,4,3,2],[4,3,2,1],[3,2,1,0]]
λ> m = generateRagged (V.fromList [1,2,3]) $ \(i, j) -> i+j
λ> m
[[0],[1,2],[2,3,4]]
λ> reverseRaggedRows m
[[0],[2,1],[4,3,2]]
λ> reverseRagged m
[[4,3,2],[2,1],[0]]
Alternatively we could have used Data.Vector.modify to operate on the outer vector or map a destructive action that uses modify across all rows. There are all sorts of ways to go about it, depends on what you are trying to achieve, for example:
λ> m = generateRagged (V.fromList [1,2,3]) $ \(i, j) -> i+j
λ> V.map (VP.modify reverseST) m
[[0],[2,1],[4,3,2]]
λ> V.modify reverseST (V.map (VP.modify reverseST) m)
[[4,3,2],[2,1],[0]]
I did recommend using massiv for regular multidimensional arrays. Therefore here is also an example of how to achieve the same with withMArrayST:
{-# LANGUAGE FlexibleContexts #-}
import Control.Monad as M
import Data.Massiv.Array as A
reverseMatrix :: Mutable r Ix2 e => Array r Ix2 e -> Array r Ix2 e
reverseMatrix arr =
withMArrayST arr $ \marr -> do
let Sz2 m n = msize marr
ix2#(m2 :. n2) = m `div` 2 :. n `div` 2
A.forM_ (0 ..: ix2) $ \ix#(i :. j) -> do
A.swapM_ marr ix (m - i - 1 :. n - j - 1)
A.swapM_ marr (i :. n - j - 1) (m - i - 1 :. j)
when (odd m) $ A.forM_ (0 ..: n2) $ \ j ->
A.swapM_ marr (m2 :. j) (m2 :. n - j - 1)
when (odd n) $ A.forM_ (0 ..: m2) $ \ i ->
A.swapM_ marr (i :. n2) (m - i - 1 :. n2)
Which can be used as follows:
λ> a = makeArrayR P Seq (Sz2 3 4) $ \ (i :. j) -> i + j
λ> a
Array P Seq (Sz (3 :. 4))
[ [ 0, 1, 2, 3 ]
, [ 1, 2, 3, 4 ]
, [ 2, 3, 4, 5 ]
]
λ> reverseMatrix a
Array P Seq (Sz (3 :. 4))
[ [ 5, 4, 3, 2 ]
, [ 4, 3, 2, 1 ]
, [ 3, 2, 1, 0 ]
]

Related

Could not deduce (Dim n0)

I want to build a Hilbert matrix using the linear package and convert it to a list of lists. While this seems an easy task the type level constraints come into my way:
import Linear
import Linear.V
import Data.Vector qualified as V
-- | Outer (tensor) product of two vectors
outerWith :: (Functor f, Functor g, Num a) => (a -> a -> a) -> f a -> g a -> f (g a)
{-# INLINABLE outerWith #-}
outerWith f a b = fmap (\x -> fmap (f x) b) a
hilbertV :: forall a n. (Fractional a, Dim n) => Integer -> V n (V n a)
hilbertV n =
let v = V $ V.fromList $ fromIntegral <$> [1..n]
w = V $ V.fromList $ fromIntegral <$> [0..n-1]
in luInv $ outerWith (+) w v
listsFromM :: V n (V n a) -> [[a]]
listsFromM m = vToList (vToList <$> m)
vToList :: V n a -> [a]
vToList = V.toList . toVector
hilbertL :: forall a. (Fractional a) => Integer -> [[a]]
hilbertL n = listsFromM (hilbertV n)
When doing this the following error arises in the last line hilbertL n = listsFromM (hilbertV n):
bench/Solve.hs:28:26: error:
• Could not deduce (Dim n0) arising from a use of ‘hilbertV’
from the context: Fractional a
bound by the type signature for:
hilbertL :: forall a. Fractional a => Integer -> [[a]]
at bench/Solve.hs:27:1-56
The type variable ‘n0’ is ambiguous
These potential instances exist:
three instances involving out-of-scope types
instance GHC.TypeNats.KnownNat n => Dim n -- Defined in ‘Linear.V’
instance Data.Reflection.Reifies s Int =>
Dim (Linear.V.ReifiedDim s)
-- Defined in ‘Linear.V’
instance forall k (n :: k) a. Dim n => Dim (V n a)
-- Defined in ‘Linear.V’
• In the first argument of ‘listsFromM’, namely ‘(hilbertV n)’
In the expression: listsFromM (hilbertV n)
In an equation for ‘hilbertL’: hilbertL n = listsFromM (hilbertV n)
How can i get this to compile?
First, the type of HilbertV is unsafe. You shouldn't pass in an Integer size if size should be determined from the type! I think you want this:
{-# LANGUAGE TypeApplications, UnicodeSyntax #-}
hilbertV :: ∀ a n. (Fractional a, Dim n) => V n (V n a)
hilbertV = luInv $ outerWith (+) w v
where v = V $ V.fromList $ fromIntegral <$> [1..n]
w = V $ V.fromList $ fromIntegral <$> [0..n-1]
n = reflectDim #n []
(The [] just fills the proxy argument with the most concise way to generate a value-less functor input, since it is easier to pass in the type information with -XTypeApplications.)
In fact, I'd avoid even passing around n twice at all. Instead, why not factor out the marginal generation:
hilbertV :: ∀ a n. (Fractional a, Dim n) => V n (V n a)
hilbertV = luInv $ outerWith (+) w v
where v = fromIntegral <$> enumFinFrom 1
w = fromIntegral <$> enumFinFrom 0
enumFinFrom :: ∀ n a . (Enum a, Dim n) => a -> V n a
enumFinFrom ini = V . V.fromList $ take (reflectDim #n []) [ini..]
Now, for hilbertL the problem is that you have a dependent type size. The trick to deal with that are Rank2-quantified functions; linear offers reifyDim/reifyVector etc. for the purpose.
hilbertL :: ∀ a . Fractional a => Int -> [[a]]
hilbertL n = reifyDim n hilbertL'
where hilbertL' :: ∀ n p . Dim n => p n -> [[a]]
hilbertL' _ = listsFromM $ hilbertV #n
Alternatively, you could also change hilbertV to take a proxy argument for the size and then just hand that in. I've always found this a bit ugly, but it's actually more compact in this case:
hilbertV :: ∀ a n p . (Fractional a, Dim n) => p n -> V n (V n a)
hilbertV np = luInv $ outerWith (+) w v
where v = V $ V.fromList $ fromIntegral <$> [1..n]
w = V $ V.fromList $ fromIntegral <$> [0..n-1]
n = reflectDim np
hilbertL :: ∀ a . Fractional a => Int -> [[a]]
hilbertL n = reifyDim n (\np -> listsFromM $ hilbertV np)

Can't match tuple inside of foldl

I have the following code, which should™ convert an excel column type its corresponding number. For example AA into 27 and AB into 28:
import Data.Char (ord)
import Data.List (foldl1')
columnToNumber :: String -> Int
columnToNumber s = foldl1' (\acc (i, v) -> acc + 26 ^ i * v) (values s)
where values s = zip (reverse [0..(length s)]) ((\c -> ord c - 64) <$> s)
The idea is to take the string "AA" convert it to the corresponding numbers
["A", "A"] -> [1, 1]
and zip it with the base so from right to left 26^0, 26^1, 26^2, and so on.
zip [1, 0] [1, 1] -> [(1, 1), (0, 1)]
That way the result of the fold would be
26^1 * 1 + 26^0 * 1 = 27
Unfortunately, I'm getting the following errors and I'm not sure why:
ExcelSheetColumn.hs:7:34:
Couldn't match expected type ‘Int’
with actual type ‘(Integer, Int)’
In the pattern: (i, v)
In the first argument of ‘foldl1'’, namely
‘(\ acc (i, v) -> acc + 26 ^ i * v)’
In the expression:
foldl1' (\ acc (i, v) -> acc + 26 ^ i * v) (values s)
ExcelSheetColumn.hs:7:63:
Couldn't match type ‘(Int, Int)’ with ‘Int’
Expected type: [Int]
Actual type: [(Int, Int)]
In the second argument of ‘foldl1'’, namely ‘(values s)’
In the expression:
foldl1' (\ acc (i, v) -> acc + 26 ^ i * v) (values s)
Could someone help me out?
to get it compiling you actually just have to switch foldl1' to foldl' and add the starting accumulator:
import Data.Char (ord)
import Data.List (foldl')
columnToNumber :: String -> Int
columnToNumber s = foldl' (\acc (i, v) -> acc + 26 ^ i * v) 0 (values s)
where values s = zip (reverse [0..(length s)]) ((\c -> ord c - 64) <$> s)
if you add the suggestion Free_D made (start at length s - 1):
columnToNumber :: String -> Int
columnToNumber s = foldl' (\acc (i, v) -> acc + 26 ^ i * v) 0 (values s)
where values s = zip (reverse [0..(length s -1)]) ((\c -> ord c - 64) <$> s)
you get the desired results:
λ> columnToNumber "AA"
27
λ> columnToNumber "AB"
28
I don't know if you actually gonna need this but hey why not:
what you probably don't like is that columnToNumber "A1" is 11 - to fix this you need to tread digits differently from letters:
columnToNumber :: String -> Int
columnToNumber s = foldl' (\acc (i, v) -> acc + 26 ^ i * v) 0 (values s)
where values s = zip (reverse [0..(length s -1)]) (parse <$> s)
parse c
| c >= '0' && c <= '9' = ord c - ord '0'
| otherwise = ord c - 64
Looking at the definition of foldl1', it has to take two things that are of the same type and produce something similar
*Main Data.List> :t foldl1'
foldl1' :: (a -> a -> a) -> [a] -> a
But foldl is what you want:
*Main Data.List> :t foldl
foldl :: Foldable t => (b -> a -> b) -> b -> t a -> b
So essentially this:
import Data.Char (ord, toUpper)
columnToNumber :: String -> Int
columnToNumber s = foldl (\acc (i, v) -> acc + 26 ^ i * v) 0 $ values s where
values s = zip [l - 1, l - 2 ..0] ((\c -> (ord.toUpper) c - 64) <$> s) where
l = length s

SHA-1 in haskell producing wrong hashes

I wrote a program to perform SHA-1 in haskell, and while it does produce hashes, they do not match with the ones produced by other SHA-1 programs
Example: cat
hashes to: b5be86bc8bccfc24b01b093228ebb96fc92fa804 but is supposed to hash to 9d989e8d27dc9e0ec3389fc855f142c3d40f0c50
My code is:
(old code omitted)
I have no idea what is wrong. Can someone tell me where I made a mistake?
Edit:
I fixed the stuff that was pointed out, however it is still not working. It works correctly up until the inner loop.
I cleaned up the code so the functions for the inner loop are available as f1, f2 and f3
cat now interestingly hashes to ebe6c9fa1afa0ef5a0ca80bab251fd41cc29127e.
Code:
import Data.Word
import Data.Bits
import Data.Char (ord, intToDigit)
import Data.Binary (encode, decode)
import Numeric (showHex, showIntAtBase)
import System.IO (stdin)
import Data.Sequence ((<|), (|>))
import qualified Data.Sequence as S
import qualified Data.ByteString.Lazy as B
type Quintuple32 = (Word32, Word32, Word32, Word32, Word32)
addQuintuple (a, b, c, d, e) (f, g, h, i, j) =
(a + f, b + g, c + h, d + i, e + j)
shower :: Quintuple32 -> String
shower (a, b, c, d, e) = concatMap (`showHex` "") [a, b, c, d, e]
hash :: Int -> S.Seq Word32 -> Quintuple32 -> Quintuple32
hash i w h#(a, b, c, d, e)
| i < 20 = hash (i + 1) w (newhash (f1 h + k1))
| i < 40 = hash (i + 1) w (newhash (f2 h + k2))
| i < 60 = hash (i + 1) w (newhash (f3 h + k3))
| i < 80 = hash (i + 1) w (newhash (f2 h + k4))
| otherwise = h
where (k1, k2, k3, k4) = (0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6)
newhash a' = (rotate a 5 + a' + e + (w `S.index` i), a, rotate b 30, c, d)
f1 :: Quintuple32 -> Word32
f1 (_, b, c, _, _) = (b .&. c) .|. (complement b .&. c)
f2 :: Quintuple32 -> Word32
f2 (_, b, c, d, _) = b `xor` c `xor` d
f3 :: Quintuple32 -> Word32
f3 (_, b, c, d, _) = (b .&. c) .|. (b .&. d) .|. (c .&. d)
starting :: Quintuple32
starting = (0x67452301
, 0xEFCDAB89
, 0x98BADCFE
, 0x10325476
, 0xC3D2E1F0)
hasher :: Quintuple32 -> S.Seq Word32 -> Quintuple32
hasher acc x = addQuintuple acc (hash 0 (extend x) acc)
process :: B.ByteString -> Quintuple32
process = foldl hasher starting . chunks . pad
extend :: S.Seq Word32 -> S.Seq Word32
extend = extend' 16
extend' :: Int -> S.Seq Word32 -> S.Seq Word32
extend' 80 a = a
extend' i a = extend' (i + 1) (a |> xored)
where xored = rotate ((a `S.index` (i - 3)) `xor`
(a `S.index` (i - 8)) `xor`
(a `S.index` (i - 14)) `xor`
(a `S.index` (i - 16))) 1
toBytes :: String -> B.ByteString
toBytes = B.pack . map (fromIntegral . ord)
splitEvery n xs
| B.null xs = S.empty
| otherwise = B.take n xs <| splitEvery n (B.drop n xs)
chunks :: B.ByteString -> [S.Seq Word32]
chunks xs
| B.null xs = []
| otherwise = x : chunks (B.drop 64 xs)
where x = fmap decode (splitEvery 4 (B.take 64 xs))
pad :: B.ByteString -> B.ByteString
pad xs = B.append (add0 $ add1 xs) length64
where length64 = encode (fromIntegral (8 * B.length xs) :: Word64)
add1 :: B.ByteString -> B.ByteString
add1 = flip B.append (B.singleton 128)
add0 :: B.ByteString -> B.ByteString
add0 xs
| modulo /= 448 = add0 $ B.append xs (B.singleton 0)
| otherwise = xs
where modulo = (B.length xs * 8) `rem` 512
Also, a small question: is something like (a, b) = (8, 9) an acceptable thing to do to set multiple variables?
Oh, another one of these!
Two errors jump out at me immediately:
pad :: B.ByteString -> B.ByteString
pad xs = B.append (add0 $ add1 xs) length64
where length64 = encode (fromIntegral (B.length xs) :: Word64)
Notice the length you append is supposed to be the bit length, not the byte length.
add1 :: B.ByteString -> B.ByteString
add1 = flip B.append (B.singleton 255)
Notice 255 /= 0b10000000 and the pad is supposed to be the later.
In general you debug these by 1) going over the spec again and again. 2) Comparing to another implementation, such as Adam Wick's SHA package, and comparing for equality at as fine grained level as possible.
EDIT: There are two more bugs, basically transcription errors. Look around a bit and shout if you're still stuck.

How to take an 2D slice with Lens

import qualified Data.Vector as V
import Control.Lens
import Data.Vector.Lens
v = V.fromList [V.fromList [1..3], V.fromList [4..6], V.fromList [7..9]]
1D slice (for example) :
*Main> v ^. sliced 1 2
fromList [fromList [4,5,6],fromList [7,8,9]]
2D sclice: What should I write to get this result?
*Main> v ^. sliced 1 2 {- ??????? -} sliced 0 2 -- Or not so?
V.fromList [V.fromList [4,5], V.fromList [7,8]]
This should do it
insliced :: Int -> Int -> Lens' (V.Vector (V.Vector a)) (V.Vector (V.Vector a))
insliced i n f m = f (V.map (V.slice i n) m)
<&> V.zipWith (\a b -> a V.// zip [i..i+n-1] (V.toList b)) m
then
λ v ^. sliced 1 2 . insliced 0 2
fromList [fromList [4,5],fromList [7,8]]
This has similar requirements to sliced to be valid.
It's worth mentioning there's a general version of this called column from linear. It can't be used with Vector because Vectors aren't Representable (because because it's size isn't statically known). But with something like V3:
λ V3 (V3 1 2 3) (V3 4 5 6) (V3 7 8 9) ^. _yz . column _xy
V2 (V2 4 5) (V2 7 8)
You could write your own (less safe) version for vectors:
vcolumn :: ALens' a b -> Lens' (V.Vector a) (V.Vector b)
vcolumn l f m = f (V.map (^# l) m) <&> V.zipWith (\a b -> a & l #~ b) m

Memoizing a function of type [Integer] -> a

My problem is how to efficiently memoize an expensive function f :: [Integer] -> a that is defined for all finite lists of integers and has the property f . sort = f?
My typical use case is that given a list as of integers I need to obtain the values f (a:as) for various Integer a, so I'd like to build up simultaneously a directed labelled graph whose vertices are pairs of an Integer list and its function value. An edge labelled by a from (as, f as) to (bs, f bs) exists if and only if a:as = bs.
Stealing from a brilliant answer by Edward Kmett I simply copied
{-# LANGUAGE BangPatterns #-}
data Tree a = Tree (Tree a) a (Tree a)
instance Functor Tree where
fmap f (Tree l m r) = Tree (fmap f l) (f m) (fmap f r)
index :: Tree a -> Integer -> a
index (Tree _ m _) 0 = m
index (Tree l _ r) n = case (n - 1) `divMod` 2 of
(q,0) -> index l q
(q,1) -> index r q
nats :: Tree Integer
nats = go 0 1
where go !n !s = Tree (go l s') n (go r s')
where l = n + s
r = l + s
s' = s * 2
and adapted his idea to my problem as
-- directed graph labelled by Integers
data Graph a = Graph a (Tree (Graph a))
instance Functor Graph where
fmap f (Graph a t) = Graph (f a) (fmap (fmap f) t)
-- walk the graph following the given labels
walk :: Graph a -> [Integer] -> a
walk (Graph a _) [] = a
walk (Graph _ t) (x:xs) = walk (index t x) xs
-- graph of all finite integer sequences
intSeq :: Graph [Integer]
intSeq = Graph [] (fmap (\n -> fmap (n:) intSeq) nats)
-- could be replaced by Data.Strict.Pair
data StrictPair a b = StrictPair !a !b
deriving Show
-- f = sum modified according to Edward's idea (the real function is more complicated)
g :: ([Integer] -> StrictPair Integer [Integer]) -> [Integer] -> StrictPair Integer [Integer]
g mf [] = StrictPair 0 []
g mf (a:as) = StrictPair (a+x) (a:as)
where StrictPair x y = mf as
g_graph :: Graph (StrictPair Integer [Integer])
g_graph = fmap (g g_m) intSeq
g_m :: [Integer] -> StrictPair Integer [Integer]
g_m = walk g_graph
This works OK, but as the function f is independent of the order of the occurring integers (but not of their counts) there should be only one vertex in the graph for all integer lists equal up to ordering.
How do I achieve this?
How about just defining g_m' = g_m . sort, i.e. you simply sort the input list first before calling your memoized function?
I have a feeling this is the best you can do since if you want your memoized graph to consist of only sorted paths someone is going to have to look at all of the elements of the list before constructing the path.
Depending on what your input lists look like it might be helpful to transform them in a way which makes the trees branch less. For instance, you might try sorting and taking differences:
original input list: [8,3,14,8,5]
sorted: [3,3,8,8,14]
diffed: [3,0,5,0,6] -- use this as the key
The transformation is a bijection, and the trees branch less because there are smaller numbers involved.
You can use a bit different approach.
There is a trick in proof that a finite product of countable sets is countable:
We can map the sequence [a1, ..., an] to Nat by product . zipWith (^) primes: 2 ^ a1 * 3 ^ a2 * 5 ^ a3 * ... * primen ^ an.
To avoid problems with sequences with zero at the end, we can increase the last index.
As the sequence is ordered, we can exploit the property as user5402 mentioned.
The benefit of using the tree, is that you can increase branching to speed-up traversal. OTOH prime trick could make indexes quite big, but hopefully some tree paths will just be unexplored (remain as thunks).
{-# LANGUAGE BangPatterns #-}
-- Modified from Kmett's answer:
data Tree a = Tree a (Tree a) (Tree a) (Tree a) (Tree a)
instance Functor Tree where
fmap f (Tree x a b c d) = Tree (f x) (fmap f a) (fmap f b) (fmap f c) (fmap f d)
index :: Tree a -> Integer -> a
index (Tree x _ _ _ _) 0 = x
index (Tree _ a b c d) n = case (n - 1) `divMod` 4 of
(q,0) -> index a q
(q,1) -> index b q
(q,2) -> index c q
(q,3) -> index d q
nats :: Tree Integer
nats = go 0 1
where
go !n !s = Tree n (go a s') (go b s') (go c s') (go d s')
where
a = n + s
b = a + s
c = b + s
d = c + s
s' = s * 4
toList :: Tree a -> [a]
toList as = map (index as) [0..]
-- Primes -- https://www.haskell.org/haskellwiki/Prime_numbers
-- Generation and factorisation could be done much better
minus (x:xs) (y:ys) = case (compare x y) of
LT -> x : minus xs (y:ys)
EQ -> minus xs ys
GT -> minus (x:xs) ys
minus xs _ = xs
primes = 2 : sieve [3..] primes
where
sieve xs (p:ps) | q <- p*p , (h,t) <- span (< q) xs =
h ++ sieve (t `minus` [q, q+p..]) ps
addToLast :: [Integer] -> [Integer]
addToLast [] = []
addToLast [x] = [x + 1]
addToLast (x:xs) = x : addToLast xs
subFromLast :: [Integer] -> [Integer]
subFromLast [] = []
subFromLast [x] = [x - 1]
subFromLast (x:xs) = x : subFromLast xs
addSubProp :: [NonNegative Integer] -> Property
addSubProp xs = xs' === subFromLast (addToLast xs')
where xs' = map getNonNegative xs
-- Trick from user5402 answer
toDiffList :: [Integer] -> [Integer]
toDiffList = toDiffList' 0
where toDiffList' _ [] = []
toDiffList' p (x:xs) = x - p : toDiffList' x xs
fromDiffList :: [Integer] -> [Integer]
fromDiffList = fromDiffList' 0
where fromDiffList' _ [] = []
fromDiffList' p (x:xs) = p + x : fromDiffList' (x + p) xs
diffProp :: [Integer] -> Property
diffProp xs = xs === fromDiffList (toDiffList xs)
listToInteger :: [Integer] -> Integer
listToInteger = product . zipWith (^) primes . addToLast
integerToList :: Integer -> [Integer]
integerToList = subFromLast . impl primes 0
where impl _ _ 0 = []
impl _ 0 1 = []
impl _ k 1 = [k]
impl (p:ps) k n = case n `divMod` p of
(n', 0) -> impl (p:ps) (k + 1) n'
(_, _) -> k : impl ps 0 n
listProp :: [NonNegative Integer] -> Property
listProp xs = xs' === integerToList (listToInteger xs')
where xs' = map getNonNegative xs
toIndex :: [Integer] -> Integer
toIndex = listToInteger . toDiffList
fromIndex :: Integer -> [Integer]
fromIndex = fromDiffList . integerToList
-- [1,0] /= [0]
-- Decreasing sequence!
doesntHold :: [NonNegative Integer] -> Property
doesntHold xs = xs' === fromIndex (toIndex xs')
where xs' = map getNonNegative xs
holds :: [NonNegative Integer] -> Property
holds xs = xs' === fromIndex (toIndex xs')
where xs' = sort $ map getNonNegative xs
g :: ([Integer] -> Integer) -> [Integer] -> Integer
g mg = g' . sort
where g' [] = 0
g' (x:xs) = x + sum (map mg $ tails xs)
g_tree :: Tree Integer
g_tree = fmap (g faster_g' . fromIndex) nats
faster_g' :: [Integer] -> Integer
faster_g' = index g_tree . toIndex
faster_g = faster_g' . sort
On my machine fix g [1..22] feels slow, when faster_g [1..40] is still blazing fast.
Addition: if we have bounded set (with indexes 0..n-1) , we can encode it as: a0 * n^0 + a1 * n^1 ....
We can encode any Integer as binary list, e.g. 11 is [1, 1, 0, 1] (least bit first).
Then if we separate integers in the list with 2, we get sequence of bounded values.
As bonus we can take the sequence of 0, 1, 2 digits and compress it to binary using e.g. Huffman encoding, as 2 is much rarer than 0 or 1. But this might be overkill.
With this trick, indexes stay much smaller and the space probably is better packed.
{-# LANGUAGE BangPatterns #-}
-- From Kment's answer:
import Data.Function (fix)
import Data.List (sort, tails)
import Data.List.Split (splitOn)
import Test.QuickCheck
{-- Tree definition as before --}
-- 0, 1, 2
newtype N3 = N3 { unN3 :: Integer }
deriving (Eq, Show)
instance Arbitrary N3 where
arbitrary = elements $ map N3 [ 0, 1, 2 ]
-- Integer <-> N3
coeffs3 :: [Integer]
coeffs3 = coeffs' 1
where coeffs' n = n : coeffs' (n * 3)
listToInteger :: [N3] -> Integer
listToInteger = sum . zipWith f coeffs3
where f n (N3 m) = n * m
listFromInteger :: Integer -> [N3]
listFromInteger 0 = []
listFromInteger n = case n `divMod` 3 of
(q, m) -> N3 m : listFromInteger q
listProp :: [N3] -> Property
listProp xs = (null xs || last xs /= N3 0) ==> xs === listFromInteger (listToInteger xs)
-- Integer <-> N2
-- 0, 1
newtype N2 = N2 { unN2 :: Integer }
deriving (Eq, Show)
coeffs2 :: [Integer]
coeffs2 = coeffs' 1
where coeffs' n = n : coeffs' (n * 2)
integerToBin :: Integer -> [N2]
integerToBin 0 = []
integerToBin n = case n `divMod` 2 of
(q, m) -> N2 m : integerToBin q
integerFromBin :: [N2] -> Integer
integerFromBin = sum . zipWith f coeffs2
where f n (N2 m) = n * m
binProp :: NonNegative Integer -> Property
binProp (NonNegative n) = n === integerFromBin (integerToBin n)
-- unsafe!
n3ton2 :: N3 -> N2
n3ton2 = N2 . unN3
n2ton3 :: N2 -> N3
n2ton3 = N3 . unN2
-- [Integer] <-> [N3]
integerListToN3List :: [Integer] -> [N3]
integerListToN3List = concatMap (++ [N3 2]) . map (map n2ton3 . integerToBin)
integerListFromN3List :: [N3] -> [Integer]
integerListFromN3List = init . map (integerFromBin . map n3ton2) . splitOn [N3 2]
n3ListProp :: [NonNegative Integer] -> Property
n3ListProp xs = xs' === integerListFromN3List (integerListToN3List xs')
where xs' = map getNonNegative xs
-- Trick from user5402 answer
-- Integer <-> Sorted Integer
toDiffList :: [Integer] -> [Integer]
toDiffList = toDiffList' 0
where toDiffList' _ [] = []
toDiffList' p (x:xs) = x - p : toDiffList' x xs
fromDiffList :: [Integer] -> [Integer]
fromDiffList = fromDiffList' 0
where fromDiffList' _ [] = []
fromDiffList' p (x:xs) = p + x : fromDiffList' (x + p) xs
diffProp :: [Integer] -> Property
diffProp xs = xs === fromDiffList (toDiffList xs)
---
toIndex :: [Integer] -> Integer
toIndex = listToInteger . integerListToN3List . toDiffList
fromIndex :: Integer -> [Integer]
fromIndex = fromDiffList . integerListFromN3List . listFromInteger
-- [1,0] /= [0]
-- Decreasing sequence! doesn't terminate in this case
doesntHold :: [NonNegative Integer] -> Property
doesntHold xs = xs' === fromIndex (toIndex xs')
where xs' = map getNonNegative xs
holds :: [NonNegative Integer] -> Property
holds xs = xs' === fromIndex (toIndex xs')
where xs' = sort $ map getNonNegative xs
g :: ([Integer] -> Integer) -> [Integer] -> Integer
g mg = g' . sort
where g' [] = 0
g' (x:xs) = x + sum (map mg $ tails xs)
g_tree :: Tree Integer
g_tree = fmap (g faster_g' . fromIndex) nats
faster_g' :: [Integer] -> Integer
faster_g' = index g_tree . toIndex
faster_g = faster_g' . sort
Second addition:
I quickly benchmarked graph and binary sequence approach for my g with:
main :: IO ()
main = do
n <- read . head <$> getArgs
print $ faster_g [100, 110..n]
And the results are:
% time ./IntegerMemo 1000
1225560638892526472150132981770
./IntegerMemo 1000 0.19s user 0.01s system 98% cpu 0.200 total
% time ./IntegerMemo 2000
3122858113354873680008305238045814042010921833620857170165770
./IntegerMemo 2000 1.83s user 0.05s system 99% cpu 1.888 total
% time ./IntegerMemo 2500
4399449191298176980662410776849867104410434903220291205722799441218623242250
./IntegerMemo 2500 3.74s user 0.09s system 99% cpu 3.852 total
% time ./IntegerMemo 3000
5947985907461048240178371687835977247601455563536278700587949163642187584269899171375349770
./IntegerMemo 3000 6.66s user 0.13s system 99% cpu 6.830 total
% time ./IntegerMemoGrap 1000
1225560638892526472150132981770
./IntegerMemoGrap 1000 0.10s user 0.01s system 97% cpu 0.113 total
% time ./IntegerMemoGrap 2000
3122858113354873680008305238045814042010921833620857170165770
./IntegerMemoGrap 2000 0.97s user 0.04s system 98% cpu 1.028 total
% time ./IntegerMemoGrap 2500
4399449191298176980662410776849867104410434903220291205722799441218623242250
./IntegerMemoGrap 2500 2.11s user 0.08s system 99% cpu 2.202 total
% time ./IntegerMemoGrap 3000
5947985907461048240178371687835977247601455563536278700587949163642187584269899171375349770
./IntegerMemoGrap 3000 3.33s user 0.09s system 99% cpu 3.452 total
Looks like that graph version is faster by constant factor of 2. But they seem to have same time complexity :)
Looks like my problem is solved by simply replacing intSeq in the definition of g_graph by a monotone version:
-- replace vertexes for non-monotone integer lists by the according monotone one
monoIntSeq :: Graph [Integer]
monoIntSeq = f intSeq
where f (Graph as t) | as == sort as = Graph as $ fmap f t
| otherwise = fetch monIntSeq $ sort as
-- extract the subgraph after following the given labels
fetch :: Graph a -> [Integer] -> Graph a
fetch g [] = g
fetch (Graph _ t) (x:xs) = fetch (index t x) xs
g_graph :: Graph (StrictPair Integer [Integer])
g_graph = fmap (g g_m) monoIntSeq
Many thanks to all (especially user5402 and Oleg) for the help!
Edit: I still have the problem that the memory consumption is to high for my typical use case which can be described by following a path like this:
p :: [Integer]
p = map f [1..]
where f n | n `mod` 6 == 0 = n `div` 6
| n `mod` 3 == 0 = n `div` 3
| n `mod` 2 == 0 = n `div` 2
| otherwise = n
A slight improvement is to define the monotone integer sequences directly like this:
-- extract the subgraph after following the given labels (right to left)
fetch :: Graph a -> [Integer] -> Graph a
fetch = foldl' step
where step (Graph _ t) n = index t n
-- walk the graph following the given labels (right to left)
walk :: Graph a -> [Integer] -> a
walk g ns = a
where Graph a _ = fetch g ns
-- all monotone falling integer sequences
monoIntSeqs :: Graph [Integer]
monoIntSeqs = Graph [] $ fmap (flip f monoIntSeqs) nats
where f n (Graph ns t) | null ns = Graph (n:ns) $ fmap (f n) t
| n >= head ns = Graph (n:ns) $ fmap (f n) t
| otherwise = fetch monoIntSeqs (insert' n ns)
insert' = insertBy (comparing Down)
But at the end I might just use the original integer sequences without identification, identify nodes now and then explicitly and avoid keeping a reference to g_graph etc to let the garbage collection clean up as the program proceeds.
Reading the functional pearl Trouble Shared is Trouble Halved by Richard Bird and Ralf Hinze, I understood how to implement, what I was looking for two years ago (again based on Edward Kmett's trick):
{-# LANGUAGE BangPatterns #-}
import Data.Function (fix)
data Tree a = Tree (Tree a) a (Tree a)
deriving Show
instance Functor Tree where
fmap f (Tree l m r) = Tree (fmap f l) (f m) (fmap f r)
index :: Tree a -> Integer -> a
index (Tree _ m _) 0 = m
index (Tree l _ r) n = case (n - 1) `divMod` 2 of
(q,0) -> index l q
(q,1) -> index r q
nats :: Tree Integer
nats = go 0 1
where go !n !s = Tree (go l s') n (go r s')
where l = n + s
r = l + s
s' = s * 2
data IntSeqTree a = IntSeqTree a (Tree (IntSeqTree a))
val :: IntSeqTree a -> a
val (IntSeqTree a _) = a
step :: Integer -> IntSeqTree t -> IntSeqTree t
step n (IntSeqTree _ ts) = index ts n
intSeqTree :: IntSeqTree [Integer]
intSeqTree = fix $ create []
where create p x = IntSeqTree p $ fmap (extend x) nats
extend x n = case span (>n) (val x) of
([], p) -> fix $ create (n:p)
(m, p) -> foldr step intSeqTree (m ++ n:p)
instance Functor IntSeqTree where
fmap f (IntSeqTree a t) = IntSeqTree (f a) (fmap (fmap f) t)
In my use case I have hundreds or thousands of similar integer sequences (of length few hundred entries) that are generated incrementally. So for me this way is cheaper than sorting the sequences before looking up the function value (which I will access by using fmap on intSeqTree).

Resources