Related
I want to build a Hilbert matrix using the linear package and convert it to a list of lists. While this seems an easy task the type level constraints come into my way:
import Linear
import Linear.V
import Data.Vector qualified as V
-- | Outer (tensor) product of two vectors
outerWith :: (Functor f, Functor g, Num a) => (a -> a -> a) -> f a -> g a -> f (g a)
{-# INLINABLE outerWith #-}
outerWith f a b = fmap (\x -> fmap (f x) b) a
hilbertV :: forall a n. (Fractional a, Dim n) => Integer -> V n (V n a)
hilbertV n =
let v = V $ V.fromList $ fromIntegral <$> [1..n]
w = V $ V.fromList $ fromIntegral <$> [0..n-1]
in luInv $ outerWith (+) w v
listsFromM :: V n (V n a) -> [[a]]
listsFromM m = vToList (vToList <$> m)
vToList :: V n a -> [a]
vToList = V.toList . toVector
hilbertL :: forall a. (Fractional a) => Integer -> [[a]]
hilbertL n = listsFromM (hilbertV n)
When doing this the following error arises in the last line hilbertL n = listsFromM (hilbertV n):
bench/Solve.hs:28:26: error:
• Could not deduce (Dim n0) arising from a use of ‘hilbertV’
from the context: Fractional a
bound by the type signature for:
hilbertL :: forall a. Fractional a => Integer -> [[a]]
at bench/Solve.hs:27:1-56
The type variable ‘n0’ is ambiguous
These potential instances exist:
three instances involving out-of-scope types
instance GHC.TypeNats.KnownNat n => Dim n -- Defined in ‘Linear.V’
instance Data.Reflection.Reifies s Int =>
Dim (Linear.V.ReifiedDim s)
-- Defined in ‘Linear.V’
instance forall k (n :: k) a. Dim n => Dim (V n a)
-- Defined in ‘Linear.V’
• In the first argument of ‘listsFromM’, namely ‘(hilbertV n)’
In the expression: listsFromM (hilbertV n)
In an equation for ‘hilbertL’: hilbertL n = listsFromM (hilbertV n)
How can i get this to compile?
First, the type of HilbertV is unsafe. You shouldn't pass in an Integer size if size should be determined from the type! I think you want this:
{-# LANGUAGE TypeApplications, UnicodeSyntax #-}
hilbertV :: ∀ a n. (Fractional a, Dim n) => V n (V n a)
hilbertV = luInv $ outerWith (+) w v
where v = V $ V.fromList $ fromIntegral <$> [1..n]
w = V $ V.fromList $ fromIntegral <$> [0..n-1]
n = reflectDim #n []
(The [] just fills the proxy argument with the most concise way to generate a value-less functor input, since it is easier to pass in the type information with -XTypeApplications.)
In fact, I'd avoid even passing around n twice at all. Instead, why not factor out the marginal generation:
hilbertV :: ∀ a n. (Fractional a, Dim n) => V n (V n a)
hilbertV = luInv $ outerWith (+) w v
where v = fromIntegral <$> enumFinFrom 1
w = fromIntegral <$> enumFinFrom 0
enumFinFrom :: ∀ n a . (Enum a, Dim n) => a -> V n a
enumFinFrom ini = V . V.fromList $ take (reflectDim #n []) [ini..]
Now, for hilbertL the problem is that you have a dependent type size. The trick to deal with that are Rank2-quantified functions; linear offers reifyDim/reifyVector etc. for the purpose.
hilbertL :: ∀ a . Fractional a => Int -> [[a]]
hilbertL n = reifyDim n hilbertL'
where hilbertL' :: ∀ n p . Dim n => p n -> [[a]]
hilbertL' _ = listsFromM $ hilbertV #n
Alternatively, you could also change hilbertV to take a proxy argument for the size and then just hand that in. I've always found this a bit ugly, but it's actually more compact in this case:
hilbertV :: ∀ a n p . (Fractional a, Dim n) => p n -> V n (V n a)
hilbertV np = luInv $ outerWith (+) w v
where v = V $ V.fromList $ fromIntegral <$> [1..n]
w = V $ V.fromList $ fromIntegral <$> [0..n-1]
n = reflectDim np
hilbertL :: ∀ a . Fractional a => Int -> [[a]]
hilbertL n = reifyDim n (\np -> listsFromM $ hilbertV np)
I have the following structure:
y = [
fromList([("c", 1 ::Int)]),
fromList([("c", 5)]),
fromList([("d", 20)])
]
I can use this to update every "c":
y & mapped . at "c" . mapped %~ (+ 1)
-- [fromList [("c",2)], fromList [("c",6)], fromList [("d",20)]]
So the third entry is basically just ignored. But I want is for the operation to fail.
Only update, iff all the maps contain the key "c".
So I want:
y & mysteryOp
-- [fromList [("c",1)], fromList [("c",5)], fromList [("d",20)]]
-- fail because third entry does not contain "c" as key
I think I know which functions to use here:
over
-- I want to map the content of the list
mapped
-- map over the structure and transform to [(Maybe Int)]
traverse
-- I need to apply the operation, which will avoid
at "c"
-- I need to index into the key "c"
I just don't know how to combine them
Here's a couple of alternative approaches seeing as you like lenses;
Using laziness to delay deciding whether or not to make the changes,
f y = res
where (All c, res) = y
& each %%~ (at "c" %%~ (Wrapped . is _Just &&& fmap (applyWhen c succ)))
Or deciding upfront whether to make the changes,
f' y = under (anon y $ anyOf each (nullOf $ ix "c")) (mapped . mapped . ix "c" +~ 1) y
I don't see a way to write it as a simple composition of lens combinators, but this is a traversal that you can write from scratch. It should either traverse all values of "c" keys if every map contains such a key or else traverse no values.
We can start with a helper function to "maybe" update a map with a new key value, failing in the Maybe monad if the key doesn't exist. For reasons that will become apparent, we want to allow the update to occur in an arbitrary functor. That is, we want a function:
maybeUpdate :: (Functor f, Ord k) => k -> (v -> f v) -> Map k v -> Maybe (f (Map k v))
Is that signature clear? We check for the key k. If the key is found, we'll return Just an updated map with the key's corresponding value v updated in the f functor. Otherwise, if the key is not found, we return Nothing. We can write this pretty clearly in monad notation, though we need the ApplicativeDo extension if we only want to use Functor f constraint:
maybeUpdate :: (Functor f, Ord k) => k -> (v -> f v) -> Map k v -> Maybe (f (Map k v))
maybeUpdate k f m = do -- in Maybe monad
v <- m ^. at k
return $ do -- in "f" functor
a <- f v
return $ m & at k .~ Just a
Alternatively, these "monadic actions" are really just functor actions, so this definition can be used:
maybeUpdate' k f m =
m ^. at k <&> \v -> f v <&> \a -> m & at k .~ Just a
That's the hard part. Now, the traversal is pretty straightforward. We start with the signature:
traverseAll :: (Ord k) => k -> Traversal' [Map k v] v
traverseAll k f maps =
The idea is that this traversal starts by traversing the list of maps over the Maybe applicative using the maybeUpdate helper:
traverse (maybeUpdate k f) maps :: Maybe [f (Map k v)]
If this traversal succeeds (returns Just a list), then all keys were found, and we can sequence the f applicative actions:
sequenceA <$> traverse (maybeUpdate k f) maps :: Maybe (f [Map k v])
Now, we just use maybe to return the original list if the traversal fails:
traverseAll k f maps = maybe (pure maps) id (sequenceA <$> traverse (maybeUpdate k f) maps)
Now, with:
y :: [Map String Int]
y = [
fromList([("c", 1 ::Int)]),
fromList([("c", 5)]),
fromList([("d", 20)])
]
y2 :: [Map String Int]
y2 = [
fromList([("c", 1 ::Int)]),
fromList([("c", 5)]),
fromList([("d", 20),("c",6)])
]
we have:
> y & traverseAll "c" %~ (1000*)
[fromList [("c",1)],fromList [("c",5)],fromList [("d",20)]]
> y2 & traverseAll "c" %~ (1000*)
[fromList [("c",1000)],fromList [("c",5000)],fromList [("c",6000),("d",20)]]
Full disclosure: I was not able to construct traverseAll like that from scratch. I started with the much stupider "traversal" in the implicit identity applicative:
traverseAllC' :: (Int -> Int) -> [Map String Int] -> [Map String Int]
traverseAllC' f xall = maybe xall id (go xall)
where go :: [Map String Int] -> Maybe [Map String Int]
go (x:xs) = case x !? "c" of
Just a -> (Map.insert "c" (f a) x:) <$> go xs
Nothing -> Nothing
go [] = Just []
and once I got that up and running, I simplified it, made the Identity explicit:
traverseAllC_ :: (Int -> Identity Int) -> [Map String Int] -> Identity [Map String Int]
and converted it to a general applicative.
Anyway, here's the code:
{-# LANGUAGE ApplicativeDo #-}
{-# LANGUAGE RankNTypes #-}
import Data.Map (Map, fromList)
import Control.Lens
y :: [Map [Char] Int]
y = [
fromList([("c", 1 ::Int)]),
fromList([("c", 5)]),
fromList([("d", 20)])
]
y2 :: [Map [Char] Int]
y2 = [
fromList([("c", 1 ::Int)]),
fromList([("c", 5)]),
fromList([("d", 20),("c",6)])
]
traverseAll :: (Ord k) => k -> Traversal' [Map k v] v
traverseAll k f maps = maybe (pure maps) id (sequenceA <$> traverse (maybeUpdate k f) maps)
maybeUpdate :: (Functor f, Ord k) => k -> (v -> f v) -> Map k v -> Maybe (f (Map k v))
maybeUpdate k f m = do
v <- m ^. at k
return $ do
a <- f v
return $ m & at k .~ Just a
maybeUpdate' :: (Functor f, Ord k) => k -> (v -> f v) -> Map k v -> Maybe (f (Map k v))
maybeUpdate' k f m =
m ^. at k <&> \v -> f v <&> \a -> m & at k .~ Just a
main = do
print $ y & traverseAll "c" %~ (1000*)
print $ y2 & traverseAll "c" %~ (1000*)
I have a vector nested inside another. I want to use modify to update this matrix in place. So I use it for the inner vector, but do I also need to use it for the outer?
My suggestion from the comments still stands, if you do not need to operate on a ragged array then the usual rectangular array implementation is better. Here is a short list of drawbacks of vector of vectors:
performance penalty: the outer vector has to be boxed (which means an extra pointer indirection)
safety: you can't guarantee the same length of all rows
operating on ragged arrays is cumbersome
Nevertheless question still stands: how would you modify a vector of vectors in place. Below I'll provide an example function, which uses mutation to reverse rows of a ragged array and another function that reverses both rows and columns. Difference is that in the former we only mutate elements of each row, while in the latter we also mutate the outer boxed vector that corresponds to rows themselves:
{-# LANGUAGE RankNTypes #-}
import Control.Monad as M
import Control.Monad.ST
import Prelude as P
import Data.Vector as V
import Data.Vector.Generic.Mutable as VGM
import Data.Vector.Mutable as VM
import Data.Vector.Primitive as VP
import Data.Vector.Primitive.Mutable as VPM
raggedModifyRows ::
VP.Prim a
=> (forall s. V.Vector (VPM.MVector s a) -> ST s ())
-> V.Vector (VP.Vector a)
-> V.Vector (VP.Vector a)
raggedModifyRows action arr = runST $ do
-- thaw will create a copy of each row, so they can be safely modified
mvs <- V.mapM VP.thaw arr
action mvs
-- We are freezing mutated copies, so it is safe to use unsafeFreeze here too
V.mapM VP.unsafeFreeze mvs
raggedModify ::
VP.Prim a
=> (forall s. VM.MVector s (VPM.MVector s a) -> ST s ())
-> V.Vector (VP.Vector a)
-> V.Vector (VP.Vector a)
raggedModify action arr = runST $ do
arr' <- V.mapM VP.thaw arr
-- mapM already created a copy of a boxed vector, so we can use unsafeThaw
mv <- V.unsafeThaw arr'
action mv
v <- V.unsafeFreeze mv
V.mapM VP.unsafeFreeze v
generateMatrix ::
Prim a => (Int, Int) -> ((Int, Int) -> a) -> V.Vector (VP.Vector a)
generateMatrix (m, n) f = V.generate m $ \ i -> VP.generate n $ \j -> f (i, j)
generateRagged ::
Prim a => V.Vector Int -> ((Int, Int) -> a) -> V.Vector (VP.Vector a)
generateRagged v f = V.imap (\ i n -> VP.generate n $ \j -> f (i, j)) v
reverseST :: (VGM.MVector v a) => v s a -> ST s ()
reverseST mv =
let n = VGM.length mv
in M.forM_ [0 .. (n `div` 2) - 1] $ \j -> VGM.swap mv j (n - j - 1)
reverseRaggedRows :: Prim a => V.Vector (VP.Vector a) -> V.Vector (VP.Vector a)
reverseRaggedRows = raggedModifyRows $ \rows -> V.forM_ rows reverseST
reverseRagged :: Prim a => V.Vector (VP.Vector a) -> V.Vector (VP.Vector a)
reverseRagged =
raggedModify $ \mrows -> do
let reverse' i = VM.read mrows i >>= reverseST
let m = VM.length mrows
M.forM_ [0 .. (m `div` 2) - 1] $ \i -> do
reverse' i
VM.swap mrows i (m - i - 1)
reverse' i
M.when (odd m) $ reverse' (m `div` 2)
Which can be used as follows:
λ> m = generateMatrix (3, 4) $ \(i, j) -> i+j
λ> m
[[0,1,2,3],[1,2,3,4],[2,3,4,5]]
λ> reverseRaggedRows m
[[3,2,1,0],[4,3,2,1],[5,4,3,2]]
λ> reverseRagged m
[[5,4,3,2],[4,3,2,1],[3,2,1,0]]
λ> m = generateRagged (V.fromList [1,2,3]) $ \(i, j) -> i+j
λ> m
[[0],[1,2],[2,3,4]]
λ> reverseRaggedRows m
[[0],[2,1],[4,3,2]]
λ> reverseRagged m
[[4,3,2],[2,1],[0]]
Alternatively we could have used Data.Vector.modify to operate on the outer vector or map a destructive action that uses modify across all rows. There are all sorts of ways to go about it, depends on what you are trying to achieve, for example:
λ> m = generateRagged (V.fromList [1,2,3]) $ \(i, j) -> i+j
λ> V.map (VP.modify reverseST) m
[[0],[2,1],[4,3,2]]
λ> V.modify reverseST (V.map (VP.modify reverseST) m)
[[4,3,2],[2,1],[0]]
I did recommend using massiv for regular multidimensional arrays. Therefore here is also an example of how to achieve the same with withMArrayST:
{-# LANGUAGE FlexibleContexts #-}
import Control.Monad as M
import Data.Massiv.Array as A
reverseMatrix :: Mutable r Ix2 e => Array r Ix2 e -> Array r Ix2 e
reverseMatrix arr =
withMArrayST arr $ \marr -> do
let Sz2 m n = msize marr
ix2#(m2 :. n2) = m `div` 2 :. n `div` 2
A.forM_ (0 ..: ix2) $ \ix#(i :. j) -> do
A.swapM_ marr ix (m - i - 1 :. n - j - 1)
A.swapM_ marr (i :. n - j - 1) (m - i - 1 :. j)
when (odd m) $ A.forM_ (0 ..: n2) $ \ j ->
A.swapM_ marr (m2 :. j) (m2 :. n - j - 1)
when (odd n) $ A.forM_ (0 ..: m2) $ \ i ->
A.swapM_ marr (i :. n2) (m - i - 1 :. n2)
Which can be used as follows:
λ> a = makeArrayR P Seq (Sz2 3 4) $ \ (i :. j) -> i + j
λ> a
Array P Seq (Sz (3 :. 4))
[ [ 0, 1, 2, 3 ]
, [ 1, 2, 3, 4 ]
, [ 2, 3, 4, 5 ]
]
λ> reverseMatrix a
Array P Seq (Sz (3 :. 4))
[ [ 5, 4, 3, 2 ]
, [ 4, 3, 2, 1 ]
, [ 3, 2, 1, 0 ]
]
I wrote a program to perform SHA-1 in haskell, and while it does produce hashes, they do not match with the ones produced by other SHA-1 programs
Example: cat
hashes to: b5be86bc8bccfc24b01b093228ebb96fc92fa804 but is supposed to hash to 9d989e8d27dc9e0ec3389fc855f142c3d40f0c50
My code is:
(old code omitted)
I have no idea what is wrong. Can someone tell me where I made a mistake?
Edit:
I fixed the stuff that was pointed out, however it is still not working. It works correctly up until the inner loop.
I cleaned up the code so the functions for the inner loop are available as f1, f2 and f3
cat now interestingly hashes to ebe6c9fa1afa0ef5a0ca80bab251fd41cc29127e.
Code:
import Data.Word
import Data.Bits
import Data.Char (ord, intToDigit)
import Data.Binary (encode, decode)
import Numeric (showHex, showIntAtBase)
import System.IO (stdin)
import Data.Sequence ((<|), (|>))
import qualified Data.Sequence as S
import qualified Data.ByteString.Lazy as B
type Quintuple32 = (Word32, Word32, Word32, Word32, Word32)
addQuintuple (a, b, c, d, e) (f, g, h, i, j) =
(a + f, b + g, c + h, d + i, e + j)
shower :: Quintuple32 -> String
shower (a, b, c, d, e) = concatMap (`showHex` "") [a, b, c, d, e]
hash :: Int -> S.Seq Word32 -> Quintuple32 -> Quintuple32
hash i w h#(a, b, c, d, e)
| i < 20 = hash (i + 1) w (newhash (f1 h + k1))
| i < 40 = hash (i + 1) w (newhash (f2 h + k2))
| i < 60 = hash (i + 1) w (newhash (f3 h + k3))
| i < 80 = hash (i + 1) w (newhash (f2 h + k4))
| otherwise = h
where (k1, k2, k3, k4) = (0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6)
newhash a' = (rotate a 5 + a' + e + (w `S.index` i), a, rotate b 30, c, d)
f1 :: Quintuple32 -> Word32
f1 (_, b, c, _, _) = (b .&. c) .|. (complement b .&. c)
f2 :: Quintuple32 -> Word32
f2 (_, b, c, d, _) = b `xor` c `xor` d
f3 :: Quintuple32 -> Word32
f3 (_, b, c, d, _) = (b .&. c) .|. (b .&. d) .|. (c .&. d)
starting :: Quintuple32
starting = (0x67452301
, 0xEFCDAB89
, 0x98BADCFE
, 0x10325476
, 0xC3D2E1F0)
hasher :: Quintuple32 -> S.Seq Word32 -> Quintuple32
hasher acc x = addQuintuple acc (hash 0 (extend x) acc)
process :: B.ByteString -> Quintuple32
process = foldl hasher starting . chunks . pad
extend :: S.Seq Word32 -> S.Seq Word32
extend = extend' 16
extend' :: Int -> S.Seq Word32 -> S.Seq Word32
extend' 80 a = a
extend' i a = extend' (i + 1) (a |> xored)
where xored = rotate ((a `S.index` (i - 3)) `xor`
(a `S.index` (i - 8)) `xor`
(a `S.index` (i - 14)) `xor`
(a `S.index` (i - 16))) 1
toBytes :: String -> B.ByteString
toBytes = B.pack . map (fromIntegral . ord)
splitEvery n xs
| B.null xs = S.empty
| otherwise = B.take n xs <| splitEvery n (B.drop n xs)
chunks :: B.ByteString -> [S.Seq Word32]
chunks xs
| B.null xs = []
| otherwise = x : chunks (B.drop 64 xs)
where x = fmap decode (splitEvery 4 (B.take 64 xs))
pad :: B.ByteString -> B.ByteString
pad xs = B.append (add0 $ add1 xs) length64
where length64 = encode (fromIntegral (8 * B.length xs) :: Word64)
add1 :: B.ByteString -> B.ByteString
add1 = flip B.append (B.singleton 128)
add0 :: B.ByteString -> B.ByteString
add0 xs
| modulo /= 448 = add0 $ B.append xs (B.singleton 0)
| otherwise = xs
where modulo = (B.length xs * 8) `rem` 512
Also, a small question: is something like (a, b) = (8, 9) an acceptable thing to do to set multiple variables?
Oh, another one of these!
Two errors jump out at me immediately:
pad :: B.ByteString -> B.ByteString
pad xs = B.append (add0 $ add1 xs) length64
where length64 = encode (fromIntegral (B.length xs) :: Word64)
Notice the length you append is supposed to be the bit length, not the byte length.
add1 :: B.ByteString -> B.ByteString
add1 = flip B.append (B.singleton 255)
Notice 255 /= 0b10000000 and the pad is supposed to be the later.
In general you debug these by 1) going over the spec again and again. 2) Comparing to another implementation, such as Adam Wick's SHA package, and comparing for equality at as fine grained level as possible.
EDIT: There are two more bugs, basically transcription errors. Look around a bit and shout if you're still stuck.
For just a 25mb file the memory usage is constant at 792mb! I thought it had to do with my usage
from list, but moving certain parts of the code for vector (the arrays where fft is applied, for example) didn't change how much memory being used at all!
{-# LANGUAGE OverloadedStrings,BangPatterns #-}
import qualified Data.Attoparsec.Char8 as Ap
import Data.Attoparsec
import Control.Monad
import Control.Applicative
--import Control.DeepSeq (force)
import System.IO
import System.Environment
import Data.List (zipWith4,unzip4,zip4,foldl')
import Data.Bits
import Data.Complex
import Data.String (fromString)
import Data.ByteString.Internal
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as Bl
import qualified Data.Vector.Unboxed as Vu
import qualified Statistics.Transform as St
{-
I run a test on a collection of data from a file
[(1,t),(2,t),(3,t),(4,t),(5,t)]
- - -
| - - -
| | - - -
| | |
[y++t, n, y++t]
To do that, I use splitN to create a list of list
[[(1,t),(2,t),(3,t)],[(2,t),(3,t),(4,t)],[(3,t),(4,t),(5,t)]]
Map a serie of functions to determine a value for each inner collection,
and return when an event happened.
-}
data FourD b a = FourD a a a b
instance Functor (FourD c) where
fmap f (FourD x y z d) = FourD (f x) (f y) (f z) d
mgrav_per_bit = [ 18, 36, 71, 143, 286, 571, 1142 ]
--Converting raw data to mg
aToG :: Int -> Double
aToG a = fromIntegral . sign $ uresult
where
twocomp = if a>128
then 256-a
else a
uresult = sum $ zipWith (*) mgrav_per_bit (map (fromEnum . testBit twocomp) [0..7])
sign = if a > 128
then negate
else id
--Data is (int,int,int,time)
--Converted to (St.CD^3,Bytestring) in place of maping afterwards.
parseAcc :: Parser (FourD B.ByteString St.CD)
parseAcc = do Ap.char '('
x <- fmap ((:+0) . aToG) Ap.decimal
Ap.char ','
y <- fmap ((:+0) . aToG) Ap.decimal
Ap.char ','
z <- fmap ((:+0) . aToG) Ap.decimal
Ap.char ','
time <- takeTill (== 41)
Ap.char ')'
return $! FourD x y z time
--applies parseAcc to many lines, fails at the end of file (Need to add a newline)
parseFile = many $ parseAcc <* (Ap.endOfInput <|> Ap.endOfLine)
readExpr input = case parse parseFile input of
Done b val -> val
Partial p -> undefined
Fail a b c -> undefined
unType (FourD x y d z) = (x ,y ,d ,z)
-- Breaks a list of FourD into smaller lists, apply f and g to those lists, then filter the result based if an even happened or not
amap :: (Num c, Ord c) => ([a] -> [c]) -> ([d] -> [ByteString]) -> [FourD d a] -> [Bl.ByteString]
amap f g = (uncurry4 (zipWith4 (filterAcc))). map4 f g . unzip4 . map (unType)
where map4 f g (a,b,c,d) = (f a,f b,f c,g d)
uncurry4 f (a,b,c,d) = f a b c d
-- before i had map filterAcc,outside amap. Tried to fuse everything to eliminate intermediaries
-- An event is detected if x > 50
filterAcc x y z t = if x > 50
then (Bl.pack . B.unpack) $ "yes: " `B.append` t
else ""
-- split [St.CD] in [(Vector St.CD)], apply fft to each, and compress to a single value.
-- Core of the application
fftAcross :: [St.CD] -> [Int]
fftAcross = map (floor . noiseEnergy . St.fft) . splitN 32
-- how the value is determined (sum of all magnitudes but the first one)
noiseEnergy :: (RealFloat a, Vu.Unbox a) => Vu.Vector (Complex a) -> a
noiseEnergy x = (Vu.foldl' (\b a-> b+(magnitude a)) 0 (Vu.drop 1 x))/32
-- how the values are split in (Vector St.CD), if lenght > 32, takes 32, otherwhise I'm done
splitN :: Vu.Unbox a => Int -> [a] -> [Vu.Vector a]
splitN n x = helper x
where
helper x = if atLeast n x
then (Vu.take n (Vu.fromList x)) : (helper (drop 1 x) )
else []
-- Replacing the test by atLeast in place of a counter (that compared to length x,calculated once) reduced the behaviour that memory usage was constant.
-- this is replicated so the behaviour of splitN happens on the time part of FourD, Can't use the same since there is no Vector Bytestring instance
splitN2 n x = helper x
where
helper x = if atLeast n x
then (head x) : (helper (drop 1 x))
else []
atLeast :: Int -> [a] -> Bool
atLeast 0 _ = True
atLeast _ [] = False
atLeast n (_:ys) = atLeast (n-1) ys
main = do
filename <- liftM head getArgs
filehandle <- openFile "results.txt" WriteMode
contents <- liftM readExpr $ B.readFile filename
Bl.hPutStr (filehandle) . Bl.unlines . splitAndApplyAndFilter $ contents where
splitAndApplyAndFilter = amap fftAcross (splitN2 32)
Edit: after some refactoring, fusing some maps, reducing length, I managed to get this working at 400~ with a 25mb input file. Still, on a 100mb, it takes 1.5gb.
The program is intended to determine if a certain event happened ina point of time, for that it requries a collection of values (im using 32 atm), runs a fft in it, sum those values and see if passes a threshold. If yes, print the time to a file.
http://db.tt/fT8kXPKz for a 25mb testfile
I found the solution due a topic in reddit about the same problem!
Parsing with Haskell and Attoparsec
The great majority of my problem was caused by the fact attoparsec is strict and haskell data are rather large (so a 100mb text file can be actually much more in run time)
The other half was that profiling doubles the memory use, and I didn't account for that.
After changing the parser to be lazy, my program uses 120mb in place of 800mb (when input size is 116mb), so sucess!
In case this interest someone, here is the relevant piece of code change:
readExpr input = case parse (parseAcc<*(Ap.endOfLine<*Ap.endOfInput<|>Ap.endOfLine)) input of
Done b val -> val : readExpr b
Partial e -> []
Fail _ _ c -> error c
The full code:
{-# LANGUAGE OverloadedStrings,BangPatterns #-}
import qualified Data.Attoparsec.Char8 as Ap
import Data.Attoparsec
import Control.Monad
import Control.Applicative
--import Control.DeepSeq (force)
import System.IO
import System.Environment
import Data.List (zipWith4,unzip4,zip4,foldl')
import Data.Bits
import Data.Complex
import Data.String (fromString)
import Data.ByteString.Internal
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as Bl
import qualified Data.Vector.Unboxed as Vu
import qualified Statistics.Transform as St
{-
I run a test on a collection of data from a file
[(1,t),(2,t),(3,t),(4,t),(5,t)]
- - -
| - - -
| | - - -
| | |
[y++t, n, y++t]
To do that, I use splitN to create a list of list
[[(1,t),(2,t),(3,t)],[(2,t),(3,t),(4,t)],[(3,t),(4,t),(5,t)]]
Map a serie of functions to determine a value for each inner collection,
and return when an event happened.
-}
data FourD b a = FourD a a a b
instance Functor (FourD c) where
fmap f (FourD x y z d) = FourD (f x) (f y) (f z) d
mgrav_per_bit = [ 18, 36, 71, 143, 286, 571, 1142 ]
--Converting raw data to mg
aToG :: Int -> Double
aToG a = fromIntegral . sign $ uresult
where
twocomp
| a>128 = 256-a
| otherwise = a
uresult = sum $ zipWith (*) mgrav_per_bit (map (fromEnum . testBit twocomp) [0..7])
sign
| a > 128 = negate
| otherwise = id
--Data is (int,int,int,time)
--Converted to (St.CD^3,Bytestring) in place of maping afterwards.
parseAcc :: Parser (FourD B.ByteString St.CD)
parseAcc = do Ap.char '('
x <- fmap ((:+0) . aToG) Ap.decimal -- Parse, transform to mg, convert to complex
Ap.char ','
y <- fmap ((:+0) . aToG) Ap.decimal
Ap.char ','
z <- fmap ((:+0) . aToG) Ap.decimal
Ap.char ','
time <- takeTill (== 41)
Ap.char ')'
return $! FourD x y z time
--applies parseAcc to many lines, fails at the end of file (Need to add a newline)
parseFile = many $ parseAcc <* (Ap.endOfInput <|> Ap.endOfLine)
readExpr input = case parse (parseAcc<*(Ap.endOfLine<*Ap.endOfInput<|>Ap.endOfLine)) input of
Done b val -> val : readExpr b
Partial e -> []
Fail _ _ c -> error c
unType (FourD x y d z) = (x ,y ,d ,z)
-- Breaks a list of FourD into smaller lists, apply f and g to those lists, then filter the result based if an even happened or not
amap :: (Num c, Ord c) => ([a] -> [c]) -> ([d] -> [ByteString]) -> [FourD d a] -> [ByteString]
amap f g = (uncurry4 (zipWith4 (filterAcc))). map4 f g . unzip4 . map (unType)
where map4 f g (a,b,c,d) = (f a,f b,f c,g d)
uncurry4 f (a,b,c,d) = f a b c d
-- before i had map filterAcc,outside amap. Tried to fuse everything to eliminate intermediaries
-- An event is detected if x > 50
filterAcc x y z t
| x > 50 = t
| otherwise = ""
-- split [St.CD] in [(Vector St.CD)], apply fft to each, and compress to a single value.
-- Core of the application
fftAcross :: [St.CD] -> [Int]
fftAcross = map (floor . noiseEnergy . St.fft) . splitN 32
-- how the value is determined (sum of all magnitudes but the first one)
noiseEnergy :: (RealFloat a, Vu.Unbox a) => Vu.Vector (Complex a) -> a
noiseEnergy x = (Vu.foldl' (\b a-> b+(magnitude a)) 0 (Vu.drop 1 x))/32
-- how the values are split in (Vector St.CD), if lenght > 32, takes 32, otherwhise I'm done
splitN :: Vu.Unbox a => Int -> [a] -> [Vu.Vector a]
splitN n x = helper x
where
helper x
| atLeast n x = (Vu.take n (Vu.fromList x)) : (helper (drop 1 x) )
| otherwise = []
-- Replacing the test by atLeast in place of a counter (that compared to length x,calculated once) reduced the behaviour that memory usage was constant.
-- this is replicated so the behaviour of splitN happens on the time part of FourD, Can't use the same since there is no Vector Bytestring instance
splitN2 n x = helper x
where
helper x
| atLeast n x = (head x) : (helper (drop 1 x))
| otherwise = []
atLeast :: Int -> [a] -> Bool
atLeast 0 _ = True
atLeast _ [] = False
atLeast n (_:ys) = atLeast (n-1) ys
intervalFinder :: [ByteString]->[B.ByteString]
intervalFinder x = helper x ""
where
helper (x:xs) ""
| x /= "" = ("Start Time: " `B.append` x `B.append` "\n"):(helper xs x)
| otherwise = helper xs ""
helper (x:xs) y
| x == "" = ( "End Time: "`B.append` y `B.append` "\n\n" ):(helper xs "")
| otherwise = helper xs x
helper _ _ = []
main = do
filename <- liftM head getArgs
filehandle <- openFile "results.txt" WriteMode
contents <- liftM readExpr $ B.readFile filename
Bl.hPutStr (filehandle) . Bl.fromChunks . intervalFinder . splitAndApplyAndFilter $ contents
hClose filehandle
where
splitAndApplyAndFilter = amap fftAcross (splitN2 32)
--contents <- liftM ((map ( readExpr )) . B.lines) $ B.readFile filename
{- *Main> let g = liftM ((amap fftAcross (splitN2 32)) . readExpr) $ B.readFile "te
stpattern2.txt"
-}
-- B.hPutStrLn (filehandle) . B.unlines . map (B.pack . show ) . amap (map (floor .quare) . (filter (/=[])) . map ( (drop 1) . (map (/32)) . fft ) . splitN 32) . map ( fmap(fromIntegral . aToG)) . map readExpr $ contents