How GHC forces evaluation in multithreaded applications?

How GHC forces evaluation in multithreaded applications? - haskell

For example: I have a quite simple memoised implementation of fibonacci sequence,
which I request in multiple threads:
{-# LANGUAGE BangPatterns #-}
module Main where
import Control.Concurrent
import Control.DeepSeq
import System.Environment (getArgs)
import System.IO.Unsafe (unsafePerformIO)
data Tree a = Tree (Tree a) a (Tree a)
instance Functor Tree where
fmap f (Tree l m r) = Tree (fmap f l) (f m) (fmap f r)
index :: Tree a -> Int -> a
index (Tree _ m _) 0 = m
index (Tree l _ r) n = case (n - 1) `divMod` 2 of
(q,0) -> index l q
(q,_) -> index r q
nats :: Tree Int
nats = go 0 1
where go !n !s = Tree (go l s') n (go r s')
where l = n + s
r = l + s
s' = s * 2
fib :: (Int -> Integer) -> Int -> Integer
fib _ 0 = 0
fib _ 1 = 1
fib f n = f (n - 1) + f (n - 2)
fib_tree :: Tree Integer
fib_tree = fmap (fib fastfib) nats
fastfib :: Int -> Integer
fastfib = index fib_tree
writeMutex :: MVar ()
writeMutex = unsafePerformIO (newMVar ())
fibIO :: Int -> IO ()
fibIO n = let fibn = fastfib n
in deepseq fibn $ do takeMVar writeMutex
putStrLn (show n ++ " " ++ show fibn)
putMVar writeMutex ()
children :: MVar [MVar ()]
children = unsafePerformIO (newMVar [])
waitForChildren :: IO ()
waitForChildren = do
cs <- takeMVar children
case cs of
[] -> return ()
m:ms -> do
putMVar children ms
takeMVar m
waitForChildren
forkChild :: IO () -> IO ThreadId
forkChild io = do
mvar <- newEmptyMVar
childs <- takeMVar children
putMVar children (mvar:childs)
forkFinally io (\_ -> putMVar mvar ())
main' :: [Int] -> IO ()
main' = mapM_ (forkChild . fibIO)
main :: IO ()
main = do
nargs <- fmap read `fmap` getArgs :: IO [Int]
main' nargs
waitForChildren
And when compiled with -threaded I can run it:
% time ./concur 10 10 10 10 10 10 10 +RTS -N4
10 55
10 55
10 55
10 55
10 55
10 55
10 55
./concur 10 10 10 10 10 10 10 +RTS -N4 0.00s user 0.00s system 82% cpu 0.007 total
And as expected if I have single large argument, or many, the execution time is the same.
I'm interested how evaluation of thunks in memoised tree is performed, on low-level?

When one thread evaluates a thunk, the chunk is locked, and other threads block on it (aka black hole). See Haskell on a Shared-Memory Multiprocessor paper for details.

Related

Why is my parallel code even slower than without parallelism?

I followed Simon Marlow's book on parallel Haskell (Chapter 1) using rpar/rseq .
Below is the code (Solving the Squid Game bridge simulation):
{-# LANGUAGE FlexibleContexts #-}
import Control.DeepSeq (force)
import Control.Exception (evaluate)
import Control.Parallel.Strategies
import Data.Array.IO
( IOUArray,
getAssocs,
newListArray,
readArray,
writeArray,
)
import Data.Functor ((<&>))
import System.Environment (getArgs)
import System.Random (randomRIO)
game ::
Int -> -- number of steps
Int -> -- number of glass at each step
Int -> -- number of players
IO Int -- return the number of survivors
game totalStep totalGlass = go 1 totalGlass
where
go currentStep currentGlass numSurvivors
| numSurvivors == 0 || currentStep > totalStep = return numSurvivors
| otherwise = do
r <- randomRIO (1, currentGlass)
if r == 1
then go (currentStep + 1) totalGlass numSurvivors
else go currentStep (currentGlass - 1) (numSurvivors - 1)
simulate :: Int -> IO Int -> IO [(Int, Int)]
simulate n game =
(newListArray (0, 16) (replicate 17 0) :: IO (IOUArray Int Int))
>>= go 1
>>= getAssocs
where
go i marr
| i <= n = do
r <- game
readArray marr r >>= writeArray marr r . (+ 1)
go (i + 1) marr
| otherwise = return marr
main1 :: IO ()
main1 = do
[n, steps, glassNum, playNum] <- getArgs <&> Prelude.map read
res <- simulate n (game steps glassNum playNum)
mapM_ print res
main2 :: IO ()
main2 = do
putStrLn "Running main2"
[n, steps, glassNum, playNum] <- getArgs <&> Prelude.map read
res <- runEval $ do
r1 <- rpar $ simulate (div n 2) (game steps glassNum playNum) >>= evaluate . force
r2 <- rpar $ simulate (div n 2) (game steps glassNum playNum) >>= evaluate . force
rseq r1
rseq r2
return $
(\l1 l2 -> zipWith (\e1 e2 -> (fst e1, snd e1 + snd e2)) l1 l2)
<$> r1
<*> r2
mapM_ print res
main = main2
For main2, I've compiled using:
ghc -O2 -threaded ./squid.hs
and run as:
./squid 10000000 18 2 16 +RTS -N2
I can't understand why main1 is faster than main2 while main2 has parallelism in it.
Could anyone give me some comments on my code as to whether this is the correct use of parallelism?
Update:
Here's the updated version (the new random is quite cumbersome to use):
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
import Control.Monad.ST (ST, runST)
import Control.Parallel.Strategies (rpar, rseq, runEval)
import Data.Array.ST
( STUArray,
getAssocs,
newListArray,
readArray,
writeArray,
)
import Data.Functor ((<&>))
import System.Environment (getArgs)
import System.Random (StdGen)
import System.Random.Stateful
( StdGen,
applySTGen,
mkStdGen,
runSTGen,
uniformR,
)
game ::
Int -> -- number of steps
Int -> -- number of glass at each step
Int -> -- number of players
StdGen ->
ST s (Int, StdGen) -- return the number of survivors
game ns ng = go 1 ng
where
go
!cs -- current step number
!cg -- current glass number
!ns -- number of survivors
!pg -- pure generator
| ns == 0 || cs > ns = return (ns, pg)
| otherwise = do
let (r, g') = runSTGen pg (applySTGen (uniformR (1, cg)))
if r == 1
then go (cs + 1) ng ns g'
else go cs (cg - 1) (ns - 1) g'
simulate :: Int -> (forall s. StdGen -> ST s (Int, StdGen)) -> [(Int, Int)]
simulate n game =
runST $
(newListArray (0, 16) (replicate 17 0) :: ST s1 (STUArray s1 Int Int))
>>= go 1 (mkStdGen n)
>>= getAssocs
where
go !i !g !marr
| i <= n = do
(r, g') <- game g
readArray marr r >>= writeArray marr r . (+ 1)
go (i + 1) g' marr
| otherwise = return marr
main :: IO ()
main = do
[n, steps, glassNum, playNum] <- getArgs <&> Prelude.map read
let res = runEval $ do
r1 <- rpar $ simulate (div n 2 - 1) (game steps glassNum playNum)
r2 <- rpar $ simulate (div n 2 + 1) (game steps glassNum playNum)
rseq r1
rseq r2
return $ zipWith (\e1 e2 -> (fst e1, snd e1 + snd e2)) r1 r2
mapM_ print res
Update 2:
Use pure code and the elapsed time is down to 7 seconds.
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
import Control.Monad.ST ( runST, ST )
import Control.Parallel ( par, pseq )
import Data.Array.ST
( getAssocs, newListArray, readArray, writeArray, STUArray )
import Data.Functor ((<&>))
import System.Environment (getArgs)
import System.Random (StdGen, uniformR, mkStdGen)
game ::
Int -> -- number of total steps
Int -> -- number of glass at each step
Int -> -- number of players
StdGen ->
(Int, StdGen) -- return the number of survivors
game ts ng = go 1 ng
where
go
!cs -- current step number
!cg -- current glass number
!ns -- number of survivors
!pg -- pure generator
| ns == 0 || cs > ts = (ns, pg)
| otherwise = do
let (r, g') = uniformR (1, cg) pg
if r == 1
then go (cs + 1) ng ns g'
else go cs (cg - 1) (ns - 1) g'
simulate :: Int -> (StdGen -> (Int, StdGen)) -> [(Int, Int)]
simulate n game =
runST $
(newListArray (0, 16) (replicate 17 0) :: ST s1 (STUArray s1 Int Int))
>>= go 1 (mkStdGen n)
>>= getAssocs
where
go !i !g !marr
| i <= n = do
let (r, g') = game g
readArray marr r >>= writeArray marr r . (+ 1)
go (i + 1) g' marr
| otherwise = return marr
main :: IO ()
main = do
[n, steps, glassNum, playNum] <- getArgs <&> Prelude.map read
let r1 = simulate (div n 2 - 1) (game steps glassNum playNum)
r2 = simulate (div n 2 + 1) (game steps glassNum playNum)
res = zipWith (\e1 e2 -> (fst e1, snd e1 + snd e2)) r1 r2
res' = par r1 (pseq r2 res)
mapM_ print res'

You aren't actually using any parallelism. You write
r1 <- rpar $ simulate (div n 2) (game steps glassNum playNum) >>= evaluate . force
This sparks a thread to evaluate an IO action, not to run it. That's not useful.
Since your simulate is essentially pure, you should convert it from IO to ST s by swapping in the appropriate array types, etc. Then you can rpar (runST $ simulate ...) and actually do work in parallel. I don't think the force invocations are useful/appropriate in context; they'll free the arrays sooner, but at significant cost.

Exit from State Monad Loop

I am playing around with the State monad and queues. At the moment I have the following code:
{-# LANGUAGE ViewPatterns, FlexibleContexts #-}
module Main where
import Criterion.Main
import Control.Monad.State.Lazy
import Data.Maybe (fromJust)
import Data.Sequence ((<|), ViewR ((:>)))
import qualified Data.Sequence as S
--------------------------------------------------------
data Queue a = Queue { enqueue :: [a], dequeue :: [a] }
deriving (Eq, Show)
-- adds an item
push :: a -> Queue a -> Queue a
push a q = Queue (a:enqueue q) (dequeue q)
pop :: Queue a -> Maybe (a, Queue a)
pop q = if null (dequeue q) then
go $ Queue [] (reverse (enqueue q))
else
go q
where go (Queue _ []) = Nothing
go (Queue en (x:de)) = Just (x, Queue en de)
queueTst :: Int -> Queue Int -> Queue Int
queueTst 0 q = q
queueTst n q | even n = queueTst (n - 1) (push (100 + n) q)
| otherwise = queueTst (n - 1)
(if popped == Nothing then q
else snd (fromJust popped))
where popped = pop q
-------------------------------------------------------------
pushS :: a -> S.Seq a -> S.Seq a
pushS a s = a <| s
pushS' :: a -> State (S.Seq a) (Maybe a)
pushS' a = do
s <- get
put (a <| s)
return Nothing
pushS'' :: a -> State (S.Seq a) (Maybe a)
pushS'' a = get >>= (\g -> put (a <| g)) >> return Nothing
popS :: S.Seq a -> Maybe (a, S.Seq a)
popS (S.viewr -> S.EmptyR) = Nothing
popS (S.viewr -> s:>r) = Just (r,s)
popS' :: State (S.Seq a) (Maybe a)
popS' = do
se <- get
let sl = popS'' se
put $ snd sl
return $ fst sl
where popS'' (S.viewr -> S.EmptyR) = (Nothing, S.empty)
popS'' (S.viewr -> beg:>r) = (Just r, beg)
queueTstS :: Int -> S.Seq Int -> S.Seq Int
queueTstS 0 s = s
queueTstS n s | even n = queueTstS (n - 1) (pushS (100 + n) s)
| otherwise = queueTstS (n - 1)
(if popped == Nothing then s
else snd (fromJust popped))
where popped = popS s
queueTstST :: Int -> State (S.Seq Int) (Maybe Int)
queueTstST n =
if (n > 0) then
if even n then
pushS' (100 + n) >> queueTstST (n - 1)
else
popS' >> queueTstST (n - 1)
else return Nothing
main :: IO ()
main = defaultMain
[ bench "Twin Queue" $ whnf (queueTst 550) (Queue [500,499..1] [])
, bench "Sequence Queue" $ whnf (queueTstS 550) (S.fromList [500,499..1])
, bench "State Queue" $ whnf
(runState (queueTstST 550)) (S.fromList [500,499..1])
]
That's a bit of code but really the only functions that are relevant here are main and queueTstST. is there a way to exit queueTstST while retaining the last "Maybe value" rather than with "Nothing"?

queueTstST :: Int -> State (S.Seq Int) (Maybe Int)
queueTstST n =
if (n > 1) then
if even n then
pushS' (100 + n) >> queueTstST (n - 1)
else
popS' >> queueTstST (n - 1)
else popS'

You can remember the last value if you add an argument to your recursive function.
queueTstST :: Int -> State (S.Seq Int) (Maybe Int)
queueTstST n = go n Nothing
where
go :: Int -> Maybe Int -> State (S.Seq Int) (Maybe Int)
go n v =
if (n > 1)
then if even n
then pushS' (100 + n) >> go (n - 1) Nothing
else popS' >>= go (n - 1)
else return v

Haskell STM : How to store ThreadID as per their execution sequence

In the following program Fibonacci number is generated from a given integer (generated randomly) and that value is stored into a TVar. As the execution time for generating the Fibonacci is different for different number, thus threads will not run sequentially. I want to store theadID, may be in a list, to check their execution pattern.
Please help me. Thanks in advance.
module Main
where
import Control.Parallel
import Control.Concurrent.STM
import Control.Concurrent
import System.Random
import Control.Monad
import Data.IORef
import System.IO
nfib :: Int -> Int
nfib n | n <= 2 = 1
| otherwise = par n1 (pseq n2 (n1 + n2 ))
where n1 = nfib (n-1)
n2 = nfib (n-2)
type TInt = TVar Int
updateNum :: TInt -> Int -> STM()
updateNum n v = do x1 <- readTVar n
let y = nfib v
x2 <- readTVar n
if x1 == x2
then writeTVar n y
else retry
updateTransaction :: TInt -> Int -> IO ()
updateTransaction n v = do atomically $ updateNum n v
incR :: IORef Int -> Int -> IO ()
incR r x = do { v <- readIORef r;
writeIORef r (v - x) }
main :: IO ()
main = do
n <- newTVarIO 10
r <- newIORef 40;
forM_ [1..10] (\i -> do
incR r i
;v <- readIORef r
;forkIO (updateTransaction n v)
)
I want to store [TreadID,FibNo] into a List for all the threads as per their execution. Suppose T1 has executed Fib30, T2 Fib35, T3->32 and T4->40. And if the commit sequence of threads like T1,T3, T2 and T4 then I want to store T1-35,T3-32,t2-35,t4-40 in a list.
Edit:
As suggested by #MathematicalOrchid, I have modified updateTrasaction as follows:-
updateTransaction :: MVar [(ThreadId, Int)] -> TInt -> Int -> IO ()
updateTransaction mvar n v = do
tid <- myThreadId
atomically $ updateNum n v
list <- takeMVar mvar
putMVar mvar $ list ++ [(tid, v)]
Now I am trying to print the values from that list in main
main :: IO ()
main = do
...
...
m <- newEmptyMVar
...
...
mv <- readMVar m
putStrLn ("ThreadId, FibVal : " ++ " = " ++ (show mv))
At the time of execution. MVar values couldn't be read and generates error
Exception: thread blocked indefinitely in an MVar operation
What to do? Thank in advance.

Did you want something like
updateTransaction :: TInt -> Int -> IO ()
updateTransaction n v = do
tid <- myThreadId
putStrLn $ "Start " ++ show tid
atomically $ updateNum n v
putStrLn $ "End " ++ show tid
Or perhaps something like
updateTransaction :: TInt -> Int -> IO ThreadId
updateTransaction n v = do
atomically $ updateNum n v
myThreadId
and change forM_ to forM?
Also, this part:
do
x1 <- readTVar n
...
x2 <- readTVar n
if x1 == x2 ...
If x1 /= x2 then GHC will automatically abort and restart your transaction. You do not need to manually check this yourself. Indeed, the else-branch can never execute. That's kind of the point of STM; it will appear to your transaction that nobody else changes the data you're looking at, so you don't ever have to worry about concurrent writes.
Edit: If you want to record the actual order in which the transactions committed, you're going to need some more inter-thread communication. Obviously you could do that with STM, but just for a list of stuff, maybe this could work?
updateTransaction :: MVar [(ThreadId, Int)] -> TInt -> Int -> IO ()
updateTransaction mvar n v = do
tid <- myThreadId
fib <- atomically $ updateNum n v
list <- takeMVar mvar
putMVar mvar $ list ++ [(tid, fib)]
(Obviously you have to make updateNum return the number it calculated.)

Mutually recursive IO definitions

I can write the following:
f :: [Int] -> [Int]
f x = 0:(map (+1) x)
g :: [Int] -> [Int]
g x = map (*2) x
a = f b
b = g a
main = print $ take 5 a
And things work perfectly fine (ideone).
However, lets say I want g to do something more complex than multiply by 2, like ask the user for a number and add that, like so:
g2 :: [Int] -> IO [Int]
g2 = mapM (\x -> getLine >>= (return . (+x) . read))
How do I then, well, tie the knot?
Clarification:
Basically I want the list of Ints from f to be the input of g2 and the list of Ints from g2 to be the input of f.

The effectful generalization of lists is ListT:
import Control.Monad
import Pipes
f :: ListT IO Int -> ListT IO Int
f x = return 0 `mplus` fmap (+ 1) x
g2 :: ListT IO Int -> ListT IO Int
g2 x = do
n <- x
n' <- lift (fmap read getLine)
return (n' + n)
a = f b
b = g2 a
main = runListT $ do
n <- a
lift (print n)
mzero
You can also implement take like functionality with a little extra code:
import qualified Pipes.Prelude as Pipes
take' :: Monad m => Int -> ListT m a -> ListT m a
take' n l = Select (enumerate l >-> Pipes.take n)
main = runListT $ do
n <- take' 5 a
lift (print n)
mzero
Example session:
>>> main
0
1<Enter>
2
2<Enter>
3<Enter>
7
4<Enter>
5<Enter>
6<Enter>
18
7<Enter>
8<Enter>
9<Enter>
10<Enter>
38
You can learn more about ListT by reading the pipes tutorial, specifically the section on ListT.

haskell Convert IO Int to Int System.Random.MWC

I would like to convert an IO Int to Int from System.Random.MWC, using unsafePerformIO. It does work in ghci:
Prelude System.Random.MWC System.IO.Unsafe> let p = unsafePerformIO(uniformR (0, 30) gen :: IO Int)
Prelude System.Random.MWC System.IO.Unsafe> p
11
Prelude System.Random.MWC System.IO.Unsafe> :t p
p :: Int
However in GHC
import System.Random.MWC
import System.IO.Unsafe
main :: IO()
main = do
gen <-createSystemRandom
print $! s 30 gen
s :: Int-> GenIO -> Int
s !k g = unsafePerformIO(uniformR (0, k - 1) g)
it returns
ghc: panic! (the 'impossible' happened)
(GHC version 7.6.3 for i386-unknown-linux):
make_exp (App _ (Coercion _))
Please report this as a GHC bug: http://www.haskell.org/ghc/reportabug

There's really no need for unsafePerformIO here. Just change the type of s to return IO Int and use do-notation or the bind operator to feed the result to print.
s :: Int -> GenIO -> IO Int
s k g = uniformR (0, k - 1) g
main :: IO ()
main = do
gen <- createSystemRandom
x <- s 30 gen
print x
or
main = do
gen <- createSystemRandom
print =<< s 30 gen
or
main = print =<< s 30 =<< createSystemRandom

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

How GHC forces evaluation in multithreaded applications? - haskell

When one thread evaluates a thunk, the chunk is locked, and other threads block on it (aka black hole). See Haskell on a Shared-Memory Multiprocessor paper for details.

Related

Why is my parallel code even slower than without parallelism?

Exit from State Monad Loop

Haskell STM : How to store ThreadID as per their execution sequence

Mutually recursive IO definitions

haskell Convert IO Int to Int System.Random.MWC

Categories

Resources