LZW routine in Haskell using Monads - haskell

I'm trying to implement LZW compression in Haskell using Monads, here is my code so far with test cases:
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleContexts #-}
import Control.Monad.State
import Control.Monad.Writer
import Data.Char (chr, ord)
import Data.List (isPrefixOf, maximumBy)
import Data.Function
import Test.QuickCheck
type Dictionary = [String]
dictionary :: Dictionary
dictionary = [[chr x] | x <- [0..127]]
test_dictionary =
[ map ord (concat dictionary) == [0..127]
, all (\str -> length str == 1) dictionary
]
prefixes :: String -> Dictionary -> [(Int, String)]
prefixes str dict = [(x, dict!!x) | x <- [0..length dict - 1], isPrefixOf (dict!!x) str]
test_prefixes =
[ prefixes "" dictionary == []
, prefixes "appletree" [] == []
, prefixes "appletree" ["ap", "apple", "tree", "pear"] == [(0, "ap"), (1, "apple")]
, prefixes "babe" dictionary == [(98, "b")]
]
longest :: [(Int, String)] -> (Int, String)
longest prefs = maximumBy (compare `on` (\(x,y) -> length y)) prefs
test_longest =
[ longest [(30, "a"), (20, "abc"), (15, "ab")] == (20, "abc")
, longest [(30, "a"), (20, "abc"), (15, "abc")] == (15, "abc")
]
instance MonadState Dictionary ((->) Dictionary) where
get = \s -> s
munch :: MonadState Dictionary m => String -> m (Int, String, String)
munch str = do
dict <- get
let longst = longest (prefixes str dict)
return (fst longst, snd longst, [str!!x | x <- [length (snd longst)..length str - 1]])
test_munch =
[ evalState (munch "a") ["a"] == (0, "a", "")
, evalState (munch "appletree") ["a"] == (0, "a", "ppletree")
, evalState (munch "peach") ["a", "ba", "b"] == (1, "ba", "be")
]
instance MonadState m (StateT Dictionary ((->) m)) where
append :: MonadState Dictionary m => String -> String -> m ()
append s "" = return ()
append s w = do
dict <- get
let newWord = s ++ (take 1 w)
if (notElem newWord dict)
then do
put (dict++[newWord])
else return ()
test_append =
[ execState (append "a" "") [] == []
, execState (append "a" "") dictionary == dictionary
, execState (append "a" "bc") [] == ["ab"]
, execState (append "a" "bc") ["ab"] == ["ab"]
]
encode :: String -> WriterT [Int] (State Dictionary) ()
encode "" = return ()
encode w = do
dict <- get
let (a, b, c) = (munch w) dict
if length dict < 256
then do
tell [a]
put ((append b c) dict)
encode c
else return ()
test_encode =
[ evalState (execWriterT (encode "")) [] == []
, evalState (execWriterT (encode "aaa")) ["a"] == [0, 1]
, evalState (execWriterT (encode "aaaa")) ["a"] == [0, 1, 0]
, evalState (execWriterT (encode "aaaaa")) ["a"] == [0, 1, 1]
, evalState (execWriterT (encode "abababab")) ["a", "b"] == [0, 1, 2, 4, 1]
, evalState (execWriterT (encode "aaabbbccc")) dictionary
== [97, 128, 98, 130, 99, 132]
]
decode :: [Int] -> WriterT String (State Dictionary) ()
decode [] = return ()
decode [x] = do
dict <- get
tell (dict!!x)
decode (x:xs) = do
dict <- get
let f = dict!!x
let s = if(length dict > head xs)
then dict!!head xs
else f
tell f
put (append f s) dict
decode xs
test_decode =
[ evalState (execWriterT (decode [])) [] == []
, evalState (execWriterT (decode [0])) ["a"] == "a"
, evalState (execWriterT (decode [0, 1, 1, 0])) ["a", "b"] == "abba"
, evalState (execWriterT (decode [0, 1, 2, 0])) ["a", "b"] == "ababa"
, evalState (execWriterT (decode [0, 1, 2, 4, 1])) ["a", "b"] == "abababab"
, evalState (execWriterT (decode [97, 128, 98, 130, 99, 132])) dictionary
== "aaabbbccc"
]
compress :: String -> [Int]
compress w = evalState (execWriterT (encode w)) dictionary
test_compress =
[ compress "" == []
, compress "a" == [97]
, compress "aaa" == [97, 128]
, compress "aaabbbccc" == [97, 128, 98, 130, 99, 132]
]
decompress :: [Int] -> String
decompress list = evalState (execWriterT (decode list)) dictionary
test_decompress =
[ decompress [] == ""
, decompress [97] == "a"
, decompress [97, 128] == "aaa"
, decompress [97, 128, 98, 130, 99, 132] == "aaabbbccc"
]
prop_compressDecompress :: String -> Bool
prop_compressDecompress w = do
let tmp = [chr (div (ord x) 2) | x <- w]
decompress (compress tmp) == tmp
compressFile :: FilePath -> FilePath -> IO ()
compressFile source target = do
s <- readFile source
let compressed = compress s
let chars = [chr x | x <- compressed]
writeFile target chars
decompressFile :: FilePath -> FilePath -> IO ()
decompressFile source target = do
s <- readFile source
let code = [ord x | x <- s]
let decompressed = decompress code
writeFile target decompressed
allTests = [test_dictionary, test_prefixes, test_longest, test_munch, test_append
,test_encode
--, test_decode, test_compress, test_decompress
]
main = do
--quickCheck prop_compressDecompress
print (allTests, and (concat allTests))
With this code I get the following error (referring to the use of put in "encode" and "decode" functions) :
Main.hs#80:13-80:16 No instance for (MonadState () (StateT Dictionary Data.Functor.Identity.Identity)) arising from a use of put
I've tried to define this instance but the best I could achieve is a "functional dependencies conflict between instance declarations" error.
I know there is more simple solutions without Monads but I have to use them, also the types of the functions are not to be modified.
Could you give me any help about what I'm doing wrong here?

You don't need to write any instances. You're simply using append and munch in a wrong way.
append has type MonadState Dictionary m => String -> String -> m ().
If f and s are strings, then append f s yields a state-modifying action that eventually returns a ().
put has type MonadState s m => s -> m (). put replaces the current state with the s argument.
In the light of this, put (append f s) dict makes little sense. You're supposed to supply put a single argument. And you don't have to do anything with dict there; a core point of using the State monad is that the state is left implicit, and there is no need to pass it around.
Also, append f s by itself already updates the state. So what you want here is simply append f s, instead of put (append x y) dict.
There's a similar error in encode with munch; it has a single String argument, so (munch w) dict is erroneous. Again, no need to touch dict. Also, because munch w yields a monadic result, you must bind the result with <- instead of let. So, you should replace let (a, b, c) = (much w dict) with (a, b, c) <- munch w.

put ((append b c) dict)
looks wrong. append is already a monadic action, so it should be enough to use
append b c

Related

Haskell - Tree Recursion - Out Of Memory

The following code with any real-logic "hollowed out" still runs out of memory when compiled on GHC 7.10.3 with the -O flag. I do not understand why a simple tree recursion with at-most a stack-depth of 52 (number of cards in a standard deck) needs so much memory. I tried using seq on the result variables, but that did not help. Could someone take a look and let me know why the memory usage is so high, and what can I do to avoid it?
import qualified Data.Map.Strict as M
type Card = (Int, Char)
compute_rank_multiplicity_map :: [Card] -> M.Map Int Int
compute_rank_multiplicity_map cards = M.fromList [(x, x) | (x, _) <- cards]
determine_hand :: [Card] -> (Int, [(Int, Int)])
determine_hand [] = error "Card list is empty!"
determine_hand cards = (0, mult_rank_desc_list)
where rank_mult_map = compute_rank_multiplicity_map cards
mult_rank_desc_list = M.toDescList rank_mult_map
check_kicker_logic :: [Card] -> (Int, Int)
check_kicker_logic cards =
let first_cards = take 5 cards
second_cards = drop 5 cards
first_hand#(f_h, f_mrdl) = determine_hand first_cards
second_hand#(s_h, s_mrdl) = determine_hand second_cards
in if (first_hand > second_hand) || (first_hand < second_hand) -- is there a clear winner?
then if (f_h == s_h) && (head f_mrdl) == (head s_mrdl) -- do we need kicker logic?
then (1, 1)
else (0, 1)
else (0, 0)
card_deck :: [Card]
card_deck = [(r, s) | r <- [2 .. 14], s <- ['C', 'D', 'H', 'S']]
need_kicker_logic :: [Card] -> (Int, Int)
need_kicker_logic cards = visit_subset cards (length cards) [] 0 (0, 0)
where visit_subset a_cards num_a_cards picked_cards num_picked_cards result#(num_kicker_logic, num_clear_winners)
| num_cards_needed == 0 = (num_kicker_logic + nkl, num_clear_winners + ncw)
| num_cards_needed > num_a_cards = result
| otherwise = let result_1 = visit_subset (tail a_cards)
(num_a_cards - 1)
picked_cards
num_picked_cards
result
result_2 = visit_subset (tail a_cards)
(num_a_cards - 1)
((head a_cards) : picked_cards)
(num_picked_cards + 1)
result_1
in result_2
where num_cards_needed = 10 - num_picked_cards
(nkl, ncw) = check_kicker_logic picked_cards
main :: IO ()
main =
do
putStrLn $ show $ need_kicker_logic card_deck

Haskell Processing text from a file

Hi Guys,
1. What do I want to do?
I get a 1-lined file with text
"Bangabang [Just 3, Nothing, Just 1, Nothing] [Nothing, Nothing, Nothing, Nothing] [Nothing, Nothing, Just 4, Nothing] [Nothing, Just 3, Nothing, Nothing]"
I want to read this text from a file and convert it to:
[[Just 3, Nothing, Just 1, Nothing], [Nothing, Nothing, Nothing, Nothing], [Nothing, Nothing, Just 4, Nothing], [Nothing, Just 3, Nothing, Nothing]]
Which is a [[Maybe Integer]] type.
2. What have I already done?
I can modify normal String to Maybe Integer
My String:
xxx = "Bangabang [Just 3, Nothing, Just 1, Nothing] [Nothing, Nothing, Nothing, Nothing] [Nothing, Nothing, Just 4, Nothing] [Nothing, Just 3, Nothing, Nothing]"
after executing stripChars ",]" $ drop 10 xxx I get:
"Just 31 Nothing Just 1 Nothing [Nothing Nothing Nothing Nothing [Nothing Nothing Just 4 Nothing [Nothing Just 3 Nothing Nothing"
after next commands map (splitOn " ") $ splitOn "[" I have:
[["Just","31","Nothing","Just","1","Nothing",""],["Nothing","Nothing","Nothing","Nothing",""],["Nothing","Nothing","Just","4","Nothing",""],["Nothing","Just","3","Nothing","Nothing"]]
Now I have to cut off that empty strings "" using cleany
And finally change [[String]] to [[Maybe Integer]] using cuty
[[Just 31,Nothing,Just 1,Nothing],[Nothing,Nothing,Nothing,Nothing],[Nothing,Nothing,Just 4,Nothing],[Nothing,Just 3,Nothing,Nothing]]
That is what I wanted to have!
3. The problem is...
...how can I execute this method:
parse xxx = cuty $ cleany $ map (splitOn " ") $ splitOn "[" $ stripChars ",]" $ drop 10 xxx
on text read from file (which is IO String type)?
This is my first Haskell project, so my functions may reinvent the wheel or do worse things :/
Used functions:
main do
text <- readFile "test.txt"
let l = lines
map parse . l
-- deletes unwanted characters from a String
stripChars :: String -> String -> String
stripChars = filter . flip notElem
-- converts String to Maybe a
maybeRead :: Read a => String -> Maybe a
maybeRead s = case reads s of
[(x,"")] -> Just x
_ -> Nothing
-- convert(with subfunction conv, because I don't know how to make it one function)
conv:: [String] -> [Maybe Integer]
conv[] = []
conv(x:xs) = if x == "Just" then conv xs
else maybeRead x: conv xs
convert:: [[String]] -> [[Maybe Integer]]
convert[] = []
convert(x:xs) = conv x : convert xs
-- cleany (with subfunction clean, because I don't know how to make it one function)
clean :: [String] -> [String]
clean [] = []
clean (x:xs) = if x == "" then clean xs
else x : clean xs
cleany :: [[String]] -> [[String]]
cleany [] = []
cleany (x:xs) = clean x : cleany xs
I'll assume you're ok with a parser that does zero to minimal error checking. Haskell has great libraries for parsing, and later I'll amend my answer with some alternatives you should look at.
Instead of using splitOn I would recommend writing these functions:
takeList :: String -> (String, String)
-- returns the match text and the text following the match
-- e.g. takeList " [1,2,3] ..." returns ("[1,2,3]", " ...")
takeLists :: String -> [String]
-- parses a sequence of lists separated by spaces
-- into a list of matches
I'll leave takeList as an exercise. I like to use span and break from Data.List for these kinds of simple parsers.
In terms of takeList, here is how you might write takeLists:
takeLists :: String -> [ String ]
takeLists str =
let s1 = dropWhile (/= '[') str
in if null s1
then []
else let (s2,s3) = takeList s1
in s2 : takeLists s3
For example, takeLists " [123] [4,5,6] [7,8] " will return:
[ "[123]", "[4,5,6]", "[7,8]" ]
Finally, to convert each string in this list to Haskell values, just use read.
answer :: [ [Int] ]
answer = map read (takeLists " [123] [4,5,6] [7,8] ")
Update
Using the ReadP and ReadS parsers available in the base libraries:
import Text.ParserCombinators.ReadP
bang :: ReadP [[Maybe Int]]
bang = do string "Bangabang"
skipSpaces
xs <- sepBy1 (readS_to_P reads) skipSpaces
eof
return xs
input = "Bangabang [Just 3, Nothing, Just 1, Nothing] [Nothing, Nothing, Nothing, Nothing] [Nothing, Nothing, Just 4, Nothing] [Nothing, Just 3, Nothing, Nothing]"
runParser p input = case (readP_to_S p) input of
[] -> error "no parses"
((a,_):_) -> print a
example = runParser bang input
You can use directly Read instance.
data Bangabang = Bangabang [Maybe Integer]
[Maybe Integer]
[Maybe Integer]
[Maybe Integer] deriving (Read, Show)
now, you can use all Read machinery (read, reads, readIO, ...), inferred from types. E.g.
readBangabang :: String -> Bangabang
readBangabang = read
If data came from file
readFile "foo.txt" >>= print . readBangabang

All possible combinations of Three-valued logic values

Is there an algorithm to lead all possible combinations of given amount of three-valued logic values?
For example, F(2) should return this list:
t t
t u
t f
u t
u u
u f
f t
f u
f f
The function would look like this (in Haskell):
data Tril = FALSE | NULL | TRUE
all :: Int -> [[Tril]]
all amount = ???
all1 :: [Tril]
all1 = join (all 1)
all2 :: [(Tril, Tril)]
all2 = map (\[f, s] -> (f, s)) (all 2)
all3 :: [(Tril, Tril, Tril)]
all3 = map (\[f, s, t] -> (f, s, t)) (all 3)
You can do this very simply as a list comprehension:
all2 = [ (v1, v2) | v1 <- [FALSE, TRUE, NULL], v2 <- [FALSE, TRUE, NULL] ]
You can write it equivalently as a monadic do-block:
all2 = do
v1 <- [FALSE, TRUE, NULL]
v2 <- [FALSE, TRUE, NULL]
return (v1, v2)
And that gives us an idea for how we can write the variable-size one:
all 0 = [[]] -- Note: Empty list with one empty item.
all n = do
v <- [FALSE, TRUE, NULL]
vs <- all (n-1)
return (v:vs)
As it turns out — and this is slightly mind-bending — this is the net effect of the replicateM function. It takes a monadic action, does it N times, and gathers the results together.
all n = replicateM n [FALSE, TRUE, NULL]
replicateM does exactly that:
> import Control.Monad
> replicateM 2 [1,2,3]
[[1,1],[1,2],[1,3],[2,1],[2,2],[2,3],[3,1],[3,2],[3,3]]
Hence,
all :: Int -> [[Tril]]
all amount = replicateM amount [FALSE,NULL,TRUE]
I'd suggest to pick anouther name, since all is already taken by Prelude.all.

Comparing and counting string correlations with Haskell

I'm working on a pretty complicated (complicated for me, at least) function that I'd like to use to count the number of times a word in a list corresponds with a word in a database.
An example using random words:
let input = [("InputName", ["dog", "cat", "cat"...]), ...]
let database = ["dog", "cat", "badger"...]
After several hours of mental gymnastics, I came up with this hideous function that almost works. I've simplified it so it'll make sense in the context of this example:
findMatches input database = [ (snd x, wordCount (snd x)) | x <- input ]
where
wordCount ys = sum[ if y `elem` database then 1 else 0 | y <- ys ]
My goal, my hope, my wish would be to have an output that reads:
[("dog", 1), ("cat", 2), ("badger", 0)]
Any suggestions or nudges in the right direction would be appreciated.
EDIT
I finally made a function that works. catWordCount counts the number of times a database entry appears in an input. I'm working on a better implementation using fold.
let input = words "5 4 10 0 1 9 1"
let database = [("C1", words "1 2 3 4 5"), ("C2", words "6 7 8 9 10")]
catwordCount input database
catWordCount fs zs = [ (fst f, inputSearch (snd f)) | f <- fs ]
where
inputSearch gs = [ (g, wordCount [g]) | g <- gs ]
wordCount hs = sum[ if h == z then 1 else 0 | h <- hs, z <- zs ]
And the output:
(["C1", [("1",2),("2",0),("3",0),("4",1),("5",1)])
(["C2", [("6",0),("7",0),("8",0),("9",1),("10",1)])
You can keep a Map of counts that you update for each item. Since you don't want to include items from the input list that are not in the database, if I understood correctly,
alter :: Ord k => (Maybe a -> Maybe a) -> k -> Map k a -> Map k a
is a good way to do that. The supplied key k is looked up, and if it's present, the argument to the update function will we Just value, otherwise it will be Nothing. If the result of the update function is Nothing, the key will be deleted from the Map (or not added, if it wasn't present), if the result is Just something, the key will be associated with something in the altered Map.
So you start with a Map mapping every item to 0,
m0 :: Map String Int
m0 = fromList $ zip database (repeat 0)
to update, you want to increment the count if the item is in database, and not change anything otherwise,
incr :: Maybe Int -> Maybe Int
incr (Just n) = Just (n+1)
incr Nothing = Nothing
or, shorter, incr = fmap (+1) using the Functor instance of Maybe.
Then the resulting map is simply
finalMap :: Map String Int
finalMap = foldl (flip $ alter incr) m0 $ snd input
and if you want a list rather than a Map, just call assocs or toList on finalMap.
It might not be exactly what you are looking for, but try this:
import Data.List
countMatches :: [(String, [String])] -> [(String, Int)]
countMatches = map (\l -> (head l, length l)) . group . sort . concat . map snd
Hopefully the function compositions are not too confusing. I'll go over it step by step. Say you run this function with input
[("", ["a", "b"]), ("", ["b", "c", "x", "a"]), ("", ["y", "b", "z"])]
After map snd it is
[["a", "b"], ["b", "c", "x", "a"], ["y", "b", "z"]]
After concat,
["a", "b", "b", "c", "x", "a", "y", "b", "z"]
After sort,
["a", "a", "b", "b", "b", "c", "x", "y", "z"]
After group,
[["a", "a"], ["b", "b", "b"], ["c"], ["x"], ["y"], ["z"]]
And finally map (\l -> (head l, length l)) produces
[("a", 2), ("b", 3), ("c", 1), ("x", 1), ("y", 1), ("z", 1)]

Haskell Hash table

I am trying to build a smallish haskell app that will translate a few key phrases from english to french.
First, i have a list of ordered pairs of strings that represent and english word/phrase followed by the french translations:
icards = [("the", "le"),("savage", "violent"),("work", "travail"),
("wild", "sauvage"),("chance", "occasion"),("than a", "qu'un")...]
next i have a new data:
data Entry = Entry {wrd, def :: String, len :: Int, phr :: Bool}
deriving Show
then i use the icards to populate a list of Entrys:
entries :: [Entry]
entries = map (\(x, y) -> Entry x y (length x) (' ' `elem` x)) icards
for simplicity, i create a new type that will be [Entry] called Run.
Now, i want to create a hash table based on the number of characters in the english word. This will be used later to speed up searchings. So i want to create a function called runs:
runs :: [Run]
runs = --This will run through the entries and return a new [Entry] that has all of the
words of the same length grouped together.
I also have:
maxl = maximum [len e | e <- entries]
It just so happens that Hackage has a hashmap package! I'm going to create a small data type based on that HashMap, which I will call a MultiMap. This is a typical trick: it's just a hash map of linked lists. I'm not sure what the correct name for MultiMap actually is.
import qualified Data.HashMap as HM
import Data.Hashable
import Prelude hiding (lookup)
type MultiMap k v = HM.Map k [v]
insert :: (Hashable k, Ord k) => k -> a -> MultiMap k a -> MultiMap k a
insert k v = HM.insertWith (++) k [v]
lookup :: (Hashable k, Ord k) => k -> MultiMap k a -> [a]
lookup k m = case HM.lookup k m of
Nothing -> []
Just xs -> xs
empty :: MultiMap k a
empty = HM.empty
fromList :: (Hashable k, Ord k) => [(k,v)] -> MultiMap k v
fromList = foldr (uncurry insert) empty
I mimicked only the essentials of a Map: insert, lookup, empty, and fromList. Now it is quite easy to turn entries into a MutliMap:
data Entry = Entry {wrd, def :: String, len :: Int, phr :: Bool}
deriving (Show)
icards = [("the", "le"),("savage", "violent"),("work", "travail"),
("wild", "sauvage"),("chance", "occasion"),("than a", "qu'un")]
entries :: [Entry]
entries = map (\(x, y) -> Entry x y (length x) (' ' `elem` x)) icards
fromEntryList :: [Entry] -> MutiMap Int Entry
fromEntryList es = fromList $ map (\e -> (len e, e)) es
Loading that up into ghci, we can now lookup a list of entries with a given length:
ghci> let m = fromEntryList entries
ghci> lookup 3 m
[Entry {wrd = "the", def = "le", len = 3, phr = False}]
ghci> lookup 4 m
[Entry {wrd = "work", def = "travail", len = 4, phr = False},
Entry {wrd = "wild", def = "sauvage", len = 4, phr = False}]
(Note that this lookup is not the one defined in Prelude.) You could similarly use the English word as a key.
-- import Data.List (find) -- up with other imports
fromEntryList' :: [Entry] -> MultiMap String Entry
fromEntryList' es = fromList $ map (\e -> (wrd e, e)) es
eLookup :: String -> MultiMap String Entry -> Maybe Entry
eLookup str m = case lookup str m of
[] -> Nothing
xs -> find (\e -> wrd e == str) xs
Testing...
ghci> let m = fromEntryList' entries
ghci> eLookup "the" m
Just (Entry {wrd = "the", def = "le", len = 3, phr = False})
ghci> eLookup "foo" m
Nothing
Notice how in eLookup we first perform the Map lookup in order to determine if anything has been placed in that slot. Since we are using a hash set, we need to remember that two different Strings might have the same hash code. So in the event that the slot is not empty, we perform a find on the linked list there to see if any of the entries there actually match the correct English word. If you are interested in performance, you should consider using Data.Text instead of String.
groupBy and sortBy are both in Data.List.
import Data.List
import Data.Function -- for `on`
runs :: [Run]
runs = f 0 $ groupBy ((==) `on` len) $ sortBy (compare `on` len) entries
where f _ [] = []
f i (r # (Entry {len = l} : _) : rs) | i == l = r : f (i + 1) rs
f i rs = [] : f (i + 1) rs
Personally, I would use a Map instead
import qualified Data.Map as M
runs :: M.Map String Entry
runs = M.fromList $ map (\entry -> (wrd entry, entry)) entries
and lookup directly by English word instead of a two step length-of-English-word and then English-word process.

Resources