My use of Haskell's Text.JSON considered ugly? - json

What I am trying to do is really simple.
I'd like to convert the following JSON, which I'm getting from an external source:
[{"symbol": "sym1", "description": "desc1"}
{"symbol": "sym1", "description": "desc1"}]
into the following types:
data Symbols = Symbols [Symbol]
type Symbol = (String, String)
I ended up writing the following code using Text.JSON:
instance JSON Symbols where
readJSON (JSArray arr) = either Error (Ok . Symbols) $ resultToEither (f arr [])
where
f ((JSObject obj):vs) acc = either Error (\x -> f vs (x:acc)) $ resultToEither (g (fromJSObject obj) [])
f [] acc = Ok $ reverse acc
f _ acc = Error "Invalid symbol/description list"
g ((name, JSString val):vs) acc = g vs ((name, fromJSString val):acc)
g [] acc = valg acc
g _ acc = Error "Invalid symbol/description record"
valg xs = case (sym, desc) of
(Nothing, _) -> Error "Record is missing symbol"
(_, Nothing) -> Error "Record is missing description"
(Just sym', Just desc') -> Ok (sym', desc')
where
sym = lookup "symbol" xs
desc = lookup "description" xs
showJSON (Symbols syms) = JSArray $ map f syms
where
f (sym, desc) = JSObject $ toJSObject [("symbol", JSString $ toJSString sym),
("description", JSString $ toJSString desc)]
This has got to the the most inelegant Haskell I've ever written. readJSON just doesn't look right. Sure, showJSON is substantially shorter, but what is up with this JSString $ toJSString and JSObject $ toJSObject stuff I am forced to put in here? And resultToEither?
Am I using Text.JSON wrong? Is there a better way?
Okay this is more like it. I've gotten readJSON down to the following thanks to the clarifications and ideas from Roman and Grazer. At every point it will detect an incorrectly formatted JSON and output an error instead of throwing an exception.
instance JSON Symbols where
readJSON o = fmap Symbols (readJSON o >>= mapM f)
where
f (JSObject o) = (,) <$> valFromObj "symbol" o <*> valFromObj "description" o
f _ = Error "Unable to read object"

Could you please change the title to something more precise? From "Haskell's Text.JSON considered ugly …" to something like "My code using Text.JSON considered ugly..."
Half of your code consists of explicit recursion -- why do you need it? From a quick look something like mapM should suffice.
Update: sample code
instance JSON Symbols where
readJSON (JSArray arr) = fmap Symbols (f arr)
f = mapM (\(JSObject obj) -> g . fromJSObject $ obj)
g = valg . map (\(name, JSString val) -> (name, fromJSString val))
valg xs = case (sym, desc) of
(Nothing, _) -> Error "Record is missing symbol"
(_, Nothing) -> Error "Record is missing description"
(Just sym', Just desc') -> Ok (sym', desc')
where
sym = lookup "symbol" xs
desc = lookup "description" xs

Rearranging a little from Roman's nice solution. I think this may be a little more readable.
instance JSON Symbols where
readJSON o = fmap Symbols (readJSON o >>= mapM f)
where
f (JSObject o) = let l = fromJSObject o
in do s <- jslookup "symbol" l
d <- jslookup "description" l
return (s,d)
f _ = Error "Expected an Object"
jslookup k l = maybe (Error $ "missing key : "++k) readJSON (lookup k l)

Related

Haskell JSON parser not parsing objects

I have been building this small JSON parser from scratch and I can't get an object to parse for some reason.
Code:
import Data.Char
import Control.Monad
import Control.Applicative
import Control.Monad (liftM, ap)
newtype Parser a = Parser (String -> [(String, a)])
parse :: Parser a -> (String -> [(String, a)])
parse (Parser p) = p
item :: Parser Char
item = Parser (\s ->
case s of
[] -> []
(x:xs) -> [(xs,x)])
failure :: Parser a
failure = Parser (\ts -> [])
produce :: a -> Parser a --parse (item >>= produce) "hello"
produce x = Parser (\ts -> [(ts, x)])
instance Applicative Parser where
pure x = produce x
Parser pf <*> Parser px = Parser (\ts -> [ (ts'', f x )| (ts', f) <- pf ts,
(ts'', x) <- px ts'] )
instance Functor Parser where
fmap f (Parser px) = Parser (\ts -> [ (ts', f x) | (ts', x) <- px ts])
instance Monad Parser where
--return :: a -> Parser a
return = produce
--(>>=) :: Parser a -> (a -> Parser b) -> Parser b
(Parser px) >>= f = Parser (\ts ->
concat [parse (f x) ts' | (ts', x) <- px ts])
satisfy :: (Char -> Bool) -> Parser Char
satisfy p = item >>= (\c ->
if p c then
produce c
else failure)
char :: Char -> Parser Char
char c = satisfy (c == )
string :: String -> Parser String --parse (string "hello") "hello"
string [] = produce []
string (c:cs) = char c >>= (\c' ->
string cs >>= (\cs' ->
produce (c:cs)))
instance Alternative Parser where
empty = failure
(<|>) = orElse
many p = some p <|> produce []
some p = (:) <$> p <*> many p
orElse :: Parser a -> Parser a -> Parser a
orElse (Parser px) (Parser py) = Parser (\ts ->
case px ts of
[] -> py ts
xs -> xs)
---------------Parsec bits---------------------------
oneOf :: [Char] -> Parser Char
oneOf s = satisfy (flip elem s)
noneOf :: [Char] -> Parser Char
noneOf cs = satisfy (\c -> not (elem c cs))
sepBy :: Parser a -> Parser String -> Parser [a]
sepBy p sep = sepBy1 p sep <|> return []
sepBy1 :: Parser a -> Parser String -> Parser [a]
sepBy1 p sep = do{ x <- p
; xs <- many (sep >> p)
; return (x:xs)
}
-------------------------------------------------------
data Value = StrJson String
| IntJson Int
| BoolJson Bool
| ObjectJson [Pair]
| ArrayJson [Value]
| NullJson
deriving (Eq, Ord, Show)
type Pair = (String, Value)
type NullJson = String
tok :: String -> Parser String
tok t = string t <* whitespace
whitespace :: Parser ()
whitespace = many (oneOf " \t") *> pure ()
var :: Parser Char
var = oneOf ['A' .. 'Z'] <* whitespace
val :: Parser Value
val = IntJson <$> jIntParser
<|> NullJson <$ tok "null"
<|> BoolJson <$> jBoolParser
<|> StrJson <$> jStrParser
<|> ArrayJson <$> jArrParser
<|> ObjectJson <$> jObjParser
jStrParser :: Parser String
jStrParser = some (noneOf ("\n\r\"=[]{},")) <* whitespace
jIntParser :: Parser Int
jIntParser = (some (oneOf ['0' .. '9']) >>= produce . read) <* whitespace
jBoolParser :: Parser Bool
jBoolParser = ((string "False" *> produce False) <|> (string "True" *> produce True))
jObjParser :: Parser [Pair]
jObjParser = do
char '{'
jp <- jPairParser `sepBy1` (tok ",")
char '}'
produce jp
jPairParser :: Parser (String, Value)
jPairParser = do
jStr <- jStrParser
tok ":"
jVal <- val
produce (jStr, jVal)
jArrParser :: Parser [Value]
jArrParser = do
char '['
jArr <- val `sepBy1` (tok ",")
char ']'
produce jArr
When I run my parser with "parse jObjParser "{asd:asd}"" it will fail and when I go further and run with "parse jPairParser "asd:asd"" it will also fail. So I assume the pair parser is the problem but I can't work out why. I'm probably just being dumb so any help would be very much appreciated, Thanks in advance.
First of all let me point out that a lot of the functions in your sample code are already available in many parser combinator packages such as parsec, attoparsec or trifecta - depending on your particular needs. Not to mention Aeson and such. But that is not much of an answer so I will assume you are doing a sort of coding excercise and are not using those on purpose.
My best guess by glancing at your code is that the problem is here:
jStrParser :: Parser String
jStrParser = some (noneOf ("\n\r\"=[]{},")) <* whitespace
And here:
jPairParser :: Parser (String, Value)
jPairParser = do
jStr <- jStrParser
tok ":"
jVal <- val
produce (jStr, jVal)
jStrParser is greedy and it will eat through ":". jPairParser will then fail at tok ":" because ":" it has already been consumed.
Basically, your problem is in jStrParser. It accepts "asd:asd". But is wrong. Secondly, your jStrParser isn't correct, because, it must accept only strings that begin on '"' and end on '"'.
So, you can fix like this:
readS_to_Parser :: ReadS a -> Parser a
readS_to_Parser r = Parser (map swap . r)
jStrParser = readS_to_Parser reads <* whitespace

Parsing JSON without having to define "data" class

I have a bunch of such the requests to the servers which returns JSON:
MyJsonData = MyJsonData { field1 :: String, field2 :: String }
d <- (Aeson.eitherDecode <$> simpleHttp "https://someUrl.com") :: IO (Either String MyJsonData)
print d
MyJsonData2 = MyJsonData2 { field12 :: String, field22 :: String }
d2 <- (Aeson.eitherDecode <$> simpleHttp "https://someUrl2.com") :: IO (Either String MyJsonData2)
print d2
Of course, all the servers return similar but not exact data in terms of the JSON representation. I need to obtain the values only from only a few of these fields, I don't need them all. I'd like to be able to do something like the following:
-- not valid Haskell code!
(d1, d2) <- (Aeson.eitherDecode <$> simpleHttp "https://someUrl.com") :: IO (Either String (_ _ _ fieldINeed _ _ fieldIneed2))
print d1
print d2
(d3, d4) <- (Aeson.eitherDecode <$> simpleHttp "https://someUrl2.com") :: IO (Either String (_ _ fieldINeed3 fieldIneed4 _ _ _))
print d3
print d4
Or something similar. The idea is get rid of necessity to define dataS MyJsonData, MyJsonData2 and so on. Is this possible?
Something like this should work:
output <- Aeson.eitherDecode rawData :: IO (Either String Aeson.Value)
case output of
Right jsonValue -> case jsonValue of
(Aeson.Object jsonObject) -> case (HashMap.lookup "someKey" jsonObject, HashMap.lookup "anotherKey" jsonObject) of
(Just val, Just val2) -> -- Your code here
_ -> error "Couldn't get both keys"
_ -> error "Unexpected JSON"
Left errorMsg -> error $ "Error in parsing: " ++ errorMsg
Basically, a JSON object is just a HashMap that you can manipulate and a JSON array is just a vector.
If you have a bunch of keys you can just map the HashMap.lookup over an array of keys and then run a sequence on the list to get what you want.
case (sequence $ map (\k -> HashMap.lookup k jsonObject) ["key1", "key2", "key3"]) of
Just x -> -- Your code here
Nothing -> error "Some key missing"
Note that Data.Aeson.Value is instance of FromJSON. So you can decode response to Value and then extract only what you need.
For example
{-# LANGUAGE OverloadedStrings #-}
import Data.Aeson
import Data.Aeson.Types
import Control.Monad
parser1 :: Value -> Parser (String, String)
parser1 (Object o) = do
str1 <- o .: "str1"
str2 <- o .: "str2"
return (str1, str2)
parser1 _ = mzero
parser2 :: Value -> Parser (String, String)
parser2 (Object o) = do
str1 <- o .: "string1"
str2 <- o .: "string2"
return (str1, str2)
parser2 _ = mzero
main :: IO ()
main = do
let json1 = object [
"str1" .= ("world" :: String),
"str2" .= ("12" :: String),
"other" .= (12.5 :: Float)
]
let json2 = object [
"string1" .= ("world" :: String),
"string2" .= ("12" :: String),
"other" .= (12.5 :: Float)
]
print $ parseEither parser1 json1
print $ parseEither parser2 json2
ADD:
You can parameterize parser with field names:
parser :: (Text, Text) -> Value -> Parser (String, String)
parser (f1, f2) (Object o) = do
str1 <- o .: f1
str2 <- o .: f2
return (str1, str2)
parser _ _ = mzero
...
print $ parseEither (parser ("str1", "str2")) json1
print $ parseEither (parser ("string1", "string2")) json2

Parsing nested JSON with "random" integer keys using aeson

I am using aeson library for generating and parsing json-files for my custom Graph type. Here are type definitions.
type Id = Int
type Edge = (Id, Id)
type Scenario = [Id]
data Point = Point Int Int
data Vertex = Vertex {-# UNPACK #-}!Id {-# UNPACK #-}!Point deriving (Show)
data Graph = Graph Id [Vertex] Scenario deriving (Show)
Actually I am working with Eulerian and semi-Eulerian graphs, all vertices of which have positions in 2D-space. In a nutshell Graph uses Data.Graph, but this is not related to my problem. Every graph has it's own ID to quickly identify it among many others.
Here is an example of json-file, containing info about my graph:
{
"id": 1,
"vertices": {
"3": {
"y": 12,
"x": 0
},
"2": {
"y": 16,
"x": 24
},
"1": {
"y": 12,
"x": 10
}
},
"scenario": [
1,
2,
3,
1
]
}
So, here is my implementation of toJSON function:
import qualified Data.Text as T
instance ToJSON Graph where
toJSON (Graph id v s) = object [ "vertices" .= object (map vertexToPair v)
, "scenario" .= s
, "id" .= id
]
where
vertexToPair :: Vertex -> (T.Text, Value)
vertexToPair (Vertex id (Point x y)) =
(T.pack $ show id) .= object [ "x" .= x, "y" .= y]
But I actually have a problem with parsing back from json-file. The main problem is the fact, that we don't know how much vertices has particular Graph, so it can't be hard-coded. Here is my first attempt to write parseJSON function:
instance FromJSON Graph where
parseJSON (Object v) = do
i <- parseJSON =<< v .: "id"
vs <- parseJSON =<< v .: "vertices"
sc <- parseJSON =<< v .: "scenario"
maybeReturn ((buildGraph i sc) <$> (parseVertices vs 1))
where
parseVertices :: Value -> Int -> Maybe [Vertex]
-- parseVertices (Object o) i = ???
parseVertices _ _ = Just []
buildGraph :: Int -> Scenario -> [Vertex] -> Graph
buildGraph i sc vertices = Graph i vertices sc
maybeReturn Nothing = mzero
maybeReturn (Just x) = return x
parseJSON _ = mzero
Actually I thought that I can start counting from 1 and get vertices while program still parses every next i. But this is not good choice because minimal vertex id is not always 1, and sometimes next vertex id differs from current by more then 1. Is it even possible to parse such data? Anyway, I stuck even with a simplest case of this problem (when vertex ids start from 1 and are incremented using (+1)).
Alright. This is how I can get max and min vertex id:
import qualified Data.Text.Read as TR
import qualified Data.Foldable as Foldable
minID :: [Either T.Text Int] -> Int
minID = Foldable.maximum
maxID :: [Either T.Text Int] -> Int
maxID = Foldable.minimum
ids :: Object -> [Either T.Text Int]
ids o = map ((fmap fst) . TR.decimal) (M.keys o)
All signatures are not generalised, but this is just example.
I will try tomorrow once again to solve this simple case of problem. Anyway, main question still needs an answer :)
The edit to your answer shows you understood how to solve your immediate problem. Still, you can make your code a lot clearer by avoiding most of the explicit list manipulation needed to build the vertexes. The plan is:
Define a FromJSON instance for Point;
Use it to define a FromJSON instance for Vertex. That would go rather like the Rule instance in other answer to the question you linked to, except that, since you want to use the object keys as IDs, the case statement there would become something like:
case M.toList (o :: Object) of
[(rawID, rawPoint)] -> Vertex (TR.decimal rawId) <$> parseJSON rawPoint
_ -> fail "Rule: unexpected format"
Finally, your existing FromJSON Graph instance will, I believe, work straight away if you change the (inferred) type of vs to [Vertex], given the instance FromJSON a => FromJSON [a]. Therefore, you won't need parseVertices anymore.
If you have control over the JSON structure, it might make sense to simplify things even further by making the vertex IDs a field alongside x and y, removing one level of nesting.
Update: An implementation of the instances, based on the one you added to your answer:
instance FromJSON Point where
parseJSON (Object v) = liftM2 Point (v .: "x") (v .: "y")
parseJSON _ = fail "Bad point"
instance FromJSON [Vertex] where
parseJSON j = case j of
(Object o) -> mapM parseVertex $ M.toList o
_ -> fail "Bad vertices"
where
parseVertex (rawID, rawPoint) = do
let eID = TR.decimal rawID
liftM2 Vertex (either (fail "Bad vertex id") (return . fst) eID) $
parseJSON rawPoint
instance FromJSON Graph where
parseJSON (Object v) = do
i <- parseJSON =<< v .: "id"
vs <- parseJSON =<< v .: "vertices"
sc <- parseJSON =<< v .: "scenario"
return $ Graph i vs sc
parseJSON _ = fail "Bad graph"
(Get the implementation as a runnable example)
The differences to your version are:
You do not need to define an instance for [Graph]; if you define the Graph instance aeson will handle the lists (i.e. JS arrays) automatically (note that the FromJSON documentation mentions a FromJSON a => FromJSON [a] instance. Unfortunately we cannot do the same (at least not as easily) with [Vertex], given that the vertex IDs are keys and not part of the values.
I Added fail cases for the pattern match failures, in order to get more informative error messages.
On your observation about creating the vertices from Either values: your solution was pretty reasonable. I only refactored it using either (from Data.Either) in order to supply a custom error message.
It is worth mentioning that liftM2 (or liftM3, etc.) code tends to look nicer if written using applicative style. For instance, the interesting case in the Point instance might become:
parseJSON (Object v) = Point <$> v .: "x" <*> v .: "y"
I just implemented solution for simple case. Here is the source code:
lookupE :: Value -> Text -> Either String Value
lookupE (Object obj) key = case H.lookup key obj of
Nothing -> Left $ "key " ++ show key ++ " not present"
Just v -> Right v
loopkupE _ _ = Left $ "not an object"
(.:*) :: (FromJSON a) => Value -> [Text] -> Parser a
(.:*) value = parseJSON <=< foldM ((either fail return .) . lookupE) value
instance FromJSON Graph where
parseJSON (Object v) = do
i <- parseJSON =<< v .: "id"
vs <- parseJSON =<< v .: "vertices"
sc <- parseJSON =<< v .: "scenario"
buildGraph i sc <$> concat <$> parseVertices vs
where
parseVertices v#(Object o) = parseFromTo minID maxID v
where
minID = unpackIndex $ Foldable.minimum ids
maxID = unpackIndex $ Foldable.maximum ids
unpackIndex eitherI = case eitherI of
Right i -> i
Left e -> error e
ids = map ((fmap fst) . TR.decimal) (M.keys o)
parseVertex i v = do
p1 <- v .:* [(T.pack $ show i), "x"]
p2 <- v .:* [(T.pack $ show i), "y"]
return $ vertex i p1 p2
parseFromTo i j v | i == j = return []
| otherwise = do
vertex <- parseVertex i v
liftM2 (:) (return [vertex]) (parseFromTo (i + 1) j v)
buildGraph :: Int -> Scenario -> [Vertex] -> Graph
buildGraph i sc vertices = Graph i vertices sc
parseJSON _ = mzero
Function lookupE and (.:*) are from Petr Pudlák's answer.
I don't really like this implementation of parseJSON function. But it works in cases when my vertices have ids with delta 1. I know that I could not extracted value from Foldable.minimum ids and Foldable.maximum ids, but it has brought me to the monad hell (a little one).
So here is an example of json-file, after parsing of which we got Nothing:
{
"id": 1,
"vertices": {
"3": {
"y": 12,
"x": 0
},
"2": {
"y": 16,
"x": 24
},
"1": {
"y": 12,
"x": 10
}
},
"scenario": [
1,
2,
3,
1
]
}
So I leave this question opened for now.
Update
Oh, I just saw my mistake. I already have all keys. :)
ids = map ((fmap fst) . TR.decimal) (M.keys o)
Now I leave this question opened for few days more. Maybe someone will improve my solution.
Update 2
Thanks to duplode, I made code more clear and readable.
Here is the source:
instance FromJSON Point where
parseJSON (Object v) = liftM2 Point (v .: "x") (v .: "y")
instance FromJSON [Vertex] where
parseJSON (Object o) = mapM parseVertex $ M.toList o
where
parseVertex (rawID, rawPoint) = Vertex (fromRight . (fmap fst) . TR.decimal $ rawID) <$> parseJSON rawPoint
instance FromJSON Graph where
parseJSON (Object v) = do
i <- parseJSON =<< v .: "id"
vs <- parseJSON =<< v .: "vertices"
sc <- parseJSON =<< v .: "scenario"
return $ Graph i vs sc
instance FromJSON [Graph] where
parseJSON (Object o) = mapM parseGraph $ M.toList o
where
parseGraph (_, rawGraph) = parseJSON rawGraph
And I don't need any helper functions to extract nested values.
BTW, I don't know any better way to create Vertex rather then Vertex (fromRight . (fmap fst) . TR.decimal $ rawID) <$> parseJSON rawPoint. liftM2 can't be used because second argument has type Either a b, but third has type Parser c. Can't combine :)

json parsing in haskell part 2 - Non-exhaustive patterns in lambda

This is actually in continuation of the question I asked a few days back. I took the applicative functors route and made my own instances.
I need to parse a huge number of json statements all in a file, one line after the other. An example json statement is something like this -
{"question_text": "How can NBC defend tape delaying the Olympics when everyone has
Twitter?", "context_topic": {"followers": 21, "name": "NBC Coverage of the London
Olympics (July & August 2012)"}, "topics": [{"followers": 2705,
"name": "NBC"},{"followers": 21, "name": "NBC Coverage of the London
Olympics (July & August 2012)"},
{"followers": 17828, "name": "Olympic Games"},
{"followers": 11955, "name": "2012 Summer Olympics in London"}],
"question_key": "AAEAABORnPCiXO94q0oSDqfCuMJ2jh0ThsH2dHy4ATgigZ5J",
"__ans__": true, "anonymous": false}
sorry for the json formatting. It got bad
I have about 10000 such json statements and I need to parse them. The code I have written is
something like this -
parseToRecord :: B.ByteString -> Question
parseToRecord bstr = (\(Ok x) -> x) decodedObj where decodedObj = decode (B.unpack bstr) :: Result Question
main :: IO()
main = do
-- my first line in the file tells how many json statements
-- are there followed by a lot of other irrelevant info...
ts <- B.getContents >>= return . fst . fromJust . B.readInteger . head . B.lines
json_text <- B.getContents >>= return . tail . B.lines
let training_data = take (fromIntegral ts) json_text
let questions = map parseToRecord training_data
print $ questions !! 8922
This code gives me a runtime error Non-exhaustive patterns in lambda. The error references to \(Ok x) -> x in the code. By hit and trial, I came to the conclusion that the program works ok till the 8921th index and fails on the 8922th iteration.
I checked the corresponding json statement and tried to parse it standalone by calling the function on it and it works. However, it doesn't work when I call map. I don't really understand what is going on. Having learnt a little bit of haskell in "learn haskell for a great good", I wanted to dive into a real world programming project but seem to have got stuck here.
EDIT :: complete code is as follows
{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_GHC -O2 -optc-O2 #-}
{-# OPTIONS_GHC -fno-warn-incomplete-uni-patterns #-}
import qualified Data.ByteString.Lazy.Char8 as B
import Data.Maybe
import NLP.Tokenize
import Control.Applicative
import Control.Monad
import Text.JSON
data Topic = Topic
{ followers :: Integer,
name :: String
} deriving (Show)
data Question = Question
{ question_text :: String,
context_topic :: Topic,
topics :: [Topic],
question_key :: String,
__ans__ :: Bool,
anonymous :: Bool
} deriving (Show)
(!) :: (JSON a) => JSObject JSValue -> String -> Result a
(!) = flip valFromObj
instance JSON Topic where
-- Keep the compiler quiet
showJSON = undefined
readJSON (JSObject obj) =
Topic <$>
obj ! "followers" <*>
obj ! "name"
readJSON _ = mzero
instance JSON Question where
-- Keep the compiler quiet
showJSON = undefined
readJSON (JSObject obj) =
Question <$>
obj ! "question_text" <*>
obj ! "context_topic" <*>
obj ! "topics" <*>
obj ! "question_key" <*>
obj ! "__ans__" <*>
obj ! "anonymous"
readJSON _ = mzero
isAnswered (Question _ _ _ _ status _) = status
isAnonymous (Question _ _ _ _ _ status) = status
parseToRecord :: B.ByteString -> Question
parseToRecord bstr = handle decodedObj
where handle (Ok k) = k
handle (Error e) = error (e ++ "\n" ++ show bstr)
decodedObj = decode (B.unpack bstr) :: Result Question
--parseToRecord bstr = (\(Ok x) -> x) decodedObj where decodedObj = decode (B.unpack bstr) :: Result Question
main :: IO()
main = do
ts <- B.getContents >>= return . fst . fromJust . B.readInteger . head . B.lines
json_text <- B.getContents >>= return . tail . B.lines
let training_data = take (fromIntegral ts) json_text
let questions = map parseToRecord training_data
let correlation = foldr (\x acc -> if (isAnonymous x == isAnswered x) then (fst acc + 1, snd acc + 1) else (fst acc, snd acc + 1)) (0,0) questions
print $ fst correlation
here's the data which can be given as input to the executable. I'm using ghc 7.6.3. If the program name is ans.hs, I followed these steps.
$ ghc --make ans.hs
$ ./ans < path/to/the/file/sample/answered_data_10k.in
thanks a lot!
The lambda function (\(Ok x) -> x) is partial in that it will only be able to match objects that were successfully decoded. If you are experiencing this, it indicates that your JSON parser is failing to parse a record, for some reason.
Making the parseToRecord function more informative would help you find the error. Try actually reporting the error, rather than reporting a failed pattern match.
parseToRecord :: B.ByteString -> Question
parseToRecord bstr = handle decodedObj
where handle (Ok k) = k
handle (Error e) = error e
decodedObj = decode (B.unpack bstr) :: Result Question
If you want more help, it might be useful to include the parser code.
Update
Based on your code and sample JSON, it looks like your code is first failing
when it encounters a null in the context_topic field of your JSON.
Your current code cannot handle a null, so it fails to parse. My fix would
be something like the following, but you could come up with other ways to
handle it.
data Nullable a = Null
| Full a
deriving (Show)
instance JSON a => JSON (Nullable a) where
showJSON Null = JSNull
showJSON (Full a) = showJSON a
readJSON JSNull = Ok Null
readJSON c = Full `fmap` readJSON c
data Question = Question
{ question_text :: String,
context_topic :: Nullable Topic,
topics :: [Topic],
question_key :: String,
__ans__ :: Bool,
anonymous :: Bool
} deriving (Show)
It also seems to fail on line 9002, where there is a naked value of "1000" on
that line, and it seems that several JSON values after that line lack the
'__ans__' field.
I would have suggestion to use Maybe in order to parse the null values:
data Question = Question
{ question_text :: String
, context_topic :: Maybe Topic
, topics :: [Topic]
, question_key :: String
, __ans__ :: Bool
, anonymous :: Bool
} deriving (Show)
And then change the readJSON function as follows (in addition, the missing ans-fields can be fixed by returning False on an unsuccessful parsing attempt):
instance JSON Question where
-- Keep the compiler quiet
showJSON = undefined
readJSON (JSObject obj) = Question <$>
obj ! "question_text" <*>
(fmap Just (obj ! "context_topic") <|> return Nothing) <*>
obj ! "topics" <*>
obj ! "question_key" <*>
(obj ! "__ans__" <|> return False) <*>
obj ! "anonymous"
readJSON _ = mzero
After getting rid of the 1000 in line 9000-something (like sabauma mentioned), I got 4358 as result. So maybe these slight changes are enough?

Why pattern matching does not throw exception in Maybe monad

My question is simple. Why wrong pattern matching does not throw exception in Maybe monad. For clarity :
data Task = HTTPTask {
getParams :: [B.ByteString],
postParams :: [B.ByteString],
rawPostData :: B.ByteString
} deriving (Show)
tryConstuctHTTPTask :: B.ByteString -> Maybe Task
tryConstuctHTTPTask str = do
case decode str of
Left _ -> fail ""
Right (Object trie) -> do
Object getP <- DT.lookup (pack "getParams") trie
Object postP <- DT.lookup (pack "postParams") trie
String rawData <- DT.lookup (pack "rawPostData") trie
return $ HTTPTask [] [] rawData
Look at tryConstuctHTTPTask function. I think that when the pattern does not match (for example "Object getP") we must get something like "Prelude.Exception", instead i get the "Nothing". I like this behavior but i am not understand why.
Thanks.
Doing pattern <- expression in a do-block, will call fail when the pattern does not match. So it is equivalent to doing
expression >>= \x ->
case x of
pattern -> ...
_ -> fail
Since fail is defined as Nothing in the Maybe monad, you get Nothing for failed pattern matches using <-.