XML Parsing - Erlang - json

I want to parse XML strings to erlang list and then to JSON.
Example Input :
<?xml version="1.0" encoding="UTF-8"?>
<!--some message here-->
<start>
<data>
<number id="333">test message</number>
<data>current date</data>
</data>
<mass>
<client>35</client>
<address>lattitude</address>
<code>3454343</code>
<foo tipo="casa">Some text message 2</foo>
<product>TEST</product>
</mass>
</start>
Output should be:
{
"start": {
"data": {
"number": {
"#id": "333",
"#text": "test message"
},
"data": "current date"
},
"mass": {
"client": "35",
"address": "lattitude",
"code": "3454343",
"foo": {
"#tipo": "casa",
"#text": "Some text message 2"
},
"product": "TEST"
}
}
}
I am trying to use erlsom:simple_form(Xml).
and getting :
{ok,{"start",[],
[{"data",[],
[{"number",[{"id","333"}],["test message"]},
{"data",[],["current date"]}]},
{"mass",[],
[{"client",[],["35"]},
{"address",[],["lattitude"]},
{"code",[],["3454343"]},
{"foo",[{"tipo","casa"}],["Some text message 2"]},
{"product",[],["TEST"]}]}]},
[]}
Now I want to delete these empty attrs. Is there any simple way to do this?
thanks in advance.
UPDATE: Make it work w/ solution from
Erlang xml to tuples and lists
BUT Getting
{"start",
[{"data",
[{"number","test message"},{"data","current date"}]},
{"mass",
[{"client","35"},
{"address","lattitude"},
{"code","3454343"},
{"foo","Some text message 2"},
{"product","TEST"}]}]}
there is no [{"id","333"}] & [{"tipo","casa"}] lists :(

The output of your simple parsing is in a set format: {Node, Attributes, Children}, so you can write a simple parser that turns that structure you have into a nested proplist. With that, you can either use mochijson or jsx to turn that proplist into a JSON string.
-module(transform).
-export([test/0]).
test() -> parse(data()).
parse({Node, [], [Value]}) when is_list(Value) ->
[{Node, Value}];
parse({Node, [], Children}) ->
V = children_to_struct(Children, []),
[{Node, V}];
parse({Node, Attributes, Children}) ->
V = attributes_to_struct(Attributes, []) ++ children_to_struct(Children, []),
[{Node, V}].
children_to_struct([], Acc) -> Acc;
children_to_struct([Value], Acc) when is_list(Value) ->
Acc ++ [{"#text", Value}];
children_to_struct([Value | T], Acc) when is_tuple(Value) ->
children_to_struct(T, Acc ++ parse(Value)).
attributes_to_struct([], Acc) -> Acc;
attributes_to_struct([{K, V}|T], Acc) ->
attributes_to_struct(T, Acc ++ [{"#" ++ K, V}]).
data() ->
{"start",[],
[{"data",[],
[{"number",[{"id","333"}],["test message"]},
{"data",[],["current date"]}]},
{"mass",[],
[{"client",[],["35"]},
{"address",[],["lattitude"]},
{"code",[],["3454343"]},
{"foo",[{"tipo","casa"}],["Some text message 2"]},
{"product",[],["TEST"]}]}]}.
Running it in the shell with mochijson:
Eshell V7.3 (abort with ^G)
1> c(transform).
{ok,transform}
2> T = transform:test().
[{"start",
[{"data",
[{"number",[{"#id","333"},{"#text","test message"}]},
{"data","current date"}]},
{"mass",
[{"client","35"},
{"address","lattitude"},
{"code","3454343"},
{"foo",[{"#tipo","casa"},{"#text","Some text message 2"}]},
{"product","TEST"}]}]}]
3>
4> iolist_to_binary(mochijson2:encode(T)).
<<"{\"start\":{\"data\":{\"number\":{\"#id\":[51,51,51],\"#text\":[116,101,115,116,32,109,101,115,115,97,103,101]},\"data\":{\"#text"...>>

i suggest to use jiffy for JSON and exml for XML.
jiffy and exml have native code which means they are so fast.
Clone and compile them.
before compiling them, you should install g++ and libexpat-dev
Example:
-module(test).
-export([convert/1]).
-include("exml/include/exml.hrl"). %% In my test
convert(XML) when erlang:is_binary(XML) ->
{ok, XMLEl} = exml:parse(XML),
jiffy:encode({[convert2(XMLEl)]}).
convert2(#xmlel{name = Name
,attrs = []
,children = [{xmlcdata, Data}]}) ->
{Name, Data};
convert2(#xmlel{name = Name
,attrs = Attrs
,children = Children}) ->
{Name, {convert_attrs(Attrs) ++ convert_children(Children)}}.
convert_attrs(Attrs) ->
convert_attrs(Attrs,[]).
convert_attrs([Attr|Attrs1], Attrs2) ->
convert_attrs(Attrs1, [convert_attr(Attr)|Attrs2]);
convert_attrs([], Attrs2) ->
lists:reverse(Attrs2).
convert_attr({Attr, Value}) ->
{<<$#, Attr/binary>>, Value}.
convert_children(Children) ->
convert_children(Children, []).
convert_children([Child|Children1], Children2) ->
convert_children(Children1, [convert_child(Child)|Children2]);
convert_children([], Children2) ->
lists:reverse(Children2).
convert_child({xmlcdata, Data}) ->
{<<"#text">>, Data};
convert_child(#xmlel{}=XMLEl) ->
convert2(XMLEl).
In the shell:
p#jahanbakhsh ~/Projects/test $ ls
exml jiffy test.erl
p#jahanbakhsh ~/Projects/test $ erl -pa jiffy/ebin exml/ebin
Erlang/OTP 19 [erts-8.2.2] [source-1ca84a4] [64-bit] [smp:4:4] [async-threads:10] [hipe] [kernel-poll:false]
Eshell V8.2.2 (abort with ^G)
1> c(test).
{ok,test}
2> XML = <<"<start><data><number id=\"333\">test message</number><data>current date</data></data><mass><client>35</client><address>lattitude</address><code>3454343</code><foo tipo=\"casa\">Some text message 2</foo><product>TEST</product></mass></start>">>.
<<"<start><data><number id=\"333\">test message</number><data>current date</data></data><mass><client>35</client><address"...>>
3> test:convert(XML).
<<"{\"start\":{\"data\":{\"number\":{\"#id\":\"333\",\"#text\":\"test message\"},\"data\":\"current date\"},\"mass\":{\"client\":\"35\",\"addres"...>>
4> io:format("~s~n", [test:convert(XML)]).
{"start":{"data":{"number":{"#id":"333","#text":"test message"},"data":"current date"},"mass":{"client":"35","address":"lattitude","code":"3454343","foo":{"#tipo":"casa","#text":"Some text message 2"},"product":"TEST"}}}
ok
5>

Related

How to use Aeson to get a vector of strings inside a deep JSON object?

Let's say I want to use Aeson to parse the following JSON object:
{
"data": [
[
"data",
"more data"
],
[
"data",
"more data"
]
],
"error": {
"code": ""
}
}
I can create the records for the JSON objects, then create the instances to parse the pieces out like the documentation describes. But, I'm really only interested in the Vector Text that's inside data. Is there a more direct way to get at this than creating the records? It's not obvious how to create the Parser that gets me this directly.
It appears that there is an Aeson tutorial documenting exactly this problem: Parsing without creating extra types
In your case, data has arrays of arrays, so I'm not sure if you want a Vector (Vector Text) or flatten all of it into one array, but adapting from the documentation:
justData :: Value -> Parser (Vector (Vector Text))
justData = withObject "structure with data" $ \o -> o .: "data"
justDataFlat :: Value -> Parser (Vector Text)
justDataFlat value = fmap join (justData value)
Also note that if your structure is deeper, like this:
{
"data": {
"deep": [
"data",
"more data"
]
}
}
you can use .: more than once:
deeperData :: Value -> Parser (Vector Text)
deeperData = withObject "structure with deeper data" $ \o ->
step1 <- o .: "data"
step1 .: "deep"

JSON Decoder in Elm 0.18

In Elm 0.18, I would like to build a JSON decoder for the following examples:
case 1:
{"metadata": {"signatures":[{"metadata": {"code": "1234"}},
{"metadata": {"code": "5678"}}]}}
-> { code = Just "1234" }
case 2:
{"metadata": {"signatures":[]}}
-> { code = Nothing }
case 3:
{"metadata": {"signatures":[{"metadata": null}]}}
-> { code = Nothing }
This is what I got working, but it fails for case 3.
type alias Code = { code : Maybe String }
let
js = """{"metadata": {"signatures":[{"metadata": {"code": "1234"}},
{"metadata": {"code": "5678"}}]}}"""
dec1 =
Decode.at [ "metadata", "code" ] Decode.string
dec0 =
Decode.list dec1
|> Decode.andThen
(\v ->
if List.isEmpty v then
Decode.succeed Nothing
else
Decode.succeed <| List.head v
)
dec =
decode Code
|> optionalAt [ "metadata", "signatures" ] dec0 Nothing
expected =
Ok { code = Just "1234" }
in
Decode.decodeString dec js
|> Expect.equal expected
A workaround would be to import all the data to the model and then obtain the info from the model, but I prefer to avoid adding unnecessary data into my model. How can I improve this?
A more simplified approach could use Json.Decode.index to force the decoding at index zero as a string if it exists, which will fail otherwise, so you can use Json.Decode.maybe to return Nothing on failure.
dec0 =
Decode.maybe (Decode.index 0 dec1)

F# JSON Type Provider, do not serialize null values

Background
I am using the FSharp.Data JSON Type Provider with a sample that has an array of objects that may have different properties. Here is an illustrative example:
[<Literal>]
let sample = """
{ "input": [
{ "name": "Mickey" },
{ "year": 1928 }
]
}
"""
type InputTypes = JsonProvider< sample >
The JSON Type Provider creates an Input type which has both an Optional Name and an Optional Year property. That works well.
Problem
When I try to pass an instance of this to the web service, I do something like this:
InputTypes.Root(
[|
InputTypes.Input(Some("Mouse"), None)
InputTypes.Input(None, Some(2028))
|]
)
The web service is receiving the following and choking on the nulls.
{
"input": [
{
"name": "Mouse",
"year": null
},
{
"name": null,
"year": 2028
}
]
}
What I Tried
I find that this works:
InputTypes.Root(
[|
InputTypes.Input(JsonValue.Parse("""{ "name": "Mouse" }"""))
InputTypes.Input(JsonValue.Parse("""{ "year": 2028 }"""))
|]
)
It sends this:
{
"input": [
{
"name": "Mouse"
},
{
"year": 2028
}
]
}
However, on my real project, the structures are larger and would require a lot more conditional JSON string building. It kind of defeats the purpose.
Questions
Is there a way to cause the JSON Type Provider to not serialize null properties?
Is there a way to cause the JSON Type Provider to not serialize empty arrays?
As a point of comparison, the Newtonsoft.JSON library has a NullValueHandling attribute.
I don't think there is an easy way to get the JSON formatting in F# Data to drop the null fields - I think the type does not clearly distinguish between what is null and what is missing.
You can fix that by writing a helper function to drop all null fields:
let rec dropNullFields = function
| JsonValue.Record flds ->
flds
|> Array.choose (fun (k, v) ->
if v = JsonValue.Null then None else
Some(k, dropNullFields v) )
|> JsonValue.Record
| JsonValue.Array arr ->
arr |> Array.map dropNullFields |> JsonValue.Array
| json -> json
Now you can do the following and get the desired result:
let json =
InputTypes.Root(
[|
InputTypes.Input(Some("Mouse"), None)
InputTypes.Input(None, Some(2028))
|]
)
json.JsonValue |> dropNullFields |> sprintf "%O"

Elixir: find by value prefix in nested JSON

I'm trying to find URLs in a nested JSON response and map them. My function so far looks like this:
def list(env, id) do
Service.get_document(env, id)
|> Poison.decode!
|> Enum.find(fn {_key, val} -> String.starts_with?(val, 'https') end)
end
The JSON looks roughly like this:
"stacks": [
{
"boxes": [
{
"content": "https://ddd.cloudfront.net/photos/uploaded_images/000/001/610/original/1449447147677.jpg?1505956120",
"box": "photo"
}
]
}
],
"logo": "https://ddd.cloudfront.net/users/cmyk_banners/000/000/002/original/banner_CMYK.jpg?1397201875"
So URLs can have any key, and be at any level.
With that code I get this error:
no function clause matching in String.starts_with?/2
Anyone got a better way to find in JSON responses?
You'll have to use recursive function for this, which handles three types of data:
For map, it recurses over all its values.
For list, it recurses over all its elements.
For string, it selects strings that start with "https"
Here's a simple implementation which accepts a term and a string to check with starts_with?:
defmodule A do
def recursive_starts_with(thing, start, acc \\ [])
def recursive_starts_with(binary, start, acc) when is_binary(binary) do
if String.starts_with?(binary, start) do
[binary | acc]
else
acc
end
end
def recursive_starts_with(map, start, acc) when is_map(map) do
Enum.reduce(map, acc, fn {_, v}, acc -> A.recursive_starts_with(v, start, acc) end)
end
def recursive_starts_with(list, start, acc) when is_list(list) do
Enum.reduce(list, acc, fn v, acc -> A.recursive_starts_with(v, start, acc) end)
end
end
data = %{
"stacks" => [
%{
"boxes" => [
%{
"content" => "https://ddd.cloudfront.net/photos/uploaded_images/000/001/610/original/1449447147677.jpg?1505956120",
"box" => "photo"
}
]
}
],
"logo" => "https://ddd.cloudfront.net/users/cmyk_banners/000/000/002/original/banner_CMYK.jpg?1397201875"
}
data |> A.recursive_starts_with("https") |> IO.inspect
Output:
["https://ddd.cloudfront.net/photos/uploaded_images/000/001/610/original/1449447147677.jpg?1505956120",
"https://ddd.cloudfront.net/users/cmyk_banners/000/000/002/original/banner_CMYK.jpg?1397201875"]

Erlang jsx:encode remove escape backslash

I'm calling a Api Service that has the following json requirement:
{
"user": {
"userid": "123456"
},
"access_token": "ABCDEFGHIJKLMPNOPQRST"
}
I'm doing the following in my code:
MyUser = {<<"uid">>, <<"MyId-1">>},
Body = json_body_([{{<<"user">>, MyUser},{<<"access_token">>, <<?ORGANIZATION_ACCESS_TOKEN>>}}]),
Body1 = lists:map(fun erlang:tuple_to_list/1, Body),
io:format("Body in start : ~n~p~n", [Body1]).
json_body_(ParamList) ->
json_body__(ParamList, []).
json_body__([], Acc) ->
jsx:encode(lists:reverse(Acc));
json_body__([{K, V} | Rest], Acc) ->
Acc1 = [{sanitize_(K), sanitize_(V)} | Acc],
json_body__(Rest, Acc1).
sanitize_(Parm) ->
Parm.
When I apply jsx:enocode to "Body1" the result is:
[{\"user\":{\"uid\":\"My-id-1234\"},\"access_token\":\"12345678ff4089\"}]
How can I get rid of the escape "\"?
Your string doesn't contain any \. Since you printed using ~p, Erlang escaped every double quote in the string to make the final output valid Erlang code. You can verify this by printing using ~s instead.
1> S = "{\"foo\": \"bar\"}".
"{\"foo\": \"bar\"}"
2> io:format("~p~n", [S]).
"{\"foo\": \"bar\"}"
ok
3> io:format("~s~n", [S]).
{"foo": "bar"}
ok