I'm looking to use the Excel Power Query to import some json that looks like the following (but much bigger, more fields etc.):
example-records.json
{
"records": {
"record_id_1": {
"file_no": "5792C",
"loads": {
"load_id_1": {
"docket_no": "3116115"
},
"load_id_2": {
"docket_no": "3116118"
},
"load_id_3": {
"docket_no": "3208776"
}
}
},
"record_id_2": {
"file_no": "5645C",
"loads": {
"load_id_4": {
"docket_no": "2000527155"
},
"load_id_5": {
"docket_no": "2000527156"
},
"load_id_6": {
"docket_no": "2000527146"
}
}
}
}
}
I want to get a table like the following at the load_id / docket level. A row per load_id
What I've tried
Clicking buttons in power query UI I get the following.
The problem is I can't include a file_no column and this doesn't work when there are lots of load ids.
let
Source = Json.Document(File.Contents("H:\Software\Site Apps\example-records.json")),
records = Source[records],
#"Converted to Table" = Record.ToTable(records),
#"Expanded Value" = Table.ExpandRecordColumn(#"Converted to Table", "Value", {"file_no", "loads"}, {"Value.file_no", "Value.loads"}),
#"Removed Columns" = Table.RemoveColumns(#"Expanded Value",{"Value.file_no"}),
#"Expanded Value.loads" = Table.ExpandRecordColumn(#"Removed Columns", "Value.loads", {"load_id_1", "load_id_2", "load_id_3", "load_id_4", "load_id_5", "load_id_6"}, {"Value.loads.load_id_1", "Value.loads.load_id_2", "Value.loads.load_id_3", "Value.loads.load_id_4", "Value.loads.load_id_5", "Value.loads.load_id_6"}),
#"Unpivoted Columns" = Table.UnpivotOtherColumns(#"Expanded Value.loads", {"Name"}, "Attribute", "Value"),
#"Expanded Value1" = Table.ExpandRecordColumn(#"Unpivoted Columns", "Value", {"docket_no"}, {"Value.docket_no"})
in
#"Expanded Value1"
You can use
let Source = JSON(Json.Document(File.Contents("c:\temp\example.json"))),
#"Removed Other Columns" = Table.SelectColumns(Source,{"Name.1", "Name.3", "Value"}),
#"Added Custom" = Table.AddColumn(#"Removed Other Columns", "Custom", each if [Name.3]=null then [Value] else null),
#"Filled Down" = Table.FillDown(#"Added Custom",{"Custom"}),
#"Filtered Rows" = Table.SelectRows(#"Filled Down", each ([Name.3] <> null))
in #"Filtered Rows"
based on this function I named JSON which comes from Imke https://www.thebiccountant.com/2018/06/17/automatically-expand-all-fields-from-a-json-document-in-power-bi-and-power-query/ which is reproduced below
let
func = (JSON) =>
let
Source = JSON,
ParseJSON = try Json.Document(Source) otherwise Source,
TransformForTable =
if Value.Is(ParseJSON, type record) then
Record.ToTable(ParseJSON)
else
#table(
{"Name", "Value"},
List.Zip({List.Repeat({0}, List.Count(ParseJSON)), ParseJSON})
),
AddSort = Table.Buffer(Table.AddColumn(TransformForTable, "Sort", each 0)),
LG = List.Skip(
List.Generate(
() => [Next = AddSort, Counter = 1, AddIndex = #table({"Sort"}, {{""}})],
each [AddIndex]{0}[Sort] <> "End",
each [
AddIndex = Table.AddIndexColumn([Next], "Index", 0, 1),
MergeSort = Table.CombineColumns(
Table.TransformColumnTypes(
AddIndex,
{{"Sort", type text}, {"Index", type text}},
"en-GB"
),
{"Sort", "Index"},
Combiner.CombineTextByDelimiter(".", QuoteStyle.None),
"Sort"
),
PJson = Table.TransformColumns(
MergeSort,
{{"Value", each try Json.Document(_) otherwise _}}
),
AddType = Table.AddColumn(
PJson,
"Type",
each
if Value.Is([Value], type record) then
"Record"
else if Value.Is([Value], type list) then
"List"
else if Value.Is([Value], type table) then
"Table"
else
"other"
),
AddStatus = Table.AddColumn(
AddType,
"Status",
each if [Type] = "other" then "Finished" else "Unfinished"
),
Finished = Table.SelectRows(AddStatus, each ([Status] = "Finished")),
Unfinished = Table.SelectRows(AddStatus, each ([Status] = "Unfinished")),
AddNext = Table.AddColumn(
Unfinished,
"Next",
each if [Type] = "Record" then {[Value]} else [Value]
),
RemoveCols = Table.RemoveColumns(AddNext, {"Value", "Type", "Status"}),
ExpandNext = Table.ExpandListColumn(RemoveCols, "Next"),
AddIndex2 = Table.AddIndexColumn(ExpandNext, "Index", 0, 1),
MergeSort2 = Table.CombineColumns(
Table.TransformColumnTypes(
AddIndex2,
{{"Sort", type text}, {"Index", type text}},
"en-GB"
),
{"Sort", "Index"},
Combiner.CombineTextByDelimiter(".", QuoteStyle.None),
"Sort"
),
TransformRecord = Table.TransformColumns(
MergeSort2,
{
{
"Next",
each try
Record.ToTable(_)
otherwise
try
if Value.Is(Text.From(_), type text) then
#table({"Value"}, {{_}})
else
_
otherwise
_
}
}
),
FilterOutNulls = Table.SelectRows(TransformRecord, each [Next] <> null),
Next =
if Table.IsEmpty(FilterOutNulls) then
#table({"Sort"}, {{"End"}})
else if Value.Is(FilterOutNulls[Next]{0}, type table) = true then
Table.ExpandTableColumn(
FilterOutNulls,
"Next",
{"Name", "Value"},
{"Name." & Text.From([Counter]), "Value"}
)
else
Table.RenameColumns(FilterOutNulls, {{"Next", "Value"}}),
Counter = [Counter] + 1
],
each Table.AddColumn([Finished], "Level", (x) => _[Counter] - 2)
)
),
Check = LG{2},
Combine = Table.Combine(LG),
Clean = Table.RemoveColumns(Combine, {"Status", "Type"}),
Trim = Table.TransformColumns(Clean, {{"Sort", each Text.Trim(_, "."), type text}}),
// Dynamic Padding for the sort-column so that it sorts by number in text strings
SelectSort = Table.SelectColumns(Trim, {"Sort"}),
SplitSort = Table.AddColumn(
SelectSort,
"Custom",
each List.Transform(try Text.Split([Sort], ".") otherwise {}, Number.From)
),
ToTable = Table.AddColumn(
SplitSort,
"Splitted",
each Table.AddIndexColumn(Table.FromColumns({[Custom]}), "Pos", 1, 1)
),
ExpandTable = Table.ExpandTableColumn(ToTable, "Splitted", {"Column1", "Pos"}),
GroupPos = Table.Group(
ExpandTable,
{"Pos"},
{{"All", each _, type table}, {"Max", each List.Max([Column1]), type text}}
),
Digits = Table.AddColumn(GroupPos, "Digits", each Text.Length(Text.From([Max]))),
FilteredDigits = List.Buffer(Table.SelectRows(Digits, each ([Digits] <> null))[Digits]),
SortNew = Table.AddColumn(
Trim,
"SortBy",
each Text.Combine(
List.Transform(
List.Zip({Text.Split([Sort], "."), List.Positions(Text.Split([Sort], "."))}),
each Text.PadStart(_{0}, FilteredDigits{_{1}}, "0")
),
"."
)
),
FilterNotNull = Table.SelectRows(SortNew, each ([Value] <> null)),
Reorder = Table.ReorderColumns(
FilterNotNull,
{"Value", "Level", "Sort", "SortBy"}
& List.Difference(
Table.ColumnNames(FilterNotNull),
{"Value", "Level", "Sort", "SortBy"}
)
),
Dots = Table.AddColumn(
#"Reorder",
"Dots",
each List.Select(Table.ColumnNames(#"Reorder"), (l) => Text.StartsWith(l, "Name"))
),
// This sort is just to view in the query editor. When loaded to the data model it will not be kept. Use "Sort by column" in the data model instead.
Sort = Table.Sort(Dots, {{"SortBy", Order.Ascending}})
in
Sort,
documentation = [
Documentation.Name = " Table.JsonExpandAll ",
Documentation.Description
= " Dynamically expands the <Json> Record and returns values in one column and additional columns to navigate. ",
Documentation.LongDescription
= " Dynamically expands the <Json> Record and returns values in one column and additional columns to navigate. Input can be JSON in binary format or the already parsed JSON. ",
Documentation.Category = " Table ",
Documentation.Version = " 1.2: Added column [Dots] (22/02/2019)",
Documentation.Author = " Imke Feldmann: www.TheBIccountant.com . ",
Documentation.Examples = {[Description = " ", Code = " ", Result = " "]}
]
in
Value.ReplaceType(func, Value.ReplaceMetadata(Value.Type(func), documentation))
Managed to use an added custom column, the action that enables the expansion to one load id per row.
#"Added Custom" = Table.AddColumn(#"Expanded Value", "Custom", each Record.ToTable([Value.loads]))
let
Source = Json.Document(File.Contents("H:\Software\Site Apps\example-records.json")),
records = Source[records],
#"Converted to Table" = Record.ToTable(records),
#"Expanded Value" = Table.ExpandRecordColumn(#"Converted to Table", "Value", {"file_no", "loads"}, {"Value.file_no", "Value.loads"}),
#"Added Custom" = Table.AddColumn(#"Expanded Value", "Custom", each Record.ToTable([Value.loads])),
#"Removed Columns" = Table.RemoveColumns(#"Added Custom",{"Value.loads"}),
#"Expanded Custom" = Table.ExpandTableColumn(#"Removed Columns", "Custom", {"Name", "Value"}, {"Custom.Name", "Custom.Value"}),
#"Expanded Custom.Value" = Table.ExpandRecordColumn(#"Expanded Custom", "Custom.Value", {"docket_no"}, {"Custom.Value.docket_no"}),
#"Renamed Columns" = Table.RenameColumns(#"Expanded Custom.Value",{{"Name", "record_id"}, {"Value.file_no", "file_no"}, {"Custom.Name", "load_id"}, {"Custom.Value.docket_no", "docket_no"}})
in
#"Renamed Columns"
I have RDD[Row] :
|---itemId----|----Country-------|---Type----------|
| 11 | US | Movie |
| 11 | US | TV |
| 101 | France | Movie |
How to do GroupBy itemId so that I can save the result as List of json where each row is separate json object(each row in RDD) :
{"itemId" : 11,
"Country": {"US" :2 },"Type": {"Movie" :1 , "TV" : 1} },
{"itemId" : 101,
"Country": {"France" :1 },"Type": {"Movie" :1} }
RDD :
I tried :
import com.mapping.data.model.MappingUtils
import com.mapping.data.model.CountryInfo
val mappingPath = "s3://.../"
val input = sc.textFile(mappingPath)
The input is list of jsons where each line is json which I am mapping to the POJO class CountryInfo using MappingUtils which takes care of JSON parsing and conversion:
val MappingsList = input.map(x=> {
val countryInfo = MappingUtils.getCountryInfoString(x);
(countryInfo.getItemId(), countryInfo)
}).collectAsMap
MappingsList: scala.collection.Map[String,com.mapping.data.model.CountryInfo]
def showCountryInfo(x: Option[CountryInfo]) = x match {
case Some(s) => s
}
val events = sqlContext.sql( "select itemId EventList")
val itemList = events.map(row => {
val itemId = row.getAs[String](1);
val çountryInfo = showTitleInfo(MappingsList.get(itemId));
val country = if (countryInfo.getCountry() == 'unknown)' "US" else countryInfo.getCountry()
val type = countryInfo.getType()
Row(itemId, country, type)
})
Can some one let me know how can I achieve this ?
Thank You!
I can't afford the extra time to complete this, but can give you a start.
The idea is that you aggregate the RDD[Row] down into a single Map that represents your JSON structure. Aggregation is a fold that requires two function parameters:
seqOp How to fold a collection of elements into the target type
combOp How to merge two of the target types.
The tricky part comes in combOp while merging, as you need to accumulate the counts of values seen in the seqOp. I have left this as an exercise, as I have a plane to catch! Hopefully someone else can fill in the gaps if you have trouble.
case class Row(id: Int, country: String, tpe: String)
def foo: Unit = {
val rows: RDD[Row] = ???
def seqOp(acc: Map[Int, (Map[String, Int], Map[String, Int])], r: Row) = {
acc.get(r.id) match {
case None => acc.updated(r.id, (Map(r.country, 1), Map(r.tpe, 1)))
case Some((countries, types)) =>
val countries_ = countries.updated(r.country, countries.getOrElse(r.country, 0) + 1)
val types_ = types.updated(r.tpe, types.getOrElse(r.tpe, 0) + 1)
acc.updated(r.id, (countries_, types_))
}
}
val z = Map.empty[Int, (Map[String, Int], Map[String, Int])]
def combOp(l: Map[Int, (Map[String, Int], Map[String, Int])], r: Map[Int, (Map[String, Int], Map[String, Int])]) = {
l.foldLeft(z) { case (acc, (id, (countries, types))) =>
r.get(id) match {
case None => acc.updated(id, (countries, types))
case Some(otherCountries, otherTypes) =>
// todo - continue by merging countries with otherCountries
// and types with otherTypes, then update acc
}
}
}
val summaryMap = rows.aggregate(z) { seqOp, combOp }
In specs2 you can match an array for elements like this:
val json = """{"products":[{"name":"shirt","price":10, "ids":["1", "2", "3"]},{"name":"shoe","price":5}]}"""
def aProductWith(name: Matcher[JsonType], price: Matcher[JsonType]): Matcher[String] =
/("name").andHave(name) and /("price").andHave(price)
def haveProducts(products: Matcher[String]*): Matcher[String] =
/("products").andHave(allOf(products:_*))
json must haveProducts(
aProductWith(name = "shirt", price = 10) and /("ids").andHave(exactly("1", "2", "3")),
aProductWith(name = "shoe", price = 5)
)
(Example taken from here: http://etorreborre.github.io/specs2/guide/SPECS2-3.0/org.specs2.guide.Matchers.html)
How do I do the same thing i.e. match the contents of products if products is a root element in the json? What should haveProducts look like?
val json = """[{"name":"shirt","price":10, "ids":["1", "2", "3"]},{"name":"shoe","price":5}]"""
You can replace /("products").andHave(allOf(products:_*)) with have(allOf(products:_*)) like this:
val json = """[{"name":"shirt","price":10, "ids":["1", "2", "3"]},{"name":"shoe","price":5}]"""
def aProductWith(name: Matcher[JsonType], price: Matcher[JsonType]): Matcher[String] =
/("name").andHave(name) and /("price").andHave(price)
def haveProducts(products: Matcher[String]*): Matcher[String] = have(allOf(products:_*))
json must haveProducts(
aProductWith(name = "shirt", price = 10) and /("ids").andHave(exactly("1", "2", "3")),
aProductWith(name = "shoe", price = 5)
)
I'm trying to move the following query to Linq-to-sql, is it possible?
select * from (
Select top (#Percent) percent with ties *
from(
Select distinct
LoanNumber as LoanNo
From CHE
Left Join RecordingInfo as Rec
On CHE.LoanNumber = Rec.LoanNo
Where Channel = 'LINX'
and CHE.Doc in ('MTG','MOD')
and Rec.LoanNo is null
and LoanNumber >= '#LoanNo'
) A
order by LoanNo #Order
) B
order by LoanNo
I have not seen anyway to do with ties in linq.
I think something like this will work for you.
public static IQueryable<T> TopPercentWithTies<T, TKey>(this IOrderedQueryable<T> query, Expression<Func<T, TKey>> groupByExpression, double percent)
{
var groupedQuery = query.GroupBy(groupByExpression);
int numberToTake = groupedQuery.Count() * percent / 100;
return groupedQuery.Take(numberToTake).SelectMany(t => t);
}
I only tested it with IEnumerable, so I don't know for sure that it'll work properly with IQueryable. I also sorted the list before calling TopPercentWithTies().
Here's the code I used to test it.
int percent = 50;
var people = new []
{
new { Age = 99, Name = "Adam" },
new { Age = 99, Name = "Andrew" },
new { Age = 89, Name = "Bob" },
new { Age = 50, Name = "Cecil" },
new { Age = 50, Name = "Doug" },
new { Age = 50, Name = "Everett" },
new { Age = 35, Name = "Frank" },
new { Age = 25, Name = "Greg" },
new { Age = 15, Name = "Hank" }
};
var sortedPeople = people.AsQueryable().OrderByDescending(person => person.Age);
var results = sortedPeople.TopPercentWithTies(person => person.Age, percent);
foreach (var person in results)
Console.WriteLine(person);
Hope it helps or at least gets you in the right direction. You may want to tweak the logic for calculating numberToTake.