compare two json files and match entries with the same value - json

I need compare two json files on keys: a,b,c,d,e
If all keys is match for entry between json file 1 and json file 2
so i should find delta betwen platform_time for this entry.
And then delete this entries from json file 1 and json file 2.
(both json files have 10000000000 entries ):
So here we should match :
1) one[0] and [two][1]
2) one[1] and [two][]
Data json one and json two:
first file -
"one": [
{
"a" : "2022-09-12 00:00:00.000",
"b" : "apple",
"c" : "1",
"d" : "2022-09-11 23:59:59.997",
"e" : 88
},
{
"a" : "2022-09-12 00:00:00.000",
"b" : "orange",
"c" : "2",
"d" : "2022-09-11 23:59:59.997",
"e" : 87
},
{
"a" : "2022-09-12 00:00:10.001",
"b" : "apple",
"c" : "6",
"d" : "2022-09-11 23:59:59.997",
"e" : 88
},...]
second file -
"two": [
{
"a" : "2022-09-12 00:00:30.000",
"b" : "orange",
"c" : "2",
"d" : "2022-09-11 23:59:59.997",
"e" : 87
},
{
"a" : "2022-09-12 00:00:10.001",
"b" : "apple",
"c" : "1",
"d" : "2022-09-11 23:59:59.997",
"e" : 88
},
{
"a" : "2022-09-12 00:00:30.000",
"b" : "orange",
"c" : "200",
"d" : "2021-09-11 23:59:59.997",
"e" : 81
},...
]
I start doing something like this, but iteration for all elements takes too much time.
Could you please help me optimize my code ?
import datetime
import json
import numpy as np
import random``
lst_in_seconds = []
f = open('one_all.json')
one = json.load(f)
f.close()
f1 = open('two_all.json')
two = json.load(f1)
f1.close()
counter_one_better = 0
counter_two_better = 0
counter_the_same = 0
for k in range(10000000000):
for i in range(10000000000):
if one['one'][k]['b'] == two['two'][i]['b'] and one['one'][k]['e'] == two['two'][i]['e']
and one['one'][k]['amount'] == two['two'][i]['amount']
and one['one'][k]['d'] == two['two'][i]['d']
and one['one'][k]['c'] == two['two'][i]['c']:
if (one['one'][k]['a']) < (two['two'][i]['a']):
# one better than two
delt_one = datetime.datetime.strptime((one['one'][k]['a']), '%Y-%m-%d %H:%M:%S.%f')
delt_two = datetime.datetime.strptime((two['two'][i]['a']), '%Y-%m-%d %H:%M:%S.%f')
delta = delt_two - delt_one
diff_in_seconds = delta.total_seconds()
lst_in_seconds.append(diff_in_seconds)
counter_one_better += 1
two['two'][i]['b'] = random.randint(0,100000)
break
elif (one['one'][k]['a']) == (two['two'][i]['a']):
# same
delt_one = datetime.datetime.strptime((one['one'][k]['a']), '%Y-%m-%d %H:%M:%S.%f')
delt_two = datetime.datetime.strptime((two['two'][i]['a']), '%Y-%m-%d %H:%M:%S.%f')
delta = delt_two - delt_one
diff_in_seconds = delta.total_seconds()
lst_in_seconds.append(diff_in_seconds)
counter_the_same += 1
two['two'][i]['b'] = random.randint(0,100000)
break
elif (one['one'][k]['a']) > (two['two'][i]['a']):
delt_one = datetime.datetime.strptime((one['one'][k]['a']), '%Y-%m-%d %H:%M:%S.%f')
delt_two = datetime.datetime.strptime((two['two'][i]['a']), '%Y-%m-%d %H:%M:%S.%f')
delta = delt_one - delt_two
diff_in_seconds = delta.total_seconds()
diff_in_seconds_to_str = float(('-' + str(diff_in_seconds)))
lst_in_seconds.append(diff_in_seconds_to_str)
counter_two_better += 1
two['two'][i]['b'] = random.randint(0,100000)
break
#print('counter_the_same',counter_the_same,'count')
#print('counter_one_better',counter_one_better,'count')
#print('counter_two_better',counter_two_better,'count','\n')
print('one better than two in ', round((counter_one_better / (counter_two_better+counter_one_better+counter_the_same))*100,4),'% case')
print('the same ', round((counter_the_same / (counter_two_better+counter_one_better+counter_the_same))*100,4),'% case')
print('two better than one in ', round((counter_two_better / (counter_two_better+counter_one_better+counter_the_same))*100,4),'% case','\n')

You can reduce the time taken to compare the files by pre-processing the two files to dictionaries with keys of the values to be compared. Then for each entry in one, you can look up the entries in two which have the same b,c,d, and e values and compare the times. Note that you can make your code a lot more "DRY" by noting that the only difference in the three branches of the if is which counter is updated.
from collections import defaultdict
import datetime
import random
one = { "one": [
{
"a" : "2022-09-12 00:00:00.000",
"b" : "apple",
"c" : "1",
"d" : "2022-09-11 23:59:59.997",
"e" : 88
},
{
"a" : "2022-09-12 00:00:00.000",
"b" : "orange",
"c" : "2",
"d" : "2022-09-11 23:59:59.997",
"e" : 87
},
{
"a" : "2022-09-12 00:00:10.001",
"b" : "apple",
"c" : "6",
"d" : "2022-09-11 23:59:59.997",
"e" : 88
}
] }
two = { "two": [
{
"a" : "2022-09-12 00:00:30.000",
"b" : "orange",
"c" : "2",
"d" : "2022-09-11 23:59:59.997",
"e" : 87
},
{
"a" : "2022-09-12 00:00:10.001",
"b" : "apple",
"c" : "1",
"d" : "2022-09-11 23:59:59.997",
"e" : 88
},
{
"a" : "2022-09-12 00:00:30.000",
"b" : "orange",
"c" : "200",
"d" : "2021-09-11 23:59:59.997",
"e" : 81
}
] }
compare_keys = ['b', 'c', 'd', 'e']
value_key = 'a'
set_key = 'b'
ones = defaultdict(list)
for o in one['one']:
ones[tuple(o[k] for k in compare_keys)].append(o[value_key])
twos = defaultdict(list)
for t in two['two']:
twos[tuple(t[k] for k in compare_keys)].append({ k : t[k] for k in [value_key, set_key] })
counter_one_better = 0
counter_two_better = 0
counter_the_same = 0
lst_in_seconds = []
for k, o in ones.items():
t = twos.get(k)
if t is None:
continue
for o1 in o:
for i, t2 in enumerate(t):
delt_one = datetime.datetime.strptime(o1, '%Y-%m-%d %H:%M:%S.%f')
delt_two = datetime.datetime.strptime(t2[value_key], '%Y-%m-%d %H:%M:%S.%f')
delta = delt_two - delt_one
diff_in_seconds = delta.total_seconds()
lst_in_seconds.append(diff_in_seconds)
counter_one_better += diff_in_seconds < 0
counter_the_same += diff_in_seconds == 0
counter_two_better += diff_in_seconds > 0
t[i][set_key] = random.randint(0,100000)
print(counter_one_better, counter_the_same, counter_two_better)
print(lst_in_seconds)
# reconstruct the two dict
new_two = { 'two' : [ dict([*zip(compare_keys, k), *v.items()]) for k in twos for v in twos[k] ] }
Output (for your sample data):
# counter_one_better, counter_the_same, counter_two_better
0 0 2
# lst_in_seconds
[10.001, 30.0]
# new_two
{
"two": [
{
"b": 4459,
"c": "2",
"d": "2022-09-11 23:59:59.997",
"e": 87,
"a": "2022-09-12 00:00:30.000"
},
{
"b": 93855,
"c": "1",
"d": "2022-09-11 23:59:59.997",
"e": 88,
"a": "2022-09-12 00:00:10.001"
},
{
"b": "orange",
"c": "200",
"d": "2021-09-11 23:59:59.997",
"e": 81,
"a": "2022-09-12 00:00:30.000"
}
]
}

Related

how to extract and modify inner array objects with parent object data in jq

We are tying to format a json similar to this:
[
{"id": 1,
"type": "A",
"changes": [
{"id": 12},
{"id": 13}
],
"wanted_key": "good",
"unwanted_key": "aaa"
},
{"id": 2,
"type": "A",
"unwanted_key": "aaa"
},
{"id": 3,
"type": "B",
"changes": [
{"id": 31},
{"id": 32}
],
"unwanted_key": "aaa",
"unwanted_key2": "aaa"
},
{"id": 4,
"type": "B",
"unwanted_key3": "aaa"
},
null,
null,
{"id": 7}
]
into something like this:
[
{
"id": 1,
"type": "A",
"wanted_key": true # every record must have this key/value
},
{
"id": 12, # note: this was in the "changes" property of record id 1
"type": "A", # type should be the same type than record id 1
"wanted_key": true
},
{
"id": 13, # note: this was in the "changes" property of record id 1
"type": "A", # type should be the same type than record id 1
"wanted_key": true
},
{
"id": 2,
"type": "A",
"wanted_key": true
},
{
"id": 3,
"type": "B",
"wanted_key": true
},
{
"id": 31, # note: this was in the "changes" property of record id 3
"type": "B", # type should be the same type than record id 3
"wanted_key": true
},
{
"id": 32, # note: this was in the "changes" property of record id 3
"type": "B", # type should be the same type than record id 3
"wanted_key": true
},
{
"id": 4,
"type": "B",
"wanted_key": true
},
{
"id": 7,
"type": "UNKN", # records without a type should have this type
"wanted_key": true
}
]
So far, I've been able to:
remove null records
obtain the keys we need with their default
give records without a type a default type
What we are missing:
from records having a changes key, create new records with the type of their parent record
join all records in a single array
Unfortunately we are not entirely sure how to proceed... Any help would be appreciated.
So far our jq goes like this:
del(..|nulls) | map({id, type: (.type // "UNKN"), wanted_key: (true)}) | del(..|nulls)
Here's our test code:
https://jqplay.org/s/eLAWwP1ha8P
The following should work:
map(select(values))
| map(., .type as $type | (.changes[]? + {$type}))
| map({id, type: (.type // "UNKN"), wanted_key: true})
Only select non-null values
Return the original items followed by their inner changes array (+ outer type)
Extract 3 properties for output
Multiple map calls can usually be combined, so this becomes:
map(
select(values)
| ., (.type as $type | (.changes[]? + {$type}))
| {id, type: (.type // "UNKN"), wanted_key: true}
)
Another option without variables:
map(
select(values)
| ., .changes[]? + {type}
| {id, type: (.type // "UNKN"), wanted_key: true}
)
# or:
map(select(values))
| map(., .changes[]? + {type})
| map({id, type: (.type // "UNKN"), wanted_key: true})
or even with a separate normalization step for the unknown type:
map(select(values))
| map(.type //= "UNKN")
| map(., .changes[]? + {type})
| map({id, type, wanted_key: true})
# condensed to a single line:
map(select(values) | .type //= "UNKN" | ., .changes[]? + {type} | {id, type, wanted_key: true})
Explanation:
Select only non-null values from the array
If type is not set, create the property with value "UNKN"
Produce the original array items, followed by their nested changes elements extended with the parent type
Reshape objects to only contain properties id, type, and wanted_key.
Here's one way:
map(
select(values)
| (.type // "UNKN") as $type
| ., .changes[]?
| {id, $type, wanted_key: true}
)
[
{
"id": 1,
"type": "A",
"wanted_key": true
},
{
"id": 12,
"type": "A",
"wanted_key": true
},
{
"id": 13,
"type": "A",
"wanted_key": true
},
{
"id": 2,
"type": "A",
"wanted_key": true
},
{
"id": 3,
"type": "B",
"wanted_key": true
},
{
"id": 31,
"type": "B",
"wanted_key": true
},
{
"id": 32,
"type": "B",
"wanted_key": true
},
{
"id": 4,
"type": "B",
"wanted_key": true
},
{
"id": 7,
"type": "UNKN",
"wanted_key": true
}
]
Demo
Something like below should work
map(
select(type == "object") |
( {id}, {id : ( .changes[]? .id )} ) +
{ type: (.type // "UNKN"), wanted_key: true }
)
jq play - demo

Customize JSON retrieved from mysql

Lets say this is my database table
id ProductID color size
1 abc red L
2 abc green M
3 abc yellow S
4 def purple L
5 def brown M
6 def pink S
Now I am fecthing data using my sql queires but in response i want my json in this structure
{
"status": true,
"message": "All Product Logs has been fetched Successfully",
"products": [
{
"id": "1",
"ProductID": "abc",
"colors": [
"red",
"green",
"yellow",
],
"sizes": [
"L",
"M",
"S",
]
},
{
"id": "2",
"ProductID": "def",
"colors": [
"purple",
"brown",
"pink",
],
"sizes": [
"L",
"M",
"S",
]
}
]
}
And this what i do but it doesn't makes sense
if ($response) {
$JSONDataArray=[];
$ColorDataArray=[];
$SizeDataArray=[];
while($row = mysqli_fetch_array($response)){
$ColorDataArray[]=array($row['color']);
$SizeDataArray[]=array($row['size']);
$JSONDataArray[]=array('productid' =>$row['productid'],'color' => $ColorDataArray,'sizes' => $SizeDataArray);
}
echo json_encode(['status'=>true,'message'=>'All Products has been fetched Successfully','products'=>$JSONDataArray]);
}
Anykind of help would be appreciated. What do u think should i change my database structure or should i change my query. I simply user Select * query without any where clause
One option is to use the JSON_ARRAYAGG function:
SELECT JSON_PRETTY(
CONCAT(
'{"status": true, ',
'"message": "All Product Logs has been fetched Successfully", ',
'"products": [',
(
SELECT
GROUP_CONCAT(`der`.`json`)
FROM (
SELECT
JSON_OBJECT(
'ProductID', `ProductID`,
'colors', JSON_ARRAYAGG(`color`),
'sizes', JSON_ARRAYAGG(`size`)
) `json`
FROM
`tbl`
GROUP BY
`ProductID`
) `der`
),
']}'
)
) `json_response`;
See dbfiddle.
Keep in mind: GROUP_CONCAT: The result is truncated to the maximum length that is given by the group_concat_max_len system variable.

Parse JSON Nested SubValues in Powershell to Table

I converted the JSON string to Powershell in v5. The original json string is below:
$j = #'
[{
"id": "1",
"Members": [
"A",
"B",
"C"
]
}, {
"id": "2",
"Members": [
"A",
"C"
]
}, {
"id": "3",
"Members": [
"A",
"D"
]
}]
'#
$json = $j | ConvertFrom-Json
I would like the result set to look like the result set below. Eventually I will export to SQL:
id Members
----- --------
1 A
1 B
1 C
2 A
2 C
3 A
3 D
try this
$json | % {
$id = $_.id
$_.members | select #{n='id';e={$id}}, #{n='members';e={$_}}
}

how to get projections in mongodb with group operator

I have a table with columns Column1 & Column2 which looks like this
Colum1 Column2
A 1
A 2
A 3
B 2
B 4
B 6
If I perform following SQL in MYSQL
SELECT Column1, Column2, count(*) from Table group by Column1;
Result is
Column1 Column2 Count(*)
A 1 3
B 2 3
I want to execute similar query on MONGODB
I tried
QUERY1: db.table.aggregate({$group: {_id:"$Column1", count:{$sum:1}} })
QUERY2: db.table.aggregate({$project: {column1:1, column2:1}}, {$group: {_id:"$Column1", count:{$sum:1}} })
However the result for Query2 is same as Query1, It seems like you can not populate fields other than mentioned in $group column.
Is there a way to populate other fields in mongodb along with $group operator ?
I'm not sure I read the mysql query correctly, and I don't understand why this is particularly useful, but $first seems to accomplish the same thing.
However, as mentioned in the $first documentation, the outcome depends on the sorting so you should include a sorting criterion.
Data (column names shortened for brevity)
> db.foo.insert({"C1" : "A", "C2" : 1});
> db.foo.insert({"C1" : "A", "C2" : 2});
> db.foo.insert({"C1" : "A", "C2" : 3});
> db.foo.insert({"C1" : "B", "C2" : 2});
> db.foo.insert({"C1" : "B", "C2" : 4});
> db.foo.insert({"C1" : "B", "C2" : 6});
Aggregation Query
> db.foo.aggregate({$group: {_id:"$C1", C2: { $first: "$C2" }, count:{$sum:1}} })
Results
{
"result" : [
{
"_id" : "B",
"C2" : 2,
"count" : 3
},
{
"_id" : "A",
"C2" : 1,
"count" : 3
}
],
"ok" : 1
}

How to write Linq query for a Json file?

I need to write a Linq query in a specific format for a json file. There are 3 tables in the Database.
Student, with Student Id and name.
Subject, with Subject Id and name.
Results with Student Id, Student Id, Result and Date of Result.
This is my Query to get Results
var studentQuery = (from c in db.Students
join f in db.Results
on c.ID equals f.StudentID
join d in db.Subjects
on f.SubjectID equals d.ID
where c.ID == Convert.ToInt32(DropDownList1.SelectedValue)
select new newResult{
ID = f.StudentID,
Date = convertDateToUTC(f.Date.Value),
Name = c.Name.Trim(),
Result1 = f.Result1.Value,
Type = d.Subject1.Trim()
})
.ToList();
return studentQuery;
But I wish to return the query in the
in the format below
[ { "name": "Denis P6 Results", "type": "English", "data": [ [ 1325431800000, 54 ], [ 1325691000000, 65 ], [ 1325950200000, 73 ], [ 1326209400000, 60 ] ] },
{ "name": "Denis P6 Results", "type": "Maths", "data": [ [ 1325518200000, 76 ], [ 1325777400000, 81 ], [ 1326036600000, 80 ], [ 1326295800000, 70 ] ] },
{ "name": "Denis P6 Results", "type": "Science", "data": [ [ 1325604600000, 80 ], [ 1325863800000, 75 ], [ 1326123000000, 69 ], [ 1326382200000, 68 ] ] },
{ "name": "Mak P6 Results", "type": "English", "data": [ [ 1325431800000, 57 ], [ 1325691000000, 49 ], [ 1325950200000, 66 ], [ 1326209400000, 70 ] ] },
{ "name": "Mak P6 Results", "type": "Maths", "data": [ [ 1325518200000, 80 ], [ 1325777400000, 83 ], [ 1326036600000, 85 ], [ 1326295800000, 79 ] ] },
{ "name": "Mak P6 Results", "type": "Science", "data": [ [ 1325604600000, 67 ], [ 1325863800000, 69 ], [ 1326123000000, 66 ], [ 1326382200000, 62 ] ] } ]
I really need some help as I've been searching for days. I am a total newbie at this.
Thanks for any help.
Let's break this down into each of the steps you're trying to achieve:
Group the results, by person and subject.
Render these into JSON.
For the first bit, we can do something like
var groupedResults = from result in db.Results
group result by new { result.StudentID, result.SubjectID } into grouping
select new { grouping.Key, grouping };
var resultsWithName = from result in groupedResults
join student in db.Students on result.Key.StudentID equals student.ID
join subject in db.Subjects on result.Key.SubjectID equals subject.ID
select new
{
result.Key,
student.Name,
subject.Type,
result.grouping
};
resultsWithName now contains a sequence of name and subject pairs, together with a sequence of the results. This is the first bit done.
You could use a library for generating the JSON, but this isn't too bad to do by hand:
var jsonRendered = resultsWithName.Select(g =>
string.Format("{{ \"name\" : \"{0} Results\", \"type\" : \"{1}\", \"data\" : {2} }}",
g.Name,
g.Type,
"[ " + string.Join(", ", g.grouping.Select(r => string.Format("[ {0}, {1} ]", r.Date, r.Mark))) + " ]"));
Here, we iterate over all the grouped results (i.e. for each person and each subject), then create a single string containing the results for that subject.
I haven't used quite the same names for some of the variables as you, but hopefully it's clear how to adapt this for your code.
JavaScriptSerializer serializer = new JavaScriptSerializer();
var json = serializer.Serialize(resultsWithName);
Just use the built in .NET json serializer. No need to make it too complicated.