MySQL 8 Json document merge data from multiple rows - mysql

I've JSON documents in multiple rows as
Row #1
{
"data": {
"level": 1,
"name": "xyz",
"property": "value",
"children": [
{
"level": 2,
"name": "xyz-1"
}
]
}
}
Row #2
{
"data": {
"children": [
{
"level": 2,
"name": "xyz-2"
},
{
"level": 2,
"name": "xyz-3"
}
]
}
}
Row #3
{
"data": {
"children": [
{
"level": 2,
"name": "xyz-4"
}
]
}
}
I want to use MySQL 8 JSON_MERGE_PRESERVE in such a way so I get the result
{
"data": {
"level": 1,
"name": "xyz",
"property": "value",
"children": [
{
"level": 2,
"name": "xyz-1"
},
{
"level": 2,
"name": "xyz-2"
},
{
"level": 2,
"name": "xyz-3"
},
{
"level": 2,
"name": "xyz-4"
}
]
}
}
I've tried
SELECT JSON_MERGE_PRESERVE(
'{ "data": { "level": 1, "name": "xyz", "property": "value", "children": [ { "level": 2, "name": "xyz-1" } ] } }',
'{ "data": { "children": [ { "level": 2, "name": "xyz-2" }, { "level": 2, "name": "xyz-3" } ] } }',
'{ "data": { "children": [ { "level": 2, "name": "xyz-4" } ] } }'
) as json;
But I want to SELECT the JSON data from the table and merge it, something like
[Not working]
SELECT JSON_MERGE_PRESERVE(a.data_json) from
(SELECT data_json FROM data_table
WHERE name = 'abc') as a
The error message is
Error Code: 1582. Incorrect parameter count in the call to native function 'JSON_MERGE_PRESERVE'

One of the many options you can evaluate and use is CTE (Common Table Expressions) - 13.2.13 WITH (Common Table Expressions):
WITH RECURSIVE `cte` AS (
SELECT
1 AS `row`,
`data_json`
FROM
`data_table`
WHERE
`id` = 1
UNION ALL
SELECT
`cte`.`row` + 1 AS `row`,
JSON_MERGE_PRESERVE(`cte`.`data_json`, `data_table`.`data_json`) AS `data_json`
FROM
`data_table`, `cte`
WHERE
`data_table`.`id` = `cte`.`row` + 1
)
SELECT
JSON_PRETTY(`data_json`)
FROM
`cte`
ORDER BY
`row` DESC
LIMIT 1;
See dbfiddle.

Related

find on id and append value to json parameter

I have the following data frame, df1:
A B C
123 B1 C1
456 B2 C2
And data frame df2:
A
[
{
"id": "123",
"details": {
"id": "123",
"color": null,
"param_1": {
"name": "mike"
},
"location": "US",
"items": [
{
"item_1": "#227858",
"offer_id": null,
"item_details": {
"detials_1": [{ "notes": "other:", "quantity": 1 }]
}
}
],
"version": 1,
}
}
]
[
{
"id": "456",
"details": {
"id": "456",
"color": null,
"param_1": {
"name": "james"
},
"location": "KR",
"items": [
{
"item_1": "#2221",
"offer_id": null,
"item_details": {
"detials_1": [{ "notes": "other", "quantity": 1 }]
}
}
],
"version": 2,
}
}
]
I want to find all values in df1[A] inside the JSON found inside df2[A] under the first instance of the id parameter. Once found, I want to replace the NULL values inside the color parameter with the df1[B] and offer_id with df1[C].
The output should create a new column with the appended values:
df2[B]:
[
{
"id": "123",
"details": {
"id": "123",
"color": B1,
"param_1": {
"name": "mike"
},
"location": "US",
"items": [
{
"item_1": "#227858",
"offer_id": C1,
"item_details": {
"detials_1": [{ "notes": "other:", "quantity": 1 }]
}
}
],
"version": 1,
}
}
]
[
{
"id": "456",
"details": {
"id": "456",
"color": B2,
"param_1": {
"name": "james"
},
"location": "KR",
"items": [
{
"item_1": "#2221",
"offer_id": C2,
"item_details": {
"detials_1": [{ "notes": "other", "quantity": 1 }]
}
}
],
"version": 2,
}
}
]
I just started researching how to approach this, but I need guidance on the most efficient way. Any insight would be greatly appreciated.

How to match on multiple fields per array item in elastic search

I am trying to create an elastic search query to match multiple fields inside of an object inside of an array.
For example, the Elastic Search structure I am querying against is similar to the following:
"hits": [
{
"_index": "titles",
"_type": "title",
...
"_source": {
...
"genres": [
{
"code": "adventure",
"priority": 1
},
{
"code": "action",
"priority": 2
},
{
"code": "horror",
"priority": 3
}
],
...
},
...
]
And what I am trying to do is match on titles with specific genre/priority pairings. For example, I am trying to match all titles with code=action and priority=1, but my query is returning too many results. The above title is hit during this example due to the fact that the genre list contains both a genre with code=action AND another genre that matches priority=1. My query is similar to the following:
"query": {
"bool": {
"filter": [
{
"bool": {
"must":[
{"term": {
"genres.code": {
"value": "action",
"boost": 1.0
}
}},
{"term": {
"genres.priority": {
"value": 1,
"boost": 1.0
}
}}
]
}
},
...
}
Is there any way to form the query in order to match a title with a single genre containing both priority=1 AND code=action?
I have recreated your problem. I added the following mapping
PUT titles
{
"mappings": {
"title": {
"properties": {
"author": {
"type": "text"
},
"genres": {
"type": "nested"
}
}
}
}
}
Then I added values to the index. This was what was inserted
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "titles",
"_type": "title",
"_id": "2",
"_score": 1,
"_source": {
"author": "Author 1",
"genres": [
{
"code": "adventure",
"priority": 2
},
{
"code": "action",
"priority": 3
},
{
"code": "horror",
"priority": 1
}
]
}
},
{
"_index": "titles",
"_type": "title",
"_id": "1",
"_score": 1,
"_source": {
"author": "Author 2",
"genres": [
{
"code": "adventure",
"priority": 3
},
{
"code": "action",
"priority": 1
},
{
"code": "horror",
"priority": 2
}
]
}
},
{
"_index": "titles",
"_type": "title",
"_id": "3",
"_score": 1,
"_source": {
"author": "Author 3",
"genres": [
{
"code": "adventure",
"priority": 3
},
{
"code": "action",
"priority": 1
},
{
"code": "horror",
"priority": 2
}
]
}
}
]
}
My query is:
GET titles/title/_search
{
"query": {
"nested": {
"path": "genres",
"query": {
"bool": {
"must": [
{
"term": {
"genres.code": {
"value": "horror"
}
}
},
{
"term": {
"genres.priority": {
"value": 1
}
}
}
]
}
}
}
}
}
The query returns
"_source": {
"author": "Author 1",
"genres": [
{
"code": "adventure",
"priority": 2
},
{
"code": "action",
"priority": 3
},
{
"code": "horror",
"priority": 1
}
]
}
This title is the only one that has code = 'horror' and priority = 1.

PostgresSQL JSON query

I have json stored in a column (oid) with the following structure:
{
"fullName": "test test",
"personDetails": {
"address": "Advisor",
"phoneNumber": "clare.railton#heptonstalls.co.uk"
},
"id": "6765788-yt67",
"submittedDocument": {
"answers": [
{
"questionId": "2",
"responses": [
{
"value": "123456"
}
]
},
{
"questionId": "2.1",
"responses": [
{
"IdA": 1,
"IdB": 1,
"value": "false"
},
{
"IdA": 1,
"IdB": 2,
"value": "false"
},
{
"IdA": 1,
"IdB": 3,
"value": "false"
},
{
"IdA": 1,
"IdB": 4,
"value": "true"
}
]
}
]
},
"date": "2018-11-22",
"PeriodId": 123456
}
How would i get the value of the response to all question numbers? I have managed to get the json structure from the oid column using the lo_get function but i am struggling to capture the values i need.
Many thanks
Are you sure you want a large object to store the json data?
Postgres can handle json columntypes in tables:
CREATE TABLE test_json (id INTEGER PRIMARY KEY, json json);
INSERT INTO test_json VALUES(1,'{
"fullName": "test test",
"personDetails": {
"address": "Advisor",
"phoneNumber": "clare.railton#heptonstalls.co.uk"
},
"id": "6765788-yt67",
"submittedDocument": {
"answers": [
{
"questionId": "2",
"responses": [
{
"value": "123456"
}
]
},
{
"questionId": "2.1",
"responses": [
{
"IdA": 1,
"IdB": 1,
"value": "false"
},
{
"IdA": 1,
"IdB": 2,
"value": "false"
},
{
"IdA": 1,
"IdB": 3,
"value": "false"
},
{
"IdA": 1,
"IdB": 4,
"value": "true"
}
]
}
]
},
"date": "2018-11-22",
"PeriodId": 123456
}');
SELECT json_extract_path(
json_array_elements(
json_extract_path(
json_array_elements(
json_extract_path((json),'submittedDocument','answers')
),'responses')
),'value'
)FROM test_json;
See:
https://www.postgresql.org/docs/current/functions-json.html

Elastic Nested query - display only the first 2 inner hits

How do I change my query to display only the 5 first orders within the orderbook?
My data is structure like this. Order is a nested type.
Orderbook
|_ Orders
This is my query
GET /orderindex/_search
{
"size": 10,
"query": {
"term": { "_type": "orderbook" }
}
}
And this is the result
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 10,
"max_score": 1,
"hits": [
{
"_index": "orderindex",
"_type": "orderbook",
"_id": "1",
"_score": 1,
"_source": {
"id": 1,
"exchange": "Exch1",
"label": "USD/CAD",
"length": 40,
"timestamp": "5/16/2018 4:33:31 AM",
"orders": [
{
"pair1": "USD",
"total": 0.00183244,
"quantity": 61,
"orderbookId": 0,
"price": 0.00003004,
"exchange": "Exch1",
"id": 5063,
"label": "USD/CAD",
"pair2": "CAD",
},
{
"pair1": "USD",
"total": 0.0231154,
"quantity": 770,
"orderbookId": 0,
"price": 0.00003002,
"exchange": "Exch1",
"id": 5064,
"label": "USD/CAD",
"pair2": "CAD",
},
...
..
.
Also, how do I make to query two specific orderbooks by its label name and retrieve only the first 2 orders?
I am now sending of this query, but the problem is it is returning the orderbooks including all its orders and then after this it returns only 2 plus inners hits. How Do I do to return only the 2 inner hits without all the orders that are coming with the orderbook from the first part of the query
GET /orderindex/_search
{
"query": {
"bool": {
"must": [
{
"term": { "_type": "orderbook" }
},
{
"nested": {
"path": "orders",
"query": {
"match_all": {}
},
"inner_hits": {
"size": 2
}
}
}
]
}
}}
Inner hits support the following options:
size
The maximum number of hits to return per inner_hits. By default the
top three matching hits are returned.
Which basically means, that you could do that, by using similar query
{
"query": {
"bool": {
"must": [
{
#replace this one with your query for orderbook
"term": {
"user": "kimchy"
}
},
{
"nested": {
"path": "orders",
"query": {
"match_all": {}
},
"inner_hits": {
"size": 3 #we asks for only 3 inner hits
}
}
}
]
}
}
}
One could also would like to filter _source from results by doing this:
"_source": {
"includes": [ "obj1.*", "obj2.*" ],
"excludes": [ "*.description" ]
}
In your case of orders - it could be useful to excludes orders.*
More information on this one - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-inner-hits.html

Returning term count for a single document using the terms facet in elastic search

Say I have the following search query...
POST /topics/_search
{
"fields": [
"topic_attachment",
"topic_replies",
"topic_status"
],
"query" : {
"filtered" : {
"query" : {
"term" : {
"_id" : "5478"
}
}
}
},
"facets": {
"text": {
"terms": {
"field": "text",
"size": 10,
"order": "count"
}
}
}
}
The result of this search is the following.
{
"took": 93,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "topics",
"_type": "full-topic",
"_id": "5478",
"_score": 1,
"fields": {
"topic_replies": 1141,
"topic_status": 0,
"topic_attachment": false
}
}
]
},
"facets": {
"text": {
"_type": "terms",
"missing": 0,
"total": 8058,
"other": 8048,
"terms": [
{
"term": "ω",
"count": 1
},
{
"term": "œyouâ",
"count": 1
},
{
"term": "œyou",
"count": 1
},
{
"term": "œwhisperedâ",
"count": 1
},
{
"term": "œwalt",
"count": 1
},
{
"term": "œunderstandingâ",
"count": 1
},
{
"term": "œtieâ",
"count": 1
},
{
"term": "œthe",
"count": 1
},
{
"term": "œpersonally",
"count": 1
},
{
"term": "œnappiesâ",
"count": 1
}
]
}
}
}
Each term has a count of exactly 1. Why is this? I know the text from this document has more than one term in common. Is this because the term count only increments once per document? If so how do I count a term more than once from a single document?
That's the document count, not the term frequency. Luckily with the new aggregations module (replacement for facets introduced in 1.0.Beta2) count has been renamed to doc_count to clarify what it is.