How to Bulk Upload Complex JSON to MySQL - mysql

I have a JSON file that I am trying to bulk upload to MySql. The file is around 50gb. Is there a simple method to get all of the data into MySql? I tried watching videos on youtube on how to do this, but all of the tutorials were for super simple json data that don't have nested data like this. Any help would be amazing. Here is a sample so you can see the structure of it:
{
"PatentData": [
{
"patentCaseMetadata": {
"applicationNumberText": {
"value": "16315092",
"electronicText": "16315092"
},
"filingDate": "2019-07-03",
"applicationTypeCategory": "Utility",
"partyBag": {
"applicantBagOrInventorBagOrOwnerBag": [
{
"applicant": [
{
"contactOrPublicationContact": [
{
"name": { "personNameOrOrganizationNameOrEntityName": [ { "organizationStandardName": { "content": [ "SEB S.A." ] } } ] },
"cityName": "ECULLY",
"geographicRegionName": {
"value": "",
"geographicRegionCategory": "STATE"
},
"countryCode": "FR"
}
]
}
]
},
{
"inventorOrDeceasedInventor": [
{
"contactOrPublicationContact": [
{
"name": {
"personNameOrOrganizationNameOrEntityName": [
{
"personStructuredName": {
"firstName": "Johan",
"middleName": "",
"lastName": "SABATTIER"
}
}
]
},
"cityName": "Mornant",
"geographicRegionName": {
"value": "",
"geographicRegionCategory": "STATE"
The end goal is to have the JSON file in a MySQL database in the following format:
Name | Address | State | Country ... | Abstract
Tim - 23 North- TX - US ... | The tissue...
Tom - 33 North- TX - US ... | The engineer...
Kim - 78 North- TX - US ... | The lung...
Bob - 123 North- TX - US ... | The tissue...
Rob - 93 North- TX - US ... | The scope...

Related

extract a subset of deep embed json and print only key,value pair I am interested in the subset json

I have a deep embeded json file:
I want to extract and parse only the subset I am interested in , in my case all content in 'node' key.
How can I:
extract subset of this json file which contains "edges[].node" (edges is the 'parent' key of node)
in 'node' session , I am interested in key:value pair of
.url,
.headline.default, (*this one is 'grandchild' of key 'node'*)
.firstPublished
I want to keep only above 3 item inside 'node' key
How can I print out the super slim version of json file I need ?
a better to have option is : can I still keep the structure/full path which leads json root key to embed 'node' json subset I am interested in ?
Here is the jqplay-myjson (full content of my json file)
Try to attach my full content here :
{
"data": {
"legacyCollection": {
"longDescription": "The latest news, analysis and investigations from Europe.",
"section": {
"name": "world",
"url": "/section/world"
},
"collectionsPage": {
"stream": {
"pageInfo": {
"hasNextPage": true,
"__typename": "PageInfo"
},
"__typename": "AssetsConnection",
"edges": [
{
"node": {
"url": "https://www.nytimes.com/video/world/europe/100000008323381/icc-war-crimes-ukraine.html",
"firstPublished": "2022-04-27T23:28:33.241Z",
"headline": {
"default": "I.C.C. Joins Investigation of War Crimes in Ukraine",
"__typename": "CreativeWorkHeadline"
},
"summary": "Karim Khan, the chief prosecutor of the International Criminal Court, said that his organization would participate in a joint effort — with Ukraine, Poland and Lithuania — to investigate war crimes committed since Russia’s invasion.",
"promotionalMedia": {
"__typename": "Image",
"id": "SW1hZ2U6bnl0Oi8vaW1hZ2UvYTY3MTVhNDUtZDE0NS01OWZjLThkZWItNzYxMWViN2UyODhk"
},
"embedded": false
},
"__typename": "AssetsEdge"
},
{
"node": {
"__typename": "Article",
"url": "https://www.nytimes.com/2022/04/27/sports/soccer/chelsea-sale-roman-abramovich.html",
"firstPublished": "2022-04-27T19:42:17.000Z",
"typeOfMaterials": [
"News"
],
"archiveProperties": {
"lede": "",
"__typename": "ArticleArchiveProperties"
},
"headline": {
"default": "Endgame Nears in Bidding for Chelsea F.C.",
"__typename": "CreativeWorkHeadline"
},
"summary": "The American bank selling the English soccer team on behalf of its Russian owner could name its preferred suitor by the end of the week. But the drama isn’t over.",
"translations": []
},
"__typename": "AssetsEdge"
}
],
"totalCount": 52559
}
},
"sourceId": "100000004047788",
"tagline": "",
"__typename": "LegacyCollection"
}
}
}
Here is the command I have jqplay Demo:
.data.legacyCollection.collectionsPage.stream.edges[].node|= with_entries(select([.key]|inside(["default","url","firstPublished"]))
And here is the output I got
{
"data": {
"legacyCollection": {
"longDescription": "The latest news, analysis and investigations from Europe.",
"section": {
"name": "world",
"url": "/section/world"
},
"collectionsPage": {
"stream": {
"pageInfo": {
"hasNextPage": true,
"__typename": "PageInfo"
},
"__typename": "AssetsConnection",
"edges": [
{
"node": {
"url": "https://www.nytimes.com/video/world/europe/100000008323381/icc-war-crimes-ukraine.html",
"firstPublished": "2022-04-27T23:28:33.241Z"
},
"__typename": "AssetsEdge"
},
{
"node": {
"url": "https://www.nytimes.com/2022/04/27/sports/soccer/chelsea-sale-roman-abramovich.html",
"firstPublished": "2022-04-27T19:42:17.000Z"
},
"__typename": "AssetsEdge"
}
],
"totalCount": 52559
}
},
"sourceId": "100000004047788",
"tagline": "",
"__typename": "LegacyCollection"
}
}
}
Here is the output I expect to have
{
"data": {
"legacyCollection": {
"collectionsPage": {
"stream": {
"edges": [
{
"node": {
"url": "https://www.nytimes.com/video/world/europe/100000008323381/icc-war-crimes-ukraine.html",
"firstPublished": "2022-04-27T23:28:33.241Z"
}
},
{
"node": {
"url": "https://www.nytimes.com/2022/04/27/sports/soccer/chelsea-sale-roman-abramovich.html",
"firstPublished": "2022-04-27T19:42:17.000Z"
}
}
]
}
}
}
}
}
Here's a (somewhat) declarative solution:
(.data.legacyCollection.collectionsPage.stream.edges
| map( {node: (.node
| {url,
firstPublished,
headline: {default: .headline.default} })})) as $edges
| {data: {
legacyCollection: {
collectionsPage: {
stream: {
$edges
}
}
}
}
}
Here's one way to make the selection while ensuring that the structure is preserved. This solution may be of interest because
it can easily be adapted for use with jq's "--stream" option.
def array_startswith($head): .[: $head|length] == $head;
. as $in
| ["data", "legacyCollection", "collectionsPage", "stream", "edges"] as $head
| ($head|length) as $len
| reduce (paths
| select( array_startswith($head) and .[1+$len] == "node" )) as $p
(null;
if ((($p|length) == $len + 3) and ($p[-1] | IN("url", "firstPublished")))
or ((($p|length) == $len + 4) and $p[-2:] == ["headline", "default"])
then setpath($p; $in | getpath($p))
else .
end)

Change type of input from string to data, MONGO DB

i'm trying to convert the inspection_date field from string to date for every object inside my db.
Every object is built like this one.
"name": "$1 STORE",
"address": "5573 ROSEMEAD BLVD",
"city": "TEMPLE CITY",
"zipcode": "91780",
"state": "California",
"violations": [{
"inspection_date": "2015-09-29",
"description": " points ... violation_status\n62754 1 ... OUT OF COMPLIANCE\n62755 1 ... OUT OF COMPLIANCE\n62756 2 ... OUT OF COMPLIANCE\n\n[3 rows x 5 columns]",
"risk": "Risk 3 (Low)"
}, {
"inspection_date": "2016-08-18",
"description": " points ... violation_status\n338879 2 ... OUT OF COMPLIANCE\n\n[1 rows x 5 columns]",
"risk": "Risk 3 (Low)"
} //could be more than 2 or less then 2 object inside violations array//]}
How can i convert all of the inspection_date field avoiding doing it by myself one by one?
As suggested by #turivishal, you have to have to make use of $map and $dateFromString operators.
db.collection.aggregate([
{
"$addFields": {
"violations": {
"$map": {
"input": "$violations",
"in": {
"$mergeObjects": [
"$$this",
{
"inspection_date": {
"$dateFromString": {
"dateString": "$$this.inspection_date",
"format": "%Y-%m-%d",
"onError": null,
"onNull": null
}
}
}
],
},
}
}
}
},
])
Mongo Playground Sample Execution

How do I create a jq query that extracts data from 2 separate levels of a JSON file?

I have a JSON file that looks something like this:
{
"people": {
"company": "Acme",
"department": "Dev",
"perks": {
"eat": "pizza",
"drink": "beer",
"play": "twister"
},
"names": [{
"last_name": "Smith",
"first_names": [{
"name": "Bill"
},
{
"name": "Alice"
},
{
"name": "Mary"
}
]
},
{
"last_name": "Brown",
"first_names": [{
"name": "Gil"
},
{
"name": "Bob"
},
{
"name": "Mary"
}
]
},
{
"last_name": "Sanchez",
"first_names": [{
"name": "Gil"
},
{
"name": "Jose"
},
{
"name": "Marlena"
}
]
}
]
}
}
The output I'm looking for is:
acme
Dev
twister
Smith, Bill
Smith, Alice
Smith, Mary
Brown, Gil
Brown, Bob
Brown, Mary
Sanchez, Gil
Sanchez, Jose
Sanchez, Marlena
I have the jq query that gets the names:
jq -r '.people | .names[] | "\(.last_name), \(.first_names[].name)"'
And I have the query that gets me the first 3 lines (Acme, Dev, twister):
jq -r '.people | .company, .department, .perks.play'
But when I try to combine them (in too many ways to list here!), I get an error. I don't know how to combine these to get the query to walk the first level below ".people" and then the level below ".people.names[]" (all in one query).
Simply use the "," operator to join the two queries, e.g.
.people
| (.company, .department, .perks.play),
(.names[] | "\(.last_name), \(.first_names[].name)")

How do I structure JSON? [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 4 years ago.
Improve this question
I have this data sample that I need to put into a JSON format.
What's the best way/structure to do that? If it helps, I'll be developing an angular product selection tool for this.
Item 1: Federation Phaser
Options:
| FORM FACTOR | PRICE |
| Compact | $545 |
| Pistol Grip | $600 |
Item 2: Sith Lightsaber
Options:
| BLADE COLOR | BLADE COUNT | PRICE |
| Red | Single | $1000 |
| Red | Double | $1750 |
| Blue | Single | $1125 |
| Blue | Double | $1875 |
| Green | Single | $1250 |
JSON is formed by name/value pairs and surrounded by curly braces {}. The name/value pair are separated by commas and the values themselves can be JSON objects or arrays.
Example 1 (Simple):
{
"fruit1": "apple",
"fruit2": "pear"
}
Example 2 (more complex):
{
"fruitBasket1": { "fruit1": "apple", "fruit2": "pear"},
"fruitBasket2": { "fruit1": "grape", "fruit2": "orange"}
}
For your example, you could construct the JSON as follows with an array:
{
"item": {
"name": "Federation Phaser",
"options": [
{
"form": "compact",
"price": "$545"
},
{
"form": "Pistol Grip",
"price": "$600"
}
]
},
"item2": {
"name": "Sith Lightsaber",
"options": [
{
"bladeColor": "red",
"count": "single",
"price": "$1000"
},
{
"bladeColor": "blue",
"count": "double",
"price": "$1875"
}
]
}
}
If you want to have a variable number of "items" you could put them into an array too. For example:
{
"items": [
{
"name": "Federation Phaser",
"options": [{
"form": "compact",
"price": "$545"
},
{
"form": "Pistol Grip",
"price": "$600"
}
]
},
{
"name": "Sith Lightsaber",
"options": [{
"bladeColor": "red",
"count": "single",
"price": "$1000"
},
{
"bladeColor": "blue",
"count": "double",
"price": "$1875"
}
]
}
]
}

GraphQL filter data by key values (e.g. users age must be 31)

I'm new to GraphQL and I'm stuck on a rather simple problem which I've yet to find documentation for.
The only thing I'm trying to do is get a list of users from a JSON that have their age key set to 31. I can easily get all the users by doing:
{
user {
name
age
}
}
But I only want users that are aged 31 and what I think I need to do, which doesn't work, is:
{
user(age: 31) {
name
age
}
}
I just get the same results, so what do I need to do to enable the age: 31 "filter"?
Schema
var MyGraphQLSchema = buildSchema(`
type Query {
user(age: Int): [User]
}
type User {
id: Int
name: String
age: Int
}
`);
JSON Data
var root = {
"user": [
{
"id": 1,
"name": "binni",
"age": 31
},
{
"id": 2,
"name": "viddi",
"age": 31
},
{
"id": 3,
"name": "mási",
"age": 32
}
]
}
GraphQL Server
graphqlHTTP({
schema: MyGraphQLSchema,
rootValue: root,
graphiql: true
})
I'm using graphql.js and koa-graphql (which is similiar to express-graphql).
Query Results
{
"data": {
"user": [
{
"name": "binni",
"age": 31
},
{
"name": "viddi",
"age": 31
},
{
"name": "mási",
"age": 32
}
]
}
}
You need to implement user as an ECMAScript function, instead of as an array.
var users = [
{
"name": "binni",
"age": 31
},
{
"name": "viddi",
"age": 31
},
{
"name": "mási",
"age": 32
}
]
var root = {
"user": ({age}) =>
age === null? users:
users.filter(user => user.age === age)
}
See http://graphql.org/graphql-js/passing-arguments/ for reference.