Parse the following JSON using google apps script - json

-- Hi everyone, It's been now several days I'm trying to parse the following JSON using google apps script.
[
{
"NOMBRE": "ViejosNoUsarEl Quebrachal",
"ACTIVO": false,
"CODIGO": "ViejosNoUsarQUEB",
"CALLE": null,
"NUMERO": null,
"PROVINCIA": "Jujuy",
"LOCALIDAD": "EL MORRO",
"ZONA": null,
"SUPERFICIE": 3900,
"CODIGOEXTERNO": ""
},
{
"NOMBRE": "ViejoNoUsarSanta Teresa",
"ACTIVO": false,
"CODIGO": "ViejoNoUsarST",
"CALLE": null,
"NUMERO": null,
"PROVINCIA": "San Luis",
"LOCALIDAD": "Villa MercedesOLD",
"ZONA": "Oeste",
"SUPERFICIE": 3700,
"CODIGOEXTERNO": ""
},
{
"NOMBRE": "ViejosNoUsarGil",
"ACTIVO": false,
"CODIGO": "ViejosNoUsarGIL",
"CALLE": null,
"NUMERO": null,
"PROVINCIA": "Cordoba",
"LOCALIDAD": "9 DE JULIO",
"ZONA": "Oeste",
"SUPERFICIE": 200,
"CODIGOEXTERNO": ""
},
{
"NOMBRE": "ViejosNoUsarDon Manuel",
"ACTIVO": false,
"CODIGO": "ViejosNoUsarDM",
"CALLE": null,
"NUMERO": null,
"PROVINCIA": "Cordoba",
"LOCALIDAD": "9 DE JULIO",
"ZONA": "Oeste",
"SUPERFICIE": 400,
"CODIGOEXTERNO": ""
}
]
The GET response is giving me the JSON as I posted it.
Using google apps script I want to add on a google sheet as much rows as objects are in the array.
In this case there would be 4 google sheet rows. I want to parse only the values of the properties.
As an example, the first row would look like this:
ViejosNoUsarEl Quebrachal | false | ViejosNoUsarQUEB | null | null | Jujuy | EL MORRO | null | 3900 |
I want to focus on this question on the pasrsing matter, not on the adding the rows to the google sheet yet.
The problem is that I cant get the dot notation to extract the values I want.
For example, Logger.log(response.provincia); prints "Information null".

Modification points:
From your showing sample data and For example, Logger.log(response.provincia); prints "Information null"., I thought that the reason for your issue is due to that you are trying to retrieve the values from an array using response.provincia. In this case, it is required to be response[i].PROVINCIA. i is the index of an array. If you want to retrieve the value of "PROVINCIA" of the 1st element of the array, you can use response[0].PROVINCIA. From your showing data, provincia is required to be PROVINCIA. When response[0].provincia is run, undefined is returned. Please be careful about this.
When you want to retrieve the values like ViejosNoUsarEl Quebrachal | false | ViejosNoUsarQUEB | null | null | Jujuy | EL MORRO | null | 3900 | in order, in this case, the values are retrieved by preparing the keys in order.
When these points are reflected in a sample script, it becomes as follows.
Sample script:
const keys = ["NOMBRE", "ACTIVO", "CODIGO", "CALLE", "NUMERO", "PROVINCIA", "LOCALIDAD", "ZONA", "SUPERFICIE", "CODIGOEXTERNO"];
const response = [
{
"NOMBRE": "ViejosNoUsarEl Quebrachal",
"ACTIVO": false,
"CODIGO": "ViejosNoUsarQUEB",
"CALLE": null,
"NUMERO": null,
"PROVINCIA": "Jujuy",
"LOCALIDAD": "EL MORRO",
"ZONA": null,
"SUPERFICIE": 3900,
"CODIGOEXTERNO": ""
},
{
"NOMBRE": "ViejoNoUsarSanta Teresa",
"ACTIVO": false,
"CODIGO": "ViejoNoUsarST",
"CALLE": null,
"NUMERO": null,
"PROVINCIA": "San Luis",
"LOCALIDAD": "Villa MercedesOLD",
"ZONA": "Oeste",
"SUPERFICIE": 3700,
"CODIGOEXTERNO": ""
},
{
"NOMBRE": "ViejosNoUsarGil",
"ACTIVO": false,
"CODIGO": "ViejosNoUsarGIL",
"CALLE": null,
"NUMERO": null,
"PROVINCIA": "Cordoba",
"LOCALIDAD": "9 DE JULIO",
"ZONA": "Oeste",
"SUPERFICIE": 200,
"CODIGOEXTERNO": ""
},
{
"NOMBRE": "ViejosNoUsarDon Manuel",
"ACTIVO": false,
"CODIGO": "ViejosNoUsarDM",
"CALLE": null,
"NUMERO": null,
"PROVINCIA": "Cordoba",
"LOCALIDAD": "9 DE JULIO",
"ZONA": "Oeste",
"SUPERFICIE": 400,
"CODIGOEXTERNO": ""
}
];
const values = response.map(o => keys.map(h => o[h]));
console.log(values)
When this script is run, the values are returned as the 2-dimensional array. This can be used for putting to the Spreadsheet using setValues.
Reference:
map()

Related

BigQuery JSON element extraction

I have a table in BigQuery with a JSON column, see below.
doc_id
data
222
{...}
333
{...}
The data JSON column looks like the IDs are set as headers.
{
"1675223776617": {
"author": "aaa",
"new": "2023-02-01",
"old": null,
"property": "asd",
"sender": "wew"
},
"1675223776618": {
"author": "aaa",
"new": true,
"old": null,
"property": "asd",
"sender": "ewew"
},
"1675223776619": {
"author": "bbb",
"new": "ySk2btk7",
"old": null,
"property": "qwe",
"sender": "yyy"
}
}
I would like to extract this JSON into this format using SQL in BigQuery.
Note, the header id isn't defined in the JSON.
doc_id
id
author
new
old
property
sender
222
1675223776617
aaa
2023-02-01
null
asd
wew
222
1675223776618
aaa
true
null
asd
ewew
222
1675223776619
bbb
ySk2btk7
null
qwe
yyy
I tried using the JSON_EXTRACT function without any success.
You might consider below approach using javascript UDF.
CREATE TEMP FUNCTION flatten_json(json STRING)
RETURNS ARRAY<STRUCT<id STRING, author STRING, new STRING, old STRING, property STRING, sender STRING>>
LANGUAGE js AS """
result = [];
for (const [key, value] of Object.entries(JSON.parse(json))) {
value["id"] = key; result.push(value);
}
return result;
""";
WITH sample_table AS (
SELECT 222 doc_id, '''{
"1675223776617": {
"author": "aaa",
"new": "2023-02-01",
"old": null,
"property": "asd",
"sender": "wew"
},
"1675223776618": {
"author": "aaa",
"new": true,
"old": null,
"property": "asd",
"sender": "ewew"
},
"1675223776619": {
"author": "bbb",
"new": "ySk2btk7",
"old": null,
"property": "qwe",
"sender": "yyy"
}
}''' data
)
SELECT doc_id, flattened.*
FROM sample_table, UNNEST(flatten_json(json)) flattened;
Query results

VBA Json Parse response with JsonConverter

Posting request API to Statistics Canada.
The Response string I'm getting is complex (at least to me) and I can not extract any value from it.
Tried many syntax possible to only get the "cansimId" without success.
My goal would be to list (array loop) the "dimensionPositionId":2 ("Principal statistics") and get all ("memberNameEn")
Many thanks! :)
Here's a sample from the response. Too big to post it all here +100K.txt:
[
{
"status": "SUCCESS",
"object": {
"responseStatusCode": 0,
"productId": "16100047",
"cansimId": "304-0014",
"cubeTitleEn": "Manufacturers' sales, inventories, orders and inventory to sales ratios, by industry (dollars unless otherwise noted)",
"cubeTitleFr": "Stocks, ventes, commandes et rapport des stocks sur les ventes pour les industries manufacturières, selon l'industrie (dollars sauf indication contraire)",
"cubeStartDate": "1992-01-01",
"cubeEndDate": "2021-08-01",
"frequencyCode": 6,
"nbSeriesCube": 2798,
"nbDatapointsCube": 935808,
"releaseTime": "2021-10-14T08:30",
"archiveStatusCode": "2",
"archiveStatusEn": "CURRENT - a cube available to the public and that is current",
"archiveStatusFr": "ACTIF - un cube qui est disponible au public et qui est toujours mise a jour",
"subjectCode": [
"1699",
"230402",
"330303",
"451003"
],
"surveyCode": [
"2101"
],
"dimension": [
{
"dimensionPositionId": 1,
"dimensionNameEn": "Geography",
"dimensionNameFr": "Géographie",
"hasUom": false,
"member": [
{
"memberId": 1,
"parentMemberId": null,
"memberNameEn": "Canada",
"memberNameFr": "Canada",
"classificationCode": "11124",
"classificationTypeCode": "1",
"geoLevel": 0,
"vintage": 2016,
"terminated": 0,
"memberUomCode": null
}
]
},
{
"dimensionPositionId": 2,
"dimensionNameEn": "Principal statistics",
"dimensionNameFr": "Les statistiques principales",
"hasUom": true,
"member": [
{
"memberId": 1,
"parentMemberId": null,
"memberNameEn": "Sales of goods manufactured (shipments)",
"memberNameFr": "Ventes de biens fabriqués (livraisons)",
"classificationCode": null,
"classificationTypeCode": null,
"geoLevel": null,
"vintage": null,
"terminated": 0,
"memberUomCode": 81
},
{
"memberId": 2,
"parentMemberId": null,
"memberNameEn": "New orders, estimated values of orders received during month",
"memberNameFr": "Nouvelles commandes, valeur estimative des commandes reçues durant le mois",
"classificationCode": null,
"classificationTypeCode": null,
"geoLevel": null,
"vintage": null,
"terminated": 0,
"memberUomCode": 81
},
{
"memberId": 3,
"parentMemberId": null,
"memberNameEn": "Unfilled orders, estimated values of orders at end of month",
"memberNameFr": "Commandes en carnet, valeur estimative des commandes à la fin du mois",
"classificationCode": null,
"classificationTypeCode": null,
"geoLevel": null,
"vintage": null,
"terminated": 0,
"memberUomCode": 81
},
{
"memberId": 4,
"parentMemberId": null,
"memberNameEn": "Raw materials, fuel, supplies, components, estimated values at end of month",
"memberNameFr": "Matières premières, combustibles, fournitures et composantes, valeur estimative à la fin du mois",
"classificationCode": null,
"classificationTypeCode": null,
"geoLevel": null,
"vintage": null,
"terminated": 0,
"memberUomCode": 81
},
{
"memberId": 5,
"parentMemberId": null,
"memberNameEn": "Goods or work in process, estimated values at end of month",
"memberNameFr": "Biens en cours de fabrication ou travaux en cours, valeur estimative à la fin du mois",
"classificationCode": null,
"classificationTypeCode": null,
"geoLevel": null,
"vintage": null,
"terminated": 0,
"memberUomCode": 81
},
{
"memberId": 6,
"parentMemberId": null,
"memberNameEn": "Finished goods manufactured, estimated values at end of month",
"memberNameFr": "Produits finis fabriqués, valeur estimative à la fin du mois",
"classificationCode": null,
"classificationTypeCode": null,
"geoLevel": null,
"vintage": null,
"terminated": 0,
"memberUomCode": 81
},
{
"memberId": 7,
"parentMemberId": null,
"memberNameEn": "Total inventory, estimated values of total inventory at end of the month",
"memberNameFr": "Total des stocks, valeur estimative des stocks à la fin du mois",
"classificationCode": null,
"classificationTypeCode": null,
"geoLevel": null,
"vintage": null,
"terminated": 0,
"memberUomCode": 81
},
{
"memberId": 8,
"parentMemberId": null,
"memberNameEn": "Ratio of total inventory to sales",
"memberNameFr": "Rapport du total des stocks aux ventes",
"classificationCode": null,
"classificationTypeCode": null,
"geoLevel": null,
"vintage": null,
"terminated": 0,
"memberUomCode": 270
},
{
"memberId": 9,
"parentMemberId": null,
"memberNameEn": "Ratio of finished goods to sales",
"memberNameFr": "Rapport des produits finis aux ventes",
"classificationCode": null,
"classificationTypeCode": null,
"geoLevel": null,
"vintage": null,
"terminated": 0,
"memberUomCode": 270
}
]
},
And here's the code:
Dim Request As New MSXML2.XMLHTTP60
Dim Body As String
Dim Json As Object
Body = "[{" & Chr(34) & "productId" & Chr(34) & ":16100047}]"
Request.Open "POST", "https://www150.statcan.gc.ca/t1/wds/rest/getCubeMetadata", False
Request.SetRequestHeader "Content-Type", "application/json"
Request.send Body
Set Json = JsonConverter.ParseJson(Request.ResponseText)
'Debug.print Json("status") 'Error !!!!
'Debug.Print Json("cansimId") 'Error!!!!
'Debug.print Json("object")("cansimId") 'Error!!!!
Thank's Tomalak, you were right!
One thing... All my arrays seems to be 'option base 1' (although not specify!)
So Json(1)("status") is now working.
Finaly got what I needed:
Dim ColStatPrinc As Collection
Dim StatPrinc As Dictionary
x = 1
Set ColStatPrinc = Json(1)("object")("dimension")(2)("member")
With Sheets(1)
For Each StatPrinc In ColStatPrinc
.Cells(x, 1) = StatPrinc("memberId")
.Cells(x, 2) = StatPrinc("memberNameFr")
x = x + 1
Next StatPrinc
End With

In sequelize bulkCreate timestamps are not updating

I am using bulkCreate and uupdate
const item = await models.Gsdatatab.bulkCreate(gsdatamodel,{updateOnDuplicate: ["SCRIP","LTP","OHL","ORB15","ORB30","PRB","CAMARILLA"]});
I see the timestamps(createdAt and updatedAt) are not getting updated in DB after the the update. Do I need to explicitly pass those two in the bulKCreate to get them updated each time there is an update or is there any option I am missing. Also the id is getting incremented while rows are getting updated. I dont want the id column to auto increment in case of update.
I am using the extended model creation for defining the model
The following was run using
MySQL Server version: 8.0.25 MySQL Community Server
Sequelize version 6.6.5
Summary
Timestamps:
The values returned from the .bulkCreate method can be misleading. You will need to query for the items after doing a bulkUpdate to find the new values. To quote the sequelize docs for version 6:
The success handler is passed an array of instances, but please notice
that these may not completely represent the state of the rows in the
DB. This is because MySQL and SQLite do not make it easy to obtain
back automatically generated IDs and other default values in a way
that can be mapped to multiple records. To obtain Instances for the
newly created values, you will need to query for them again.
Also, to update the updatedAt column, it will need to be included in the array parameter for updateOnDuplicate. Otherwise, it will not receive a new timestamp.
Non-sequential primary keys: The next auto_increment value for the MySQL primary key appears to be incremented when an update is being done. I'm not really sure if there's a way to prevent this from happening. However, it is still possible to insert rows that have primary keys which have been skipped over by the auto_increment mechanism. Also, according to another answer on stackoverflow concerning non-sequential primary keys, there should be no impact on efficiency. As an alternative, bulkCreate statements could be separated into two groups, one for inserts and one for updates, which could then be done separately using sequelize. The downside is that there would be extra queries to determine whether incoming data already exists in the database in order to decide between inserts versus updates.
Here's a code sample:
let {
Sequelize,
DataTypes,
} = require('sequelize')
async function run () {
let sequelize = new Sequelize(process.env.DB_NAME, process.env.DB_USER, process.env.DB_PASSWORD, {
host: 'localhost',
dialect: 'mysql',
logging: console.log
})
let Item = sequelize.define('item', {
name: DataTypes.STRING,
age: DataTypes.INTEGER
}, {
tableName: 'items',
schema: 'agw_queries'
})
await sequelize.sync({ force: true })
let wait = sec => new Promise( res => setTimeout(res, sec * 1000));
let items = await Item.bulkCreate([{ name: 'mickey', age: 32 }, { name: 'minnie', age: 30 }])
console.log()
console.log('These values are returned upon creation.')
console.log()
console.log(JSON.stringify(items, null, 2))
console.log()
console.log('These values are returned after a subsequent query.')
console.log()
let r = await Item.findAll({})
console.log(JSON.stringify(r, null, 2))
console.log()
console.log('Waiting two seconds ...')
console.log()
await wait(2)
console.log('These values are returned after an update.')
console.log()
items = await Item.bulkCreate(
[
{ id: 1, name: 'mickey mouse', age: 33 },
{ id: 2, name: 'minnie mouse', age: 31 },
{ name: 'goofy', age: 37 }
],
{ updateOnDuplicate: [ 'name', 'updatedAt' ] })
console.log(JSON.stringify(items, null, 2))
console.log()
console.log('These values are returned after another subsequent query.')
console.log()
r = await Item.findAll({})
console.log(JSON.stringify(r, null, 2))
console.log()
console.log('Waiting two seconds ...')
console.log()
await wait(2)
console.log('These values are returned after an update.')
console.log()
items = await Item.bulkCreate(
[
{ id: 1, name: 'mickey t. mouse', age: 33 },
{ id: 2, name: 'minerva mouse', age: 31 },
{ name: 'donald duck', age: 32 }
],
{ updateOnDuplicate: [ 'name', 'updatedAt' ] })
console.log(JSON.stringify(items, null, 2))
console.log()
console.log('These values are returned after another subsequent query.')
console.log()
r = await Item.findAll({})
console.log(JSON.stringify(r, null, 2))
await sequelize.close()
}
run()
And here's the output
Executing (default): DROP TABLE IF EXISTS `items`;
Executing (default): DROP TABLE IF EXISTS `items`;
Executing (default): CREATE TABLE IF NOT EXISTS `items` (`id` INTEGER NOT NULL auto_increment , `name` VARCHAR(255), `age` INTEGER, `createdAt` DATETIME NOT NULL, `updatedAt` DATETIME NOT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB;
Executing (default): SHOW INDEX FROM `items`
Executing (default): INSERT INTO `items` (`id`,`name`,`age`,`createdAt`,`updatedAt`) VALUES (NULL,'mickey',32,'2021-09-06 12:17:44','2021-09-06 12:17:44'),(NULL,'minnie',30,'2021-09-06 12:17:44','2021-09-06 12:17:44');
These values are returned upon creation.
[
{
"id": 1,
"name": "mickey",
"age": 32,
"createdAt": "2021-09-06T12:17:44.042Z",
"updatedAt": "2021-09-06T12:17:44.042Z"
},
{
"id": 2,
"name": "minnie",
"age": 30,
"createdAt": "2021-09-06T12:17:44.042Z",
"updatedAt": "2021-09-06T12:17:44.042Z"
}
]
These values are returned after a subsequent query.
Executing (default): SELECT `id`, `name`, `age`, `createdAt`, `updatedAt` FROM `items` AS `item`;
[
{
"id": 1,
"name": "mickey",
"age": 32,
"createdAt": "2021-09-06T12:17:44.000Z",
"updatedAt": "2021-09-06T12:17:44.000Z"
},
{
"id": 2,
"name": "minnie",
"age": 30,
"createdAt": "2021-09-06T12:17:44.000Z",
"updatedAt": "2021-09-06T12:17:44.000Z"
}
]
Waiting two seconds ...
These values are returned after an update.
Executing (default): INSERT INTO `items` (`id`,`name`,`age`,`createdAt`,`updatedAt`) VALUES (1,'mickey mouse',33,'2021-09-06 12:17:46','2021-09-06 12:17:46'),(2,'minnie mouse',31,'2021-09-06 12:17:46','2021-09-06 12:17:46'),(NULL,'goofy',37,'2021-09-06 12:17:46','2021-09-06 12:17:46') ON DUPLICATE KEY UPDATE `name`=VALUES(`name`),`updatedAt`=VALUES(`updatedAt`);
[
{
"id": 1,
"name": "mickey mouse",
"age": 33,
"createdAt": "2021-09-06T12:17:46.174Z",
"updatedAt": "2021-09-06T12:17:46.174Z"
},
{
"id": 2,
"name": "minnie mouse",
"age": 31,
"createdAt": "2021-09-06T12:17:46.174Z",
"updatedAt": "2021-09-06T12:17:46.174Z"
},
{
"id": 5,
"name": "goofy",
"age": 37,
"createdAt": "2021-09-06T12:17:46.174Z",
"updatedAt": "2021-09-06T12:17:46.174Z"
}
]
These values are returned after another subsequent query.
Executing (default): SELECT `id`, `name`, `age`, `createdAt`, `updatedAt` FROM `items` AS `item`;
[
{
"id": 1,
"name": "mickey mouse",
"age": 32,
"createdAt": "2021-09-06T12:17:44.000Z",
"updatedAt": "2021-09-06T12:17:46.000Z"
},
{
"id": 2,
"name": "minnie mouse",
"age": 30,
"createdAt": "2021-09-06T12:17:44.000Z",
"updatedAt": "2021-09-06T12:17:46.000Z"
},
{
"id": 3,
"name": "goofy",
"age": 37,
"createdAt": "2021-09-06T12:17:46.000Z",
"updatedAt": "2021-09-06T12:17:46.000Z"
}
]
Waiting two seconds ...
These values are returned after an update.
Executing (default): INSERT INTO `items` (`id`,`name`,`age`,`createdAt`,`updatedAt`) VALUES (1,'mickey t. mouse',33,'2021-09-06 12:17:48','2021-09-06 12:17:48'),(2,'minerva mouse',31,'2021-09-06 12:17:48','2021-09-06 12:17:48'),(NULL,'donald duck',32,'2021-09-06 12:17:48','2021-09-06 12:17:48') ON DUPLICATE KEY UPDATE `name`=VALUES(`name`),`updatedAt`=VALUES(`updatedAt`);
[
{
"id": 1,
"name": "mickey t. mouse",
"age": 33,
"createdAt": "2021-09-06T12:17:48.258Z",
"updatedAt": "2021-09-06T12:17:48.258Z"
},
{
"id": 2,
"name": "minerva mouse",
"age": 31,
"createdAt": "2021-09-06T12:17:48.258Z",
"updatedAt": "2021-09-06T12:17:48.258Z"
},
{
"id": 8,
"name": "donald duck",
"age": 32,
"createdAt": "2021-09-06T12:17:48.258Z",
"updatedAt": "2021-09-06T12:17:48.258Z"
}
]
These values are returned after another subsequent query.
Executing (default): SELECT `id`, `name`, `age`, `createdAt`, `updatedAt` FROM `items` AS `item`;
[
{
"id": 1,
"name": "mickey t. mouse",
"age": 32,
"createdAt": "2021-09-06T12:17:44.000Z",
"updatedAt": "2021-09-06T12:17:48.000Z"
},
{
"id": 2,
"name": "minerva mouse",
"age": 30,
"createdAt": "2021-09-06T12:17:44.000Z",
"updatedAt": "2021-09-06T12:17:48.000Z"
},
{
"id": 3,
"name": "goofy",
"age": 37,
"createdAt": "2021-09-06T12:17:46.000Z",
"updatedAt": "2021-09-06T12:17:46.000Z"
},
{
"id": 6,
"name": "donald duck",
"age": 32,
"createdAt": "2021-09-06T12:17:48.000Z",
"updatedAt": "2021-09-06T12:17:48.000Z"
}
]

Parsing JSON without key names to retrieve a column

I am loading json from data.gov that does not have key names for the values in the json data, e.g. below: the metadata is available separately.
I am able to load the json into a variant column, but cannot see how to parse and query for specific columns, e.g. Frankford below - I have tried JSONcol:data[0] which returns the entire entry, but am unable to see how to specify column 4, say.
{
data: [ [ "row-ea6u~fkaa~32ry", "0B8F94EE5292", 0, 1486063689, null, 1486063689, null, "{ }", "410", "21206", "Frankford", "2", "NORTHEASTERN", [ "{\"address\": \"4509 BELAIR ROAD\", \"city\": \"Baltimore\", \"state\": \"MD\", \"zip\": \"\"}", null, null, null, true ], null, null, null ]]
}
The following code is used to create and load the snowflake table:
create or replace table snowpipe.public.snowtable(jsontext variant);
copy into snowpipe.public.snowtable
from #snowpipe.public.snowstage
file_format = (type = 'JSON')
Not exactly sure how your varient data is look once you have loaded it, but experimenting on variant via PARSE_JSON for you object. Which I has to double slash the \ to make it valid sql.
select
PARSE_JSON('{ data: [ [ "row-ea6u~fkaa~32ry", "0B8F94EE5292", 0, 1486063689, null, 1486063689, null, "{ }", "410", "21206", "Frankford", "2", "NORTHEASTERN", [ "{\\"address\\": \\"4509 BELAIR ROAD\\", \\"city\\": \\"Baltimore\\", \\"state\\": \\"MD\\", \\"zip\\": \\"\\"}", null, null, null, true ], null, null, null ]]}') as j
,j:data as jd
,jd[0] as jd0
,jd0[3] as jd0_3
,array_slice(j:data[0],3,5) as jd0_3to4
;
shows that you can use [0] notation to index arrays, and thus get the results:
J: { "data": [ [ "row-ea6u~fkaa~32ry", "0B8F94EE5292", 0, 1486063689, null, 1486063689, null, "{ }", "410", "21206", "Frankford", "2", "NORTHEASTERN", [ "{\"a...
JD: [ [ "row-ea6u~fkaa~32ry", "0B8F94EE5292", 0, 1486063689, null, 1486063689, null, "{ }", "410", "21206", "Frankford", "2", "NORTHEASTERN", [ "{\"address\": \"4509 BELAIR ROAD\", \"city\": \"...
JD0: [ "row-ea6u~fkaa~32ry", "0B8F94EE5292", 0, 1486063689, null, 1486063689, null, "{ }", "410", "21206", "Frankford", "2", "NORTHEASTERN", [ "{\"address\": \"4509 BELAIR ROAD\", \"city\": \"Baltimore\", \"state\": \"MD\", \"...
JD0_3: 1486063689
JD0_3TO4: [ 1486063689, null ]
so if you have unknown amount of first level elements in data that you want to access, then use LATERAL FLATTEN like so:
WITH data as (
select PARSE_JSON('{ data: [ [ "row-1", "0B8", 0 ],["row-2", "F94", 2],
["row-3", "EE5", 4]]}') as j
)
select f.value[0]::text as row_name
,f.value[1]::text as serial_number
,f.value[2]::number as num
from data d,
lateral flatten(input=> d.j:data) f;
gives:
ROW_NAME SERIAL_NUMBER NUM
row-1 0B8 0
row-2 F94 2
row-3 EE5 4

Converting 1-to-many json into csv

I'm trying to parse json output from an API call. The output has an array of orders, and each order has an array of items. I want to parse the output such that I have a single CSV output of each individual item with its parent order ID.
So if a single order contains multiple items, I need the orderID repeated for each item in its order. I've read the jq documentation and dozens of samples, and I've tried some trial and error for hours. I'm SO confused as to how to do this.
I'm struggling very much with the jq parsing syntax. None of the examples are really helping, and I'm just confused. Here's the basics:
curl -s https://api.site.com/orders?page=1&pageSize=10 | jq '.'
A sample of the json is below.
{
"orders": [
{
"orderId": 217356098,
"items": [
{
"orderItemId": 327010821,
"lineItemKey": "1",
"sku": "AJC-C10S",
"name": "TestDescription",
"imageUrl": null,
"weight": null,
"quantity": 2,
"unitPrice": 106.85,
"taxAmount": null,
"shippingAmount": null,
"warehouseLocation": null,
"options": [],
"productId": null,
"fulfillmentSku": null,
"adjustment": false,
"upc": null,
"createDate": "2016-11-09T02:11:28.307",
"modifyDate": "2016-11-09T02:11:28.307"
},
{
"orderItemId": 327010822,
"lineItemKey": "1",
"sku": "AJC-C106",
"name": "AnotherTestDescription",
"imageUrl": null,
"weight": null,
"quantity": 2,
"unitPrice": 106.85,
"taxAmount": null,
"shippingAmount": null,
"warehouseLocation": null,
"options": [],
"productId": null,
"fulfillmentSku": null,
"adjustment": false,
"upc": null,
"createDate": "2016-11-09T02:11:28.307",
"modifyDate": "2016-11-09T02:11:28.307"
}
]
},
],
"total": 359934,
"page": 1,
"pages": 179968
}
Expected output (without column headers of course):
orderId,orderItemId,sku,name
217356098,327010821,"JC-C10S","TestDescription"
217356098,327010822,"JC-C106","AnotherTestDescription"
As you can see, each item has its own line, but if they came from the same order, the orderId should be repeated on each line.
How can I do this?
With the -r command-line option, the following jq filter:
.orders[]
| .orderId as $oid
| .items[]
| [$oid, .orderItemId, .sku, .name]
| #csv
produces the desired output.
If there's any chance that any of the selected values might be [], then consider adding a line like the following immediately before the last line above:
| map_values(if . == [] then "NONE" else . end)
Thanks! That worked with a slight alteration:
.orders[]
| .orderId as $oid
| .items[]
| [$oid, .items.orderItemId, .items.sku, .items.name | tostring]
| #csv