Inserting data from a for loop in database - mysql

I am fetching data from an APi, extracting part of it. The data comes in nested dictionaries and lists and I used a nested for loop to extract variables. I want to insert it in mysql db, not sure how to do so, as in some of the columns I will receive a different number of values to be stored. For example, cars could be 1,2,3 or 4.
All vehicle_id fetched should be inserted into a column all_vehicles, I am not sure how to do this either.
datetime_received= datetime.now()
car_dealer_id=11
int_id = 8
dealer_name ='XXX'
for car in cars:
code=car['Code']
start_date=car['RDate']
end_date=car['RDate']
for portion in car['Consists']['Portions']:
location= portion['Location']
for consist in portion['Consist']:
ext_id = consist['ExtId']
for vehicle in consist['Vehicles']:
vehicle_id= vehicle['Id']
sql = """
INSERT INTO table
(`datetime_received`, `car_dealer_id` , `ind_id`, `dealer_name`,`code`,`start_date`, `start_time`, `end_date`, `location`, `ext_id`, `all_vehilces`)
VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')"""
cursor.executemany(sql, data)
connection.commit()
connection.close()
Data:
cars = {
"Consists": {
"Portions": [
{
"Consist": [
{
"ext_id": "755411",
"Position": "0",
"Vehicles": [
{
"Id": "92",
"Position": "1"
},
{
"Id": "921",
"Position": "2"
},
{
"Id": "932",
"Position": "3"
},
{
"Id": "34",
"Position": "4"
},
{
"Id": "92",
"Position": "5"
}
]
}
],
"Location": "ATA"
}
],
"Updated": "2022-07-21T04:25:08.0000000+01:00"
},
"Code": "5`enter code here`75",
"RDate": "2022-07-21T08:25:00.0000000+01:00",
"RunDate": "2022-07-21T00:00:00.0000000+01:00",
}
EDITED: Thanks to Barmar, I managed to insert the values.
I have one final value to insert in the data[]. Based on the ext_id value I get, I have a function returning the corresponding my_system_id. I want to insert the my_system_ids as well, but I am not calling the function from the correct place and it is not being inserted into the db table.
Here is the function:
def get_my_system_id(ext_id):
cursor=db_conn.cursor()
sql=("""SELECT my_system_id FROM table
WHERE ext_id= %s""")
data=(ext_id,)
cursor.execute(sql,data)
id_row =cursor.fetchone()
if_row is not None:
my_id=id_row[0]
return(my_id)
else:
return null

Use ','.join() to combine all the vehicle IDs into a comma-delimited list.
In the prepared statement, %s should not be quoted. You also only had 10 of them, but you're inserting into 11 columns.
With th edit, add a call to get_my_system_id(ext_id) to the loop, and add that value to the data list.
data = []
for car in cars:
code=car['Code']
start_date, end_date = car['RDate'].split('T')
end_date=car['RDate']
for portion in car['Consists']['Portions']:
location= portion['Location']
for consist in portion['Consist']:
ext_id = consist['ExtId']
vehicle_ids = ','.join(v['id'] for v in consist['Vehicles'])
system_id = get_my_system_id(ext_id)
if not system_id:
print(f"No system ID found for ext_id = {ext_id}, skipping")
continue
data.append((datetime_received, car_dealer_id, int_id, dealer_name, code, start_date, start_time, end_date, location, ext_id, system_id, vehicle_ids))
sql = """
INSERT INTO table
(`datetime_received`, `car_dealer_id` , `ind_id`, `dealer_name`,`code`,`start_date`, `start_time`, `end_date`, `location`, `ext_id`, my_system_id, `all_vehicles`)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
cursor.executemany(sql, data)
connection.commit()
connection.close()

Related

JSON object: Query a value from unkown node based on a condition

I'm trying to query two values (DISCOUNT_TOTAL and ITEM_TOTAL) from a JSON object in a PostgreSQL database. Take the following query as reference:
SELECT
mt.customer_order
totals -> 0 -> 'amount' -> centAmount DISCOUNT_TOTAL
totals -> 1 -> 'amount' -> centAmount ITEM_TOTAL
FROM
my_table mt
to_jsonb(my_table.my_json -> 'data' -> 'order' -> 'totals') totals
WHERE
mt.customer_order in ('1000001', '1000002')
The query code works just fine, the big problem is that, for some reason out of my control, the values DISCOUNT_TOTAL and ITEM_TOTAL some times change their positions in the JSON object from one customer_order to other:
JSON Object
So i cannot aim to totals -> 0 -> 'amount' -> centAmount assuming that it contains the value related to type : DISCOUNT_TOTAL (same for type: ITEM_TOTAL). Is there any work around to get the correct centAmount for each type?
Use a path query instead of hardcoding the array positions:
with sample (jdata) as (
values (
'{
"data": {
"order": {
"email": "something",
"totals": [
{
"type": "ITEM_TOTAL",
"amount": {
"centAmount": 14990
}
},
{
"type": "DISCOUNT_TOTAL",
"amount": {
"centAmount": 6660
}
}
]
}
}
}'::jsonb)
)
select jsonb_path_query_first(
jdata,
'$.data.order.totals[*] ? (#.type == "DISCOUNT_TOTAL").amount.centAmount'
) as discount_total,
jsonb_path_query_first(
jdata,
'$.data.order.totals[*] ? (#.type == "ITEM_TOTAL").amount.centAmount'
) as item_total
from sample;
db<>fiddle here
EDIT: In case your PostgreSQL version does not support json path queries, you can do it by expanding the array into rows and then doing a pivot by case and sum:
with sample (order_id, jdata) as (
values ( 1,
'{
"data": {
"order": {
"email": "something",
"totals": [
{
"type": "ITEM_TOTAL",
"amount": {
"centAmount": 14990
}
},
{
"type": "DISCOUNT_TOTAL",
"amount": {
"centAmount": 6660
}
}
]
}
}
}'::jsonb)
)
select order_id,
sum(
case
when el->>'type' = 'DISCOUNT_TOTAL' then (el->'amount'->'centAmount')::int
else 0
end
) as discount_total,
sum(
case
when el->>'type' = 'ITEM_TOTAL' then (el->'amount'->'centAmount')::int
else 0
end
) as item_total
from sample
cross join lateral jsonb_array_elements(jdata->'data'->'order'->'totals') as a(el)
group by order_id;
db<>fiddle here

Split JSON into columns in a dynamic way in Big Query

I have the following JSON:
{
"rewards": {
"reward_1": {
"type": "type 1",
"amount": "amount 1"
},
"reward_2": {
"type": "type 2",
"amount": "amount 2"
},
"reward_3": {
"type": "type 3",
"amount": "amount 3"
},
"reward_4": {
"type": "type 4",
"amount": "amount 4"
}
}
}
This JSON is dynamic and I don't necessarily know how many rewards it will get, here it's 4 but it can be 2 or 8 etc.
I want to write a query in Big Query that will parse those values dynamically without knowing how many of them exist, and then split them into column, like this:
Thank you!
Hope these are helpful.
since a JSON data is dynamic, first step is to find a max reward sequence. (I've used a regular expression and max_reward UDF.)
and then, extract each reward from a json rewards field in an iterative way.
lastly, make the result to be a wide form using PIVOT query.
If you want a more generic solution, you need to use BigQuery dynamic SQL to generate PIVOT columns. I've hard-coded them in the query.
('reward_1', 'reward_2', 'reward_3', 'reward_4')
query:
CREATE TEMP TABLE sample AS
SELECT 1 AS id, '{"rewards": { "reward_1": { ... ' AS json -- put your json here
UNION ALL
SELECT 2 AS id, '{"rewards": { "reward_1": { ... ' AS json -- put your another json here
;
CREATE TEMP FUNCTION extract_reward(json STRING, seq INT64)
RETURNS STRUCT<type STRING, amount STRING>
LANGUAGE js AS """
return JSON.parse(json)['reward_' + seq];
""";
CREATE TEMP FUNCTION max_reward(arr ARRAY<STRING>) AS ((
SELECT MAX(CAST(v AS INT64)) FROM UNNEST(arr) v
));
SELECT * FROM (
SELECT id,
'reward_' || seq AS reward,
extract_reward(FORMAT('%t', JSON_QUERY(json, '$.rewards')), seq) AS value
FROM sample, UNNEST(GENERATE_ARRAY(1, max_reward(REGEXP_EXTRACT_ALL(json, r'"reward_([0-9]+)"')))) seq
) PIVOT (ANY_VALUE(value) FOR reward IN ('reward_1', 'reward_2', 'reward_3', 'reward_4'));
output:
▶ Split a reward STRUCT column into separate columns
SELECT * FROM (
SELECT id,
'reward_' || seq || '_' || IF (offset = 0, 'type', 'amount') AS reward,
value
FROM sample,
UNNEST(GENERATE_ARRAY(1, max_reward(REGEXP_EXTRACT_ALL(json, r'"reward_([0-9]+)"')))) seq,
UNNEST([extract_reward(FORMAT('%t', JSON_QUERY(json, '$.rewards')), seq)]) pair,
UNNEST([pair.type, pair.amount]) value WITH OFFSET
) PIVOT (ANY_VALUE(value) FOR reward IN ('reward_1_type', 'reward_2_type', 'reward_3_type', 'reward_4_type', 'reward_1_amount', 'reward_2_amount', 'reward_3_amount', 'reward_4_amount'));
output:

Using JSON_VALUE + JSON_QUERY to create new JSON

I was wondering if it would be possible to reshape JSON and return it as JSON. I have JSON which, in simplified form, looks like:
Name
Details
fieldId
fieldValue
Other
Id
Value
And would like to return:
Name
Details
fieldId
fieldValue
I can return Name and Details with JSON_VALUE and JSON_QUERY but would like it as one combined JSON field.
create table #test (
[id] int,
[json] varchar(max)
);
insert into #test (id, json) values (1, '{
"Name": "Test 1",
"Details": [
{
"fieldId": "100",
"fieldValue": "ABC"
}],
"Other": [
{
"Id": "1",
"Value": "ABC"
}]
}');
insert into #test (id, json) values (2, '{
"Name": "Test 2",
"Details": [
{
"fieldId": "101",
"fieldValue": "ABCD"
}],
"Other": [
{
"Id": "2",
"Value": "ABCD"
}]
}');
select id, JSON_VALUE(json, '$.Name'), JSON_QUERY(json, '$.Details')
from #test
As an additional option, you may parse the JSON with OPENJSON() and explicit schema (columns definitions) and then build the new JSON again:
SELECT
id,
newjson = (
SELECT Name, Details
FROM OPENJSON(json) WITH (
Name varchar(max) '$.Name',
Details nvarchar(max) '$.Details' AS JSON
)
FOR JSON PATH, WITHOUT_ARRAY_WRAPPER
)
FROM #test
And the same approach with JSON_VALUE() and JSON_QUERY():
SELECT
id,
newjson = (
SELECT JSON_VALUE(json, '$.Name') AS [Name], JSON_QUERY(json, '$.Details') AS [Details]
FOR JSON PATH, WITHOUT_ARRAY_WRAPPER
)
FROM #test
One solution is to use JSON_MODIFY to re-construct the JSON:
SELECT
id,
JSON_MODIFY(
JSON_MODIFY(
'{}',
'$.Name',
JSON_VALUE(json, '$.Name')
),
'$.Details',
JSON_QUERY(json, '$.Details')
) AS new_json
FROM #test
An alternate would be to delete the Other node using JSON_MODIFY but you have to know the name of node(s) to remove.

Querying nested hash of hashes using PostgreSQL

I have a json datatype field to store complex data. JSON data looks like this hash:
{
"0" => {
"origin" => {},
"diff" => {
"type" => "type_1",
...
}
},
"1" => {
"origin" => {
"type" => "type_2",
...
},
"diff" => {
...
}
},
...
}
I've tried to transform json to array to avoid these index keys, but it did not help me.
WITH data_values AS (
SELECT id, array_to_json(array(SELECT t.v from json_each_text(data) as t(k,v))) as array_data
FROM event_logs
)
SELECT * FROM data_values
WHERE array_data->'origin'->>'type' = 'type_3' OR array_data->'diff'->>'type' = 'type_3'
Also, I had an idea to use json_object_keys and iterate over top-level keys to find the necessary key/value pair, but I'm a newbie to PSQL and I have some problems with solving that problem.
A version of PSQL is 11, so the JSON path is not available for me.
Table definition example:
CREATE TABLE event_logs (
id integer,
data json,
created_at timestamp without time zone
);
INSERT INTO event_logs (id, data)
VALUES
(1, '{"0": {"origin": {}, "diff": {"type": "type_1"}}, "1": {"origin": {"type": "type_1"}, "diff": {}}}'),
(2, '{"0": {"origin": {}, "diff": {"type": "type_2"}}, "1": {"origin": {}, "diff": {"type": "type_3"}}}'),
(3, '{"0": {"origin": {}, "diff": {"type": "type_3"}}, "1": {"origin": {"type": "type_2"}, "diff": {}}}')
Important note: there can be a different count of top-level keys.
I want to find records by the key/value pair (for example, type = 'type_3'). It should select records with ID 2 and 3.
Can you help me to do it right?
Storing the objects as an array instead of an integer-indexed object is a good idea, but that doesn't let you skip this level with the -> operators. (Only jsonpath can do that).
You will need to use the json_each iteration inside your WHERE clauses:
SELECT * FROM data_values
WHERE EXISTS(
SELECT *
FROM json_each(data)
WHERE value->'origin'->>'type' = 'type_3'
OR value->'diff'->>'type' = 'type_3'
);
(If you had used an array, json_each would become json_array_elements).

postgresql - query to build up json

Running: PostgreSQL 9.6.2
I have data stored in a table that is in the form of a key/value pair. The "key" is actually the path of a json object, each one being a property. So for example if the key was "cogs","props1","value", then the json object would be like so:
{
"cogs":{
"props1": {
"value": 100
}
}
}
I'd like to somehow reconstruct a json object via a SQL query if possible. Here is the test data set:
drop table if exists test_table;
CREATE TABLE test_table
(
id serial,
file_id integer NOT NULL,
key character varying[],
value character varying,
status character varying
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
insert into test_table (file_id, key, value, status)
values (1, '{"cogs","description"}', 'some awesome cog', 'approved');
insert into test_table (file_id, key, value, status)
values (1, '{"cogs","display"}', 'Giant Cog', null);
insert into test_table (file_id, key, value, status)
values (1, '{"cogs","props1","value"}', '100', 'not verified');
insert into test_table (file_id, key, value, status)
values (1, '{"cogs","props1","id"}', 26, 'approved');
insert into test_table (file_id, key, value, status)
values (1, '{"cogs","props1","dimensions"}', '{"200", "300"}', null);
insert into test_table (file_id, key, value, status)
values (1, '{"cogs","props2","value"}', '200', 'not verified');
insert into test_table (file_id, key, value, status)
values (1, '{"cogs","props2","id"}', 27, 'approved');
insert into test_table (file_id, key, value, status)
values (1, '{"cogs","props2","dimensions"}', '{"700", "800"}', null);
insert into test_table (file_id, key, value, status)
values (1, '{"widgets","description"}', 'some awesome widget', 'approved');
insert into test_table (file_id, key, value, status)
values (1, '{"widgets","display"}', 'Giant Widget', null);
insert into test_table (file_id, key, value, status)
values (1, '{"widgets","props1","value"}', '100', 'not verified');
insert into test_table (file_id, key, value, status)
values (1, '{"widgets","props1","id"}', 28, 'approved');
insert into test_table (file_id, key, value, status)
values (1, '{"widgets","props1","dimensions"}', '{"200", "300"}', null);
insert into test_table (file_id, key, value, status)
values (1, '{"widgets","props2","value"}', '200', 'not verified');
insert into test_table (file_id, key, value, status)
values (1, '{"widgets","props2","id"}', 29, 'approved');
insert into test_table (file_id, key, value, status)
values (1, '{"widgets","props2","dimensions"}', '{"900", "1000"}', null);
The output I'm looking for is in this format:
{
"cogs": {
"description": "some awesome cog",
"display": "Giant Cog",
"props1": {
"value": 100,
"id": 26,
"dimensions": [200, 300]
},
"props2": {
"value": 200,
"id": 27,
"dimensions": [700, 800]
}
},
"widgets": {
"description": "some awesome widget",
"display": "Giant Widget",
"props1": {
"value": 100,
"id": 28,
"dimensions": [200, 300]
},
"props2": {
"value": 200,
"id": 29,
"dimensions": [900, 1000]
}
}
}
Some issues I'm facing:
The "value" column can hold text, numbers, and an array. For whatever reason, the server-side code using knex.js is storing an array of integers (ie, [100,300]) into postgres as the following format: {"100","300"}. I need to ensure I extract this out as an array of integers as well.
Trying to make this dynamic as possible. Maybe a recursive procedure to figure out what depth of the "key" path exists.... rather than hard-coding array lookup values.
json_object_agg works well to group together properties into a single object. However it breaks when hitting a null value. So if the "key" column has only two values (ie, "cogs","description"), and I attempt to aggregate up an array of length three (ie, "cogs","props1","value"), it will break unless I filter on only arrays of length 3.
Preserve the ordering of the input. #klin solution below is amazing and gets me 95% of the way there. However I failed to mention to also preserve the ordering...
A dynamic solution needs some work.
First, we need a function to convert a text array and a value to a jsonb object.
create or replace function keys_to_object(keys text[], val text)
returns jsonb language plpgsql as $$
declare
i int;
rslt jsonb = to_jsonb(val);
begin
for i in select generate_subscripts(keys, 1, true) loop
rslt := jsonb_build_object(keys[i], rslt);
end loop;
return rslt;
end $$;
select keys_to_object(array['key', 'subkey', 'subsub'], 'value');
keys_to_object
------------------------------------------
{"key": {"subkey": {"subsub": "value"}}}
(1 row)
Next, another function to merge jsonb objects (see Merging JSONB values in PostgreSQL).
create or replace function jsonb_merge(a jsonb, b jsonb)
returns jsonb language sql as $$
select
jsonb_object_agg(
coalesce(ka, kb),
case
when va isnull then vb
when vb isnull then va
when jsonb_typeof(va) <> 'object' or jsonb_typeof(vb) <> 'object' then vb
else jsonb_merge(va, vb) end
)
from jsonb_each(a) e1(ka, va)
full join jsonb_each(b) e2(kb, vb) on ka = kb
$$;
select jsonb_merge('{"key": {"subkey1": "value1"}}', '{"key": {"subkey2": "value2"}}');
jsonb_merge
-----------------------------------------------------
{"key": {"subkey1": "value1", "subkey2": "value2"}}
(1 row)
Finally, let's create an aggregate based on the above function,
create aggregate jsonb_merge_agg(jsonb)
(
sfunc = jsonb_merge,
stype = jsonb
);
and we are done:
select jsonb_pretty(jsonb_merge_agg(keys_to_object(key, translate(value, '{}"', '[]'))))
from test_table;
jsonb_pretty
----------------------------------------------
{ +
"cogs": { +
"props1": { +
"id": "26", +
"value": "100", +
"dimensions": "[200, 300]" +
}, +
"props2": { +
"id": "27", +
"value": "200", +
"dimensions": "[700, 800]" +
}, +
"display": "Giant Cog", +
"description": "some awesome cog" +
}, +
"widgets": { +
"props1": { +
"id": "28", +
"value": "100", +
"dimensions": "[200, 300]" +
}, +
"props2": { +
"id": "29", +
"value": "200", +
"dimensions": "[900, 1000]" +
}, +
"display": "Giant Widget", +
"description": "some awesome widget"+
} +
}
(1 row)