Parsing JSON in SQL Server 2017 (Clearbit API call) - json

I'm pulling some data into a database on my local server with API calls via Clearbit provider. Everything was OK regarding parsing the data with SQL Server 2017 until I hit a bump.
I will go straight on the example for easier understanding.
This is the example of an API call output in JSON
{
"id": "384dfe0d-5bba-445e-a390-2d946dc84a12",
"name": "Honeywell",
"legalName": "Honeywell International Inc",
"domain": "honeywell.com",
"domainAliases": [
"honeywell.at",
"honeywell.it",
"evohome.info",
"wifithermostat.com",
"emsaviation.com",
"mytotalconnect.com",
"honeywell.nl",
"honeywell.co.za",
"honeywell.com.au",
"honeywell.ca",
"alliedsignal.com",
"emsdss.com",
"primusepic.com",
"alarmnet-me.com",
"lebow.com",
"honeywell.ie",
"honeywell.jp",
"honeywell.com.br",
"trendcontrol.co.uk",
"honeywellforjaguar.co.uk",
"aviaso.com",
"skyforce.co.uk",
"newenglandinstruments.com",
"honeywell.fi",
"alarmnet.com",
"skyconnect.com",
"skyforceuk.com",
"securitex.com",
"missionready.com",
"honeywellaerospace.com",
"formation.com",
"aclon.com",
"electrocorp.com",
"ultrak.com",
"satcom1.com",
"hsmpats.com",
"myaerospace.com",
"emsglobaltracking.com",
"fascocontrols.com",
"honeywellnow.com",
"bendixbrakes.com",
"elmwoodsensors.com",
"ovationselect.com",
"honeywellbusinessaviation.com",
"iflyaspire.com",
"btrinc.com",
"honeywellspecialtymaterials.com",
"magneticsensors.com",
"activeye.com",
"egarrett.com",
"novar-eds.com",
"aviaso.co.uk",
"chadwick-helmuth.com",
"datainstruments.com",
"lebowproducts.com",
"honeywell-produktkatalog.de",
"honeywellforjaguar.com",
"hobbs-corp.com",
"emsgt.com",
"honeywellaes.com",
"honeywellbuildingsolutions.com",
"satcom1.aero",
"honeywell-building-solutions.de",
"lifesafetydistribution.com",
"godirect.com",
"garrettbulletin.com",
"yourhomeexpert.com",
"aerospacetrading.com",
"sensorsystems.com",
"wifithermostat.info",
"honeywell-fachseminare.de",
"hobbscorporation.com",
"kcl.hu",
"honeywell.sk",
"esser.info",
"inertialsensor.com",
"sensotec.com",
"notifier.com",
"honeywellgreer.com",
"smartact.de",
"honeywellfire.com",
"iris-systems.com",
"honeywell.ru",
"lxei.com",
"thermalswitch.com",
"hightempsolutions.com",
"aubetech.com",
"honeywell-haustechnik.de",
"careersathoneywell.com",
"garrettbyhoneywell.com",
"honeywell.in",
"honeywell.cn",
"honeywell.com.mx",
"kcp.com",
"satamatics.com",
"myflite.com"
],
"site": {
"title": "Honeywell",
"h1": null,
"metaDescription": " We are blending products with software solutions to link people and businesses to the information they need to be more efficient, safer and connected. ",
"metaAuthor": null,
"phoneNumbers": [
"+1 877-271-8620",
"+1 800-633-3991",
"+1 877-841-2840",
"+1 480-353-3020",
"+1 973-455-3388",
"+1 973-204-9621",
"+32 2 728 20 45",
"+32 476 20 90 19",
"+44 7794 007289",
"+86 21 2219 6509"
],
"emailAddresses": [
"domains#honeywell.com",
"HoneywellPrivacy#honeywell.com",
"rob.ferris#honeywell.com",
"ilse.schouteden#honeywell.com",
"chris.martin2#honeywell.com",
"Anahi.Espinosa#honeywell.com",
"lydia.lu#honeywell.com",
"madhavi.jha#Honeywell.com",
"Steven.Brecken#Honeywell.com",
"Steve.Brecken#Honeywell.com",
"Eugene.Tan#Honeywell.com"
]
},
"category": {
"sector": "Consumer Discretionary",
"industryGroup": "Automobiles & Components",
"industry": "Automotive",
"subIndustry": "Automotive",
"sicCode": "3714",
"naicsCode": null
},
"tags": [
"Automotive",
"Enterprise",
"B2B",
"Electrical"
],
"description": " We are blending products with software solutions to link people and businesses to the information they need to be more efficient, safer and connected. ",
"foundedYear": 1936,
"location": "115 Tabor Rd, Morris Plains, NJ 07950, USA",
"timeZone": "America/New_York",
"utcOffset": -4,
"geo": {
"streetNumber": "115",
"streetName": "Tabor Road",
"subPremise": null,
"city": "Morris Plains",
"postalCode": "07950",
"state": "New Jersey",
"stateCode": "NJ",
"country": "United States",
"countryCode": "US",
"lat": 40.8358456,
"lng": -74.4771042
},
"logo": "https://logo.clearbit.com/honeywell.com",
"facebook": {
"handle": "293855263965203",
"likes": null
},
"linkedin": {
"handle": "company/honeywell"
},
"twitter": {
"handle": "HoneywellNow",
"id": "257492733",
"bio": "Please visit us over at #Honeywell.",
"followers": 2322,
"following": 1,
"location": "Morris Plains, NJ",
"site": "https:",
"avatar":
},
"crunchbase": {
"handle": "organization/honeywell"
},
"emailProvider": false,
"type": "public",
"ticker": "HON",
"phone": "+1 973-455-2000",
"metrics": {
"alexaUsRank": 6045,
"alexaGlobalRank": 18053,
"googleRank": null,
"employees": 51779,
"employeesRange": "1000+",
"marketCap": 102920000000,
"raised": null,
"annualRevenue": 39302000000,
"fiscalYearEnd": 12
},
"indexedAt": "2017-07-11T23:00:41.115Z",
"tech": [
"crazy_egg",
"google_analytics",
"google_tag_manager",
"asp_net",
"mouseflow",
"marketo",
"go_squared",
"microsoft_exchange_online",
"outlook",
"recaptcha"
],
"parent": {
"domain": null
},
"similarDomains": [
"abb-livingspace.com",
"alerton.com",
"gereports.com",
"honeywellprocess.com",
"honeywelluk.com",
"johnsoncontrols.com",
"jpinstruments.com",
"lenel.com",
"maxitrol.com",
"nucalgon.com",
"schneider-electric.us",
"siemens.com"
]
}
If you look at the example up here you will see "domainAliases": [...]
and that is the part of the JSON I still need to parse.
This is the parse query for SQL that I already have:
SELECT *
, JSON_VALUE(JSONData,'$.name') AS CompanyName
, JSON_VALUE(JSONData,'$.category.sector') AS CategorySector
, JSON_VALUE(JSONData, '$.category.industryGroup') AS CategoryIndustryGroup
, JSON_VALUE(JSONData, '$.category.industry') AS CategoryIndustry
, JSON_VALUE(JSONData, '$.category.subIndustry') AS CategorySubIndustry
, JSON_VALUE(JSONData, '$.category.sicCode') AS CategorySicCode
, JSON_VALUE(JSONData, '$.category.naicsCode') AS CategoryNaicsCode
, JSON_VALUE(JSONData, '$.metrics.employees') AS EmployeesNumber
, JSON_VALUE(JSONData, '$.metrics.employeesRange') AS EmployeesRange
, JSON_VALUE(JSONData, '$.metrics.marketCap') AS MarketCap
, JSON_VALUE(JSONData, '$.metrics.annualRevenue') AS AnnualRevenue
, JSON_VALUE(JSONData, '$.similarDomains') AS SimilarDomains
FROM Domains;
I want this data ("domainAliases") to be stored in other table as the data in the upper query (I know that the parse query I already have is only a SELECT query but I also have an UPDATE version of the query).
Here is an example picture of how the finished product in a new table, same database should look. The left column is called Company Name, the 2nd column is called Domain Aliases:
Now WHERE is the JSON data stored? I have it stored in a Column called JSONData, tablename: Domains and all this is in a database called Domainbank. JSONData datatype is nvarchar(max).
I need the data to be grouped by the name of the company and next to the company name there should be aliases domain just like the picture example shows. Now keep in mind that I will run this query for 10k+ JSONDatas and the new table that is going to be created will be super huge but as long as it is all grouped by the company name with all the alias domains it should be good. Some of the JSONDatas did not return the API call in the correct format because they either didn't find the data or something else went wrong, so If the query doesnt find anyting under the "domainAliases": [...] or if it doesn't even find the "domainAliases": [...] then I don't need the company to appear on the new table.
So short recap: let's make a new table (Let's call it AliasDomains), find the data under "domainAliases": [...] also pull the company name out JSON_VALUE(JSONData,'$.name') AS CompanyName, Store the data in the new table as the picture example higher in the post and then group by CompanyName.

So, from your post I am not completely clear on what your question is, but I assume it is how to write some SQL statement to accomplish the above?
First of all, I'd say you should not care of the GROUP BY in the insert, do GROUP BY when retrieving data out of the table.
Having said that you can quite easily accomplish what you want with a SELECT from the Domains table together with a CROSS APPLY OPENJSON statement, like so:
INSERT INTO AliasDomains(CompanyName, DomainAliases)
SELECT JSON_VALUE(JSONData, '$.name'), value
FROM Domains
CROSS APPLY OPENJSON (JSONData, '$.domainAliases')
EDIT: Should probably add that value in the above statement is returned from OPENJSON, e.g. it references the values of the (in this case domainAliases) path you want.
Hope this helps?!
Niels

Related

How to parse or work with a JSON POST request to Oracle relational data table

I am building a .net core web api using Dapper and Oracle 19c. The application will receive a POST request similar to the one below, and needs to return a value (Salary) from the same table. It needs to loop over the JSON and return the salary filtering on name, id, and year that match relational data in an Employees table which also contains the salary and other information for each employee. I am new to Oracle and especially working with JSON. I tried to use JSON_TABLE, but can't get that to work. What is an easy way to do this?
Request
POST
{
"Employees": [
{
"EMPLOYEE_ID": "100",
"FIRST_NAME":"Steven",
"LAST_NAME": "King",
"HIRE_DATE": "17-JUN-03"
},
{
"EMPLOYEE_ID": "101",
"FIRST_NAME":"Neena",
"LAST_NAME": "Kochar",
"HIRE_DATE": "21-SEP-05"
},
{
"EMPLOYEE_ID": "104",
"FIRST_NAME":"Bruce",
"LAST_NAME": "Ernst",
"HIRE_DATE": "21-MAY-07"
}
]
}
Response
{
"Employees": [
{
"SALARY": "100000",
"STATUS":"SUCCESS"
},
{
"SALARY": "100000",
"STATUS":"SUCCESS"
},
{
"SALARY": "100000",
"STATUS":"SUCCESS"
}
]
}
I tried something like the below query and get "column ambiguously defined" error at line 2 Column 8.
I've tried some other variations of this, but I think I'm using JSON_TABLE wrong and maybe trying to do something that can't be done with JSON functions in Oracle 19c. I'm not sure of the best way to approach this and having trouble making sense of the Oracle documentation and articles. I'm also kind of new with APIs, but can easily do a simple GET request to this table with Dapper and return employee information in JSON.
SELECT *
FROM EMPLOYEES e
JOIN EMPLOYEES e ON e.EMPLOYEE_ID IN(
SELECT jt.* FROM JSON_TABLE(
'{
"Payees": [
{
"EMPLOYEE_ID": "100",
"FIRST_NAME":"Steven",
"LAST_NAME": "King",
"HIRE_DATE": "17-JUN-03"
}
]
},
'COLUMNS(EMPLOYEE_ID VARCHAR2(20) PATH '$.EMPLOYEE_ID')) AS jt
);
Final solution:
select e.salary FROM EMPLOYEES e WHERE e.EMPLOYEE_ID IN (SELECT jt.* FROM JSON_TABLE( q'~{ "Payees": [ { "EMPLOYEE_ID": "100", "FIRST_NAME":"Steven", "LAST_NAME": "King", "HIRE_DATE": "17-JUN-03" } ] }~', '$.Payees[*]' COLUMNS(EMPLOYEE_ID VARCHAR2(20) PATH '$.EMPLOYEE_ID')) AS jt) ;

Bigquery Get json key name

I have a BigQuery table that contains a column that contains a JSON string. Within the JSON, there may be a key called "person" or "corp" or "sme". I want to run a query that will return which of the possible keys exist in the JSON and store it in a new column.
Below is the data from a column 'class', which is one long string each in BQ. The first level key name can equal ‘corp’, ’sme’, or ‘person’ (see examples below).
Example 1
{
"corp": {
"address": {
"city": "London",
"countryCode": "gb",
"streetAddress": [
"Fairlop road"
],
"zip": "e111bn"
},
"cin": 1234567420,
"title": "Demo Corp"
}
}
Example 2
{
"person": {
"address": {
"city": "Madrid",
"countryCode": "es",
"streetAddress": [
"Some street 1"
],
"zip": "z1123ab"
},
"cin": 1234567411,
"title": "Demo Person"
}
}
I've tried using the json_xxx functions, but they require specifying the json_path. I'm interested in fetching the json_path name to create a new column (cust_type)which lists corp, sme, person for each row.
example
cust_type
1
corp
2
person
This is my first question so pls bear with me! Thnx
Also you can use a function to extract first level keys whatever they are.
CREATE TEMP FUNCTION json_keys(input STRING) RETURNS ARRAY<STRING> LANGUAGE js AS """
return Object.keys(JSON.parse(input))
""";
SELECT json_keys(json_text) AS cust_type
FROM UNNEST([
'{"corp": {"address": {"city": "London","countryCode": "gb","streetAddress": ["Fairlop road"],"zip": "e111bn"},"cin": 1234567420,"title": "Demo Corp"}}',
'{"person": {"address": {"city": "Madrid","countryCode": "es","streetAddress": ["Some street 1"],"zip": "z1123ab"},"cin": 1234567411,"title": "Demo Person"}}'
]) AS json_text;
output:
Maybe we can use the JSON_EXTRACT function and look to see if the field exists (is not null). An example test might be:
SELECT CASE
WHEN JSON_EXTRACT(json_text, '$.corp') is not null then 'corp'
WHEN JSON_EXTRACT(json_text, '$.person') is not null then 'person'
WHEN JSON_EXTRACT(json_text, '$.sme') is not null then 'sme'
END AS cust_type
FROM UNNEST([
'{"corp": {"address": {"city": "London","countryCode": "gb","streetAddress": ["Fairlop road"],"zip": "e111bn"},"cin": 1234567420,"title": "Demo Corp"}}',
'{"person": {"address": {"city": "Madrid","countryCode": "es","streetAddress": ["Some street 1"],"zip": "z1123ab"},"cin": 1234567411,"title": "Demo Person"}}'
]) AS json_text;

BQ: How to UNNEST into new table

I'm exporting billing data from Google Cloud Platform to BigQuery (BQ).
The task at hand is to build a query that UNNEST relevant data to a new 'flat' table
The structure of the data in BQ is this:
[{
"billing_account_id": "01234-1778EC-123456",
"service": {
"id": "2062-016F-44A2",
"description": "Maps"
},
"sku": {
"id": "5D8F-0D17-AAA2",
"description": "Google Maps"
},
"usage_start_time": "2018-11-05 14:45:00 UTC",
"usage_end_time": "2018-11-05 15:00:00 UTC",
"project": {
"id": null,
"name": null,
"labels": []
},
"labels": [],
"system_labels": [],
"location": null,
"export_time": "2018-11-05 21:54:09.779 UTC",
"cost": "5.0",
"currency": "EUR",
"currency_conversion_rate": "0.87860000000017424",
"usage": {
"amount": "900.0",
"unit": "seconds",
"amount_in_pricing_units": "0.00034674063800277393",
"pricing_unit": "month"
},
"credits": "-1.25",
"invoice": {
"month": "201811"
}
},
I wish to schedule a job that builds a new table every day with just this schema
billing_account_id, usage_start_time, usage_end_time, cost, credit_amount
So far I'm at this:
select billing_account_id, usage_start_time, usage_end_time, cost, credits AS CREDITS from clientBilling.gcp_billing_export_v1_XXXX , UNNEST(credits);
But in the results credits are still nested and not 'flat' as I need. Any input is welcome, thanks! :)
Result
credits is an array of structs (each struct being "name, amount") - a "repeated" record in BigQuery - so you have to first unnest the array and then reference the struct member you want.
Thus:
UNNEST the credits record
Alias the credits.amount struct member as credit_amount
SELECT
billing_account_id,
usage_start_time,
usage_end_time,
cost,
credit.amount as credit_amount
FROM
`optimum-rock-145719.billing_export.gcp_billing_export_v1*`,
UNNEST(credits) as credit
This will return a result table with just the credits.amount column as credits_amount. You were doing step 1, but not step 2, and ignoring the unnested fields in your SELECT clause.

RethinkDb query current date time between two date columns

Sorry if this a really a basic/noob question, but I am really new to rethinkDb coming from SQL.
(1)
Below is the query in SQL that I wanted to convert into ReThinkDb. This may be really simple, but I cannot make it right.
SQL equivalent:
select *
from reservations
where
room_id = 'b1a7ddd3-ddfd-4624-8e85-79b47fb19f99' and
now() between reservation_start and reservation_end
RethinkDb Query (with error):
r.db("myDb").table("reservations").filter(function(doc){
return
doc("room_id").eq("b1a7ddd3-ddfd-4624-8e85-79b47fb19f99")
.and( r.now().between(doc("reservation_start ").date(), doc("reservation_end").date()) )
}
)
I just want to return the reservations scheduled today, or ongoing if it is already started but not yet finished (end date-time).
(2)
Reservations have a column or field for attendees which is a list/array of name/email:
attendees: [
{"name": "Attendee 1", "email": "attendee1#test.com"},
{"name": "Attendee 2", "email": "attendee2#test.com"},
{"name": "Attendee 3", "email": "attendee3#test.com"},
]
I would like to add a filter to check that an email exists in the list of attendees.
It is like querying for: The email attendee2#test.com has a reservation today for room 101.
If querying with attende email is not possible or complicated... I don't mind as I can do the checking in my application. What matters is the rethinkdb query equivalent for now() between dateColumnStart and dateColumnEnd.
Update: Added sample data stored in DB (RethinkDB)
{
"attendees": [
{
"email": dummyUser101#gmail.com, »
"name": "Dummy User 101"
} ,
{
"email": dummyUser102#gmail.com, »
"name": "Dummy User 102"
}
] ,
"id": "45qum29cel0cm4ejl2obi6pttj" ,
"room_id": "7cc8e51d-e3fa-4d84-b7e6-9ebf8975754a" ,
"reservation_end": "2018-11-23T02:00:00" , //10AM (GMT8)
"reservation_start": "2018-11-19T00:00:00" , //8AM (GMT8)
"details": "Week event 8AM-10AM Test"
}
{
"attendees": [
{
"email": dummyUser103#gmail.com, »
"name": "Dummy User 103"
} ,
{
"email": dummyUser101#gmail.com, »
"name": "Dummy User 101"
} ,
{
"email": dummyUser102#gmail.com, »
"name": "Dummy User 102"
}
] ,
"id": "6ejq8h6tvlpnjiskvt4kthfmss_20181123T060000Z" ,
"room_id": "7cc8e51d-e3fa-4d84-b7e6-9ebf8975754a" ,
"reservation_end": "2018-11-23T07:00:00" , //3PM (GMT8)
"reservation_start": "2018-11-23T06:00:00" , //2PM (GMT8)
"details": "Test Reservation"
}
Thanks!
(1)
You cannot use between in the filter. Between gets you all documents between two keys. See docu here: https://www.rethinkdb.com/api/javascript/between/
What you need is the "during" keyword.
r.db("myDB").table("reservations").filter(function(doc){
return doc("room_id").eq("b1a7ddd3-ddfd-4624-8e85-79b47fb19f99")
.and( r.now().during(
r.ISO8601(doc("reservation_start"),{defaultTimezone:"+08:00"}),
r.ISO8601(doc("reservation_end"),{defaultTimezone:"+08:00"})))
}
)

Access deeper elements of a JSON using postgresql 9.4

I want to be able to access deeper elements stored in a json in the field json, stored in a postgresql database. For example, I would like to be able to access the elements that traverse the path states->events->time from the json provided below. Here is the postgreSQL query I'm using:
SELECT
data#>> '{userId}' as user,
data#>> '{region}' as region,
data#>>'{priorTimeSpentInApp}' as priotTimeSpentInApp,
data#>>'{userAttributes, "Total Friends"}' as totalFriends
from game_json
WHERE game_name LIKE 'myNewGame'
LIMIT 1000
and here is an example record from the json field
{
"region": "oh",
"deviceModel": "inHouseDevice",
"states": [
{
"events": [
{
"time": 1430247045.176,
"name": "Session Start",
"value": 0,
"parameters": {
"Balance": "40"
},
"info": ""
},
{
"time": 1430247293.501,
"name": "Mission1",
"value": 1,
"parameters": {
"Result": "Win ",
"Replay": "no",
"Attempt Number": "1"
},
"info": ""
}
]
}
],
"priorTimeSpentInApp": 28989.41467999999,
"country": "CA",
"city": "vancouver",
"isDeveloper": true,
"time": 1430247044.414,
"duration": 411.53,
"timezone": "America/Cleveland",
"priorSessions": 47,
"experiments": [],
"systemVersion": "3.8.1",
"appVersion": "14312",
"userId": "ef617d7ad4c6982e2cb7f6902801eb8a",
"isSession": true,
"firstRun": 1429572011.15,
"priorEvents": 69,
"userAttributes": {
"Total Friends": "0",
"Device Type": "Tablet",
"Social Connection": "None",
"Item Slots Owned": "12",
"Total Levels Played": "0",
"Retention Cohort": "Day 0",
"Player Progression": "0",
"Characters Owned": "1"
},
"deviceId": "ef617d7ad4c6982e2cb7f6902801eb8a"
}
That SQL query works, except that it doesn't give me any return values for totalFriends (e.g. data#>>'{userAttributes, "Total Friends"}' as totalFriends). I assume that part of the problem is that events falls within a square bracket (I don't know what that indicates in the json format) as opposed to a curly brace, but I'm also unable to extract values from the userAttributes key.
I would appreciate it if anyone could help me.
I'm sorry if this question has been asked elsewhere. I'm so new to postgresql and even json that I'm having trouble coming up with the proper terminology to find the answers to this (and related) questions.
You should definitely familiarize yourself with the basics of json
and json functions and operators in Postgres.
In the second source pay attention to the operators -> and ->>.
General rule: use -> to get a json object, ->> to get a json value as text.
Using these operators you can rewrite your query in the way which returns correct value of 'Total Friends':
select
data->>'userId' as user,
data->>'region' as region,
data->>'priorTimeSpentInApp' as priotTimeSpentInApp,
data->'userAttributes'->>'Total Friends' as totalFriends
from game_json
where game_name like 'myNewGame';
Json objects in square brackets are elements of a json array.
Json arrays may have many elements.
The elements are accessed by an index.
Json arrays are indexed from 0 (the first element of an array has an index 0).
Example:
select
data->'states'->0->'events'->1->>'name'
from game_json
where game_name like 'myNewGame';
-- returns "Mission1"
select
data->'states'->0->'events'->1->>'name'
from game_json
where game_name like 'myNewGame';
This did help me