Cannot use json_agg with ST_AsGeoJSON to create geojson in PostgreSQL - json

I am trying to recreate a structure like the following to get a geojson as a query result in PostgreSQL/PostGIS
the structure
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
0.0,
0.0
]
},
"properties": {
"title": "Unnamed",
"marker-color": "#B9EB14"
}
},
.....other features here
]
}
My query, based on json_build_object and json_agg is this
SELECT
json_build_object(
'type', 'FeatureCollection',
'features', json_agg((
'type', 'Feature',
'geometry', ST_AsGeoJSON(t.geometry)
))
)
FROM (
SELECT
type, id,
ST_Transform(geom, 4326) as geometry
FROM contexts where id in (1,2,3)
) AS t
The output is wrong, is
"{""type"" : ""FeatureCollection"", ""features"" :
[{""f1"":""type"",""f2"":""Feature"",""f3"":""geometry"",""f4"":""
{\""type\"":\""MultiPolygon\"",\""coordinates\"":[[[[22.7151924380517,37.9825351476175],
[22.7151917624093,37.9825355387348],.... ]]]}""}]}"
How can I get the right input?
Thanks

Related

Using Recursive feature while Flattening in Snowflake

I have a JSON string, which needs to be parsed in order to retrieve particular values.Here is an example I am working with;
{
"assignable_type": "SHIPMENT",
"rule": {
"rules": [
{
"meta_data": {},
"rules": [
{
"op": "IN",
"target": "CLIENT_FID",
"type": "ARRAY_VALUE_ASSERTION",
"values": [
"flx::core:client:dbid/64171",
"flx::core:client:dbid/76049",
"flx::core:client:dbid/34040",
"flx::core:client:dbid/61806"
]
}
],
"type": "AND"
}
],
"type": "OR"
},
"type": "USER_DEFINED"
}
The goal is to get the values when "target":"CLIENT_FID".
Expected Output for this JSON file should be ;
["flx::core:client:dbid/64171",
"flx::core:client:dbid/76049",
"flx::core:client:dbid/34040",
"flx::core:client:dbid/61806"]
Here, as we can see rules is a list of dictionaries, and we can have nested lists as seen in the example.
Similarly, we have other JSON file of following type;
{
"assignable_type": "SHIPMENT",
"rule": {
"rules": [
{
"meta_data": {},
"rules": [
{
"op": "IN",
"target": "PORT_OF_ENTRY_FID",
"type": "ARRAY_VALUE_ASSERTION",
"values": [
"flx::core:port:dbid/566788",
"flx::core:port:dbid/566931",
"flx::core:port:dbid/561482"
]
}
],
"type": "AND"
},
{
"meta_data": {},
"rules": [
{
"op": "IN",
"target": "PORT_OF_LOADING_FID",
"type": "ARRAY_VALUE_ASSERTION",
"values": [
"flx::core:port:dbid/561465"
]
},
{
"op": "IN",
"target": "SHIPMENT_MODE",
"type": "ARRAY_VALUE_ASSERTION",
"values": [
0
]
},
{
"op": "IN",
"target": "CLIENT_FID",
"type": "ARRAY_VALUE_ASSERTION",
"values": [
"flx::core:client:dbid/28169"
]
}
],
"type": "AND"
}
],
"type": "OR"
},
"type": "USER_DEFINED"
}
For the second example ,
Expected Output shd be;
["flx::core:client:dbid/28169"]
As. seen, we may need to read the values at different depths in the file. In order to address this issue, I used following code;
/* first convert the string to a JSON object in cte1 */
with cte1 as (
select to_json(json_string) as json_rep,
parse_json(json_extract_path_text(json_rep, 'rule.rules')) as list_elem
from table 1),
cte2 as (select split_array,
json_extract_path_text(split_array, 'target') as target_client
from (
select json_rep,
list_elem,
t.value as split_array,
typeof(split_array) as obj_type,
index
from cte1,
table(flatten(cte1.list_elem, recursive=>true)) as t) temp /* use recursive feature */
where split_array ilike '%"target":"client_fid"%' /* filter for those rows containing this string */
and obj_type='OBJECT')
select
split_array,
json_extract_path_text(split_array, 'values') as client_values
from cte2
where target_client='CLIENT_FID'; /* filter the rows where we have the dictionary containing client fid */
In order to address the issue of varying depth at which client_fid is found we're recursing while flattening the string into rows. The output which is obtained for both of above inputs is provided below,
For the first String we get the actual output in variable client_values as
["flx::core:client:dbid/64171",
"flx::core:client:dbid/76049",
"flx::core:client:dbid/34040",
"flx::core:client:dbid/61806"]
Similarly, for the second string we get the actual output as
["flx::core:client:dbid/28169"]
As seen the code seems to be working in getting the correct output, but the way I filtered in the final query for target_client='CLIENT_FID'; it seems to be a very hacky way. Hence is it possible to get a better approach to resolve the issue of retrieving client fid values though the depth can vary in the given input.
Help is appreciated.

Extract value of Tags from cloudTrail logs using Athena

I am trying to query cloudtrail logs using Athena. My goal is to find specific instances and extract them with their Tags.
The query I am using is:
SELECT eventTime, awsRegion , json_extract(responseelements, '$.instancesSet.items[0].instanceId') AS instanceId, json_extract(responseelements, '$.instancesSet.items[0].tagSet.items') AS TAGS FROM cloudtrail_logs_PP WHERE (eventName = 'RunInstances' OR eventName = 'StartInstances' ) AND requestparameters LIKE '%mytest1%' AND "timestamp" BETWEEN '2021/09/01' AND '2021/10/01' ORDER BY eventTime;
Using this query - I am able to get all Tags under one column.
Output of query
I want to extract only specific Tags and need help in the same. How cam I extract the only specific Tag?
I tried enhancing my query as json_extract(responseelements, '$.instancesSet.items[0].tagSet.items[0]' but the order of Tags is diff in diff logs - so cant pass the index location.
My json file in S3 is something like below:
{
"eventVersion": "1",
"eventTime": "2022-05-27T18:44:29Z",
"eventName": "RunInstances",
"awsRegion": "us-east-1",
"requestParameters": {
"instancesSet": {
"items": [{
"imageId": "ami-1234545",
"keyName": "DDKJKD"
}]
},
"instanceType": "m5.2xlarge",
"monitoring": {
"enabled": false
},
"hibernationOptions": {
"configured": false
}
},
"responseElements": {
"instancesSet": {
"items": [{
"tagSet": {
"items": [ {
"key": "11",
"value": "DS"
}, {
"key": "1",
"value": "A"
}]
}]
}
}
}

How to store a json value in a postgres column

I have a table with two columns,One column should store int and other should store json.
here the data which i want to store in the table.
id,polygon
1,"{""type"": ""Feature"",
""properties"": {
""stroke"": ""#555555"",
""stroke-width"": 2,
""stroke-opacity"": 1,
""fill"": ""#00aa22"",
""fill-opacity"": 0.5
},
""geometry"": {
""type"": ""Polygon"",
""coordinates"": [
[
[-76.97021484375,
40.17887331434696
],
[-74.02587890625,
39.842286020743394
],
[-73.4326171875,
41.713930073371294
],
[-76.79443359375,
41.94314874732696
],
[-76.97021484375,
40.17887331434696
]
]
]
}
}"
I tired storing in postgres as follows:
insert into gjl_polygon values(1,'"{""type"":
""Feature"",""properties"": {""stroke"": ""#555555"",""stroke-
width"": 2,""stroke-opacity"": 1,""fill"": ""#00aa22"",""fill-
opacity"": 0.5},""geometry"": {""type"":
""Polygon"",""coordinates"":
[[[-76.97021484375,40.17887331434696],[-74.02587890625,
39.842286020743394 ],[-73.4326171875, 41.713930073371294],
[-76.79443359375,41.94314874732696],
[-76.97021484375,40.17887331434696]]]}}"');
I got the following error,
Expecting ':' delimiter: line 1 column 4 (char 3)
The problem of your code is the use of double quotes twice. Try to edit like this:
{
"type": "Feature",
"properties": {
"stroke": "#555555",
"stroke-width": 2,
"stroke-opacity": 1,
"fill": "#00aa22",
"fill-opacity": 0.5
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[-76.97021484375,
40.17887331434696
],
[-74.02587890625,
39.842286020743394
],
[-73.4326171875,
41.713930073371294
],
[-76.79443359375,
41.94314874732696
],
[-76.97021484375,
40.17887331434696
]
]
]
}
}
The JSON above is a valid JSON string and it should work as expected.

Hive Sql Query To get Json Object from Json Array

I have a json inside 'content' column in the following format:
{ "identifier": [
{
"type": {
"coding": [
{
"code": "MRN",
}
]
},
"value": "181"
},
{
"type": {
"coding": [
{
"code": "PID",
}
]
},
"value": "5d3669b0"
},
{
"type": {
"coding": [
{
"code": "IPN",
}
]
},
"value": "41806"
}
]}
I have to run an hive query to get the "value" of the code which is equal to "MRN".
I have written the following query but its not giving the value as expected:
select get_json_object(content,'$.identifier.value')as Mrn from Doctor where get_json_object(content,'$.identifier.type.coding.code') like '%MRN%'
I dont want to give particular array position like:
select get_json_object(content,'$.identifier[0].value')as Mrn from Doctor where get_json_object(content,'$.identifier[0].type.coding.code') like '%MRN%'
As the json gets created randomly and the position is not fixed always.
Give [ * ] to avoid giving position.
select get_json_object(content,'$.identifier[*].value')as Mrn from Doctor where get_json_object(content,'$.identifier[*].type.coding.code') like '%MRN%'

AWS Athena - Querying JSON - Searching for Values

I have nested JSON files on S3 and am trying to query them with Athena.
However, I am having problems to query the nested JSON values.
My JSON file looks like this:
{
"id": "17842007980192959",
"acount_id": "17841401243773780",
"stats": [
{
"name": "engagement",
"period": "lifetime",
"values": [
{
"value": 374
}
],
"title": "Engagement",
"description": "Total number of likes and comments on the media object",
"id": "17842007980192959/insights/engagement/lifetime"
},
{
"name": "impressions",
"period": "lifetime",
"values": [
{
"value": 11125
}
],
"title": "Impressions",
"description": "Total number of times the media object has been seen",
"id": "17842007980192959/insights/impressions/lifetime"
},
{
"name": "reach",
"period": "lifetime",
"values": [
{
"value": 8223
}
],
"title": "Reach",
"description": "Total number of unique accounts that have seen the media object",
"id": "17842007980192959/insights/reach/lifetime"
},
{
"name": "saved",
"period": "lifetime",
"values": [
{
"value": 0
}
],
"title": "Saved",
"description": "Total number of unique accounts that have saved the media object",
"id": "17842007980192959/insights/saved/lifetime"
}
],
"import_date": "2017-12-04"
}
What I'm trying to do is to query the "stats" field value where name=impressions.
So ideally something like:
SELECT id, account_id, stats.values.value WHERE stats.name='engagement'
AWS example: https://docs.aws.amazon.com/athena/latest/ug/searching-for-values.html
Any help would be appreciated.
You can query the JSON with the following table definition:
CREATE EXTERNAL TABLE test(
id string,
acount_id string,
stats array<
struct<
name:string,
period:string,
values:array<
struct<value:string>>,
title:string
>
>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION 's3://bucket/';
Now, the value column is available through the following unnesting:
select id, acount_id, stat.name,x.value
from test
cross join UNNEST(test.stats) as st(stat)
cross join UNNEST(stat."values") as valx(x)
WHERE stat.name='engagement';