how to parse complex JSON in perl - json

I have the following JSON content and I am trying to parse it by using perl (use JSON qw( decode_json );
here you have the complete JSON content,
{
"page": {
"currentPage": 1,
"totalPages": 1,
"pageSize": 45
},
"networkelements": [
{
"name": "aRestPM",
"alias": "",
"networkAddresses": [
{
"address": "135.244.37.111",
"port": 443,
"restResourceId": "135.244.37.111:443",
"connectionProtocol": "ADAPTER_MANAGED"
},
{
"address": "10.206.198.182",
"port": 443,
"restResourceId": "10.206.198.182:443",
"connectionProtocol": "ADAPTER_MANAGED"
}
],
"adapter": {
"name": "Simple RestService V1.0",
"version": "19.0.SR5a-STD-0.50_PM-SNAPSHOT"
},
"vendor": "Simple",
"model": "RestService",
"version": "V1.0",
"creationTime": "2022-04-07T20:32:22.368-0000",
"agentGroupName": "DefaultAgentGroup",
"connectivityState": "IN_CONTACT_CONDITIONAL",
"interfaces": [
{
"name": "REST",
"networkAddresses": [
{
"address": "135.244.37.111",
"port": 443
},
{
"address": "10.206.198.182",
"port": 443
}
],
"sessionCredentialAAAServer": "AAA",
"sessionCredential": {
"userId": "dummy"
},
"adminState": "ON_DEMAND",
"sessionAddresses": {},
"additionalAttributes": {}
}
],
"customAttributes": {},
"managementState": "MANAGED",
"emsServer": "No"
},
{
"name": "aRestPM_443",
"alias": "",
"networkAddresses": [
{
"address": "10.154.74.91",
"port": 443,
"restResourceId": "10.154.74.91:443",
"connectionProtocol": "ADAPTER_MANAGED"
}
],
"adapter": {
"name": "Simple RestService V1.0",
"version": "19.0.SR5a-STD-0.50_PM-SNAPSHOT"
},
"vendor": "Simple",
"model": "RestService",
"version": "V1.0",
"creationTime": "2022-05-16T13:06:01.928-0000",
"agentGroupName": "DefaultAgentGroup",
"connectivityState": "IN_CONTACT_CONDITIONAL",
"interfaces": [
{
"name": "REST",
"networkAddresses": [
{
"address": "10.154.74.91",
"port": 443
}
],
"sessionCredential": {
"userId": "dummy"
},
"adminState": "ON_DEMAND",
"sessionAddresses": {},
"additionalAttributes": {}
}
],
"customAttributes": {},
"managementState": "MANAGED",
"emsServer": "No"
}
]
}
I need to print name from networkelements array and address and ip from networkAddresses
the given json is in $keycloak_response
$decoded = decode_json($keycloak_response);
my #networkelements = #{ $decoded->{'networkelements'} };
my #net = #{ $decoded->{"networkelements"}[0]{"networkAddresses"} };
#print Dumper \#net;
foreach my $ne ( #networkelements ) {
$ne_name = $ne->{"name"};
$decoded2 = decode_json($ne);
my #net = #{ $decoded2->{"networkAddresses"} };
foreach my $ne ( #net) {
print $ne_name . "," . $ne->{"address"} . "," . $ne->{"port"} . "\n";
}
}
expected result is
aRestPM,135.244.37.111,443
aRestPM,10.206.198.182,443
aRestPM_443,10.154.74.91,443
I would really appreciate any advice.
Thanks

decode_json( $ne ) is wrong since $ne doesn't contain JSON.
for my $networkelement (#{ $decoded->{ networkelements } }) {
my $name = $networkelement->{ name };
for my $networkAddress (#{ $networkelement->{ networkAddresses } }) {
my $address = $networkAddress->{ address };
my $port = $networkAddress->{ port };
say join ",", $name, $address, $port;
}
}
You created needless copies of arrays, which I avoided. And I properly scoped my variables. Always use use strict; use warnings;!

JSON data should be decoded only once and you get fully restored structure of the data.
Reference: from_json
It does not require to create intermediary variables, you have full direct access to data of interest -- use it to your advantage.
Usage of map gives you access to elements of an array what allows to avoid loop.
use strict;
use warnings;
use feature 'say';
use JSON;
my $json = do { local $/; <DATA> };
my $data = from_json($json);
for my $e ( #{$data->{'networkelements'}} ) {
say join ',', $e->{name},$_->#{qw/address port/} for #{$e->{networkAddresses}};
}
exit 0;
__DATA__
{
"page": {
"currentPage": 1,
"totalPages": 1,
"pageSize": 45
},
"networkelements": [
{
"name": "aRestPM",
"alias": "",
"networkAddresses": [
{
"address": "135.244.37.111",
"port": 443,
"restResourceId": "135.244.37.111:443",
"connectionProtocol": "ADAPTER_MANAGED"
},
{
"address": "10.206.198.182",
"port": 443,
"restResourceId": "10.206.198.182:443",
"connectionProtocol": "ADAPTER_MANAGED"
}
],
"adapter": {
"name": "Simple RestService V1.0",
"version": "19.0.SR5a-STD-0.50_PM-SNAPSHOT"
},
"vendor": "Simple",
"model": "RestService",
"version": "V1.0",
"creationTime": "2022-04-07T20:32:22.368-0000",
"agentGroupName": "DefaultAgentGroup",
"connectivityState": "IN_CONTACT_CONDITIONAL",
"interfaces": [
{
"name": "REST",
"networkAddresses": [
{
"address": "135.244.37.111",
"port": 443
},
{
"address": "10.206.198.182",
"port": 443
}
],
"sessionCredentialAAAServer": "AAA",
"sessionCredential": {
"userId": "dummy"
},
"adminState": "ON_DEMAND",
"sessionAddresses": {},
"additionalAttributes": {}
}
],
"customAttributes": {},
"managementState": "MANAGED",
"emsServer": "No"
},
{
"name": "aRestPM_443",
"alias": "",
"networkAddresses": [
{
"address": "10.154.74.91",
"port": 443,
"restResourceId": "10.154.74.91:443",
"connectionProtocol": "ADAPTER_MANAGED"
}
],
"adapter": {
"name": "Simple RestService V1.0",
"version": "19.0.SR5a-STD-0.50_PM-SNAPSHOT"
},
"vendor": "Simple",
"model": "RestService",
"version": "V1.0",
"creationTime": "2022-05-16T13:06:01.928-0000",
"agentGroupName": "DefaultAgentGroup",
"connectivityState": "IN_CONTACT_CONDITIONAL",
"interfaces": [
{
"name": "REST",
"networkAddresses": [
{
"address": "10.154.74.91",
"port": 443
}
],
"sessionCredential": {
"userId": "dummy"
},
"adminState": "ON_DEMAND",
"sessionAddresses": {},
"additionalAttributes": {}
}
],
"customAttributes": {},
"managementState": "MANAGED",
"emsServer": "No"
}
]
}
Output
aRestPM,135.244.37.111,443
aRestPM,10.206.198.182,443
aRestPM_443,10.154.74.91,443
Note: printf can generate aligned output
printf "%-12s %-15s %d\n", $e->{name},$_->{address},$_->{port} for #{$e->{networkAddresses}};
Output
aRestPM 135.244.37.111 443
aRestPM 10.206.198.182 443
aRestPM_443 10.154.74.91 443

Related

Inserting a Complex Nested JSON Column in MySQL

Here is my use case :-
I am trying to get the deployment details in a JSON format using :
kubectl get deployment -o json depl_name
and inserting result back to a column: meta_data in MySQL. The column data type is json . But the insert statement is failing with error :-
ERROR 3140 (22032): Invalid JSON text: "Missing a comma or '}' after an object member." at position 1035 in value for column
Here is my entire JSON :-
{
"uuid": {
"view": "demoBoard",
"demo": [
{
"serviceName": "wordpress-backend",
"configurations": {
"ec2_iam": {
"user": [],
"roles": null,
"permissions": null
}
},
"deployment_config": {
"apiVersion": "apps/v1",
"kind": "Deployment",
"metadata": {
"annotations": {
"deployment.kubernetes.io/revision": "6",
"kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"app\":\"wordpress-backend\",\"wordpress_app_id\":\"w26\"},\"name\":\"wordpress-backend\",\"namespace\":\"wordpress\"},\"spec\":{\"selector\":{\"matchLabels\":{\"app\":\"wordpress-backend\"}},\"template\":{\"metadata\":{\"labels\":{\"app\":\"wordpress-backend\",\"wordpress_app_id\":\"w26\"}},\"spec\":{\"containers\":[{\"envFrom\":[{\"configMapRef\":{\"name\":\"wordpress-backend-config\"}}],\"image\":\"docker-image\",\"imagePullPolicy\":\"IfNotPresent\",\"name\":\"wordpress-backend\",\"ports\":[{\"containerPort\":8000}],\"resources\":{},\"volumeMounts\":[{\"mountPath\":\"/tmp/me/cloud\",\"name\":\"my-key\"}]}],\"imagePullSecrets\":[{\"name\":\"my-json\"}],\"volumes\":[{\"name\":\"my-cloud-key\",\"secret\":{\"defaultMode\":123,\"secretName\":\"my-key\"}}]}}}}\n"
},
"creationTimestamp": "2022-09-12T13:56:34Z",
"generation": 7,
"labels": {
"app": "wordpress-backend",
"wordpress_app_id": "w26"
},
"name": "wordpress-backend",
"namespace": "wordpress",
"resourceVersion": "v2",
"uid": "0da99b29"
},
"spec": {
"progressDeadlineSeconds": 600,
"replicas": 1,
"revisionHistoryLimit": 10,
"selector": {
"matchLabels": {
"app": "wordpress-backend"
}
},
"strategy": {
"rollingUpdate": {
"maxSurge": "25%",
"maxUnavailable": "25%"
},
"type": "RollingUpdate"
},
"template": {
"metadata": {
"creationTimestamp": null,
"labels": {
"app": "wordpress-backend",
"wordpress_app_id": "267"
}
},
"spec": {
"containers": [
{
"envFrom": [
{
"configMapRef": {
"name": "wordpress-backend-config"
}
}
],
"image": "docker.io/my-image",
"imagePullPolicy": "IfNotPresent",
"name": "wordpress-backend",
"ports": [
{
"containerPort": 8000,
"protocol": "TCP"
}
],
"resources": {},
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File",
"volumeMounts": [
{
"mountPath": "/my/path/cloud",
"name": "my-key"
}
]
}
],
"dnsPolicy": "ClusterFirst",
"imagePullSecrets": [
{
"name": "my-key"
}
],
"restartPolicy": "Always",
"schedulerName": "default-scheduler",
"securityContext": {},
"terminationGracePeriodSeconds": 30,
"volumes": [
{
"name": "my-key",
"secret": {
"defaultMode": 123,
"secretName": "sampleKeyName"
}
}
]
}
}
},
"status": {
"availableReplicas": 1,
"conditions": [
{
"lastTransitionTime": "2022-09-29T15:11:14Z",
"lastUpdateTime": "2022-09-29T15:11:14Z",
"message": "Deployment has minimum availability.",
"reason": "MinimumReplicasAvailable",
"status": "True",
"type": "Available"
},
{
"lastTransitionTime": "2022-09-12T14:20:35Z",
"lastUpdateTime": "2022-09-30T14:13:08Z",
"message": "ReplicaSet \"wordpress-backend-abc123\" has successfully progressed.",
"reason": "NewReplicaSetAvailable",
"status": "True",
"type": "Progressing"
}
],
"observedGeneration": 7,
"readyReplicas": 1,
"replicas": 1,
"updatedReplicas": 1
}
}
}
]
}
}
I guess, because of escape sequence in below line causing the failure :-
"message": "ReplicaSet \"wordpress-backend-abc123\" has successfully progressed.", tried removing that, but no luck.

Unable to parse JSON data for Oracle Integration Cloud using JQ

need help to parse the JSON data received from Oracle Integration Cloud. The expected output is mentioned below alongwith the command i am trying to use.
JQ command
jq '[{id: .id},{integrations: [.integrations[]|{code: .code, version: .version, dependencies: .dependencies|{connections: .connections[]|{id: .id, status: .status}}, .dependencies|{lookups: .lookups}}]}]' output.json
Error :
jq: error: syntax error, unexpected FIELD (Unix shell quoting issues?) at , line 1:
[{id: .id},{integrations: [.integrations[]|{code: .code, version: .version, dependencies: .dependencies|{connections: .connections[]|{id: .id, status: .status}}, .dependencies|{lookups: .lookups}}]}]
Note : If i run below command to fetch only connections data it works fine
jq '[{id: .id},{integrations: [.integrations[]|{code: .code, version: .version, dependencies: .dependencies|{connections: .connections[]|{id: .id, status: .status}}}]}]' output.json
Expected Output:
[
{
"id": "SAMPLE_PACKAGE"
},
{
"integrations": [
{
"code": "HELLO_INTEGRATION",
"version": "01.00.0000",
"dependencies": {
"connections": {
"id": "HELLO_WORLD1",
"status": "CONFIGURED"
}
}
},
{
"code": "HELLO_INTEGRATIO_LOOKUP",
"version": "01.00.0000",
"dependencies": {
"connections": {
"id": "HELLO_WORLD1",
"status": "CONFIGURED"
},
"lookups": {
"name": "COMMON_LOOKUP_VARIABLES",
"status": "CONFIGURED"
}
}
},
{
"code": "HI_INTEGRATION",
"version": "01.00.0000",
"dependencies": {
"connections": {
"id": "HELLO_WORLD1",
"status": "CONFIGURED"
}
}
}
]
}
]
output.json file contains
{
"bartaType": "DEVELOPED",
"countOfIntegrations": 3,
"id": "SAMPLE_PACKAGE",
"integrations": [
{
"code": "HELLO_INTEGRATION",
"dependencies": {
"connections": [
{
"id": "HELLO_WORLD1",
"lockedFlag": false,
"name": "Hello World1",
"role": "SOURCE",
"status": "CONFIGURED",
"type": "rest",
"usage": 6
}
]
},
"description": "",
"eventSubscriptionFlag": false,
"filmstrip": [
{
"code": "HELLO_WORLD1",
"iconUrl": "/images/rest/rest_icon_46.png",
"name": "Hello World1",
"role": "SOURCE",
"status": "CONFIGURED"
}
],
"id": "HELLO_INTEGRATION|01.00.0000",
"lockedFlag": false,
"name": "HELLO_INTEGRATION",
"pattern": "Orchestration",
"patternDescription": "Map Data",
"payloadTracingEnabledFlag": true,
"publishFlag": false,
"scheduleApplicable": false,
"scheduleDefined": false,
"status": "ACTIVATED",
"style": "FREEFORM",
"styleDescription": "Orchestration",
"tempCopyExists": false,
"tracingEnabledFlag": true,
"version": "01.00.0000",
"warningMsg": "ACTIVATE_PUBLISH_NO_CONN"
},
{
"code": "HELLO_INTEGRATIO_LOOKUP",
"dependencies": {
"connections": [
{
"id": "HELLO_WORLD1",
"lockedFlag": false,
"name": "Hello World1",
"role": "SOURCE",
"status": "CONFIGURED",
"type": "rest",
"usage": 6
}
],
"lookups": [
{
"lockedFlag": false,
"name": "COMMON_LOOKUP_VARIABLES",
"status": "CONFIGURED",
"usage": 1
}
]
},
"description": "",
"eventSubscriptionFlag": false,
"filmstrip": [
{
"code": "HELLO_WORLD1",
"iconUrl": "/images/rest/rest_icon_46.png",
"name": "Hello World1",
"role": "SOURCE",
"status": "CONFIGURED"
}
],
"id": "HELLO_INTEGRATIO_LOOKUP|01.00.0000",
"lockedFlag": false,
"name": "HELLO_INTEGRATION_LOOKUP",
"pattern": "Orchestration",
"patternDescription": "Map Data",
"payloadTracingEnabledFlag": true,
"publishFlag": false,
"scheduleApplicable": false,
"scheduleDefined": false,
"status": "ACTIVATED",
"style": "FREEFORM",
"styleDescription": "Orchestration",
"tempCopyExists": false,
"tracingEnabledFlag": true,
"version": "01.00.0000",
"warningMsg": "ACTIVATE_PUBLISH_NO_CONN"
},
{
"code": "HI_INTEGRATION",
"dependencies": {
"connections": [
{
"id": "HELLO_WORLD1",
"lockedFlag": false,
"name": "Hello World1",
"role": "SOURCE",
"status": "CONFIGURED",
"type": "rest",
"usage": 6
}
]
},
"description": "",
"eventSubscriptionFlag": false,
"filmstrip": [
{
"code": "HELLO_WORLD1",
"iconUrl": "/images/rest/rest_icon_46.png",
"name": "Hello World1",
"role": "SOURCE",
"status": "CONFIGURED"
}
],
"id": "HI_INTEGRATION|01.00.0000",
"lockedFlag": false,
"name": "HI_INTEGRATION",
"pattern": "Orchestration",
"patternDescription": "Map Data",
"payloadTracingEnabledFlag": true,
"publishFlag": false,
"scheduleApplicable": false,
"scheduleDefined": false,
"status": "ACTIVATED",
"style": "FREEFORM",
"styleDescription": "Orchestration",
"tempCopyExists": false,
"tracingEnabledFlag": true,
"version": "01.00.0000",
"warningMsg": "ACTIVATE_PUBLISH_NO_CONN"
}
],
"isCloneAllowed": false,
"isViewAllowed": false,
"name": "SAMPLE_PACKAGE",
"type": "DEVELOPED"
}
The problem is that the lookups key is not always present so, you cannot use the [] on it. So, instead you can use the map function and provide a default before piping to the map function like below
[
{ id: .id },
{
integrations: [
.integrations[]|{
id: .id,
code: .code,
dependencies: {
connections: (.dependencies.connections//[]|map({id,status}))[0],
lookups: (.dependencies.lookups//[]|map({name,status}))[0]
}
}
]
}
]
The (.dependencies.lookups//[]|map({name,status}))[0] has the effect of passing an empty array to the map function which results in a null value when accessing the first element.
See in action https://jqplay.org/s/zQBkHtnzOd1
The provided JQ statement works fine for single elements in the array , but incase the array contains multiple elements it only fetches the first element. Also i updated the dependencies object to capture all the arrays ( connections,lookups,certificates,libraries,integrations)
Below is the modified one. Please suggest for any better options.
[
{ id: .id },
{
integrations: [
.integrations[]|{
id: .id,
code: .code,
dependencies: {
connections: (.dependencies.connections//[]|map({id,status})),
lookups: (.dependencies.lookups//[]|map({name,status})),
certificates: (.dependencies.certificates//[]|map({id,status})),
libraries: (.dependencies.libraries//[]|map({code,status,version})),
integrations: (.dependencies.integrations//[]|map({code,version}))
}
}
]
}
]|del(..|select(.==[]))
Note: To remove the empty arrays del function is added which is giving the below output :
[
{
"id": "SAMPLE_PACKAGE"
},
{
"integrations": [
{
"id": "HELLO_INTEGRATION|01.00.0000",
"code": "HELLO_INTEGRATION",
"dependencies": {
"connections": [
{
"id": "HELLO_WORLD1",
"status": "CONFIGURED"
},
{
"id": "HELLO_WORLD2",
"status": "CONFIGURED"
}
]
}
},
{
"id": "HELLO_INTEGRATIO_LOOKUP|01.00.0000",
"code": "HELLO_INTEGRATIO_LOOKUP",
"dependencies": {
"connections": [
{
"id": "HELLO_WORLD1",
"status": "CONFIGURED"
}
],
"lookups": [
{
"name": "COMMON_LOOKUP_VARIABLES",
"status": "CONFIGURED"
}
]
}
},
{
"id": "HI_INTEGRATION|01.00.0000",
"code": "HI_INTEGRATION",
"dependencies": {
"connections": [
{
"id": "HELLO_WORLD1",
"status": "CONFIGURED"
}
]
}
}
]
}
]

How can I extract subdomains from a json file?

I have a long list of json file . I want to extract the subdomain of harvard.edu which is in the variable host in "host": "ceonlineb2b.hms.harvard.edu using bash . I would be happy if anyone can help out .Below is only a snippet of json file.
{
"data": {
"total_items": 3,
"offset": 0,
"limit": 1,
"items": [
{
"name": "ceonlineb2b.hms.harvard.edu",
"alexa": null,
"cert_summary": null,
"dns_records": {
"A": [
"3.221.168.206",
"54.174.253.3"
],
"AAAA": null,
"CAA": null,
"CNAME": [
"hms-moodleb2b-prod.cabem.com"
],
"MX": null,
"NS": null,
"SOA": null,
"TXT": null,
"SPF": null,
"updated_at": "2021-05-14T23:12:43.332816923Z"
},
"hosts_enrichment": [
{
"ip": "3.221.168.206",
"as_num": 14618,
"as_org": "amazon-aes",
"isp": "amazon.com",
"city_name": "ashburn",
"country": "united states",
"country_iso_code": "us",
"location": {
"lat": 39.0481,
"lon": -77.4728
}
},
{
"ip": "54.174.253.3",
"as_num": 14618,
"as_org": "amazon-aes",
"isp": "amazon.com",
"city_name": "ashburn",
"country": "united states",
"country_iso_code": "us",
"location": {
"lat": 39.0481,
"lon": -77.4728
}
}
],
"http_extract": {
"cookies": [
{
"domain": "",
"expire": "0001-01-01T00:00:00Z",
"http_only": true,
"key": "MoodleSession",
"max_age": 0,
"path": "/",
"security": true,
"value": "tqhmqc4muk513sad1bmnl3kocj"
}
],
"description": "",
"emails": null,
"final_redirect_url": {
"full_uri": "https://ceonlineb2b.hms.harvard.edu/login/index.php",
"host": "ceonlineb2b.hms.harvard.edu",
"path": "/login/index.php"
},
"extracted_at": "2020-10-04T20:55:26.043777194Z",
"favicon_sha256": "",
"http_headers": [
{
"name": "date",
"value": "Sun, 04 Oct 2020 20:55:25 GMT"
},
{
"name": "content-type",
"value": "text/html; charset=utf-8"
},
{
"name": "server",
"value": "Apache/2.4.46 () OpenSSL/1.0.2k-fips"
},
{
"name": "x-powered-by",
"value": "PHP/7.2.24"
},
{
"name": "content-language",
"value": "en"
},
{
"name": "content-script-type",
"value": "text/javascript"
},
{
"name": "content-style-type",
"value": "text/css"
},
{
"name": "x-ua-compatible",
"value": "IE=edge"
},
{
"name": "cache-control",
"value": "private, pre-check=0, post-check=0, max-age=0, no-transform"
},
{
"name": "pragma",
"value": "no-cache"
},
{
"name": "expires",
"value": ""
},
{
"name": "accept-ranges",
"value": "none"
},
{
"name": "set-cookie",
"value": "MoodleSession=tqhmqc4muk513sad1bmnl3kocj; path=/; secure;HttpOnly;Secure;SameSite=None"
}
],
"http_status_code": 200,
"links": [
{
"anchor": "Forgotten your username or password?",
"url": "https://ceonlineb2b.hms.harvard.edu/login/forgot_password.php",
"url_host": "ceonlineb2b.hms.harvard.edu"
},
{
"anchor": "Privacy Statement",
"url": "/local/staticpage/view.php?page=privacy-statement",
"url_host": ""
},
{
"anchor": "Terms of Service",
"url": "/local/staticpage/view.php?page=terms-of-service",
"url_host": ""
},
{
"anchor": "Copyright Information",
"url": "/local/staticpage/view.php?page=copyright-information",
"url_host": ""
}
],
"meta_tags": [
{
"name": "keywords",
"value": "moodle, HMS Postgraduate Courses: Log in to the site"
},
{
"name": "format-detection",
"value": "telephone=no"
},
{
"name": "robots",
"value": "noindex"
},
{
"name": "viewport",
"value": "width=device-width, initial-scale=1.0"
}
],
"robots_txt": "",
"scripts": [
"https://ceonlineb2b.hms.harvard.edu/theme/yui_combo.php?rollup/3.17.2/yui-moodlesimple-min.js",
"https://ceonlineb2b.hms.harvard.edu/lib/javascript.php/1589465014/lib/javascript-static.js",
"https://ceonlineb2b.hms.harvard.edu/lib/javascript.php/1589465014/lib/requirejs/require.min.js",
"https://ceonlineb2b.hms.harvard.edu/theme/javascript.php/hms/1589465013/footer"
],
"styles": [
"https://ceonlineb2b.hms.harvard.edu/theme/yui_combo.php?rollup/3.17.2/yui-moodlesimple-min.css",
"https://ceonlineb2b.hms.harvard.edu/theme/styles.php/hms/1589465013_1/all"
],
"title": "HMS Postgraduate Courses: Log in to the site"
},
"is_CNAME": null,
"is_MX": null,
"is_NS": null,
"is_PTR": null,
"is_subdomain": true,
"name_without_suffix": "ceonlineb2b.hms.harvard",
"updated_at": "2021-05-16T10:25:01.59086376Z",
"user_scan_at": null,
"whois_parsed": null,
"security_score": {
"score": 100
},
"cve_list": null,
"technologies": [
{
"name": "Moodle",
"version": ""
},
{
"name": "RequireJS",
"version": ""
}
],
"trackers": null,
"organizations": null
}
]
}
}
For json parsing on bash, I recommend checking out jq. It's lightweight and versatile.
We can use the -r flag to output only values.
Output the fields of each object with the keys in sorted order.
--raw-output / -r:
The structure of the JSON you provided has the subdomain at .data.items[].http_extract.final_redirect_url.host
{
"data": {
"items": [
{
"http_extract": {
"final_redirect_url": {
"full_uri": "https://ceonlineb2b.hms.harvard.edu/login/index.php",
"host": "ceonlineb2b.hms.harvard.edu",
"path": "/login/index.php"
},
...
I've saved your json to a file, se.json
Example extracting full domain with jq
jq -r '.data.items[].http_extract.final_redirect_url.host' se.json
Output
ceonlineb2b.hms.harvard.edu
To extract the subdomain, just perform a search/replace using sub().
sub(regex; tostring) sub(regex; string; flags)
Emit the string obtained by replacing the first match of regex in the input string with tostring, after interpolation. tostring should be a jq string, and may contain references to named captures. The named captures are, in effect, presented as a JSON object (as constructed by capture) to tostring, so a reference to a captured variable named "x" would take the form: "(.x)".
Extracting subdomain using jq
jq -r '.data.items[].http_extract.final_redirect_url.host | sub(".hms.harvard.edu";"")' se.json
Output
ceonlineb2b

POWERSHELL - How to access multilevel child elements in JSON file with condtion

can someone please send me solution or link for PowerShell 5 and 7 how can I access child elements if specific condition is fulfilled for JSON file which I have as output.json. I haven't find it on the net.
I want to retrieve value of the "children" elements if type element has value FILE and to put that into some list. So final result should be [test1.txt,test2.txt]
Thank you!!!
{
"path": {
"components": [
"Packages"
],
"parent": "",
"name": "Packages",
},
"children": {
"values": [
{
"path": {
"components": [
"test1.txt"
],
"parent": "",
"name": "test1.txt",
},
"type": "FILE",
"size": 405
},
{
"path": {
"components": [
"test2.txt"
],
"parent": "",
"name": "test2.txt",
},
"type": "FILE",
"size": 409
},
{
"path": {
"components": [
"FOLDER"
],
"parent": "",
"name": "FOLDER",
},
"type": "DIRECTORY",
"size": 1625
}
]
"start": 0
}
}
1.) The json is incorrect, I assumt that this one is the correct one:
{
"path": {
"components": [
"Packages"
],
"parent": "",
"name": "Packages"
},
"children": {
"values": [
{
"path": {
"components": [
"test1.txt"
],
"parent": "",
"name": "test1.txt"
},
"type": "FILE",
"size": 405
},
{
"path": {
"components": [
"test2.txt"
],
"parent": "",
"name": "test2.txt"
},
"type": "FILE",
"size": 409
},
{
"path": {
"components": [
"FOLDER"
],
"parent": "",
"name": "FOLDER"
},
"type": "DIRECTORY",
"size": 1625
}
],
"start": 0
}
}
2.) The structure is not absolute clear, but for your example this seems to me to be the correct solution:
$element = $json | ConvertFrom-Json
$result = #()
$element.children.values | foreach {
if ($_.type -eq 'FILE') { $result += $_.path.name }
}
$result | ConvertTo-Json
Be aware, that the used construct $result += $_.path.name is fine if you have up to ~10k items, but for very large items its getting very slow and you need to use an arraylist. https://adamtheautomator.com/powershell-arraylist/

jq json filter and keep original structure

I am really new to the command jq, and I am trying do some filtering to remove blocks of data that I don't want/need.
Here is an example of my JSON structure:
{
"BackupCfg": [
{
"type": "filesystem",
"repository": "trunk",
"url": "test.example.com",
"port": "394",
"cfg": [
{
"Default": "true",
"ID": "trunk00",
"Paths": [
"/etc",
"/home",
"/var",
"/usr/local",
"/opt",
"/root"
],
"Cron": "33 0 * * *"
}
]
},
{
"type": "filesystem",
"repository": "trunk02",
"url": "test.example.com",
"port": "394",
"cfg": [
{
"ID": "trunk01",
"Paths": [
"/opt/example",
"/opt/var_example"
],
"Cron": "*/30 0-23 * * *"
}
]
},
{
"type": "database",
"repository": "trunk-db",
"url": "test.example.com",
"port": "399",
"cfg": [
{
"Default": "true",
"ID": "trunk00",
"db_type": "mysql",
"db_hostname": "localhost",
"db_port": "3306",
"db_user": "root",
"db_pwd": "password",
"databases": [],
"Cron": "40 0 * * *"
},
{
"ID": "trunk01",
"db_type": "mysql",
"db_hostname": "localhost",
"db_port": "3307",
"db_user": "riit",
"db_pwd": "passwird",
"databases": [],
"Cron": "33 3 * * *"
},
{
"Default": "false",
"ID": "trunk02",
"db_type": "postgres",
"db_hostname": "localhost",
"db_port": "3308",
"db_user": "ruut",
"db_pwd": "passwurd",
"databases": [],
"Cron": "0 10 * * *"
}
]
}
]
}
I want to filter this in order to have only the "type": "filesystem", and get the following output:
{
"BackupCfg": [
{
"type": "filesystem",
"repository": "trunk",
"url": "test.example.com",
"port": "394",
"cfg": [
{
"Default": "true",
"ID": "trunk00",
"Paths": [
"/etc",
"/home",
"/var",
"/usr/local",
"/opt",
"/root"
],
"Cron": "33 0 * * *"
}
]
},
{
"type": "filesystem",
"repository": "trunk02",
"url": "test.example.com",
"port": "394",
"cfg": [
{
"ID": "trunk01",
"Paths": [
"/opt/example",
"/opt/var_example"
],
"Cron": "*/30 0-23 * * *"
}
]
}
]
}
I have try some commands like
jq '.[][] | select(.type | contains("filesystem"))'
But it destroys the original structure.
I have searched around, and found lots of example, but lots doesn't work, or doesn't give me what I need.
Has someone any ideas?
If someone has also any good learning website in order to understand jq, that would be awesome!
Thanks in advance!
jq solution:
jq '.BackupCfg |= map(select(.type == "filesystem"))' file.json
The output:
{
"BackupCfg": [
{
"type": "filesystem",
"repository": "trunk",
"url": "test.example.com",
"port": "394",
"cfg": [
{
"Default": "true",
"ID": "trunk00",
"Paths": [
"/etc",
"/home",
"/var",
"/usr/local",
"/opt",
"/root"
],
"Cron": "33 0 * * *"
}
]
},
{
"type": "filesystem",
"repository": "trunk02",
"url": "test.example.com",
"port": "394",
"cfg": [
{
"ID": "trunk01",
"Paths": [
"/opt/example",
"/opt/var_example"
],
"Cron": "*/30 0-23 * * *"
}
]
}
]
}
https://stedolan.github.io/jq/manual/v1.5/#select(boolean_expression)