Remove comma from between JSON events - json

I was hoping to get Splunk to break up the content of the pages into events, but it's unable to. I'm trying to sed the comma that's in between the events out, but it's not going well. This is a portion of the json coming in
"last_updated":"2017-02-28T17:56:19Z"},{"id":588699,"name":null,...
and this is the sed line I'm trying
sed -e "s/},{/}+{/" -e "s/}[^}]*$/}/" secunia.txt | tr "+" "\n"
I've put it outside my for loop in the script that barmar helped with, but it's not pulling out the ,. What am I missing?
Here is some of the data:
{"id":588699,"name":null,"status":{"id":2963,"name":"Handled"},"priority":{"id":2873,"name":"Urgent"},"queue":{"id":2144,"name":"Default"},"description":null,"assigned_to":{"id":4120,"username":"user4#company.com"},"asset_list":{"id":4777,"name":"Info Security Threat_Splunk"},"advisory":{"id":199003,"advisory_identifier":"SA74447","title":"Blue Coat Security Analytics Multiple Vulnerabilities","released":"2016-12-21T15:24:53Z","modified_date":"2016-12-21T15:24:53Z","criticality":2,"criticality_description":"Highly critical","solution_status":4,"solution_status_description":"Partial Fix","where":1,"where_description":"From remote","cvss_score":10.0,"cvss_vector":"(AV:N/AC:L/Au:N/C:C/I:C/A:C/E:U/RL:TF/RC:C)","type":0,"is_zero_day":false},"created":"2016-12-21T15:33:09Z","pretty_id":79,"custom_score":null,"last_updated":"2016-12-21T15:40:28Z"},{"id":584252,"name":null,"status":{"id":2963,"name":"Handled"},"priority":{"id":2873,"name":"Urgent"},"queue":{"id":2144,"name":"Default"},"description":null,"assigned_to":{"id":4118,"username":"user3#company.com"},"asset_list":{"id":4657,"name":"PSS Middleware Environment"},"advisory":{"id":195840,"advisory_identifier":"SA73221","title":"Oracle Solaris Multiple Third Party Components Multiple Vulnerabilities","released":"2016-10-19T14:20:02Z","modified_date":"2016-12-19T14:42:30Z","criticality":2,"criticality_description":"Highly critical","solution_status":2,"solution_status_description":"Vendor Patched","where":1,"where_description":"From remote","cvss_score":10.0,"cvss_vector":"(AV:N/AC:L/Au:N/C:C/I:C/A:C/E:U/RL:OF/RC:C)","type":0,"is_zero_day":false},"created":"2016-12-20T13:43:24Z","pretty_id":76,"custom_score":null,"last_updated":"2017-01-11T19:47:09Z"}

Try this command -
sed -e "s/,//g" -e "s/}{/}\n{/" -e "s/}[^}]*$/}/" f

Replacing the event separator works here, this assumes it does not occur elsewhere in the input though. For example:
sed 's/},{/}\n{/' secunia.txt | jq -s .
Or with portable sed:
sed 's/},{/}\
{/' secunia.txt | jq -s .
Output:
[
{
"id": 588699,
"name": null,
"status": {
"id": 2963,
"name": "Handled"
},
"priority": {
"id": 2873,
"name": "Urgent"
},
"queue": {
"id": 2144,
"name": "Default"
},
"description": null,
"assigned_to": {
"id": 4120,
"username": "user4#company.com"
},
"asset_list": {
"id": 4777,
"name": "Info Security Threat_Splunk"
},
"advisory": {
"id": 199003,
"advisory_identifier": "SA74447",
"title": "Blue Coat Security Analytics Multiple Vulnerabilities",
"released": "2016-12-21T15:24:53Z",
"modified_date": "2016-12-21T15:24:53Z",
"criticality": 2,
"criticality_description": "Highly critical",
"solution_status": 4,
"solution_status_description": "Partial Fix",
"where": 1,
"where_description": "From remote",
"cvss_score": 10,
"cvss_vector": "(AV:N/AC:L/Au:N/C:C/I:C/A:C/E:U/RL:TF/RC:C)",
"type": 0,
"is_zero_day": false
},
"created": "2016-12-21T15:33:09Z",
"pretty_id": 79,
"custom_score": null,
"last_updated": "2016-12-21T15:40:28Z"
},
{
"id": 584252,
"name": null,
"status": {
"id": 2963,
"name": "Handled"
},
"priority": {
"id": 2873,
"name": "Urgent"
},
"queue": {
"id": 2144,
"name": "Default"
},
"description": null,
"assigned_to": {
"id": 4118,
"username": "user3#company.com"
},
"asset_list": {
"id": 4657,
"name": "PSS Middleware Environment"
},
"advisory": {
"id": 195840,
"advisory_identifier": "SA73221",
"title": "Oracle Solaris Multiple Third Party Components Multiple Vulnerabilities",
"released": "2016-10-19T14:20:02Z",
"modified_date": "2016-12-19T14:42:30Z",
"criticality": 2,
"criticality_description": "Highly critical",
"solution_status": 2,
"solution_status_description": "Vendor Patched",
"where": 1,
"where_description": "From remote",
"cvss_score": 10,
"cvss_vector": "(AV:N/AC:L/Au:N/C:C/I:C/A:C/E:U/RL:OF/RC:C)",
"type": 0,
"is_zero_day": false
},
"created": "2016-12-20T13:43:24Z",
"pretty_id": 76,
"custom_score": null,
"last_updated": "2017-01-11T19:47:09Z"
}
]

Related

Return the value, if another value exists using jq

I have a json fragment like below. I want to return the value of the name key together with the value of the version key if the version key has a value. The expected output is "name": "value" , "version" : "value"
It is better to have a solution with grep or jq.
node ~/wappalyzer/src/drivers/npm/cli.js https://youtube.com | jq .
Result:
{
"urls": {
"https://youtube.com/": {
"status": 301
},
"https://www.youtube.com/": {
"status": 200
}
},
"technologies": [
{
"slug": "youtube",
"name": "YouTube",
"description": "YouTube is a video sharing service where users can create their own profile, upload videos, watch, like and comment on other videos.",
"confidence": 100,
"version": null,
"icon": "YouTube.png",
"website": "http://www.youtube.com",
"cpe": null,
"categories": [
{
"id": 14,
"slug": "video-players",
"name": "Video players"
}
],
"rootPath": true
},
{
"slug": "polymer",
"name": "Polymer",
"description": null,
"confidence": 100,
"version": "3.5.0",
"icon": "Polymer.png",
"website": "http://polymer-project.org",
"cpe": null,
"categories": [
{
"id": 12,
"slug": "javascript-frameworks",
"name": "JavaScript frameworks"
}
],
"rootPath": true
},
{
"slug": "google-ads",
"name": "Google Ads",
"description": "Google Ads is an online advertising platform developed by Google.",
"confidence": 100,
"version": null,
"icon": "Google Ads.svg",
"website": "https://ads.google.com",
"cpe": null,
"categories": [
{
"id": 36,
"slug": "advertising",
"name": "Advertising"
}
]
},
{
"slug": "hammer-js",
"name": "Hammer.js",
"description": null,
"confidence": 100,
"version": "2.0.2",
"icon": "Hammer.js.png",
"website": "https://hammerjs.github.io",
"cpe": null,
"categories": [
{
"id": 59,
"slug": "javascript-libraries",
"name": "JavaScript libraries"
}
],
"rootPath": true
},
{
"slug": "google-font-api",
"name": "Google Font API",
"description": "Google Font API is a web service that supports open-source font files that can be used on your web designs.",
"confidence": 100,
"version": null,
"icon": "Google Font API.png",
"website": "http://google.com/fonts",
"cpe": null,
"categories": [
{
"id": 17,
"slug": "font-scripts",
"name": "Font scripts"
}
],
"rootPath": true
},
{
"slug": "recaptcha",
"name": "reCAPTCHA",
"description": "reCAPTCHA is a free service from Google that helps protect websites from spam and abuse.",
"confidence": 100,
"version": null,
"icon": "reCAPTCHA.svg",
"website": "https://www.google.com/recaptcha/",
"cpe": null,
"categories": [
{
"id": 16,
"slug": "security",
"name": "Security"
}
]
},
{
"slug": "google-ads-conversion-tracking",
"name": "Google Ads Conversion Tracking",
"description": "Google Ads Conversion Tracking is a free tool that shows you what happens after a customer interacts with your ads.",
"confidence": 100,
"version": null,
"icon": "Google.svg",
"website": "https://support.google.com/google-ads/answer/1722022",
"cpe": null,
"categories": [
{
"id": 10,
"slug": "analytics",
"name": "Analytics"
}
]
},
{
"slug": "hsts",
"name": "HSTS",
"description": "HTTP Strict Transport Security (HSTS) informs browsers that the site should only be accessed using HTTPS.",
"confidence": 100,
"version": null,
"icon": "default.svg",
"website": "https://www.rfc-editor.org/rfc/rfc6797#section-6.1",
"cpe": null,
"categories": [
{
"id": 16,
"slug": "security",
"name": "Security"
}
],
"rootPath": true
},
{
"slug": "webpack",
"name": "webpack",
"description": "Webpack is an open-source JavaScript module bundler.",
"confidence": 50,
"version": null,
"icon": "webpack.svg",
"website": "https://webpack.js.org/",
"cpe": null,
"categories": [
{
"id": 19,
"slug": "miscellaneous",
"name": "Miscellaneous"
}
]
},
{
"slug": "pwa",
"name": "PWA",
"description": "Progressive Web Apps (PWAs) are web apps built and enhanced with modern APIs to deliver enhanced capabilities, reliability, and installability while reaching anyone, anywhere, on any device, all with a single codebase.",
"confidence": 100,
"version": null,
"icon": "PWA.svg",
"website": "https://web.dev/progressive-web-apps/",
"cpe": null,
"categories": [
{
"id": 19,
"slug": "miscellaneous",
"name": "Miscellaneous"
}
],
"rootPath": true
},
{
"slug": "open-graph",
"name": "Open Graph",
"description": "Open Graph is a protocol that is used to integrate any web page into the social graph.",
"confidence": 100,
"version": null,
"icon": "Open Graph.png",
"website": "https://ogp.me",
"cpe": null,
"categories": [
{
"id": 19,
"slug": "miscellaneous",
"name": "Miscellaneous"
}
],
"rootPath": true
},
{
"slug": "module-federation",
"name": "Module Federation",
"description": "Module Federation is a webpack technology for dynamically loading parts of other independently deployed builds.",
"confidence": 50,
"version": null,
"icon": "Module Federation.png",
"website": "https://webpack.js.org/concepts/module-federation/",
"cpe": null,
"categories": [
{
"id": 19,
"slug": "miscellaneous",
"name": "Miscellaneous"
}
]
},
{
"slug": "http-3",
"name": "HTTP/3",
"description": "HTTP/3 is the third major version of the Hypertext Transfer Protocol used to exchange information on the World Wide Web.",
"confidence": 100,
"version": null,
"icon": "HTTP3.svg",
"website": "https://httpwg.org/",
"cpe": null,
"categories": [
{
"id": 19,
"slug": "miscellaneous",
"name": "Miscellaneous"
}
],
"rootPath": true
}
]
}
I expect this:
Polymer 3.5.0
Hammer.js 2.0.2
With the alternative operator //, you can default to something else if the input happens to be falsy. With the empty function, you can simply discard that input.
jq -r '.[] | "\(.name) \(.version // empty)"'
Polymer 3.5.0
Hammer.js 2.0.2
Demo
An alternate to #pmf's solution, making the same assumptions about the structure of the data: select the objects that have a "truthy" version:
jq -r '.[] | select(.version) | "\(.name) \(.version)"' file.json
With the corrected input data, you want
jq -r '.technologies[] | select(.version) | "\(.name) \(.version)"' file.json
# .....^^^^^^^^^^^^^^^
Same change for #pmf's solution.

Check if a key exists and return another key

I need help with jq syntax on how to return the Gitlab job ID if it contains an artifact. The JSON output looks like this (removed a lot of unrelated info from it and added [...]):
[{
"id": 3219589880,
"status": "success",
"stage": "test",
"name": "job_with_no_artifact",
"ref": "main",
"tag": false,
"coverage": null,
"allow_failure": false,
"created_at": "2022-10-24T18:21:25.119Z",
"started_at": "2022-10-24T18:21:25.986Z",
"finished_at": "2022-10-24T18:21:38.464Z",
"duration": 12.478682,
"queued_duration": 0.499786,
"user": {
"id": 123456789,
[...]
},
"commit": {
"id": "5e0e1f287d20daf2036a3ca71c656dce55999265",
[...]
"pipeline": {
"id": 123456789,
[...]
"project": {
"ci_job_token_scope_enabled": false
},
"artifacts": [],
"runner": {
"id": 12270859,
[...]
},
"artifacts_expire_at": null,
"tag_list": []
}, {
"id": 3219589878,
"status": "success",
"stage": "test",
"name": "create_artifact_job_2",
"ref": "main",
"tag": false,
"coverage": null,
"allow_failure": false,
"created_at": "2022-10-24T18:21:25.111Z",
"started_at": "2022-10-24T18:21:25.922Z",
"finished_at": "2022-10-24T18:21:39.090Z",
"duration": 13.168405,
"queued_duration": 0.464364,
"user": {
"id": 123456789,
[...]
},
"commit": {
"id": "5e0e1f287d20daf2036a3ca71c656dce55999265",
[...]
},
"pipeline": {
"id": 675641982,
[...],
"project": {
"ci_job_token_scope_enabled": false
},
"artifacts_file": {
"filename": "artifacts.zip",
"size": 223
},
"artifacts": [{
"file_type": "archive",
"size": 223,
"filename": "artifacts.zip",
"file_format": "zip"
}, {
"file_type": "metadata",
"size": 153,
"filename": "metadata.gz",
"file_format": "gzip"
}],
"runner": {
"id": 12270845,
[...]
},
"artifacts_expire_at": "2022-10-25T18:21:35.859Z",
"tag_list": []
}, {
"id": 3219589876,
"status": "success",
"stage": "test",
"name": "create_artifact_job_1",
"ref": "main",
"tag": false,
"coverage": null,
"allow_failure": false,
"created_at": "2022-10-24T18:21:25.103Z",
"started_at": "2022-10-24T18:21:25.503Z",
"finished_at": "2022-10-24T18:21:41.407Z",
"duration": 15.904028,
"queued_duration": 0.098837,
"user": {
"id": 123456789,
[...]
},
"commit": {
"id": "5e0e1f287d20daf2036a3ca71c656dce55999265",
[...]
},
"pipeline": {
"id": 123456789,
[...]
},
"web_url": "WEB_URL",
"project": {
"ci_job_token_scope_enabled": false
},
"artifacts_file": {
"filename": "artifacts.zip",
"size": 217
},
"artifacts": [{
"file_type": "archive",
"size": 217,
"filename": "artifacts.zip",
"file_format": "zip"
}, {
"file_type": "metadata",
"size": 152,
"filename": "metadata.gz",
"file_format": "gzip"
}],
"runner": {
"id": 12270857,
},
"artifacts_expire_at": "2022-10-25T18:21:37.808Z",
"tag_list": []
}]
I've been trying to do either of the following using jQ:
Either:
Check if artifacts_file key exists in each iteration and if it does return the (job) id (so .[].id)
Check if artifacts array is empty in each iteration and if it is empty return the (job) id.
In both cases I'm able to do the first part but I am not sure how to return the .id key.
Related stackoverflow questions that I've been trying to utilize and adapt to my case:
jq - return array value if its length is not null
How to check for presence of 'key' in jq before iterating over the values
What I have so far: jq '[.[].artifacts[]|select(length > 0)] | .[]' which returns all the artifacts found (but it doesn't contain the .id of the job).
Checking the existence of a field using has:
.[] | select(has("artifacts_file")).id
3219589878
3219589876
Demo
Checking if a field is an empty array by comparing it to []:
.[] | select(.artifacts == []).id
3219589880
Demo

Json parsing losgstash

i can't parse this json with logstash... someone could help me?
seems like the way it is parsed can't be readed by logstash.
there is a ruby code to parse this?
I cannot extract the fields nested in the square brackets
[
{
"capacity": 0,
"created_at": "2021-04-06T16:18:34+02:00",
"decisions": [
{
"duration": "22h16m4.141220361s",
"id": 842,
"origin": "CAPI",
"scenario": "crowdsecurity/http-bad-user-agent",
"scope": "ip",
"simulated": false,
"type": "ban",
"value": "3.214.184.223/32"
},
.
.
.
{
"duration": "22h16m4.195897491s",
"id": 904,
"origin": "CAPI",
"scenario": "crowdsecurity/http-backdoors-attempts",
"scope": "ip",
"simulated": false,
"type": "ban",
"value": "51.68.11.195/32"
}
],
"events": null,
"events_count": 0,
"id": 12,
"labels": null,
"leakspeed": "",
"machine_id": "N/A",
"message": "",
"scenario": "update : +63/-0 IPs",
"scenario_hash": "",
"scenario_version": "",
"simulated": false,
"source": {
"scope": "Community blocklist",
"value": ""
},
"start_at": "2021-04-06 16:18:34.750588276 +0200 +0200",
"stop_at": "2021-04-06 16:18:34.750588717 +0200 +0200"
}
]
Require JSON
JSON.parse(yourString)
Would likely be what you're looking for.
The module is described here

Parse and filter complex JSON Response in Python (the easy compute method)

I have a list of dictionaries (basically JSON Response of an endpoint)
I would need to Parse this 16000 lines of json objects and fitler the documents/objects which match criteria that
whose leaf element/field : statusInfo/status in not "UP" and of those filtered objects, just return "name" , "serviceUrl","status"
example :
"ADMIN-V1" "http://aws-ec2.aws.com:4435" "Warning"
I have been researching about JSONPath module , but there is no good documentation about it, and I could not find any easier way.
Any guidance is highly appreciated.
here is a snippet from long 16000 lines of JSON response.
[
{
"id": "9c108ec5",
"name": "USER-V2",
"managementUrl": "http://aws-ec2.aws.com:5784/",
"healthUrl": "http://aws-ec2.aws.com:5784/health",
"serviceUrl": "http://aws-ec2.aws.com:5784/",
"statusInfo": {
"status": "UP",
"timestamp": 1566663146681,
"details": {
"description": " Eureka Discovery Client",
"status": "UP"
}
},
"source": "discovery",
"metadata": {},
"info": {
"component": "user",
"description": "User REST Resource",
"version": "2.2.1",
"git": {
"commit": {
"time": "07/27/2018 # 15:06:55 CDT",
"id": "b2a1b37"
},
"branch": "refs/tags/v2.2.1"
}
}
},
{
"id": "1a381f20",
"name": "ADMIN-V1",
"managementUrl": "http://aws-ec2.aws.com:4435/",
"healthUrl": "http://aws-ec2.aws.com:4435/health",
"serviceUrl": "http://aws-ec2.aws.com:4435/",
"statusInfo": {
"status": "Warning",
"timestamp": 1566663146682,
"details": {
"description": "Spring Cloud Eureka Discovery Client",
"status": "Warning"
}
},
"source": "discovery",
"metadata": {},
"info": {
"description": "Exchange Admin REST Resource",
"api": {
"version": "1.2.1",
"name": "admin",
"link": "https://app.swaggerhub.com/apis/AWSExchange/admin/1.2.1"
},
"implementation": "admin",
"version": "1.1.0",
"git": {
"commit": {
"time": "01/04/2019 # 15:36:48 UTC",
"id": "39d5551"
},
"branch": "refs/tags/v1.1.0"
}
}
}
]
If your json file contains one big array, you'll want to stream that file in truncating out the array. Then use fromstream/1 to rebuild the objects and filtering them out as you go.
I don't have a representative file to test out the performance myself, but give this a try:
$ jq --stream -n 'fromstream(1|truncate_stream(inputs))
| select(.statusInfo.status != "UP")
| .name, .serviceUrl, .statusInfo.status
' input.json

Parsing Git Json with Regular Express

I am taking a Github json file and parsing it with Java's regular expression library JsonPath. I am having a problem parsing arrays that do not have labels.
I need to send a email every time a particular file is changed in our repository.
Here is the Git Json:
{
"trigger": "push",
"payload": {
"type": "GitPush",
"before": "xxxxxxxx",
"after": "yyyyyyyy",
"branch": "branch-name",
"ref": "refs/heads/branch-name",
"repository": {
"id": 42,
"name": "repo",
"title": "repo",
"type": "GitRepository"
},
"beanstalk_user": {
"type": "Owner",
"id": 42,
"login": "username",
"email": "user#example.org",
"name": "Name Surname"
},
"commits": [
{
"type": "GitCommit",
"id": "ffffffff",
"message": "Important changes.",
"branch": "branch-name",
"author": {
"name": "Name Surname",
"email": "user#example.org"
},
"beanstalk_user": {
"type": "Owner",
"id": 42,
"login": "username",
"email": "user#example.org",
"name": "Name Surname"
},
"changed_files": {
"added": [
"NEWFILE",
],
"deleted": [
"Gemfile",
"NEWFILE"
],
"modified": [
"README.md",
"NEWFILE"
],
"copied": [
]
},
"changeset_url": "https://subdomain.github.com/repository-name/changesets/ffffffff",
"committed_at": "2014/08/18 13:30:29 +0000",
"parents": [
"afafafaf"
]
}
]
}
}
This is the expression I am using: to get the commits
$..changed_files
This return the whole changed files part but I can not explicitly choose the name "NEWFILE"
I tried
$..changed_files.*[?(#.added == "NEWFILE")]
$..changed_files.*[?(#.*== "NEWFILE")]
It just returns a empty array.
I just want it to return Newfile and what type of change. Any Ideas?
You can use the following JsonPath to retrieve the commits which list "NEWFILE" as an added file :
$.payload.commits[?(#.changed_files.added.indexOf("NEWFILE") != -1)]