Convert txt to json file - json

I have a text file, need to convert that into JSON, please help
RULE_NAME{
RULE 1
SOURCE
DESTINAION
PORT
POROTCOL
RULE 2
SOURCE
DESTINAION
PORT
POROTCOL
RULE 3
....
}
Need json format to be like this
Local{
01
SOURCE : SAG
DESTINAION : any
PORT :02
POROTCOL: icmp
04
SOURCE : SAG
DESTINAION :any
PORT: any
POROTCOL : tcp
}
bk1-2-internal{
02
SOURCE : any
DESTINAION : SoftLY
PORT :any
POROTCOL: any
28
SOURCE : 119.111.126.115/18
DESTINAION :129.37.164.74/30
PORT: 112
POROTCOL : udpt
}
My text file looks like this:
>Local = 01 : SAG = any = 02 = tcp
>Local = 04 : SAG = any = any = tcp
>bk1-2-internal = 2 : any = SoftLY = any = any
>bk1-2-internal = 28 : 119.111.126.115/18 = 129.37.164.74/30 = 112 = udpt
Goes upto 200 more lines with same format
i tried the code below but does not get expected structure. please check and assist in getting expected output, please feel free to contact at your own space, need to close this by today, please help
#filename = '/home/AY.txt'
import json
# the file to be converted
filename = '/home/ay/txt'
# resultant dictionary
dict1 = {}
# fields in the sample file
fields =['name', 'source', 'destination', 'port' 'protocol']
with open(filename) as fh:
# count variable for employee id creation
l = 1
for line in fh:
# reading line by line from the text file
description = list( line.strip().split(None, 4))
# for output see below
print(description)
# for automatic creation of id for each employee
sno =' '+str(l)
# loop variable
i = 0
# intermediate dictionary
dict2 = {}
while i<len(fields):
# creating dictionary for each employee
dict2[fields[i]]= description[i]
i = i + 1
# appending the record of each employee to
# the main dictionary
dict1[sno]= dict2
l = l + 1
# creating json file
out_file = open("/home/task.json", "w")
json.dump(dict1, out_file, indent = 4)
out_file.close()

Code
import os
import json
def json_from_log(path):
'''
Convert log to json text file as follows
Steps:
Create dictionary from log file
Deserialize dictionary to json
Output file same name as input but with suffix of json
'''
outfile = os.path.splitext(path)[0] + '.json' # same as input but with json extension
with open(path, 'r') as fread, open(outfile, 'w') as outfile:
result = {}
for line in fread:
line = line.strip()
rule, address = line.split(':')
rule = rule[1:-1] # skip '>', and drop trailing space ata end of string
address = address.lstrip() # Drop leading space
rule_id, rule_number = rule.split(' = ')
source, destination, port, protocol = address.split(' = ')
port = int(port) if port.isnumeric() else port
# Add nested ditionaries if they don't exist for key
result.setdefault(rule_id, {})
result[rule_id].setdefault(rule_number, {})
# Place in dictionary
result[rule_id][rule_number].update({"SOURCE":source,
"DESTINAION": destination,
"PORT": port,
"PROTOCOL": protocol})
json.dump(result, outfile, indent = 1) # indent = 1 allows "pretty" output
Usage
json_from_log('test3.txt')
Input File: test3.txt
>Local = 01 : SAG = any = 02 = tcp
>Local = 04 : SAG = any = any = tcp
>bk1-2-internal = 2 : any = SoftLY = any = any
>bk1-2-internal = 28 : 119.111.126.115/18 = 129.37.164.74/30 = 112 = udpt
Output File: test3.json
{
"Local": {
"01": {
"SOURCE": "SAG",
"DESTINAION": "any",
"PORT": 2,
"PROTOCOL": "tcp"
},
"04": {
"SOURCE": "SAG",
"DESTINAION": "any",
"PORT": "any",
"PROTOCOL": "tcp"
}
},
"bk1-2-internal": {
"2": {
"SOURCE": "any",
"DESTINAION": "SoftLY",
"PORT": "any",
"PROTOCOL": "any"
},
"28": {
"SOURCE": "119.111.126.115/18",
"DESTINAION": "129.37.164.74/30",
"PORT": 112,
"PROTOCOL": "udpt"
}
}
}

Related

Creating json with RUBY looping through SQL Server table

This is a followup to this question:
Ruby create JSON from SQL Server
I was able to create nested arrays in JSON. But I'm struggling with looping through records and appending a file with each record. Also how would I add a root element just at the top of the json and not on each record. "aaSequences" needs to be at the top just once... I also need a comma between each record.
here is my code so far
require 'pp'
require 'tiny_tds'
require 'awesome_print'
require 'json'
class Document
def initialize strategy
#document = strategy
#load helper functions
load "helpers_ruby.rb"
#set environment 'dev', 'qa', or 'production'
load "envconfig_ruby.rb"
end
def StartUP
#document.StartUP
end
def getseqrecord
#document.getseqrecord
end
end
class GetSqlaaSequence
def StartUP
##system "clear" ##linux
system "cls" ##Windows
# create connection to db
$connReportingDB = createReportingxxSqlConn($ms_sql_host, $ms_sql_user, $ms_sql_password, $ms_sql_dbname)
##$currentDateTime = DateTime.now
##pp 'def StartUP ran at: '+$currentDateTime.to_s
end
def getseqrecord
# get the aaaaSequences data
#result = $connReportingDB.execute("SELECT
[jsonFile]
,[id]
,[title]
,[authorIds]
,[name]
,[aminoAcids]
,[schemaId]
,[registryId]
,[namingStrategy]
FROM tablename
")
$aaSequences = Array.new
#i = 0
#result.each do |aaSequence|
jsonFile = aaSequence['jsonFile']
id = aaSequence['id']
title = aaSequence['title']
authorIds = aaSequence['authorIds']
name = aaSequence['name']
aminoAcids = aaSequence['aminoAcids']
schemaId = aaSequence['schemaId']
registryId = aaSequence['registryId']
namingStrategy = aaSequence['namingStrategy']
##end
#hash = Hash[
"jsonFile", jsonFile,
"id", id,
"title", title,
"authorIds", authorIds,
"name", name,
"aminoAcids", aminoAcids,
"schemaId", schemaId,
"registryId", registryId,
"namingStrategy", namingStrategy
]
#filename = jsonFile
jsonFileOutput0 = {:"#{title}" => [{:authorIds => ["#{authorIds}"],:aminoAcids => "#{aminoAcids}",:name => "#{name}",:schemaId => "#{schemaId}",:registryId => "#{registryId}",:namingStrategy => "#{namingStrategy}"}]}
jsonFileOutput = JSON.pretty_generate(jsonFileOutput0)
File.open(jsonFile,"a") do |f|
f.write(jsonFileOutput)
####ad the comma between records...Not sure if this is the best way to do it...
# File.open(jsonFile,"a") do |f|
# f.write(',')
# end
end
$aaSequences[#i] = #hash
#i = #i + 1
###createReportingSqlConn.close
end
end
end
Document.new(GetSqlaaSequence.new).StartUP
#get aaSequences and create json files
Document.new(GetSqlaaSequence.new).getseqrecord
here is a sample of the json it creates so far...
{
"aaSequences": [
{
"authorIds": [
"fff_fdfdfdfd"
],
"aminoAcids": "aminoAcids_data",
"name": "fdfdfddf-555_1",
"schemaId": "5555fdfd5",
"registryId": "5fdfdfdf",
"namingStrategy": "NEW_IDS"
}
]
}{
"aaSequences": [
{
"authorIds": [
"fff_fdfdfdfd"
],
"aminoAcids": "aminoAcids_data",
"name": "fdfdfddf-555_2",
"schemaId": "5555fdfd5",
"registryId": "5fdfdfdf",
"namingStrategy": "NEW_IDS"
}
]
}
and here is an example of what I need it to look like
{
"aaSequences": [
{
"authorIds": [
"authorIds_data"
],
"aminoAcids": "aminoAcids_data",
"name": "name_data",
"schemaId": "schemaId_data",
"registryId": "registryId_data",
"namingStrategy": "namingStrategy_data"
},
{
"authorIds": [
"authorIds_data"
],
"aminoAcids": "aminoAcids_data",
"name": "name_data",
"schemaId": "schemaId_data",
"registryId": "registryId_data",
"namingStrategy": "namingStrategy_data"
}
]
}
You can just do the whole thing in SQL using FOR JSON.
Unfortunately, arrays are not possible using this method. There are anumber of hacks, but the easiest one in your situation is to just append to [] using JSON_MODIFY
SELECT
authorIds = JSON_MODIFY('[]', 'append $', a.authorIds),
[aminoAcids],
[name],
[schemaId],
[registryId],
[namingStrategy]
FROM aaSequences a
FOR JSON PATH, ROOT('aaSequences');
db<>fiddle

Python- Issue parsing multi-layered API JSON into CSV

I'm trying to parse the NIH grant API and am running into a complex layering issue. In the JSON output below, I've been able to navigate into the "results" section which contains all the fields I want, except some are layered within another dictionary. What I'm trying to do is get the JSON data within "full_study_section", "organization", and "project_num_split" to be in the same layer as "appl_id", "contact_pi_name", "fiscal_year", and so forth. This post was helpful but I'm not quite sure how to level the layers through iteration.
{
"meta":{
"limit":25,
"offset":0,
"properties":{},
"search_id":null,
"sort_field":"project_start_date",
"sort_order":"desc",
"sorted_by_relevance":false,
"total":78665
},
"results":[
{
"appl_id":10314644,
"contact_pi_name":"BROCATO, EMILY ROSE",
"fiscal_year":2021,
"full_study_section":{
"group_code":"32",
"name":"Special Emphasis Panel[ZAA1 GG (32)]",
"sra_designator_code":"GG",
"sra_flex_code":"",
"srg_code":"ZAA1",
"srg_flex":""
},
"organization":{
"city":null,
"country":null,
"dept_type":"PHARMACOLOGY",
"external_org_id":353201,
"fips_country_code":null,
"org_city":"RICHMOND",
"org_country":"UNITED STATES",
"org_duns":[
"105300446"
],
"org_fips":"US",
"org_ipf_code":"353201",
"org_name":"VIRGINIA COMMONWEALTH UNIVERSITY",
"org_state":"VA",
"org_state_name":null,
"org_zipcode":"232980568"
},
"project_end_date":null,
"project_num":"1F31AA029259-01A1",
"project_num_split":{
"activity_code":"F31",
"appl_type_code":"1",
"full_support_year":"01A1",
"ic_code":"AA",
"serial_num":"029259",
"suffix_code":"A1",
"support_year":"01"
},
"project_start_date":"2022-03-07T05:00:00Z",
"subproject_id":null
},
Code:
import requests
import json
import csv
params = {
"criteria":
{
"fiscal_years":[2021]
},
"include_fields": [
"ApplId","ContactPiName","FiscalYear",
"OrgCountry","AllText",
"FullStudySection","Organization","ProjectEndDate",
"ProjectNum","ProjectNumSplit","ProjectStartDate","SubprojectId"
],
"offset":0,
"limit":25,
"sort_field":"project_start_date",
"sort_order":"desc"
}
response = requests.post("https://api.reporter.nih.gov/v2/projects/search", json = params)
#print(response.status_code)
#print(response.text)
resdecode = json.loads(response.text)
#print(json.dumps(resdecode, sort_keys=True, indent=4, separators=(',', ':')))
data = resdecode["results"]
#print(json.dumps(data, sort_keys=True, indent=4, separators=(',', ':')))
pns = resdecode["results"][0]["project_num_split"]
#print(json.dumps(pns, sort_keys=True, indent=4, separators=(',', ':')))
# for item in data:
# appl_id = item.get("appl_id")
# print(appl_id)
writerr = csv.writer(open('C:/Users/nkmou/Desktop/Venture/Tech Opportunities/NIH.csv', 'w', newline = ''))
count = 0
for row in resdecode:
if count == 0:
header = resdecode.keys()
writerr.writerow(header)
count += 1
writerr.writerow(row)
writerr.close()
In order to move the items under full_study_section, organization and project_num_split to same level as appl_id, contact_pi_name and fiscal_year you will have to loop through each of the results and recreate those key value pairs for those three dicts and then remove the full_study_section, organization and project_num_split keys once done. Below code should work as you expected.
import requests
import json
import csv
params = {
"criteria":
{
"fiscal_years":[2021]
},
"include_fields": [
"ApplId","ContactPiName","FiscalYear",
"OrgCountry","AllText",
"FullStudySection","Organization","ProjectEndDate",
"ProjectNum","ProjectNumSplit","ProjectStartDate","SubprojectId"
],
"offset":0,
"limit":25,
"sort_field":"project_start_date",
"sort_order":"desc"
}
response = requests.post("https://api.reporter.nih.gov/v2/projects/search", json = params)
resdecode = json.loads(response.text)
data = resdecode["results"]
for item in data:
x = ["full_study_section","organization","project_num_split"]
for i in x:
for key, value in item[i].items():
item[key] = value
del item[i]
with open('C:/Users/nkmou/Desktop/Venture/Tech Opportunities/NIH.csv', 'w', newline = '') as f:
writer = csv.writer(f)
count = 0
for row in data:
if count == 0:
header = row.keys()
writer.writerow(header)
count =+ 1
writer.writerow(row.values())
You can move the items to the required level and remove the dict.
import json
import pprint
pp = pprint
file = open("test.json")
jsonData = json.load(file)
full_study_section = jsonData['results'][0]['full_study_section']
organization = jsonData['results'][0]['organization']
project_num_split = jsonData['results'][0]['project_num_split']
jsonData['results'][0].update(full_study_section)
jsonData['results'][0].update(project_num_split)
jsonData['results'][0].update(organization)
jsonData['results'][0].pop('full_study_section')
jsonData['results'][0].pop('project_num_split')
jsonData['results'][0].pop('organization')
pp.pprint(jsonData)
Output:
{u'meta': {u'limit': 25,
u'offset': 0,
u'properties': {},
u'search_id': None,
u'sort_field': u'project_start_date',
u'sort_order': u'desc',
u'sorted_by_relevance': False,
u'total': 78665},
u'results': [{u'activity_code': u'F31',
u'appl_id': 10314644,
u'appl_type_code': u'1',
u'city': None,
u'contact_pi_name': u'BROCATO, EMILY ROSE',
u'country': None,
u'dept_type': u'PHARMACOLOGY',
u'external_org_id': 353201,
u'fips_country_code': None,
u'fiscal_year': 2021,
u'full_support_year': u'01A1',
u'group_code': u'32',
u'ic_code': u'AA',
u'name': u'Special Emphasis Panel[ZAA1 GG (32)]',
u'org_city': u'RICHMOND',
u'org_country': u'UNITED STATES',
u'org_duns': [u'105300446'],
u'org_fips': u'US',
u'org_ipf_code': u'353201',
u'org_name': u'VIRGINIA COMMONWEALTH UNIVERSITY',
u'org_state': u'VA',
u'org_state_name': None,
u'org_zipcode': u'232980568',
u'project_end_date': None,
u'project_num': u'1F31AA029259-01A1',
u'project_start_date': u'2022-03-07T05:00:00Z',
u'serial_num': u'029259',
u'sra_designator_code': u'GG',
u'sra_flex_code': u'',
u'srg_code': u'ZAA1',
u'srg_flex': u'',
u'subproject_id': None,
u'suffix_code': u'A1',
u'support_year': u'01'}]}

Issue with output while using python and jinja2 to generate grafana json panels for dashboards

I am working on some code to generate dashboard json files specific for Solarwinds.
I need to generate a set of panels to monitor all critical links in the solarwinds.
The issue I am facing is in the Grafana Text Panel that should present the description of each link. There are no queries, I am suing a csv file a generated with all the links relevant information.
Python code to load the csv file
# Python source to read csv fileSource csv file
## load list with csv data
with open('crit_links.csv', newline='') as f:
reader = csv.reader(f)
data = list(reader)
Sample line of the csv file
crilinks.csv sample
businesshq;17;SWLINKS-bus;121;Unit: 1 Slot: 0 Port: 2 Gbit - Level · L2L_HLD-area_comp;111.111.111.110;103;OSPF;FALSE;02/03/20 05:19;;;;
Python code that generates the panels list for the dashboard
grid_Xa = [0,6,8,10]
grid_Xb = [12,18,20,22]
for i, row in enumerate(data):
rowlist = str(row).split(';')
if i == 0:
titlerow = rowlist #row of titles froms list columns dos items da lista
grid_y = initial_y
continue
if i % 2 == 1:
for x in grid_Xa:
if x == 0:
panelsList.append(createTextPanel(rowlist[0],rowlist[2],rowlist[4],x,grid_y, i+1))
continue
createTextPanel python function
def createTextPanel(siteName,nodeName,interfaceName,grid_X,grid_Y,g_id):
template = jenv.get_or_select_template('p-text.json.jinja')
return template.render( site=siteName, node=nodeName,interface=interfaceName ,grid_x = grid_X, grid_y = grid_Y,id=g_id)
jinja template:
{
"content": "Site: " + {{site}} + "Node: " + {{node}} + "Interface: " + {{interface}},
"gridPos": {
"h": 3,
"w": 6,
"x": {{ grid_x }},
"y": {{ grid_y }}
},...}
Problem:
The {{site}} string in the output.json is appearing with [' and this is crashing the quotes
output json:
{
"content": "Site: " + ['businessh1 + "Node: " + SWLINKS-bus + "Interface: " + Unit: 1 Slot: 0 Port: 2 Gbit - Level · L2L_HLD-area_comp,
"gridPos": {
"h": 3,
"w": 6,
"x": 0,
"y": 3
},
...}
My intention was that the content: parameter of the output look like this:
"output": "Site: businessh1 Node: SWLINKS-bus Interface: Unit: 1 Slot: 0 Port: 2 Gbit - Level · L2L_HLD-area_comp..."
Thanks!
I was able to solve this issue changing the following:
From:
# Python source to read csv fileSource csv file
## load list with csv data
with open('crit_links.csv', newline='') as f:
reader = csv.reader(f)
data = list(reader)
To:
# load list with csv data
with open('crit_links.csv') as f:
data = list(csv.reader(f, delimiter=','))

json to lua with multiple stings backslash and dot

Hello i'm trying to use Json from my washer with lua. It's for visualizing the samsung in Domoitcz.
A part of the Json what i get from https://api.smartthings.com/v1/devices/abcd-1234-abcd is:
"main": {
"washerJobState": {
"value": "wash"
},
"mnhw": {
"value": "1.0"
},
"data": {
"value": "{
\"payload\":{
\"x.com.samsung.da.state\":\"Run\",\"x.com.samsung.da.delayEndTime\":\"00:00:00\",\"x.com.samsung.da.remainingTime\":\"01:34:00\",\"if\":[\"oic.if.baseline\",\"oic.if.a\"],\"x.com.samsung.da.progressPercentage\":\"2\",\"x.com.samsung.da.supportedProgress\":[\"None\",\"Wash\",\"Rinse\",\"Spin\",\"Finish\"],\"x.com.samsung.da.progress\":\"Wash\",\"rt\":[\"x.com.samsung.da.operation\"]}}"
},
"washerRinseCycles": {
"value": "3"
},
"switch": {
"value": "on"
},
if i use in my script
local switch = item.json.main.switch.value
I got the valua on or off and i can use it for showing the status of the washer.
i'm trying to find out how to get the "data"value in my script, there are more items with dots en backslhases:
local remainingTime = rt.data.value.payload['x.com.samsung.da.remainingTime']
or
local remainingTime = rt.data.value['\payload']['\x.com.samsung.da.remainingTime']
i tried some more opions with 'or // , "" but always got a nill value.
Can someone explain me how to get:
\"x.com.samsung.da.remainingTime\":\"01:34:00\"
\"x.com.samsung.da.progressPercentage\":\"2\",
All the " , \, x., ar confusing me
Below is my script to test where i only left the Json log (Dzvents Lua Based) i get an error:
dzVents/generated_scripts/Samsung_v3.lua:53: attempt to index a nil value (global 'json') i don't heave any idea how te use/adjust my code for decode the string.
local json = require"json" -- the JSON library
local outer = json.decode(your_JSON_string)
local rt = outer.main
local inner = json.decode(rt.data.value)
local remainingTime = inner.payload['x.com.samsung.da.remainingTime']
local API = 'API'
local Device = 'Device'
local LOGGING = true
--Define dz Switches
local WM_STATUS = 'WM Status' --Domoitcz virtual switch ON/Off state Washer
return
{
on =
{
timer =
{
'every 1 minutes', -- just an example to trigger the request
},
httpResponses =
{
'trigger', -- must match with the callback passed to the openURL command
},
},
logging =
{
level = domoticz.LOG_DEBUG ,
},
execute = function(dz, item)
local wm_status = dz.devices(WM_STATUS)
if item.isTimer then
dz.openURL({
url = 'https://api.smartthings.com/v1/devices/'.. Device .. '/states',
headers = { ['Authorization'] = 'Bearer '.. API },
method = 'GET',
callback = 'trigger', -- see httpResponses above.
})
end
if (item.isHTTPResponse) then
if item.ok then
if (item.isJSON) then
rt = item.json.main
-- outer = json.decode'{"payload":{"x.com.samsung.da.state":"Run","x.com.samsung.da.delayEndTime":"00:00:00","x.com.samsung.da.remainingTime":"00:40:00","if":["oic.if.baseline","oic.if.a"],"x.com.samsung.da.progressPercentage":"81","x.com.samsung.da.supportedProgress":["None","Weightsensing","Wash","Rinse","Spin","Finish"],"x.com.samsung.da.progress":"Rinse","rt":["x.com.samsung.da.operation"]}}
inner = json.decode(rt.data.value)
-- local remainingTime = inner.payload['x.com.samsung.da.remainingTime']
dz.utils.dumpTable(rt) -- this will show how the table is structured
-- dz.utils.dumpTable(inner)
local washerSpinLevel = rt.washerSpinLevel.value
-- local remainingTime = inner.payload['x.com.samsung.da.remainingTime']
dz.log('Debuggg washerSpinLevel:' .. washerSpinLevel, dz.LOG_DEBUG)
dz.log('Debuggg remainingTime:' .. remainingTime, dz.LOG_DEBUG)
-- dz.log('Resterende tijd:' .. remainingTime, dz.LOG_INFO)
-- dz.log(dz.utils.fromJSON(item.data))
-- end
elseif LOGGING == true then
dz.log('There was a problem handling the request', dz.LOG_ERROR)
dz.log(item, dz.LOG_ERROR)
end
end
end
end
}
This is a weird construction: a serialized JSON inside a normal JSON.
This means you have to invoke deserialization twice:
local json = require"json" -- the JSON library
local outer = json.decode(your_JSON_string)
local rt = outer.main
local inner = json.decode(rt.data.value)
local remainingTime = inner.payload['x.com.samsung.da.remainingTime']

how to parse more complex human-oriented text output to machine-friently style?

This is the question about how to parse "unparseable" output into json, or to something easily consumable as json. This is "little" bit behind trivial stuff, so I'd like to know, how do you solve these things in principle, it's not about this specific example only. But example:
We have this command, which shows data about audio inputs:
pacmd list-sink-inputs
it prints something like this:
2 sink input(s) available.
index: 144
driver: <protocol-native.c>
flags:
state: RUNNING
sink: 4 <alsa_output.pci-0000_05_00.0.analog-stereo>
volume: front-left: 15728 / 24% / -37.19 dB, front-right: 15728 / 24% / -37.19 dB
balance 0.00
muted: no
current latency: 70.48 ms
requested latency: 210.00 ms
sample spec: float32le 2ch 44100Hz
channel map: front-left,front-right
Stereo
resample method: copy
module: 13
client: 245 <MPlayer>
properties:
media.name = "UNREAL! Tetris Theme on Violin and Guitar-TnDIRr9C83w.webm"
application.name = "MPlayer"
native-protocol.peer = "UNIX socket client"
native-protocol.version = "32"
application.process.id = "1543"
application.process.user = "mmucha"
application.process.host = "vbDesktop"
application.process.binary = "mplayer"
application.language = "C"
window.x11.display = ":0"
application.process.machine_id = "720184179caa46f0a3ce25156642f7a0"
application.process.session_id = "2"
module-stream-restore.id = "sink-input-by-application-name:MPlayer"
index: 145
driver: <protocol-native.c>
flags:
state: RUNNING
sink: 4 <alsa_output.pci-0000_05_00.0.analog-stereo>
volume: front-left: 24903 / 38% / -25.21 dB, front-right: 24903 / 38% / -25.21 dB
balance 0.00
muted: no
current latency: 70.50 ms
requested latency: 210.00 ms
sample spec: float32le 2ch 48000Hz
channel map: front-left,front-right
Stereo
resample method: speex-float-1
module: 13
client: 251 <MPlayer>
properties:
media.name = "Trombone Shorty At Age 13 - 2nd Line-k9YUi3UhEPQ.webm"
application.name = "MPlayer"
native-protocol.peer = "UNIX socket client"
native-protocol.version = "32"
application.process.id = "2831"
application.process.user = "mmucha"
application.process.host = "vbDesktop"
application.process.binary = "mplayer"
application.language = "C"
window.x11.display = ":0"
application.process.machine_id = "720184179caa46f0a3ce25156642f7a0"
application.process.session_id = "2"
module-stream-restore.id = "sink-input-by-application-name:MPlayer"
very nice. But we don't want to show user all of this, we just want to show index (id of input), application.process.id, application.name and media.name, in some reasonable format. It would be great to parse it somehow to json, but even if I preprocess it somehow, the jq is way beyond my capabilities and quite complex. I tried multiple approaches using jq, with regex or without, but I wasn't able to finish it. And I guess we cannot rely on order or presence of all fields.
I was able to get the work "done", but it's messy, inefficient, and namely expects no semicolons in media name or app name. Not acceptable solution, but this is the only thing I was able to bring to the "end".
incorrect solution:
cat exampleOf2Inputs |
grep -e "index: \|application.process.id = \|application.name = \|media.name = " |
sed "s/^[ \t]*//;s/^\([^=]*\) = /\1: /" |
tr "\n" ";" |
sed "s/$/\n/;s/index:/\nindex:/g" |
tail -n +2 |
while read A; do
index=$(echo $A|sed "s/^index: \([0-9]*\).*/\1/");
pid=$(echo $A|sed 's/^.*application\.process\.id: \"\([0-9]*\)\".*$/\1/');
appname=$(echo $A|sed 's/^.*application\.name: \"\([^;]*\)\".*$/\1/');
medianame=$(echo $A|sed 's/^.*media\.name: \"\([^;]*\)\".*$/\"\1\"/');
echo "pid=$pid index=$index appname=$appname medianame=$medianame";
done
~ I grepped the interessant part, replaced newlines with semicolon, split to multiple lines, and just extract the data multiple times using sed. Crazy.
Here the output is:
pid=1543 index=144 appname=MPlayer medianame="UNREAL! Tetris Theme on Violin and Guitar-TnDIRr9C83w.webm"
pid=2831 index=145 appname=MPlayer medianame="Trombone Shorty At Age 13 - 2nd Line-k9YUi3UhEPQ.webm"
which is easily convertable to any format, but the question was about json, so to:
[
{
"pid": 1543,
"index": 144,
"appname": "MPlayer",
"medianame": "UNREAL! Tetris Theme on Violin and Guitar-TnDIRr9C83w.webm"
},
{
"pid": 2831,
"index": 145,
"appname": "MPlayer",
"medianame": "Trombone Shorty At Age 13 - 2nd Line-k9YUi3UhEPQ.webm"
}
]
Now I'd like to see, please, how are these things done correctly.
If the input is as reasonable as shown in the Q, the following approach that only uses jq should be possible.
An invocation along the following lines is assumed:
jq -nR -f parse.jq input.txt
def parse:
def interpret:
if . == null then .
elif startswith("\"") and endswith("\"")
then .[1:-1]
else tonumber? // .
end;
(capture( "(?<key>[^\t:= ]*)(: | = )(?<value>.*)" ) // null)
| if . then .value = (.value | interpret) else . end
;
# Construct one object for each "segment"
def construct(s):
[ foreach (s, 0) as $kv (null;
if $kv == 0 or $kv.index
then .complete = .accumulator | .accumulator = $kv
else .complete = null | .accumulator += $kv
end;
.complete // empty ) ]
;
construct(inputs | parse | select(.) | {(.key):.value})
| map( {pid: .["application.process.id"],
index,
appname: .["application.name"],
medianame: .["media.name"]} )
With the example input, the output would be:
[
{
"pid": "1543",
"index": 144,
"appname": "MPlayer",
"medianame": "UNREAL! Tetris Theme on Violin and Guitar-TnDIRr9C83w.webm"
},
{
"pid": "2831",
"index": 145,
"appname": "MPlayer",
"medianame": "Trombone Shorty At Age 13 - 2nd Line-k9YUi3UhEPQ.webm"
}
]
Brief explanation
parse parses one line. It assumes that whitespace (blank and tab characters) on each line before the key name can be ignored.
construct is responsible for grouping the lines (presented as a stream of key-value single-key objects) corresponding to a particular value of “index”. It produces an array of objects, one for each value of “index”.
I don't know about "correctly", but this is what I'd do:
pacmd list-sink-inputs | awk '
BEGIN { print "[" }
function print_record() {
if (count++) {
print " {"
printf " %s,\n", print_number("pid")
printf " %s,\n", print_number("index")
printf " %s,\n", print_string("appname")
printf " %s\n", print_string("medianame")
print " },"
}
delete record
}
function print_number(key) { return sprintf("\"%s\": %d", key, record[key]) }
function print_string(key) { return sprintf("\"%s\": \"%s\"", key, record[key]) }
function get_quoted_value() {
if (match($0, /[^"]+"$/))
return substr($0, RSTART, RLENGTH-1)
else
return "?"
}
$1 == "index:" { print_record(); record["index"] = $2 }
$1 == "application.process.id" { record["pid"] = get_quoted_value() }
$1 == "application.name" { record["appname"] = get_quoted_value() }
$1 == "media.name" { record["medianame"] = get_quoted_value() }
END { print_record(); print "]" }
' |
tac | awk '/},$/ && !seen++ {sub(/,$/,"")} 1' | tac
where the tac|awk|tac line removes the trailing comma from the last JSON object in the list.
[
{
"pid": 1543,
"index": 144,
"appname": "MPlayer",
"medianame": "UNREAL! Tetris Theme on Violin and Guitar-TnDIRr9C83w.webm"
},
{
"pid": 2831,
"index": 145,
"appname": "MPlayer",
"medianame": "Trombone Shorty At Age 13 - 2nd Line-k9YUi3UhEPQ.webm"
}
]
You could just pipe your output into:
sed -E '
s/pid=([0-9]+) index=([0-9]+) appname=([^ ]+) medianame=(.*)/{"pid": \1, "index": \2, "appname": "\3", "medianame": \4},/
1s/^/[/
$s/,$/]/
' | jq .