single-line json split - json

I have a json file which is complicated and I am trying to split it to multiple files. Is there an approach to achieve this
My input file
{"Ename":"xyz LLC","entity_type":"Third Party","version":"1.0.0","in_network":[{"neg_arr":"ffs","name":"PULMONARY EDEMA & RESPIRATORY FAILURE","billing_type":"MS-DRG","billing_version":"2022","billing_code":"189","description":"PULMONARY EDEMA & RESPIRATORY FAILURE","negotiated_rates":[{"negotiated_prices":[{"negotiated_type":"negotiated","negotiated_rate":14678.75,"expiration_date":"9999-12-31","service_code":["21"],"billing_class":"institutional"}],"provider_groups":[{"npi":[1538244918],"tin":{"type":"ein","value":"610444707"}}]}]},{"neg_arr":"ffs","name":"STERILE SUPPLY","billing_type":"RC","billing_version":"2022","billing_code":"272","description":"MEDICAL/SURGICAL SUPPLIES AND DEVICES - STERILE SUPPLY","negotiated_rates":[{"negotiated_prices":[{"negotiated_type":"negotiated","negotiated_rate":9.03,"expiration_date":"9999-12-31","service_code":["23"],"billing_class":"institutional"}],"provider_groups":[{"npi":[1902862824],"tin":{"type":"ein","value":"610510934"}}]}]}}
Expected output file1.json
"Ename":"xyz LLC",
"entity_type":"Third Party",
"version":"1.0.0",
"in_network":[{"neg_arr":"ffs","name":"PULMONARY RESPIRATORY FAILURE","billing_type":"MS-DRG","billing_version":"2022","billing_code":"189","description":"PULMONARY EDEMA & RESPIRATORY FAILURE","negotiated_rates":[{"negotiated_prices":[{"negotiated_type":"negotiated","negotiated_rate":18.75,"expiration_date":"9999-12-31","service_code":["21"],"billing_class":"institutional"}],"provider_groups":[{"npi":[211],"tin":{"type":"ein","value":"322"}}]}]}
Expected output file2.json
"Ename":"xyz LLC",
"entity_type":"Third Party",
"version":"1.0.0",
"in_network":[{"neg_arr":"ffs","name":"STERILE SUPPLY","billing_type":"RC","billing_version":"2022","billing_code":"272","description":"MEDICAL - STERILE SUPPLY","negotiated_rates":[{:[{"negotiated_type":"negotiated","negotiated_rate":9.03,"expiration_date":"9999-12-31","service_code":["23"],"billing_class":"institutional"}],"provider_groups":[{"npi":[18824],"tin":{"type":"ein","value":"6934"}}]}]}]

Related

How to do a two-step lookup using jq?

My bank provides a very poor excuse for transaction export which makes doing reconciling receipts for taxes way more difficult than it needs be, fortunately their web site is an SPA that is chock-full of rich json formatted data.
Having extracted all of the transaction data for a year, I'd now like to transform that data into filenames (date - cat - desc - amt.pdf) to use for receipt scans to help speed up the reconciling process.
I'm close to being able to do this but a doing a double lookup has stumped me and I could use the help of someone more well versed in jq.
Sample transaction data
Here's a sample set of the data (many fields omitted for privacy).
export START_YEAR=2020
cat ${START_YEAR}-transaction.json | jq -c -r \
'{ transaction_date: .transaction_date | capture("(?<x>[0-9,-]{10})") | join("-"), id: .id, category_id: .category_id, description: .description }' \
| tail -4
{"transaction_date":"2020-01-01","id":365707178,"category_id":113,"description":"PAYPAL *JOES DC"}
{"transaction_date":"2020-01-02","id":366592530,"category_id":84,"description":"SANSOTEI RAMEN"}
{"transaction_date":"2020-01-02","id":365963458,"category_id":84,"description":"RURU RESTAURANT"}
{"transaction_date":"2020-01-09","id":367661534,"category_id":211,"description":"PAYMENT - THANK YOU"}
{"transaction_date":"2020-01-12","id":368954730,"category_id":35,"description":"PEARSON PARKING T3"}
{"transaction_date":"2020-03-03","id":383492950,"category_id":84,"description":"WVRST"}
{"transaction_date":"2020-03-18","id":387334378,"category_id":113,"description":"WWW.ALIEXPRESS.COM LONDON GBR"}
{"transaction_date":"2020-03-27","id":389293681,"category_id":91,"description":"PAYPAL *CORGI TECH GBR"}
{"transaction_date":"2020-10-09","id":445515569,"category_id":52,"description":"GOODLIFE FITNESS"}
{"transaction_date":"2020-10-27","id":448912859,"category_id":211,"description":"Credit Card Payment"}
{"transaction_date":"2020-12-18","id":466667441,"category_id":64,"description":"KICKSTARTER: A TECHNOL SHAM SHUI PO KOW"}
{"transaction_date":"2020-12-31","id":469463176,"category_id":4,"description":"Interest Paid"}
Lookup #1: category id to translation tag
First the category_id is looked up in following categories data set to extract out a category.key which is a translation tag (i.e. category_id 113 is electronicsComputers)
categories.json (extracted from angular app file - app.dc68cc71460569631f28.js)
{"1":{"id":1,"icon":"icon-income","key":"income","parentCategoryId":null,"color":"#609b02"},"2":{"id":2,"icon":"icon-cheque","key":"payrollorEmploymentIncome","parentCategoryId":1},"4":{"id":4,"icon":"icon-interest","key":"interestCapitalIncome","parentCategoryId":1},"5":{"id":5,"icon":"icon-income-tax-94","key":"taxRefund","parentCategoryId":1},"13":{"id":13,"icon":"icon-home","key":"home","parentCategoryId":null,"color":"#335400"},"14":{"id":14,"icon":"r","key":"groceries","parentCategoryId":116},"16":{"id":16,"icon":"icon-lightbulb","key":"electricityHeatingUtilities","parentCategoryId":13},"17":{"id":17,"icon":"icon-home-insurance","key":"homeInsurancePremiums","parentCategoryId":307},"18":{"id":18,"icon":"icon-tv","key":"tVPhoneInternet","parentCategoryId":13},"20":{"id":20,"icon":"icon-cleaning","key":"homeCleaning","parentCategoryId":13},"21":{"id":21,"icon":"icon-rent-paid","key":"rentPaid","parentCategoryId":13},"23":{"id":23,"icon":"s","key":"furnishingsAppliancesDécor","parentCategoryId":13},"24":{"id":24,"icon":"icon-art","key":"flowersCandlesArtSmallItems","parentCategoryId":64},"27":{"id":27,"icon":"icon-mortgage","key":"mortgage","parentCategoryId":13},"28":{"id":28,"icon":"icon-property-taxes","key":"propertyTaxes","parentCategoryId":13},"29":{"id":29,"icon":"c","key":"homeImprovementsRepairs","parentCategoryId":13},"32":{"id":32,"icon":"icon-car","key":"carsTransportation","parentCategoryId":null,"color":"#0064d8"},"33":{"id":33,"icon":"icon-auto-insurance","key":"autoInsurancePremiums","parentCategoryId":307},"35":{"id":35,"icon":"icon-car","key":"parking","parentCategoryId":32},"36":{"id":36,"icon":"icon-gas","key":"gasFuel","parentCategoryId":32},"37":{"id":37,"icon":"icon-wrench","key":"autoMaintenanceExpenses","parentCategoryId":32},"38":{"id":38,"icon":"icon-auto-loan","key":"autoLoanLeasePayment","parentCategoryId":32},"39":{"id":39,"icon":"icon-bus","key":"taxisPublicTransportation","parentCategoryId":32},"40":{"id":40,"icon":"icon-family","key":"family","parentCategoryId":null,"color":"#335400"},"41":{"id":41,"icon":"icon-baby","key":"childcare","parentCategoryId":40},"42":{"id":42,"icon":"icon-childrens-clothes","key":"children'sClothing","parentCategoryId":40},"43":{"id":43,"icon":"icon-childrens-toys","key":"children'sToys","parentCategoryId":40},"46":{"id":46,"icon":"icon-paddleball","key":"children'sRecreationalActivities","parentCategoryId":40},"48":{"id":48,"icon":"icon-health-and-beauty","key":"healthBeauty","parentCategoryId":null,"color":"#048476"},"49":{"id":49,"icon":"l","key":"drugStoresandPharmacies","parentCategoryId":48},"50":{"id":50,"icon":"icon-health-insurance","key":"lifeHealthInsurancePremiums","parentCategoryId":307},"51":{"id":51,"icon":"icon-bandaid","key":"doctorsHealthcare","parentCategoryId":48},"52":{"id":52,"icon":"icon-sports","key":"sports","parentCategoryId":70},"54":{"id":54,"icon":"icon-loans-taxes-fines","key":"loansTaxesandFines","parentCategoryId":null,"color":"#4c7b02"},"55":{"id":55,"icon":"icon-fines","key":"fines","parentCategoryId":54},"59":{"id":59,"icon":"icon-bank-fees","key":"bankFeesServiceFees","parentCategoryId":54},"61":{"id":61,"icon":"icon-media","key":"subscriptionsMedia","parentCategoryId":64},"64":{"id":64,"icon":"icon-shopping","key":"shoppingServices","parentCategoryId":null,"color":"#7a18c2"},"65":{"id":65,"icon":"icon-clothing","key":"clothingShoes","parentCategoryId":64},"66":{"id":66,"icon":"icon-heart","key":"beautyProducts","parentCategoryId":48},"67":{"id":67,"icon":"icon-scissors","key":"barberHairdressingSalon","parentCategoryId":48},"69":{"id":69,"icon":"icon-iron","key":"dryCleaningClothingRepairs","parentCategoryId":64},"70":{"id":70,"icon":"icon-leisure","key":"leisure","parentCategoryId":null,"color":"#7a18c2"},"71":{"id":71,"icon":"icon-fast-food","key":"fastFoodQuickService","parentCategoryId":116},"72":{"id":72,"icon":"icon-spa","key":"spaMassagePersonalCare","parentCategoryId":48},"76":{"id":76,"icon":"icon-membership","key":"memberships","parentCategoryId":70},"79":{"id":79,"icon":"icon-beer","key":"barsPubsNightclubs","parentCategoryId":70},"81":{"id":81,"icon":"icon-martini","key":"alcoholBars","parentCategoryId":116},"82":{"id":82,"icon":"icon-champagne","key":"tobaccoAlcohol","parentCategoryId":64},"84":{"id":84,"icon":"icon-restaurant","key":"restaurants","parentCategoryId":116},"86":{"id":86,"icon":"icon-education","key":"education","parentCategoryId":null,"color":"#02685d"},"87":{"id":87,"icon":"icon-student-loans","key":"studentLoanPayment","parentCategoryId":86},"88":{"id":88,"icon":"icon-graduation","key":"tuitionCourses","parentCategoryId":86},"89":{"id":89,"icon":"icon-papers","key":"schoolBooksMaterialsStationery","parentCategoryId":86},"91":{"id":91,"icon":"icon-gift","key":"gifts","parentCategoryId":64},"95":{"id":95,"icon":"icon-party","key":"partiesCelebrations","parentCategoryId":64},"96":{"id":96,"icon":"icon-plane","key":"vacationTravel","parentCategoryId":null,"color":"#0064d8"},"97":{"id":97,"icon":"icon-train","key":"transportationCarRental","parentCategoryId":96},"98":{"id":98,"icon":"d","key":"hotelsAccommodation","parentCategoryId":96},"99":{"id":99,"icon":"icon-surfboard","key":"recreationEntertainmentonVacation","parentCategoryId":96},"100":{"id":100,"icon":"icon-alcoholic-drink","key":"foodLivingExpensesonVacation","parentCategoryId":96},"101":{"id":101,"icon":"icon-savings","key":"investmentsSavings","parentCategoryId":null,"color":"#38629c"},"102":{"id":102,"icon":"u","key":"regularSavings","parentCategoryId":101},"104":{"id":104,"icon":"icon-donations-and-charities","key":"charityDonations","parentCategoryId":null,"color":"#bc2e8d"},"105":{"id":105,"icon":"icon-pets","key":"pets","parentCategoryId":40},"106":{"id":106,"icon":"icon-trailer","key":"summerHome/Cottage/Trailer","parentCategoryId":13},"828":{"id":828,"icon":"icon-condo-fees","key":"condoFees","parentCategoryId":13},"830":{"id":830,"icon":"icon-tangerine-rewards","key":"tangerineMoneyBackRewards","parentCategoryId":1},"831":{"id":831,"icon":"icon-uncategorized-expenses","key":"uncategorizedExpenses","parentCategoryId":null,"color":"#757575"},"832":{"id":832,"icon":"u","key":"goalSavings","parentCategoryId":101},"833":{"id":833,"icon":"u","key":"recipes","parentCategoryId":101},"113":{"id":113,"icon":"icon-computer","key":"electronicsComputers","parentCategoryId":64},"114":{"id":114,"icon":"icon-jewelry","key":"jewelleryAccessories","parentCategoryId":64},"115":{"id":115,"icon":"icon-toll","key":"tolls","parentCategoryId":32},"116":{"id":116,"icon":"p","key":"foodDining","parentCategoryId":null,"color":"#9339d4"},"117":{"id":117,"icon":"icon-carwash","key":"carCleaning","parentCategoryId":32},"118":{"id":118,"icon":"icon-rattle","key":"supportPayment","parentCategoryId":40},"121":{"id":121,"icon":"icon-sin","key":"socialInsurance","parentCategoryId":1},"124":{"id":124,"icon":"icon-loan","key":"loanPayments","parentCategoryId":54},"129":{"id":129,"icon":"icon-pacifier","key":"childcareProducts","parentCategoryId":40},"131":{"id":131,"icon":"icon-emergency-savings","key":"emergencyFund","parentCategoryId":101},"143":{"id":143,"icon":"icon-other-pension","key":"otherPension","parentCategoryId":1},"145":{"id":145,"icon":"icon-rental-income","key":"rentalIncome","parentCategoryId":1},"199":{"id":199,"icon":"icon-compass","key":"outdoorActivities","parentCategoryId":70},"202":{"id":202,"icon":"icon-paintbrush","key":"artsCraftsMusic","parentCategoryId":70},"210":{"id":210,"icon":"icon-exclude-from-budget","key":"excludefromBudget","parentCategoryId":null,"color":"#d50179"},"211":{"id":211,"icon":"icon-credit-card-payment","key":"creditCardPayments","parentCategoryId":210},"212":{"id":212,"icon":"icon-transfers","key":"transfersBetweenownAccounts","parentCategoryId":210},"216":{"id":216,"icon":"icon-cash-withdrawal","key":"aBMOtherCashWithdrawals","parentCategoryId":831},"258":{"id":258,"icon":"icon-dice","key":"lotteriesGambling","parentCategoryId":70},"259":{"id":259,"icon":"icon-glasses","key":"eyeCare","parentCategoryId":48},"268":{"id":268,"icon":"icon-uncategorized-income","key":"uncategorizedIncome","parentCategoryId":1},"283":{"id":283,"icon":"icon-income-tax-89","key":"incomeTaxPaid","parentCategoryId":54},"289":{"id":289,"icon":"icon-headphones","key":"musicandApps","parentCategoryId":64},"292":{"id":292,"icon":"icon-savings-transfers","key":"savingsAccountTransactions","parentCategoryId":210},"298":{"id":298,"icon":"icon-pocket-money","key":"allowancePocketMoneyEtc","parentCategoryId":40},"307":{"id":307,"icon":"icon-insurance","key":"insurance","parentCategoryId":null,"color":"#9b53cb"},"315":{"id":315,"icon":"icon-retirement-savings","key":"retirementSavings","parentCategoryId":101},"801":{"id":801,"icon":"icon-support-income","key":"supportIncome","parentCategoryId":1},"802":{"id":802,"icon":"icon-coffee","key":"coffeeShops","parentCategoryId":116},"803":{"id":803,"icon":"icon-desk-1","key":"homeOffice","parentCategoryId":13},"804":{"id":804,"icon":"icon-dependants","key":"dependants","parentCategoryId":40},"805":{"id":805,"icon":"icon-tooth","key":"dental","parentCategoryId":48},"806":{"id":806,"icon":"icon-bottle","key":"nursingCareFacilities","parentCategoryId":48},"807":{"id":807,"icon":"icon-sports-apparel","key":"sportsApparel","parentCategoryId":64},"808":{"id":808,"icon":"icon-pens","key":"schoolSupplies","parentCategoryId":64},"809":{"id":809,"icon":"m","key":"entertainment","parentCategoryId":70},"810":{"id":810,"icon":"icon-school-bus","key":"fieldtripsMiscellaneousExpenses","parentCategoryId":86},"811":{"id":811,"icon":"icon-alcoholic-drink","key":"travelAgenciesTourOperators","parentCategoryId":96},"812":{"id":812,"icon":"icon-signpost","key":"souvenirsGifts","parentCategoryId":96},"813":{"id":813,"icon":"icon-securities","key":"securities","parentCategoryId":101},"814":{"id":814,"icon":"icon-retirement-savings","key":"rRSPs/RSPs","parentCategoryId":101},"815":{"id":815,"icon":"icon-savings","key":"tFSAs","parentCategoryId":101},"816":{"id":816,"icon":"icon-trust-fund","key":"trustFund","parentCategoryId":101},"817":{"id":817,"icon":"icon-education","key":"rESPs","parentCategoryId":101},"818":{"id":818,"icon":"icon-travel-insurance","key":"travelInsurancePremiums","parentCategoryId":307},"819":{"id":819,"icon":"icon-marine-insurance","key":"marineInsurancePremiums","parentCategoryId":307},"820":{"id":820,"icon":"icon-pet-insurance","key":"petInsurancePremiums","parentCategoryId":307},"821":{"id":821,"icon":"icon-charities","key":"charities","parentCategoryId":104},"822":{"id":822,"icon":"icon-donations","key":"donations","parentCategoryId":104},"823":{"id":823,"icon":"icon-credit-card-other","key":"nonTangerineCreditCardPayments","parentCategoryId":831},"824":{"id":824,"icon":"icon-weights","key":"gymSports","parentCategoryId":48},"826":{"id":826,"icon":"icon-tangerine-loan-payment","key":"tangerineLoanPayments","parentCategoryId":210},"827":{"id":827,"icon":"icon-tangerine-mortgage-payment","key":"tangerineMortgagePayments","parentCategoryId":210},"829":{"id":829,"icon":"icon-interest-charge","key":"interestCharge","parentCategoryId":54},"112":{"id":112,"icon":"icon-question2","key":"uncategorized","parentCategoryId":null,"color":"#757575"}}
Lookup #2: translation tag to natural language description
Next the translation tag obtained from step #1 (i.e. electronicsComputers) is looked up in a file called en_CA.json (or fr_CA.json if using the site in French), this gives the desired natural language description: Electronics & Computers
{"OMITTED_OTHER_STUFF":{},"global":{"OMITTED_OTHER_STUFF":{},"transactionsCategories":{"income":"Income","payrollorEmploymentIncome":" Payroll or Employment Income","interestCapitalIncome":"Interest & Capital Income","taxRefund":"Tax Refund","home":"Home","groceries":" Groceries","electricityHeatingUtilities":" Electricity & Heating (Utilities)","homeInsurancePremiums":" Home Insurance Premiums","tVPhoneInternet":"TV, Phone & Internet","homeCleaning":"Home Cleaning","rentPaid":"Rent Paid","furnishingsAppliancesDécor":"Furnishings, Appliances & Décor","flowersCandlesArtSmallItems":"Flowers, Candles, Art & Small Items","mortgage":"Mortgage","propertyTaxes":"Property Taxes","homeImprovementsRepairs":"Home Improvements & Repairs","carsTransportation":"Cars & Transportation","autoInsurancePremiums":"Auto Insurance Premiums","parking":" Parking","gasFuel":"Gas & Fuel","autoMaintenanceExpenses":"Auto Maintenance & Expenses ","autoLoanLeasePayment":"Auto Loan & Lease Payment","taxisPublicTransportation":"Taxis & Public Transportation","family":"Family","childcare":" Childcare","children'sClothing":"Children's Clothing","children'sToys":"Children's Toys","children'sRecreationalActivities":"Children's Recreational Activities","healthBeauty":"Health & Beauty","drugStoresandPharmacies":" Drug Stores and Pharmacies","lifeHealthInsurancePremiums":"Life & Health Insurance Premiums","doctorsHealthcare":"Doctors & Healthcare","sports":"Sports","loansTaxesandFines":"Loans, Taxes, and Fines","fines":" Fines","bankFeesServiceFees":"Bank Fees & Service Fees","subscriptionsMedia":"Subscriptions & Media","shoppingServices":"Shopping & Services","clothingShoes":"Clothing & Shoes","beautyProducts":"Beauty Products","barberHairdressingSalon":"Barber & Hairdressing Salon","dryCleaningClothingRepairs":"Dry Cleaning & Clothing Repairs","leisure":"Leisure ","fastFoodQuickService":"Fast Food (Quick Service)","spaMassagePersonalCare":"Spa, Massage & Personal Care","memberships":" Memberships","barsPubsNightclubs":"Bars, Pubs & Nightclubs ","alcoholBars":"Alcohol & Bars ","tobaccoAlcohol":"Tobacco & Alcohol","restaurants":"Restaurants ","education":"Education","studentLoanPayment":" Student Loan Payment","tuitionCourses":"Tuition & Courses","schoolBooksMaterialsStationery":"School Books, Materials & Stationery","gifts":"Gifts","partiesCelebrations":"Parties & Celebrations","vacationTravel":"Vacation & Travel","transportationCarRental":" Transportation & Car Rental","hotelsAccommodation":"Hotels & Accommodation","recreationEntertainmentonVacation":"Recreation & Entertainment on Vacation","foodLivingExpensesonVacation":"Food & Living Expenses on Vacation","investmentsSavings":"Investments & Savings","regularSavings":" Regular Savings","goalSavings":"Goals","charityDonations":"Charity & Donations","pets":"Pets","summerHome/Cottage/Trailer":"Summer Home/ Cottage/Trailer ","uncategorizedExpenses":"Miscellaneous Expenses","electronicsComputers":"Electronics & Computers","jewelleryAccessories":"Jewellery & Accessories","tolls":"Tolls","foodDining":"Food & Dining","carCleaning":"Car Cleaning","supportPayment":"Support Payment","socialInsurance":"Payments from Government","loanPayments":"Loan Payments","childcareProducts":"Childcare Products","emergencyFund":"Emergency Fund","otherPension":"Other Pension","rentalIncome":"Rental Income","outdoorActivities":"Outdoor Activities ","artsCraftsMusic":"Arts, Crafts & Music","excludefromBudget":"Exclude from Budget","creditCardPayments":" Credit Card Payments","transfersBetweenownAccounts":"Transfers Between own Accounts","aBMOtherCashWithdrawals":" ABM & Other Cash Withdrawals","uncategorizedTransfers":"Uncategorized Transfers","lotteriesGambling":"Lotteries & Gambling","eyeCare":"Eye Care","uncategorizedIncome":"Miscellaneous Income","incomeTaxPaid":"Income Tax Paid","musicandApps":"Music and Apps","savingsAccountTransactions":"Savings Account Transactions","allowancePocketMoneyEtc":"Allowance, Pocket Money, Etc.","insurance":"Insurance","retirementSavings":"Retirement Savings","supportIncome":"Support Income","coffeeShops":"Coffee Shops","homeOffice":"Home Office","dependants":"Dependants","dental":"Dental","nursingCareFacilities":"Nursing & Care Facilities","sportsApparel":"Sports Apparel","schoolSupplies":"School Supplies","entertainment":"Entertainment","fieldtripsMiscellaneousExpenses":"Fieldtrips & Miscellaneous Expenses","travelAgenciesTourOperators":"Travel Agencies & Tour Operators","souvenirsGifts":"Souvenirs & Gifts ","securities":"Securities","rRSPs/RSPs":"RRSPs/RSPs","tFSAs":"TFSAs","trustFund":"Trust Fund","rESPs":"RESPs","travelInsurancePremiums":"Travel Insurance Premiums","marineInsurancePremiums":"Marine Insurance Premiums","petInsurancePremiums":"Pet Insurance Premiums","charities":" Charities","donations":"Donations","gymSports":"Gym & Sports","uncategorized":"Uncategorized","other":"Other","interestCharge":"Interest Charge","tangerineLoanPayments":"Tangerine Loan Payments","tangerineMortgagePayments":"Tangerine Mortgage Payments","tangerineMoneyBackRewards":"Tangerine Money-Back Rewards","condoFees":"Condo Fees","nonTangerineCreditCardPayments":"Non-Tangerine Credit Card Payments","recipes":"Money Rules"}}}
I believe the logic would be something like this:
jq --argfile cat_lookup categories.json --argfile cat_word en_CA.json \
'if $cat_lookup.transactionsCategories.id == .category_id then .cat_desc = $cat_word[$cat_lookup.key] end' \
${START_YEAR}-transaction.json | head
jq: error: syntax error, unexpected end (Unix shell quoting issues?) at <top-level>, line 1:
if $cat_lookup.transactionsCategories.id == .category_id then .cat_desc = $cat_word[$cat_lookup.key] end
jq: error: Possibly unterminated 'if' statement at <top-level>, line 1:
if $cat_lookup.transactionsCategories.id == .category_id then .cat_desc = $cat_word[$cat_lookup.key] end
jq: 2 compile errors
Obviously this fails because my syntax is bad and there needs to be a search each for of all $cat_lookup.transactionsCategories and $cat_word values to match everything up...
what is the best way to accomplish that with jq?
Actually it's fairly simple references:
jq --argfile cat_lookup categories.json --argfile cat_word en_CA.json '
.category_key = $cat_lookup[.category_id | tostring].key
| .category_desc = $cat_word.global.transactionsCategories[.category_key]
' ${START_YEAR}-transaction.json
producing
{
"transaction_date": "2020-01-01",
"id": 365707178,
"category_id": 113,
"description": "PAYPAL *JOES DC",
"category_key": "electronicsComputers",
"category_desc": "Electronics & Computers"
}
{
"transaction_date": "2020-01-02",
"id": 366592530,
"category_id": 84,
"description": "SANSOTEI RAMEN",
"category_key": "restaurants",
"category_desc": "Restaurants "
}
...
Or in one step (without storing the category key):
jq --argfile cat_lookup categories.json --argfile cat_word en_CA.json '
.category_desc = $cat_word.global.transactionsCategories[
$cat_lookup[.category_id | tostring].key
]
' ${START_YEAR}-transaction.json
Note: The jq manual discourages you from using --argfile and suggests to use --slurpfile instead. This would slightly change the way you have to access your variables, as they would become an array containing the actual object as single element. Thus, with --slurpfile use $cat_lookup[0] and $cat_word[0] instead of $cat_lookup and $cat_word, respectively.

AWS DynamoDB Issues adding values to existing table

I have already created a table called Sensors and identified Sensor as the hash key. I am trying to add to the table with my .json file. The items in my file look like this:
{
"Sensor": "Room Sensor",
"SensorDescription": "Turns on lights when person walks into room",
"ImageFile": "rmSensor.jpeg",
"SampleRate": "1000",
"Locations": "Baltimore, MD"
}
{
"Sensor": "Front Porch Sensor",
"SensorDescription": " ",
"ImageFile": "fpSensor.jpeg",
"SampleRate": "2000",
"Locations": "Los Angeles, CA"
}
There's 20 different sensors in the file. I was using the following command:
aws dynamodb batch-write-item \
--table-name Sensors \
--request-items file://sensorList.json \
--returned-consumed-capacity TOTAL
I get the following error:
Error parsing parameter '--request-items': Invalid JSON: Extra data: line 9 column 1 (char 189)
I've tried adding --table name Sensors to the cl and it says Unknown options: --table-name, Sensors. I've tried put-item and a few others. I am trying to understand what my errors are, what I need to change in my .json if anything, and what I need to change in my cl. Thanks!
Your input file is not a valid json. You are missing a comma to separate both objects, and you need to enclose everything with brackets [ ..... ]

Add a COMMA after a CURLY Brace For JSON Format

Hello there i am working on a dataset but its not formatted correctly. It's missing its Square brackets and comma after every object
Example:
{"is_sarcastic": 1, "headline": "thirtysomething scientists unveil doomsday clock of hair loss", "article_link": "something"}
{"is_sarcastic": 0, "headline": "dem rep. totally nails why congress is falling short on gender, racial equality", "article_link": "somethingelse"}
I want to format it such that it turns to this:
[{"is_sarcastic": 1, "headline": "thirtysomething scientists unveil doomsday clock of hair loss", "article_link": "something"},
{"is_sarcastic": 0, "headline": "dem rep. totally nails why congress is falling short on gender, racial equality", "article_link": "somethingelse"}]
I am using Python 3.x to achieve this task.
you can run the following python script, it'll output a file containing the output you desire.
import json
dataJson = []
with open('data.json') as f:
for jsonObj in f:
dataDict = json.loads(jsonObj)
dataJson.append(dataDict)
#print (dataJson)
with open('data2.json', 'w') as jsonfile:
json.dump(dataJson, jsonfile)
where data.json is the name of the file containing the dataset

How to split very large JSON file in R?

I am currently working with Amazon User Metadata of size roughly around 9 GB. I have this data in .json format and want to read in R, due to such huge volume of data I want to divide the files in small chunks (either by size or by number of elements). If you can help me out how can I do this in R. The format of file is as follows:
{
"asin": "0000031852",
"title": "Girls Ballet Tutu Zebra Hot Pink",
"price": 3.17,
"imUrl": "http://ecx.images-amazon.com/images/I/51fAmVkTbyL._SY300_.jpg",
"related":
{
"also_bought": ["B00JHONN1S", "B002BZX8Z6", "B00D2K1M3O", "0000031909", "B00613WDTQ", "B00D0WDS9A", "B00D0GCI8S", "0000031895", "B003AVKOP2", "B003AVEU6G", "B003IEDM9Q", "B002R0FA24", "B00D23MC6W", "B00D2K0PA0", "B00538F5OK", "B00CEV86I6", "B002R0FABA", "B00D10CLVW", "B003AVNY6I", "B002GZGI4E", "B001T9NUFS", "B002R0F7FE", "B00E1YRI4C", "B008UBQZKU", "B00D103F8U", "B007R2RM8W"],
"also_viewed": ["B002BZX8Z6", "B00JHONN1S", "B008F0SU0Y", "B00D23MC6W", "B00AFDOPDA", "B00E1YRI4C", "B002GZGI4E", "B003AVKOP2", "B00D9C1WBM", "B00CEV8366", "B00CEUX0D8", "B0079ME3KU", "B00CEUWY8K", "B004FOEEHC", "0000031895", "B00BC4GY9Y", "B003XRKA7A", "B00K18LKX2", "B00EM7KAG6", "B00AMQ17JA", "B00D9C32NI", "B002C3Y6WG", "B00JLL4L5Y", "B003AVNY6I", "B008UBQZKU", "B00D0WDS9A", "B00613WDTQ", "B00538F5OK", "B005C4Y4F6", "B004LHZ1NY", "B00CPHX76U", "B00CEUWUZC", "B00IJVASUE", "B00GOR07RE", "B00J2GTM0W", "B00JHNSNSM", "B003IEDM9Q", "B00CYBU84G", "B008VV8NSQ", "B00CYBULSO", "B00I2UHSZA", "B005F50FXC", "B007LCQI3S", "B00DP68AVW", "B009RXWNSI", "B003AVEU6G", "B00HSOJB9M", "B00EHAGZNA", "B0046W9T8C", "B00E79VW6Q", "B00D10CLVW", "B00B0AVO54", "B00E95LC8Q", "B00GOR92SO", "B007ZN5Y56", "B00AL2569W", "B00B608000", "B008F0SMUC", "B00BFXLZ8M"],
"bought_together": ["B002BZX8Z6"]
},
"salesRank": {"Toys & Games": 211836},
"brand": "Coxlures",
"categories": [["Sports & Outdoors", "Other Sports", "Dance"]]
}

Replace single quotes in double quotes in brackets

I must modify a file json. I must replace the single quotes in double quotes but I can't use the following command sed -i -r "s/'/\"/g" file because in the file there are more single quotes that I don't change.
The following code is an example of string:
"categories": [['Clothing, Shoes & Jewelry', 'Girls'], ['Clothing, Shoes & Jewelry', 'Novelty, Costumes & More', 'Costumes & Accessories', 'More Accessories', 'Kids & Baby']]
The desided result should be:
"categories": [["Clothing, Shoes & Jewelry", "Girls"], ["Clothing, Shoes & Jewelry", "Novelty, Costumes & More", "Costumes & Accessories", "More Accessories", "Kids & Baby"]]
sample file:
{"categories": [['Movies & TV', 'Movies']], "title": "Understanding Seizures and Epilepsy DVD"},
{"title": "Who on Earth is Tom Baker?", "salesRank": {"Books": 3843450}, "categories": [['Books']]},
{"categories": [['Clothing, Shoes & Jewelry', 'Girls'], ['Clothing, Shoes & Jewelry', 'Novelty, Costumes & More', 'Costumes & Accessories', 'More Accessories', 'Kids & Baby']], "description": "description, "title": "Mog's Kittens", "salesRank": {"Books": 1760368}}},
{"description": "Three Dr. Suess' Puzzles", "brand": "Dr. Seuss", "categories": [['Toys & Games', 'Puzzles', 'Jigsaw Puzzles']]},
I used a regular expression but the problem is that I don't know how many element are in brackets. So I would a way for replace all single quotes in the brackets, this is a perfect way, but I can not find the solution.
#!/usr/bin/perl -w
use strict;
# read each line from stdin
while (my $l=<>) {
chomp($l); # remove newline char
# split: get contents of innermost square brackets
my #a=split(/(\[[^][]*\])/,$l);
foreach my $i (#a) {
# replace quotes iff innermost square brackets
if ($i=~/^\[/) { $i=~s/'/"/g; }
}
# join and print
print join('',#a)."\n";
}
I found a way to do that, using python.
Note that the json stream you provided is not recognized by python json because of single quotes (and also some copy/paste problems, missing quotes, I fixed that).
My solution is using fully the python libraries, I doubt you can do the same with sed, that's why I provide it despite the fact you didn't mention that technology.
I read the data using ast.literal_eval since it's a list of dictionaries with the exact python syntax. Single quotes are not a problem for ast
I write the data using json.dump. It writes the data using double quotes.
Note that I write it in a "fake" file (i.e. a string with I/O write method to "fool" the json serializer).
Here's a standalone snippet that works:
import io
foo = """[{"categories": [['Movies & TV', 'Movies']], "title": "Understanding Seizures and Epilepsy DVD"},
{"title": "Who on Earth is Tom Baker?", "salesRank": {"Books": 3843450}, "categories": [['Books']]},
{"categories": [['Clothing, Shoes & Jewelry', 'Girls'], ['Clothing, Shoes & Jewelry', 'Novelty, Costumes & More', 'Costumes & Accessories', 'More Accessories', 'Kids & Baby']], "description": "description", "title": "Mog's Kittens", "salesRank": {"Books": 1760368}},
{"description": "Three Dr. Suess' Puzzles",
"brand": "Dr. Seuss", "categories": [['Toys & Games', 'Puzzles', 'Jigsaw Puzzles']]}
]"""
fp = io.StringIO()
json_data=ast.literal_eval(foo)
json.dump(json_data,fp)
print(fp.getvalue())
result:
[{"categories": [["Movies & TV", "Movies"]], "title": "Understanding Seizures and Epilepsy DVD"}, {"salesRank": {"Books": 3843450}, "categories": [["Books"]], "title": "Who on Earth is Tom Baker?"}, {"description": "description", "salesRank": {"Books": 1760368}, "categories": [["Clothing, Shoes & Jewelry", "Girls"], ["Clothing, Shoes & Jewelry", "Novelty, Costumes & More", "Costumes & Accessories", "More Accessories", "Kids & Baby"]], "title": "Mog's Kittens"}, {"brand": "Dr. Seuss", "description": "Three Dr. Suess' Puzzles", "categories": [["Toys & Games", "Puzzles", "Jigsaw Puzzles"]]}]
Here's a full script taking 2 parameters (input file & output file) and performing the conversion. You can use this script within your already existing bash scripts if you're not comfortable with python (save that in fix_quotes.py for instance):
import ast,json,sys
input_file = sys.argv[1]
output_file = sys.argv[2]
with open(input_file,"r") as fr:
json_data=ast.literal_eval(fr.read())
with open(output_file,"w") as fw:
json.dump(json_data,fw)