Creating json with RUBY looping through SQL Server table - json

This is a followup to this question:
Ruby create JSON from SQL Server
I was able to create nested arrays in JSON. But I'm struggling with looping through records and appending a file with each record. Also how would I add a root element just at the top of the json and not on each record. "aaSequences" needs to be at the top just once... I also need a comma between each record.
here is my code so far
require 'pp'
require 'tiny_tds'
require 'awesome_print'
require 'json'
class Document
def initialize strategy
#document = strategy
#load helper functions
load "helpers_ruby.rb"
#set environment 'dev', 'qa', or 'production'
load "envconfig_ruby.rb"
end
def StartUP
#document.StartUP
end
def getseqrecord
#document.getseqrecord
end
end
class GetSqlaaSequence
def StartUP
##system "clear" ##linux
system "cls" ##Windows
# create connection to db
$connReportingDB = createReportingxxSqlConn($ms_sql_host, $ms_sql_user, $ms_sql_password, $ms_sql_dbname)
##$currentDateTime = DateTime.now
##pp 'def StartUP ran at: '+$currentDateTime.to_s
end
def getseqrecord
# get the aaaaSequences data
#result = $connReportingDB.execute("SELECT
[jsonFile]
,[id]
,[title]
,[authorIds]
,[name]
,[aminoAcids]
,[schemaId]
,[registryId]
,[namingStrategy]
FROM tablename
")
$aaSequences = Array.new
#i = 0
#result.each do |aaSequence|
jsonFile = aaSequence['jsonFile']
id = aaSequence['id']
title = aaSequence['title']
authorIds = aaSequence['authorIds']
name = aaSequence['name']
aminoAcids = aaSequence['aminoAcids']
schemaId = aaSequence['schemaId']
registryId = aaSequence['registryId']
namingStrategy = aaSequence['namingStrategy']
##end
#hash = Hash[
"jsonFile", jsonFile,
"id", id,
"title", title,
"authorIds", authorIds,
"name", name,
"aminoAcids", aminoAcids,
"schemaId", schemaId,
"registryId", registryId,
"namingStrategy", namingStrategy
]
#filename = jsonFile
jsonFileOutput0 = {:"#{title}" => [{:authorIds => ["#{authorIds}"],:aminoAcids => "#{aminoAcids}",:name => "#{name}",:schemaId => "#{schemaId}",:registryId => "#{registryId}",:namingStrategy => "#{namingStrategy}"}]}
jsonFileOutput = JSON.pretty_generate(jsonFileOutput0)
File.open(jsonFile,"a") do |f|
f.write(jsonFileOutput)
####ad the comma between records...Not sure if this is the best way to do it...
# File.open(jsonFile,"a") do |f|
# f.write(',')
# end
end
$aaSequences[#i] = #hash
#i = #i + 1
###createReportingSqlConn.close
end
end
end
Document.new(GetSqlaaSequence.new).StartUP
#get aaSequences and create json files
Document.new(GetSqlaaSequence.new).getseqrecord
here is a sample of the json it creates so far...
{
"aaSequences": [
{
"authorIds": [
"fff_fdfdfdfd"
],
"aminoAcids": "aminoAcids_data",
"name": "fdfdfddf-555_1",
"schemaId": "5555fdfd5",
"registryId": "5fdfdfdf",
"namingStrategy": "NEW_IDS"
}
]
}{
"aaSequences": [
{
"authorIds": [
"fff_fdfdfdfd"
],
"aminoAcids": "aminoAcids_data",
"name": "fdfdfddf-555_2",
"schemaId": "5555fdfd5",
"registryId": "5fdfdfdf",
"namingStrategy": "NEW_IDS"
}
]
}
and here is an example of what I need it to look like
{
"aaSequences": [
{
"authorIds": [
"authorIds_data"
],
"aminoAcids": "aminoAcids_data",
"name": "name_data",
"schemaId": "schemaId_data",
"registryId": "registryId_data",
"namingStrategy": "namingStrategy_data"
},
{
"authorIds": [
"authorIds_data"
],
"aminoAcids": "aminoAcids_data",
"name": "name_data",
"schemaId": "schemaId_data",
"registryId": "registryId_data",
"namingStrategy": "namingStrategy_data"
}
]
}

You can just do the whole thing in SQL using FOR JSON.
Unfortunately, arrays are not possible using this method. There are anumber of hacks, but the easiest one in your situation is to just append to [] using JSON_MODIFY
SELECT
authorIds = JSON_MODIFY('[]', 'append $', a.authorIds),
[aminoAcids],
[name],
[schemaId],
[registryId],
[namingStrategy]
FROM aaSequences a
FOR JSON PATH, ROOT('aaSequences');
db<>fiddle

Related

Pulling specific Parent/Child JSON data with Python

I'm having a difficult time figuring out how to pull specific information from a json file.
So far I have this:
# Import json library
import json
# Open json database file
with open('jsondatabase.json', 'r') as f:
data = json.load(f)
# assign variables from json data and convert to usable information
identifier = data['ID']
identifier = str(identifier)
name = data['name']
name = str(name)
# Collect data from user to compare with data in json file
print("Please enter your numerical identifier and name: ")
user_id = input("Numerical identifier: ")
user_name = input("Name: ")
if user_id == identifier and user_name == name:
print("Your inputs matched. Congrats.")
else:
print("Your inputs did not match our data. Please try again.")
And that works great for a simple JSON file like this:
{
"ID": "123",
"name": "Bobby"
}
But ideally I need to create a more complex JSON file and can't find deeper information on how to pull specific information from something like this:
{
"Parent": [
{
"Parent_1": [
{
"Name": "Bobby",
"ID": "123"
}
],
"Parent_2": [
{
"Name": "Linda",
"ID": "321"
}
]
}
]
}
Here is an example that you might be able to pick apart.
You could either:
Make a custom de-jsonify object_hook as shown below and do something with it. There is a good tutorial here.
Just gobble up the whole dictionary that you get without a custom de-jsonify and drill down into it and make a list or set of the results. (not shown)
Example:
import json
from collections import namedtuple
data = '''
{
"Parents":
[
{
"Name": "Bobby",
"ID": "123"
},
{
"Name": "Linda",
"ID": "321"
}
]
}
'''
Parent = namedtuple('Parent', ['name', 'id'])
def dejsonify(json_str: dict):
if json_str.get("Name"):
parent = Parent(json_str.get('Name'), int(json_str.get('ID')))
return parent
return json_str
res = json.loads(data, object_hook=dejsonify)
print(res)
# then we can do whatever... if you need lookups by name/id,
# we could put the result into a dictionary
all_parents = {(p.name, p.id) : p for p in res['Parents']}
lookup_from_input = ('Bobby', 123)
print(f'found match: {all_parents.get(lookup_from_input)}')
Result:
{'Parents': [Parent(name='Bobby', id=123), Parent(name='Linda', id=321)]}
found match: Parent(name='Bobby', id=123)

Replace and access values in nested hash/json by path in Ruby

Asking for a advice what would be in your opinion best and simple solution to replace and access values in nested hash or json by path ir variable using ruby?
For example imagine I have json or hash with this kind of structure:
{
"name":"John",
"address":{
"street":"street 1",
"country":"country1"
},
"phone_numbers":[
{
"type":"mobile",
"number":"234234"
},
{
"type":"fixed",
"number":"2342323423"
}
]
}
And I would like to access or change fixed mobile number by path which could be specified in variable like this: "phone_numbers/1/number" (separator does not matter in this case)
This solution is necessary to retrieve values from json/hash and sometimes replace variables by specifying path to it. Found some solutions which can find value by key, but this solution wouldn't work as there is some hashes/json where key name is same in multiple places.
I saw this one: https://github.com/chengguangnan/vine , but it does not work when payload is like this as it is not kinda hash in this case:
[
{
"value":"test1"
},
{
"value":"test2"
}
]
Hope you have some great ideas how to solve this problem.
Thank you!
EDIT:
So I tried code below with this data:
x = JSON.parse('[
{
"value":"test1"
},
{
"value":"test2"
}
]')
y = JSON.parse('{
"name":"John",
"address":{
"street":"street 1",
"country":"country1"
},
"phone_numbers":[
{
"type":"mobile",
"number":"234234"
},
{
"type":"fixed",
"number":"2342323423"
}
]
}')
p x
p y.to_h
p x.get_at_path("0/value")
p y.get_at_path("name")
And got this:
[{"value"=>"test1"}, {"value"=>"test2"}]
{"name"=>"John", "address"=>{"street"=>"street 1", "country"=>"country1"}, "phone_numbers"=>[{"type"=>"mobile", "number"=>"234234"}, {"type"=>"fixed", "number"=>"2342323423"}]}
hash_new.rb:91:in `<main>': undefined method `get_at_path' for [{"value"=>"test1"}, {"value"=>"test2"}]:Array (NoMethodError)
For y.get_at_path("name") got nil
You can make use of Hash.dig to get the sub-values, it'll keep calling dig on the result of each step until it reaches the end, and Array has dig as well, so when you reach that array things will keep working:
# you said the separator wasn't important, so it can be changed up here
SEPERATOR = '/'.freeze
class Hash
def get_at_path(path)
dig(*steps_from(path))
end
def replace_at_path(path, new_value)
*steps, leaf = steps_from path
# steps is empty in the "name" example, in that case, we are operating on
# the root (self) hash, not a subhash
hash = steps.empty? ? self : dig(*steps)
# note that `hash` here doesn't _have_ to be a Hash, but it needs to
# respond to `[]=`
hash[leaf] = new_value
end
private
# the example hash uses symbols as the keys, so we'll convert each step in
# the path to symbols. If a step doesn't contain a non-digit character,
# we'll convert it to an integer to be treated as the index into an array
def steps_from path
path.split(SEPERATOR).map do |step|
if step.match?(/\D/)
step.to_sym
else
step.to_i
end
end
end
end
and then it can be used as such (hash contains your sample input):
p hash.get_at_path("phone_numbers/1/number") # => "2342323423"
p hash.get_at_path("phone_numbers/0/type") # => "mobile"
p hash.get_at_path("name") # => "John"
p hash.get_at_path("address/street") # => "street 1"
hash.replace_at_path("phone_numbers/1/number", "123-123-1234")
hash.replace_at_path("phone_numbers/0/type", "cell phone")
hash.replace_at_path("name", "John Doe")
hash.replace_at_path("address/street", "123 Street 1")
p hash.get_at_path("phone_numbers/1/number") # => "123-123-1234"
p hash.get_at_path("phone_numbers/0/type") # => "cell phone"
p hash.get_at_path("name") # => "John Doe"
p hash.get_at_path("address/street") # => "123 Street 1"
p hash
# => {:name=>"John Doe",
# :address=>{:street=>"123 Street 1", :country=>"country1"},
# :phone_numbers=>[{:type=>"cell phone", :number=>"234234"},
# {:type=>"fixed", :number=>"123-123-1234"}]}

transform JSON to hashmap in Ruby

I am trying to convert a json file which contain object and array to a JSON file.
Below is the JSON file
{
"localbusiness":{
"name": "toto",
"phone": "+11234567890"
},
"date":"05/02/2016",
"time":"5:00pm",
"count":"4",
"userInfo":{
"name": "John Doe",
"phone": "+10987654321",
"email":"john.doe#unknown.com",
"userId":"user1234333"
}
}
my goal is to save this is a database such as MongoId. I would like to use map to get something like:
localbusiness_name => "toto",
localbusiness_phone => "+11234567890",
date => "05/02/2016",
...
userInfo_name => "John Doe"
...
I have tried map but it's not splitting the array of local business or userInfo
def format_entry
ps = #params.map do | h |
ps.merge!(h)
##logger.info("entry #{h}")
end
##logger.info("formatting the data #{ps}")
ps
end
I do not really how to parse each entry and rebuild the name
It looks like to me you are trying to "flatten" the inner hashes into one big hash. Flatten being incorrect because you want to prepend the hash's key to the sub-hash's key. This will require looping through the hash, and then looping again through each sub hash. This code example will only work if you have 1 layer deep. if you have multiple layers, then I would suggest making two methods, or a recursive method.
#business = { # This is a hash, not a json blob, but you can take json and call JSON.parse(blob) to turn it into a hash.
"localbusiness":{
"name": "toto",
"phone": "+11234567890"
},
"date":"05/02/2016",
"time":"5:00pm",
"count":"4",
"userInfo":{
"name": "John Doe",
"phone": "+10987654321",
"email":"john.doe#unknown.com",
"userId":"user1234333"
}
}
#squashed_business = Hash.new
#business.each do |k, v|
if v.is_a? Hash
v.each do |key, value|
#squashed_business.merge! (k.to_s + "_" + key.to_s) => value
end
else
#squashed_business.merge! k => v
end
end
I noticed that you are getting "unexpected" outcomes when enumerating over a hash #params.each { |h| ... } because it gives you both a key and a value. Instead you want to do #params.each { |key, value| ... } as I did in the above code example.

how to parse and iterate a json using ruby

Im a beginner with ruby and learning to iterate and parse json file. The contents inside input.json
[
{
"scheme": "http",
"domain_name": "www.example.com",
"path": "path/to/file",
"fragment": "header2"
},
{
"scheme": "http",
"domain_name": "www.example2.org",
"disabled": true
},
{
"scheme": "https",
"domain_name": "www.stack.org",
"path": "some/path",
"query": {
"key1": "val1",
"key2": "val2"
}
}
]
ho do I parse print the output as:
http://www.example.com/path/to/file#header2
https://www.stack.org/some/path?key1=val1&key2=val2
Any learning references would be very helpful.
Hopefully this code is self-explanatory:
require 'URI'
require 'json'
entries = JSON.parse(File.read('input.json'))
entries.reject { |entry| entry["disabled"] }.each do |entry|
puts URI::Generic.build({
:scheme => entry["scheme"],
:host => entry["domain_name"],
:fragment => entry["fragment"],
:query => entry["query"] && URI.encode_www_form(entry["query"]),
:path => entry["path"] && ("/" + entry["path"])
}).to_s
end
# Output:
# http://www.example.com/path/to/file#header2
# https://www.stack.org/some/path?key1=val1&key2=val2
The first step is to turn this JSON into Ruby data:
require 'json'
data = JSON.load(DATA)
Then you need to iterate over this and reject all those that are flagged as disabled:
data.reject do |entry|
entry['disabled']
end
Which you can chain together with an operation that leverages the URI library to build your output:
require 'uri'
uris = data.reject do |entry|
entry['disabled']
end.map do |entry|
case (entry['scheme'])
when 'https'
URI::HTTPS
else
URI::HTTP
end.build(
host: entry['domain_name'],
path: normalized_path(entry['path']),
query: entry['query'] && URI.encode_www_form(entry['query'])
).to_s
end
#=> ["http://www.example.com/path/to/file", "http://www.stack.org/some/path?key1=val1&key2=val2"]
This requires a function called normalized_path to deal with nil or invalid paths and fix them:
def normalized_path(path)
case (path and path[0,1])
when nil
'/'
when '/'
path
else
"/#{path}"
end
end

Logstash indexing JSON arrays

Logstash is awesome. I can send it JSON like this (multi-lined for readability):
{
"a": "one"
"b": {
"alpha":"awesome"
}
}
And then query for that line in kibana using the search term b.alpha:awesome. Nice.
However I now have a JSON log line like this:
{
"different":[
{
"this": "one",
"that": "uno"
},
{
"this": "two"
}
]
}
And I'd like to be able to find this line with a search like different.this:two (or different.this:one, or different.that:uno)
If I was using Lucene directly I'd iterate through the different array, and generate a new search index for each hash within it, but Logstash currently seems to ingest that line like this:
different: {this: one, that: uno}, {this: two}
Which isn't going to help me searching for log lines using different.this or different.that.
Any got any thoughts as to a codec, filter or code change I can make to enable this?
You can write your own filter (copy & paste, rename the class name, the config_name and rewrite the filter(event) method) or modify the current JSON filter (source on Github)
You can find the JSON filter (Ruby class) source code in the following path logstash-1.x.x\lib\logstash\filters named as json.rb. The JSON filter parse the content as JSON as follows
begin
# TODO(sissel): Note, this will not successfully handle json lists
# like your text is '[ 1,2,3 ]' JSON.parse gives you an array (correctly)
# which won't merge into a hash. If someone needs this, we can fix it
# later.
dest.merge!(JSON.parse(source))
# If no target, we target the root of the event object. This can allow
# you to overwrite #timestamp. If so, let's parse it as a timestamp!
if !#target && event[TIMESTAMP].is_a?(String)
# This is a hack to help folks who are mucking with #timestamp during
# their json filter. You aren't supposed to do anything with
# "#timestamp" outside of the date filter, but nobody listens... ;)
event[TIMESTAMP] = Time.parse(event[TIMESTAMP]).utc
end
filter_matched(event)
rescue => e
event.tag("_jsonparsefailure")
#logger.warn("Trouble parsing json", :source => #source,
:raw => event[#source], :exception => e)
return
end
You can modify the parsing procedure to modify the original JSON
json = JSON.parse(source)
if json.is_a?(Hash)
json.each do |key, value|
if value.is_a?(Array)
value.each_with_index do |object, index|
#modify as you need
object["index"]=index
end
end
end
end
#save modified json
......
dest.merge!(json)
then you can modify your config file to use the/your new/modified JSON filter and place in \logstash-1.x.x\lib\logstash\config
This is mine elastic_with_json.conf with a modified json.rb filter
input{
stdin{
}
}filter{
json{
source => "message"
}
}output{
elasticsearch{
host=>localhost
}stdout{
}
}
if you want to use your new filter you can configure it with the config_name
class LogStash::Filters::Json_index < LogStash::Filters::Base
config_name "json_index"
milestone 2
....
end
and configure it
input{
stdin{
}
}filter{
json_index{
source => "message"
}
}output{
elasticsearch{
host=>localhost
}stdout{
}
}
Hope this helps.
For a quick and dirty hack, I used the Ruby filter and below code , no need to use the out of box 'json' filter anymore
input {
stdin{}
}
filter {
grok {
match => ["message","(?<json_raw>.*)"]
}
ruby {
init => "
def parse_json obj, pname=nil, event
obj = JSON.parse(obj) unless obj.is_a? Hash
obj = obj.to_hash unless obj.is_a? Hash
obj.each {|k,v|
p = pname.nil?? k : pname
if v.is_a? Array
v.each_with_index {|oo,ii|
parse_json_array(oo,ii,p,event)
}
elsif v.is_a? Hash
parse_json(v,p,event)
else
p = pname.nil?? k : [pname,k].join('.')
event[p] = v
end
}
end
def parse_json_array obj, i,pname, event
obj = JSON.parse(obj) unless obj.is_a? Hash
pname_ = pname
if obj.is_a? Hash
obj.each {|k,v|
p=[pname_,i,k].join('.')
if v.is_a? Array
v.each_with_index {|oo,ii|
parse_json_array(oo,ii,p,event)
}
elsif v.is_a? Hash
parse_json(v,p, event)
else
event[p] = v
end
}
else
n = [pname_, i].join('.')
event[n] = obj
end
end
"
code => "parse_json(event['json_raw'].to_s,nil,event) if event['json_raw'].to_s.include? ':'"
}
}
output {
stdout{codec => rubydebug}
}
Test json structure
{"id":123, "members":[{"i":1, "arr":[{"ii":11},{"ii":22}]},{"i":2}], "im_json":{"id":234, "members":[{"i":3},{"i":4}]}}
and this is whats output
{
"message" => "{\"id\":123, \"members\":[{\"i\":1, \"arr\":[{\"ii\":11},{\"ii\":22}]},{\"i\":2}], \"im_json\":{\"id\":234, \"members\":[{\"i\":3},{\"i\":4}]}}",
"#version" => "1",
"#timestamp" => "2014-07-25T00:06:00.814Z",
"host" => "Leis-MacBook-Pro.local",
"json_raw" => "{\"id\":123, \"members\":[{\"i\":1, \"arr\":[{\"ii\":11},{\"ii\":22}]},{\"i\":2}], \"im_json\":{\"id\":234, \"members\":[{\"i\":3},{\"i\":4}]}}",
"id" => 123,
"members.0.i" => 1,
"members.0.arr.0.ii" => 11,
"members.0.arr.1.ii" => 22,
"members.1.i" => 2,
"im_json" => 234,
"im_json.0.i" => 3,
"im_json.1.i" => 4
}
The solution I liked is the ruby filter because that requires us to not write another filter. However, that solution creates fields that are on the "root" of JSON and it's hard to keep track of how the original document looked.
I came up with something similar that's easier to follow and is a recursive solution so it's cleaner.
ruby {
init => "
def arrays_to_hash(h)
h.each do |k,v|
# If v is nil, an array is being iterated and the value is k.
# If v is not nil, a hash is being iterated and the value is v.
value = v || k
if value.is_a?(Array)
# "value" is replaced with "value_hash" later.
value_hash = {}
value.each_with_index do |v, i|
value_hash[i.to_s] = v
end
h[k] = value_hash
end
if value.is_a?(Hash) || value.is_a?(Array)
arrays_to_hash(value)
end
end
end
"
code => "arrays_to_hash(event.to_hash)"
}
It converts arrays to has with each key as the index number. More details:- http://blog.abhijeetr.com/2016/11/logstashelasticsearch-best-way-to.html