Ruby: Extract from deeply nested JSON structure based on multiple criteria - json

I want to select any marketId of marketName == 'Moneyline' but only those with countryCode == 'US' || 'GB' OR eventName.include?(' # '). (space before and after the #). I tried different combos of map and select but some nodes don't have countryCode which complicates things for me. This is the source, but a sample of what it might look like:
{"currencyCode"=>"GBP",
"eventTypes"=>[
{"eventTypeId"=>7522,
"eventNodes"=>[
{"eventId"=>28024331,
"event"=>
{"eventName"=>"EWE Baskets Oldenburg v PAOK Thessaloniki BC"
},
"marketNodes"=>[
{"marketId"=>"1.128376755",
"description"=>
{"marketName"=>"Moneyline"}
},
{"marketId"=>"1.128377853",
"description"=>
{"marketName"=>"Start Lublin +7.5"}
}}}]},
{"eventId"=>28023434,
"event"=>
{"eventName"=>"Asseco Gdynia v Start Lublin",
"countryCode"=>"PL",
},
"marketNodes"=>
[{"marketId"=>"1.128377853", ETC...

Based on this previous answer, you just need to add a select on eventNodes :
require 'json'
json = File.read('data.json')
hash = JSON.parse(json)
moneyline_market_ids = hash["eventTypes"].map{|type|
type["eventNodes"].select{|event_node|
['US', 'GB'].include?(event_node["event"]["countryCode"]) || event_node["event"]["eventName"].include?(' # ')
}.map{|event|
event["marketNodes"].select{|market|
market["description"]["marketName"] == 'Moneyline'
}.map{|market|
market["marketId"]
}
}
}.flatten
puts moneyline_market_ids.join(', ')
#=> 1.128255531, 1.128272164, 1.128255516, 1.128272159, 1.128278718, 1.128272176, 1.128272174, 1.128272169, 1.128272148, 1.128272146, 1.128255464, 1.128255448, 1.128272157, 1.128272155, 1.128255499, 1.128272153, 1.128255484, 1.128272150, 1.128255748, 1.128272185, 1.128278720, 1.128272183, 1.128272178, 1.128255729, 1.128360712, 1.128255371, 1.128255433, 1.128255418, 1.128255403, 1.128255387
If you want to keep the country code and name information with the id:
moneyline_market_ids = hash["eventTypes"].map{|type|
type["eventNodes"].map{|event_node|
[event_node, event_node["event"]["countryCode"], event_node["event"]["eventName"]]
}.select{|_, country, event_name|
['US', 'GB'].include?(country) || event_name.include?(' # ')
}.map{|event, country, event_name|
event["marketNodes"].select{|market|
market["description"]["marketName"] == 'Moneyline'
}.map{|market|
[market["marketId"],country,event_name]
}
}
}.flatten(2)
require 'pp'
pp moneyline_market_ids
#=> [["1.128255531", "US", "Philadelphia # Seattle"],
# ["1.128272164", "US", "Arkansas # Mississippi State"],
# ["1.128255516", "US", "New England # San Francisco"],
# ["1.128272159", "US", "Indiana # Michigan"],
# ["1.128278718", "CA", "Edmonton # Ottawa"],
# ["1.128272176", "US", "Arizona State # Washington"],
# ["1.128272174", "US", "Alabama A&M # Auburn"],
# ...

Related

How do I iterate variables to pass into a http header params for further iteration

I have 2 json api's that I am requesting; search and extended profile.
The first one gives me some search results for profiles. The search results have a "memberid" number ps['id'] for each profile found.
I want to pass and iterate those memberid's to the next json api for the extended profile information for each member. The memberid's has to be passed into the profile_params. As it is now, only 1 memberid is being passed and stored and therefore I only get the first extended profile and not all from the search.
My code is like this:
# Search for profiles
search_response = requests.post('https://api_search_for_profiles', headers=search_headers, data=search_params)
search_json = json.dumps(search_response.json(), indent=2)
search_data = json.loads(search_json)
memberid = []
for ps in (search_data['data']['content']):
memberid = str(ps['id']) # These memberid's I want to pass all found to the profile_params.
print('UserID: ' + str(ps['roomNo']))
print('MemberID: ' + str(ps['id']))
print('Username: ' + ps['nickName'])
# Extended profile info
profile_headers = {
'x-auth-token': f'{token}',
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': 'okhttp/3.11.0',
}
profile_params = {
'id': '',
'token': f'{token}',
'memberId': f'{memberid}', # where I want each memberid from the search to go
'roomNo': ''
}
profile_response = requests.post('https://api_extended_profile_information', headers=profile_headers, data=profile_params)
profile_json = json.dumps(profile_response.json(), indent=2)
profile_data = json.loads(profile_json)
pfd = profile_data['data'] # main data
userid = str(pfd['roomNo'])
username = pfd['nickName']
gender = str(pfd['gender'])
level = str(pfd['memberLevel'])
# Here I will iterate through each profiles with the corresponding memberid and print.
The json output for search is like this, snippet:
{
"code": 0,
"data": {
"content": [
{
"id": 1359924,
"memberLevel": 1,
"nickName": "akuntesting dgt",
"roomNo": 1820031
},
{
"id": 2607179,
"memberLevel": 1,
"nickName": "testingsyth",
"roomNo": 3299390
},
# ... and so on
Assuming the post request takes only one memberid, the following is a simplified version of your code designed to handle only the issue of multiple memberids. Starting here:
memberids = []
for ps in (search_data['data']['content']):
memberid = str(ps['id'])
memberids.append(memberid)
for memberid in memberids:
profile_params = {'memberId': memberid}
profile_response = requests.post('https://api_extended_profile_information', headers=profile_headers, data=profile_params)
#the rest of your code goes here inside the loop
Try it and let me know if it works.

Replace and access values in nested hash/json by path in Ruby

Asking for a advice what would be in your opinion best and simple solution to replace and access values in nested hash or json by path ir variable using ruby?
For example imagine I have json or hash with this kind of structure:
{
"name":"John",
"address":{
"street":"street 1",
"country":"country1"
},
"phone_numbers":[
{
"type":"mobile",
"number":"234234"
},
{
"type":"fixed",
"number":"2342323423"
}
]
}
And I would like to access or change fixed mobile number by path which could be specified in variable like this: "phone_numbers/1/number" (separator does not matter in this case)
This solution is necessary to retrieve values from json/hash and sometimes replace variables by specifying path to it. Found some solutions which can find value by key, but this solution wouldn't work as there is some hashes/json where key name is same in multiple places.
I saw this one: https://github.com/chengguangnan/vine , but it does not work when payload is like this as it is not kinda hash in this case:
[
{
"value":"test1"
},
{
"value":"test2"
}
]
Hope you have some great ideas how to solve this problem.
Thank you!
EDIT:
So I tried code below with this data:
x = JSON.parse('[
{
"value":"test1"
},
{
"value":"test2"
}
]')
y = JSON.parse('{
"name":"John",
"address":{
"street":"street 1",
"country":"country1"
},
"phone_numbers":[
{
"type":"mobile",
"number":"234234"
},
{
"type":"fixed",
"number":"2342323423"
}
]
}')
p x
p y.to_h
p x.get_at_path("0/value")
p y.get_at_path("name")
And got this:
[{"value"=>"test1"}, {"value"=>"test2"}]
{"name"=>"John", "address"=>{"street"=>"street 1", "country"=>"country1"}, "phone_numbers"=>[{"type"=>"mobile", "number"=>"234234"}, {"type"=>"fixed", "number"=>"2342323423"}]}
hash_new.rb:91:in `<main>': undefined method `get_at_path' for [{"value"=>"test1"}, {"value"=>"test2"}]:Array (NoMethodError)
For y.get_at_path("name") got nil
You can make use of Hash.dig to get the sub-values, it'll keep calling dig on the result of each step until it reaches the end, and Array has dig as well, so when you reach that array things will keep working:
# you said the separator wasn't important, so it can be changed up here
SEPERATOR = '/'.freeze
class Hash
def get_at_path(path)
dig(*steps_from(path))
end
def replace_at_path(path, new_value)
*steps, leaf = steps_from path
# steps is empty in the "name" example, in that case, we are operating on
# the root (self) hash, not a subhash
hash = steps.empty? ? self : dig(*steps)
# note that `hash` here doesn't _have_ to be a Hash, but it needs to
# respond to `[]=`
hash[leaf] = new_value
end
private
# the example hash uses symbols as the keys, so we'll convert each step in
# the path to symbols. If a step doesn't contain a non-digit character,
# we'll convert it to an integer to be treated as the index into an array
def steps_from path
path.split(SEPERATOR).map do |step|
if step.match?(/\D/)
step.to_sym
else
step.to_i
end
end
end
end
and then it can be used as such (hash contains your sample input):
p hash.get_at_path("phone_numbers/1/number") # => "2342323423"
p hash.get_at_path("phone_numbers/0/type") # => "mobile"
p hash.get_at_path("name") # => "John"
p hash.get_at_path("address/street") # => "street 1"
hash.replace_at_path("phone_numbers/1/number", "123-123-1234")
hash.replace_at_path("phone_numbers/0/type", "cell phone")
hash.replace_at_path("name", "John Doe")
hash.replace_at_path("address/street", "123 Street 1")
p hash.get_at_path("phone_numbers/1/number") # => "123-123-1234"
p hash.get_at_path("phone_numbers/0/type") # => "cell phone"
p hash.get_at_path("name") # => "John Doe"
p hash.get_at_path("address/street") # => "123 Street 1"
p hash
# => {:name=>"John Doe",
# :address=>{:street=>"123 Street 1", :country=>"country1"},
# :phone_numbers=>[{:type=>"cell phone", :number=>"234234"},
# {:type=>"fixed", :number=>"123-123-1234"}]}

compare input to fields in a json file in ruby

I am trying to create a function that takes an input. Which in this case is a tracking code. Look that tracking code up in a JSON file then return the tracking code as output. The json file is as follows:
[
{
"tracking_number": "IN175417577",
"status": "IN_TRANSIT",
"address": "237 Pentonville Road, N1 9NG"
},
{
"tracking_number": "IN175417578",
"status": "NOT_DISPATCHED",
"address": "Holly House, Dale Road, Coalbrookdale, TF8 7DT"
},
{
"tracking_number": "IN175417579",
"status": "DELIVERED",
"address": "Number 10 Downing Street, London, SW1A 2AA"
}
]
I have started using this function:
def compare_content(tracking_number)
File.open("pages/tracking_number.json", "r") do |file|
file.print()
end
Not sure how I would compare the input to the json file. Any help would be much appreciated.
You can use the built-in JSON module.
require 'json'
def compare_content(tracking_number)
# Loads ENTIRE file into string. Will not be effective on very large files
json_string = File.read("pages/tracking_number.json")
# Uses the JSON module to create an array from the JSON string
array_from_json = JSON.parse(json_string)
# Iterates through the array of hashes
array_from_json.each do |tracking_hash|
if tracking_number == tracking_hash["tracking_number"]
# If this code runs, tracking_hash has the data for the number you are looking up
end
end
end
This will parse the JSON supplied into an array of hashes which you can then compare to the number you are looking up.
If you are the one generating the JSON file and this method will be called a lot, consider mapping the tracking numbers directly to their data for this method to potentially run much faster. For example,
{
"IN175417577": {
"status": "IN_TRANSIT",
"address": "237 Pentonville Road, N1 9NG"
},
"IN175417578": {
"status": "NOT_DISPATCHED",
"address": "Holly House, Dale Road, Coalbrookdale, TF8 7DT"
},
"IN175417579": {
"status": "DELIVERED",
"address": "Number 10 Downing Street, London, SW1A 2AA"
}
}
That would parse into a hash, where you could much more easily grab the data:
require 'json'
def compare_content(tracking_number)
json_string = File.read("pages/tracking_number.json")
hash_from_json = JSON.parse(json_string)
if hash_from_json.key?(tracking_number)
tracking_hash = hash_from_json[tracking_number]
else
# Tracking number does not exist
end
end

Nokogiri parsing table with no html element

I have this code that attempts to go to a URL and parse 'li' elements into an array. However I have run into a problem when trying to parse anything that is not in a 'b' tag.
Code:
url = '(some URL)'
page = Nokogiri::HTML(open(url))
csv = CSV.open("/tmp/output.csv", 'w')
page.search('//li[not(#id) and not(#class)]').each do |row|
arr = []
row.search('b').each do |cell|
arr << cell.text
end
csv << arr
pp arr
end
HTML:
<li><b>The Company Name</b><br>
The Street<br>
The City,
The State
The Zipcode<br><br>
</li>
I would like to parse all of the elements so that the output would be something like this:
["The Company Name", "The Street", "The City", "The State", "The Zip Code"],
["The Company Name", "The Street", "The City", "The State", "The Zip Code"],
["The Company Name", "The Street", "The City", "The State", "The Zip Code"]
require 'nokogiri'
def main
output = []
page = File.open("parse.html") {|f| Nokogiri::HTML(f)}
page.search("//li[not(#id) and not (#class)]").each do |row|
arr = []
result = row.text
result.each_line { |l|
if l.strip.length > 0
arr << l.strip
end
}
output << arr
end
print output
end
if __FILE__ == $PROGRAM_NAME
main()
end
I ended up finding the solution to my own question so if anyone is interested I simply changed
row.search('b').each do |cell|
into:
row.search('text()'.each do |cell|
I also changed
arr << cell.text
into:
arr << cell.text.gsub("\n", '').gsub("\r", '')
in order to remove all the \n and the \r that were present in the output.
Based on your HTML I'd do it like:
require 'nokogiri'
doc = Nokogiri::HTML(<<EOT)
<ol>
<li><b>The Company Name</b><br>
The Street<br>
The City,
The State
The Zipcode<br><br>
</li>
<li><b>The Company Name</b><br>
The Street<br>
The City,
The State
The Zipcode<br><br>
</li>
</ol>
EOT
doc.search('li').map{ |li|
text = li.text.split("\n").map(&:strip)
}
# => [["The Company Name",
# "The Street",
# "The City,",
# "The State",
# "The Zipcode"],
# ["The Company Name",
# "The Street",
# "The City,",
# "The State",
# "The Zipcode"]]

Postgres JSON data type Rails query

I am using Postgres' json data type but want to do a query/ordering with data that is nested within the json.
I want to order or query with .where on the json data type. For example, I want to query for users that have a follower count > 500 or I want to order by follower or following count.
Thanks!
Example:
model User
data: {
"photos"=>[
{"type"=>"facebook", "type_id"=>"facebook", "type_name"=>"Facebook", "url"=>"facebook.com"}
],
"social_profiles"=>[
{"type"=>"vimeo", "type_id"=>"vimeo", "type_name"=>"Vimeo", "url"=>"http://vimeo.com/", "username"=>"v", "id"=>"1"},
{"bio"=>"I am not a person, but a series of plants", "followers"=>1500, "following"=>240, "type"=>"twitter", "type_id"=>"twitter", "type_name"=>"Twitter", "url"=>"http://www.twitter.com/", "username"=>"123", "id"=>"123"}
]
}
For any who stumbles upon this. I have come up with a list of queries using ActiveRecord and Postgres' JSON data type. Feel free to edit this to make it more clear.
Documentation to the JSON operators used below: https://www.postgresql.org/docs/current/functions-json.html.
# Sort based on the Hstore data:
Post.order("data->'hello' DESC")
=> #<ActiveRecord::Relation [
#<Post id: 4, data: {"hi"=>"23", "hello"=>"22"}>,
#<Post id: 3, data: {"hi"=>"13", "hello"=>"21"}>,
#<Post id: 2, data: {"hi"=>"3", "hello"=>"2"}>,
#<Post id: 1, data: {"hi"=>"2", "hello"=>"1"}>]>
# Where inside a JSON object:
Record.where("data ->> 'likelihood' = '0.89'")
# Example json object:
r.column_data
=> {"data1"=>[1, 2, 3],
"data2"=>"data2-3",
"array"=>[{"hello"=>1}, {"hi"=>2}],
"nest"=>{"nest1"=>"yes"}}
# Nested search:
Record.where("column_data -> 'nest' ->> 'nest1' = 'yes' ")
# Search within array:
Record.where("column_data #>> '{data1,1}' = '2' ")
# Search within a value that's an array:
Record.where("column_data #> '{array,0}' ->> 'hello' = '1' ")
# this only find for one element of the array.
# All elements:
Record.where("column_data ->> 'array' LIKE '%hello%' ") # bad
Record.where("column_data ->> 'array' LIKE ?", "%hello%") # good
According to this http://edgeguides.rubyonrails.org/active_record_postgresql.html#json
there's a difference in using -> and ->>:
# db/migrate/20131220144913_create_events.rb
create_table :events do |t|
t.json 'payload'
end
# app/models/event.rb
class Event < ActiveRecord::Base
end
# Usage
Event.create(payload: { kind: "user_renamed", change: ["jack", "john"]})
event = Event.first
event.payload # => {"kind"=>"user_renamed", "change"=>["jack", "john"]}
## Query based on JSON document
# The -> operator returns the original JSON type (which might be an object), whereas ->> returns text
Event.where("payload->>'kind' = ?", "user_renamed")
So you should try Record.where("data ->> 'status' = 200 ") or the operator that suits your query (http://www.postgresql.org/docs/current/static/functions-json.html).
Your question doesn't seem to correspond to the data you've shown, but if your table is named users and data is a field in that table with JSON like {count:123}, then the query
SELECT * WHERE data->'count' > 500 FROM users
will work. Take a look at your database schema to make sure you understand the layout and check that the query works before complicating it with Rails conventions.
JSON filtering in Rails
Event.create( payload: [{ "name": 'Jack', "age": 12 },
{ "name": 'John', "age": 13 },
{ "name": 'Dohn', "age": 24 }]
Event.where('payload #> ?', '[{"age": 12}]')
#You can also filter by name key
Event.where('payload #> ?', '[{"name": "John"}]')
#You can also filter by {"name":"Jack", "age":12}
Event.where('payload #> ?', {"name":"Jack", "age":12}.to_json)
You can find more about this here