Related
This is a followup to this question:
Ruby create JSON from SQL Server
I was able to create nested arrays in JSON. But I'm struggling with looping through records and appending a file with each record. Also how would I add a root element just at the top of the json and not on each record. "aaSequences" needs to be at the top just once... I also need a comma between each record.
here is my code so far
require 'pp'
require 'tiny_tds'
require 'awesome_print'
require 'json'
class Document
def initialize strategy
#document = strategy
#load helper functions
load "helpers_ruby.rb"
#set environment 'dev', 'qa', or 'production'
load "envconfig_ruby.rb"
end
def StartUP
#document.StartUP
end
def getseqrecord
#document.getseqrecord
end
end
class GetSqlaaSequence
def StartUP
##system "clear" ##linux
system "cls" ##Windows
# create connection to db
$connReportingDB = createReportingxxSqlConn($ms_sql_host, $ms_sql_user, $ms_sql_password, $ms_sql_dbname)
##$currentDateTime = DateTime.now
##pp 'def StartUP ran at: '+$currentDateTime.to_s
end
def getseqrecord
# get the aaaaSequences data
#result = $connReportingDB.execute("SELECT
[jsonFile]
,[id]
,[title]
,[authorIds]
,[name]
,[aminoAcids]
,[schemaId]
,[registryId]
,[namingStrategy]
FROM tablename
")
$aaSequences = Array.new
#i = 0
#result.each do |aaSequence|
jsonFile = aaSequence['jsonFile']
id = aaSequence['id']
title = aaSequence['title']
authorIds = aaSequence['authorIds']
name = aaSequence['name']
aminoAcids = aaSequence['aminoAcids']
schemaId = aaSequence['schemaId']
registryId = aaSequence['registryId']
namingStrategy = aaSequence['namingStrategy']
##end
#hash = Hash[
"jsonFile", jsonFile,
"id", id,
"title", title,
"authorIds", authorIds,
"name", name,
"aminoAcids", aminoAcids,
"schemaId", schemaId,
"registryId", registryId,
"namingStrategy", namingStrategy
]
#filename = jsonFile
jsonFileOutput0 = {:"#{title}" => [{:authorIds => ["#{authorIds}"],:aminoAcids => "#{aminoAcids}",:name => "#{name}",:schemaId => "#{schemaId}",:registryId => "#{registryId}",:namingStrategy => "#{namingStrategy}"}]}
jsonFileOutput = JSON.pretty_generate(jsonFileOutput0)
File.open(jsonFile,"a") do |f|
f.write(jsonFileOutput)
####ad the comma between records...Not sure if this is the best way to do it...
# File.open(jsonFile,"a") do |f|
# f.write(',')
# end
end
$aaSequences[#i] = #hash
#i = #i + 1
###createReportingSqlConn.close
end
end
end
Document.new(GetSqlaaSequence.new).StartUP
#get aaSequences and create json files
Document.new(GetSqlaaSequence.new).getseqrecord
here is a sample of the json it creates so far...
{
"aaSequences": [
{
"authorIds": [
"fff_fdfdfdfd"
],
"aminoAcids": "aminoAcids_data",
"name": "fdfdfddf-555_1",
"schemaId": "5555fdfd5",
"registryId": "5fdfdfdf",
"namingStrategy": "NEW_IDS"
}
]
}{
"aaSequences": [
{
"authorIds": [
"fff_fdfdfdfd"
],
"aminoAcids": "aminoAcids_data",
"name": "fdfdfddf-555_2",
"schemaId": "5555fdfd5",
"registryId": "5fdfdfdf",
"namingStrategy": "NEW_IDS"
}
]
}
and here is an example of what I need it to look like
{
"aaSequences": [
{
"authorIds": [
"authorIds_data"
],
"aminoAcids": "aminoAcids_data",
"name": "name_data",
"schemaId": "schemaId_data",
"registryId": "registryId_data",
"namingStrategy": "namingStrategy_data"
},
{
"authorIds": [
"authorIds_data"
],
"aminoAcids": "aminoAcids_data",
"name": "name_data",
"schemaId": "schemaId_data",
"registryId": "registryId_data",
"namingStrategy": "namingStrategy_data"
}
]
}
You can just do the whole thing in SQL using FOR JSON.
Unfortunately, arrays are not possible using this method. There are anumber of hacks, but the easiest one in your situation is to just append to [] using JSON_MODIFY
SELECT
authorIds = JSON_MODIFY('[]', 'append $', a.authorIds),
[aminoAcids],
[name],
[schemaId],
[registryId],
[namingStrategy]
FROM aaSequences a
FOR JSON PATH, ROOT('aaSequences');
db<>fiddle
I have the following json file annotations
and here is a screenshot form it.tree structure of the json file
I want to parse it and extract the following info
here is a link which I take this screenshot form it Standard Dataset Dicts
I tried to use this code which is not working as expected.
def get_buildings_dicts(img_dir):
json_file = os.path.join(img_dir, "annotations.json")
with open(json_file) as f:
imgs_anns = json.load(f)
dataset_dicts = []
for idx, v in enumerate(imgs_anns):
record = {}
filename = os.path.join(img_dir, v["imagePath"])
height, width = cv2.imread(filename).shape[:2]
record["file_name"] = filename
record["image_id"] = idx
record["height"] = height
record["width"] = width
annos = v["shapes"][idx]
objs = []
for anno in annos:
# assert not anno["region_attributes"]
anno = anno["shape_type"]
px = anno["points"][0]
py = anno["points"][1]
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
obj = {
"bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
"bbox_mode": BoxMode.XYXY_ABS,
"segmentation": [poly],
"category_id": 0,
}
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
here is an expected output of the final dict items:
{
"file_name": "balloon/train/34020010494_e5cb88e1c4_k.jpg",
"image_id": 0,
"height": 1536,
"width": 2048,
"annotations": [
{
"bbox": [994, 619, 1445, 1166],
"bbox_mode": <BoxMode.XYXY_ABS: 0>,
"segmentation": [[1020.5, 963.5, 1000.5, 899.5, 994.5, 841.5, 1003.5, 787.5, 1023.5, 738.5, 1050.5, 700.5, 1089.5, 663.5, 1134.5, 638.5, 1190.5, 621.5, 1265.5, 619.5, 1321.5, 643.5, 1361.5, 672.5, 1403.5, 720.5, 1428.5, 765.5, 1442.5, 800.5, 1445.5, 860.5, 1441.5, 896.5, 1427.5, 942.5, 1400.5, 990.5, 1361.5, 1035.5, 1316.5, 1079.5, 1269.5, 1112.5, 1228.5, 1129.5, 1198.5, 1134.5, 1207.5, 1144.5, 1210.5, 1153.5, 1190.5, 1166.5, 1177.5, 1166.5, 1172.5, 1150.5, 1174.5, 1136.5, 1170.5, 1129.5, 1153.5, 1122.5, 1127.5, 1112.5, 1104.5, 1084.5, 1061.5, 1037.5, 1032.5, 989.5, 1020.5, 963.5]],
"category_id": 0
}
]
}
I think the only tricky part is dealing with the nested lists but a handful of coprehensions can probably make life easier for us.
Try:
import json
new_images = []
with open("merged_file.json", "r") as file_in:
for index, image in enumerate( json.load(file_in)):
#height, width = cv2.imread(filename).shape[:2]
height, width = 100, 100
new_images.append({
"image_id": index,
"filename": image["imagePath"],
"height": height,
"width": width,
"annotations": [
{
"category_id": 0,
#"bbox_mode": BoxMode.XYXY_ABS,
"bbox_mode": 0,
"bbox": [
min(x for x,y in shape["points"]),
min(y for x,y in shape["points"]),
max(x for x,y in shape["points"]),
max(y for x,y in shape["points"])
],
"segmentation": [coord for point in shape["points"] for coord in point]
}
for shape in image["shapes"]
],
})
print(json.dumps(new_images, indent=2))
print(self.global_model.state_dict())
print("total_loss",total_loss)
total_loss.backward()
self.opt.step()
print(self.global_model.state_dict())
it's output is
('dense1.weight', tensor([[ 0.3997, -0.1907, 0.1120, 0.3016],
[ 0.1156, 0.0646, 0.1802, 0.3558],
[ 0.0321, 0.2537, 0.0879, 0.2441],
[-0.2952, -0.0886, -0.3235, 0.3006]])), ('dense1.bias', tensor([ 0.1927, 0.3048, -0.3551, -0.0302])), ('dense2.weig
total_loss.backward() tensor(2.5806, dtype=torch.float64, grad_fn=<MeanBackward0>)
('dense1.weight', tensor([[ 0.3997, -0.1907, 0.1120, 0.3016],
[ 0.1156, 0.0646, 0.1802, 0.3558],
[ 0.0321, 0.2537, 0.0879, 0.2441],
[-0.2952, -0.0886, -0.3235, 0.3006]])), ('dense1.bias', tensor([ 0.192
We can see total_loss have some value but it is not updating the weights
self.opt = torch.optim.SGD(self.global_model.parameters(),lr = 0.01)
Update
If i do
print(self.local_model.state_dict())
print("total_loss.backward()",total_loss)
total_loss.backward()
opt_2 = torch.optim.SGD(self.local_model.parameters(),lr = 0.01)
opt_2.step()
self.opt.step()
print(self.local_model.state_dict())
It updates the weights of local model.
But i need to apply that gradient to other model. So what i will need to do ?
Let's say I have the following document in a MongoDB database:
{
"assist_leaders" : {
"Steve Nash" : {
"team" : "Phoenix Suns",
"position" : "PG",
"draft_data" : {
"class" : 1996,
"pick" : 15,
"selected_by" : "Phoenix Suns",
"college" : "Santa Clara"
}
},
"LeBron James" : {
"team" : "Cleveland Cavaliers",
"position" : "SF",
"draft_data" : {
"class" : 2003,
"pick" : 1,
"selected_by" : "Cleveland Cavaliers",
"college" : "None"
}
},
}
}
I'm trying to collect a few values under "draft_data" for each player in an ORDERED list. The list needs to look like the following for this particular document:
[ [1996, 15, "Phoenix Suns"], [2003, 1, "Cleveland Cavaliers"] ]
That is, each nested list must contain the values corresponding to the "pick", "selected_by", and "class" keys, in that order. I also need the "Steve Nash" data to come before the "LeBron James" data.
How can I achieve this using pymongo? Note that the structure of the data is not set in stone so I can change this if that makes the code simpler.
I'd extract the data and turn it into a list in Python, once you've retrieved the document from MongoDB:
for doc in db.collection.find():
for name, info in doc['assist_leaders'].items():
draft_data = info['draft_data']
lst = [draft_data['class'], draft_data['pick'], draft_data['selected_by']]
print name, lst
List comprehension is the way to go here (Note: don't forget .iteritems() in Python2 or .items() in Python3 or you'll get a ValueError: too many values to unpack).
import pymongo
import numpy as np
client = pymongo.MongoClient()
db = client[database_name]
dataList = [v for i in ["Steve Nash", "LeBron James"]
for key in ["class", "pick", "selected_by"]
for document in db.collection_name.find({"assist_leaders": {"$exists": 1}})
for k, v in document["assist_leaders"][i]["draft_data"].iteritems()
if k == key]
print dataList
# [1996, 15, "Phoenix Suns", 2003, 1, "Cleveland Cavaliers"]
matrix = np.reshape(dataList, [2,3])
print matrix
# [ [1996, 15, "Phoenix Suns"],
# [2003, 1, "Cleveland Cavaliers"] ]
I am trying to create a ragged list in R that corresponds to the D3 tree structure of flare.json. My data is in a data.frame:
path <- data.frame(P1=c("direct","direct","organic","direct"),
P2=c("direct","direct","end","end"),
P3=c("direct","organic","",""),
P4=c("end","end","",""), size=c(5,12,23,45))
path
P1 P2 P3 P4 size
1 direct direct direct end 5
2 direct direct organic end 12
3 organic end 23
4 direct end 45
but it could also be a list or reshaped if necessary:
path <- list()
path[[1]] <- list(name=c("direct","direct","direct","end"),size=5)
path[[2]] <- list(name=c("direct","direct","organic","end"), size=12)
path[[3]] <- list(name=c("organic", "end"), size=23)
path[[4]] <- list(name=c("direct", "end"), size=45)
The desired output is:
rl <- list()
rl <- list(name="root", children=list())
rl$children[1] <- list(list(name="direct", children=list()))
rl$children[[1]]$children[1] <- list(list(name="direct", children=list()))
rl$children[[1]]$children[[1]]$children[1] <- list(list(name="direct", children=list()))
rl$children[[1]]$children[[1]]$children[[1]]$children[1] <- list(list(name="end", size=5))
rl$children[[1]]$children[[1]]$children[2] <- list(list(name="organic", children=list()))
rl$children[[1]]$children[[1]]$children[[2]]$children[1] <- list(list(name="end", size=12))
rl$children[[1]]$children[2] <- list(list(name="end", size=23))
rl$children[2] = list(list(name="organic", children=list()))
rl$children[[2]]$children[1] <- list(list(name="end", size=45))
So when I print to json it's:
require(RJSONIO)
cat(toJSON(rl, pretty=T))
{
"name" : "root",
"children" : [
{
"name" : "direct",
"children" : [
{
"name" : "direct",
"children" : [
{
"name" : "direct",
"children" : [
{
"name" : "end",
"size" : 5
}
]
},
{
"name" : "organic",
"children" : [
{
"name" : "end",
"size" : 12
}
]
}
]
},
{
"name" : "end",
"size" : 23
}
]
},
{
"name" : "organic",
"children" : [
{
"name" : "end",
"size" : 45
}
]
}
]
}
I am having a hard time wrapping my head around the recursive steps that are necessary to create this list structure in R. In JS I can pretty easily move around the nodes and at each node determine whether to add a new node or keep moving down the tree by using push as needed, eg: new = {"name": node, "children": []}; or new = {"name": node, "size": size}; as in this example. I tried to split the data.frame as in this example:
makeList<-function(x){
if(ncol(x)>2){
listSplit<-split(x,x[1],drop=T)
lapply(names(listSplit),function(y){list(name=y,children=makeList(listSplit[[y]]))})
} else {
lapply(seq(nrow(x[1])),function(y){list(name=x[,1][y],size=x[,2][y])})
}
}
jsonOut<-toJSON(list(name="root",children=makeList(path)))
but it gives me an error
Error: evaluation nested too deeply: infinite recursion / options(expressions=)?
Error during wrapup: evaluation nested too deeply: infinite recursion / options(expressions=)?
The function given in the linked Q&A is essentially what you need, however it was failing on your data set because of the null values for some rows in the later columns. Instead of just blindly repeating the recursion until you run out of columns, you need to check for your "end" value, and use that to switch to making leaves:
makeList<-function(x){
listSplit<-split(x[-1],x[1], drop=TRUE);
lapply(names(listSplit),function(y){
if (y == "end") {
l <- list();
rows = listSplit[[y]];
for(i in 1:nrow(rows) ) {
l <- c(l, list(name=y, size=rows[i,"size"] ) );
}
l;
}
else {
list(name=y,children=makeList(listSplit[[y]]))
}
});
}
I believe this does what you want, though it has some limitations. In particular, it is assumed that every branch in your network is unique (i.e. there can't be two rows in your data frame that are equal for every column other than size):
df.split <- function(p.df) {
p.lst.tmp <- unname(split(p.df, p.df[, 1]))
p.lst <- lapply(
p.lst.tmp,
function(x) {
if(ncol(x) == 2L && nrow(x) == 1L) {
return(list(name=x[1, 1], size=unname(x[, 2])))
} else if (isTRUE(is.na(unname(x[ ,2])))) {
return(list(name=x[1, 1], size=unname(x[, ncol(x)])))
}
list(name=x[1, 1], children=df.split(x[, -1, drop=F]))
}
)
p.lst
}
all.equal(rl, df.split(path)[[1]])
# [1] TRUE
Though note you had the organic size switched, so I had to fix your rl to get this result (rl has it as 45, but your path as 23). Also, I modified your path data.frame slightly:
path <- data.frame(
root=rep("root", 4),
P1=c("direct","direct","organic","direct"),
P2=c("direct","direct","end","end"),
P3=c("direct","organic",NA,NA),
P4=c("end","end",NA,NA),
size=c(5,12,23,45),
stringsAsFactors=F
)
WARNING: I haven't tested this with other structures, so it's possible it will hit corner cases that you'll need to debug.