I am reframing my question again so that it would be more clear.
My data looks like this .
{
"Research": {
"#xmlns": "http://www.xml.org/2013/2/XML",
"#language": "eng",
"#createDateTime": "2022-03-25T10:12:39Z",
"#researchID": "abcd",
"Product": {
"#productID": "abcd",
"StatusInfo": {
"#currentStatusIndicator": "Yes",
"#statusDateTime": "2022-03-25T12:18:41Z",
"#statusType": "Published"
},
"Source": {
"Organization": {
"#primaryIndicator": "Yes",
"#type": "SellSideFirm",
"OrganizationID": [
{
"#idType": "L1",
"#text": "D827C98E315F"
},
{
"#idType": "TR",
"#text": "3202"
},
{
"#idType": "TR",
"#text": "SZA"
}
],
"OrganizationName": {
"#nameType": "Legal",
"#text": "Citi"
},
"PersonGroup": {
"PersonGroupMember": {
"#primaryIndicator": "Yes",
"#sequence": "1",
"Person": {
"#personID": "tr56",
"FamilyName": "Wang",
"GivenName": "Bond",
"DisplayName": "Bond Wang",
"Biography": "Bond Wang is a",
"BiographyFormatted": "Bond Wang",
"PhotoResourceIdRef": "AS44556"
}
}
}
}
},
"Content": {
"Title": "Premier",
"Abstract": "None",
"Synopsis": "Premier’s solid 1H22 result .",
"Resource": [
{
"#language": "eng",
"#primaryIndicator": "Yes",
"#resourceID": "9553",
"Length": {
"#lengthUnit": "Pages",
"#text": "17"
},
"MIMEType": "text/html",
"URL": "https://www.DFKJG.com/rendition/eppublic"
},
{
"#language": "eng",
"#primaryIndicator": "No",
"#resourceID": "4809",
"Length": {
"#lengthUnit": "Pages",
"#text": "17"
},
"MIMEType": "ABS/pdf",
"Name": "asdf.pdf",
"Comments": "fr5.pdf"
},
{
"#language": "eng",
"#primaryIndicator": "No",
"#resourceID": "6d13a965723e",
"Length": {
"#lengthUnit": "Pages",
"#text": "17"
},
"MIMEType": "text/html",
"URL": "https://www.dfgdfg.com/"
},
{
"#primaryIndicator": "No",
"#resourceID": "709c7bdb1c99",
"MIMEType": "tyy/image",
"URL": "https://ir.ght.com"
},
{
"#primaryIndicator": "No",
"#resourceID": "gfjhgj",
"MIMEType": "gtty/image",
"URL": "https://ir.gtty.com"
}
]
},
"Context": {
"#external": "Yes",
"IssuerDetails": {
"Issuer": {
"#issuerType": "Corporate",
"#primaryIndicator": "Yes",
"SecurityDetails": {
"Security": {
"#estimateAction": "Revision",
"#primaryIndicator": "Yes",
"#targetPriceAction": "Increase",
"SecurityID": [
{
"#idType": "RIC",
"#idValue": "PMV.AX",
"#publisherDefinedValue": "RIC"
},
{
"#idType": "Bloomberg",
"#idValue": "PMV#AU"
},
{
"#idType": "SEDOL",
"#idValue": "6699781"
}
],
"SecurityName": "Premier Investments Ltd",
"AssetClass": {
"#assetClass": "Equity"
},
"AssetType": {
"#assetType": "Stock"
},
"SecurityType": {
"#securityType": "Common"
},
"Rating": {
"#rating": "NeutralSentiment",
"#ratingType": "Rating",
"#aspect": "Investment",
"#ratingDateTime": "2020-07-31T08:24:37Z",
"RatingEntity": {
"#ratingEntity": "PublisherDefined",
"PublisherDefinedValue": "Citi"
}
}
}
},
"IssuerID": {
"#idType": "PublisherDefined",
"#idValue": "PMV.AX",
"#publisherDefinedValue": "TICKER"
},
"IssuerName": {
"#nameType": "Legal",
"NameValue": "Premier Investments Ltd"
}
}
},
"ProductDetails": {
"#periodicalIndicator": "No",
"#publicationDateTime": "2022-03-25T12:18:41Z",
"ProductCategory": {
"#productCategory": "Report"
},
"ProductFocus": {
"#focus": "Issuer",
"#primaryIndicator": "Yes"
},
"EntitlementGroup": {
"Entitlement": [
{
"#includeExcludeIndicator": "Include",
"#primaryIndicator": "No",
"AudienceTypeEntitlement": {
"#audienceType": "PublisherDefined",
"#entitlementContext": "TR",
"#text": "20012"
}
},
{
"#includeExcludeIndicator": "Include",
"#primaryIndicator": "No",
"AudienceTypeEntitlement": {
"#audienceType": "PublisherDefined",
"#entitlementContext": "TR",
"#text": "2001"
}
}
]
}
},
"ProductClassifications": {
"Discipline": {
"#disciplineType": "Investment",
"#researchApproach": "Fundamental"
},
"Subject": {
"#publisherDefinedValue": "TREPS",
"#subjectValue": "PublisherDefined"
},
"Country": {
"#code": "AU",
"#primaryIndicator": "Yes"
},
"Region": {
"#primaryIndicator": "Yes",
"#emergingIndicator": "No",
"#regionType": "Australasia"
},
"AssetClass": {
"#assetClass": "Equity"
},
"AssetType": {
"#assetType": "Stock"
},
"SectorIndustry": [
{
"#classificationType": "GICS",
"#code": "25201040",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Household Appliances"
},
{
"#classificationType": "GICS",
"#code": "25504020",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Computer & Electronics Retail"
},
{
"#classificationType": "GICS",
"#code": "25504040",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Specialty Stores"
},
{
"#classificationType": "GICS",
"#code": "25504030",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Home Improvement Retail"
},
{
"#classificationType": "GICS",
"#code": "25201050",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Housewares & Specialties"
}
]
}
}
}
}
}
I want to explode all of its elements into data frame .
The no of columns that has list like structure can change also.
Basically we will not be knowing if next input will have few column or more columns to be exploded .
This is what i have tried so far but it looks like it does not give me correct answer .
Also the column values i have hardcoded but it should identify and then explode.
import xmltodict as xmltodict
from pprint import pprint
import pandas as pd
import json
from tabulate import tabulate
dict =(xmltodict.parse("""xml data"""))
json_str = json.dumps(dict)
resp = json.loads(json_str)
print(resp)
df = pd.json_normalize(resp)
cols=['Research.Product.Source.Organization.OrganizationID','Research.Product.Content.Resource','Research.Product.Context.IssuerDetails.Issuer.SecurityDetails.Security.SecurityID','Research.Product.Context.ProductDetails.EntitlementGroup.Entitlement','Research.Product.Context.ProductClassifications.SectorIndustry']
def expplode_columns(df, cols):
df_e = df.copy()
for c in cols:
df_e = df_e.explode(c, ignore_index=True)
return df_e
df2 = expplode_columns(df, cols)
print(tabulate(df2, headers="keys", tablefmt="psql"))
# df2.to_csv('dataframe.csv', header=True, index=False)
As suggested in the comments, you can define a helper function in pure Python to recursively flatten the nested values of your data.
So, with the json file you provided, here is one way to do it:
def flatten(data, new_data):
"""Recursive helper function.
Args:
data: nested dictionary.
new_data: empty dictionary.
Returns:
Flattened dictionary.
"""
for key, value in data.items():
if isinstance(value, dict):
flatten(value, new_data)
if isinstance(value, str) or isinstance(value, int) or isinstance(value, list):
new_data[key] = value
return new_data
And then:
import json
import pandas as pd
with open("file.json") as f:
content = json.load(f)
df = pd.DataFrame.from_dict(flatten(content, {}), orient="index").T
From here, you can deal with columns which contains lists of dictionaries with identical keys, but different values, by exploding them and repeating the other values, like this:
cols_with_lists = [col for col in df.columns if isinstance(df.loc[0, col], list)]
for col in cols_with_lists:
temp_df = pd.concat(
[pd.DataFrame(item, index=[i]) for i, item in enumerate(df.loc[0, col])],
axis=0,
)
df = pd.concat([df.drop(columns=[col]), temp_df], axis=1).fillna(method="ffill")
So that, finally, the json file is entirely flattened:
print(df)
# Output
#xmlns #language ... #primaryIndicator Name
0 http://www.xml.org/2013/2/XML eng ... Yes Household Appliances
1 http://www.xml.org/2013/2/XML eng ... Yes Computer & Electronics Retail
2 http://www.xml.org/2013/2/XML eng ... Yes Specialty Stores
3 http://www.xml.org/2013/2/XML eng ... Yes Home Improvement Retail
4 http://www.xml.org/2013/2/XML eng ... Yes Housewares & Specialties
[5 rows x 73 columns]
Little hacky but you can extract columns that has a list type in it. Then use reduce to recursively explode and normalize all columns until there are no more list/object.
I haven't tested well but something like this.
from functools import reduce
def full_explode_normalize(df):
# Extract list columns
explode_cols = [x for x in df.columns if isinstance(df.iloc[0][x], list)]
if len(explode_cols) < 1:
return df
# Explode and normalize the list
df = reduce(_explode, explode_cols, df)
return df
def _explode(df, col):
df = df.explode(col)
if isinstance(df.iloc[0][col], list):
df = _explode(df, col)
elif isinstance(df.iloc[0][col], object):
df_child = pd.json_normalize(df[col])
# To prevent column name collision, add the parent column name as prefix.
df_child.columns = [f'{col}.{x}' for x in df_child.columns]
df = pd.concat([df.loc[:, ~df.columns.isin([col])].reset_index(drop=True), df_child], axis=1)
return df
I need help with writing proper queryset in Django View. I have Post model with created_at (datetime field). I want to group it by date and return in a specific format.
models.py
class Post(TrackableDate):
title = models.CharField(max_length=255)
body = models.TextField()
created_at = models.DateTimeField(auto_now_add=True)
views.py
class PostViewSet(mixins.ListModelMixin,
viewsets.GenericViewSet):
queryset = Post.objects.all()
serializer_class = PostSerializer
serializers.py
class PostSerializer(serializers.ModelSerializer):
class Meta:
model = Post
fields = '__all__'
The usual response looks like:
[
{
"id": 1,
"text": "ok",
"created_at": "2012-12-12T12:30:00"
},
{
"id": 2,
"text": "ok",
"created_at": "2012-12-12T12:30:00"
},
{
"id": 3,
"text": "ok",
"created_at": "2012-12-13T12:30:00"
}
]
How to group and return like that?
{
"2012-12-12": [
{
"id": 1,
"text": "ok",
"created_at": "2012-12-12T12:30:00"
},
{
"id": 2,
"text": "ok",
"created_at": "2012-12-12T12:30:00"
}
],
"2012-12-13": [
{
"id": 3,
"text": "ok",
"created_at": "2012-12-13T12:30:00"
}
]
}
I tried to do
Post.objects.extra(select={'created_at': 'date(created_at)'}).values('created_at').annotate(available=Count('created_at'))
But it returns
<QuerySet [{'created_at': '2020-07-04', 'available': 7}, {'created_at': '2020-07-09', 'available': 2}]>
This is a helper function, you can accomplish your results like this, it may be a better solution. This is the one that I came up with
def group_by_date(mydata):
new_data = {}
for element in mydata:
v = element['created_at'].split('T')[0]
if v not in new_data.keys():
new_data[v] = []
new_data[v].append(element)
return new_data
You can override the get function of your API and rework the output
and override return Response(group_by_date(serializer.data), HTTP_200_OK)
This my views.py file
class NewsChannelListView(ObjectMultipleModelAPIView):
def get_querylist(self, *args, **kwargs):
userId = self.request.user.id
queryset = [
{'queryset': News_Channel.objects.all(),
'serializer_class': NewsChannelSerializers},
{'queryset': Count.objects.filter(userId=userId),
'serializer_class': CountSerializers},
]
return queryset
I am getting following respose from my this views
{
"News_Channel": [
{
"id": 2,
"name": "Republic",
"info": "Arnab Goswami News channel",
"image": "https://fourthpillar-static.s3.amazonaws.com/media/repiblic_1bRFWNZ.png",
"website": "https://www.repu",
"total_star": 10,
"total_user": 2
},
{
"id": 1,
"name": "NDTV",
"info": "India News Media",
"image": "https://fourthpillar-static.s3.amazonaws.com/media/ndtv_WH67OhA.png",
"website": "https://ndtv.com",
"total_star": 18,
"total_user": 2
}
],
"Count": [
{
"userId": 1,
"channelId": 2,
"rate": 6
},
{
"userId": 1,
"channelId": 1,
"rate": 8
}
]
}
Is there any way I can get single object.Channel id 2 in count merge with the id 2 in news channel and Channel id 1 in count merge with the id 1 in news channel. So final Response shoule be like this
{
"News_Channel": [
{
"id": 2,
"name": "Republic",
"info": "Arnab Goswami News channel",
"image": "https://fourthpillar-static.s3.amazonaws.com/media/repiblic_1bRFWNZ.png",
"website": "https://www.repu",
"total_star": 10,
"total_user": 2,
"userId": 1,
"rate": 6
},
{
"id": 1,
"name": "NDTV",
"info": "India News Media",
"image": "https://fourthpillar-static.s3.amazonaws.com/media/ndtv_WH67OhA.png",
"website": "https://ndtv.com",
"total_star": 18,
"total_user": 2,
"userId": 1,
"rate": 8
}
],
}
Code of the model
class News_Channel(models.Model):
name = models.TextField(blank=False)
info = models.TextField(blank=False)
image = models.FileField()
website = models.TextField()
total_star = models.PositiveIntegerField(default=0)
total_user = models.IntegerField()
class Meta:
ordering = ["-id"]
def __str__(self):
return self.name
class Count(models.Model):
userId = models.ForeignKey(User, on_delete=models.CASCADE)
channelId = models.ForeignKey(News_Channel, on_delete=models.CASCADE)
rate = models.PositiveIntegerField(default=0)
class Meta:
ordering = ["-id"]
I am using django 2.1.7 and the djangorestframework==3.9.2.
I need to build a Json format in the following way in scala. How to implement the same ?
{
"name": "protocols",
"children": [
{
"name": "tcp", "children": [
{
"name": "source 1",
"children": [
{
"name": "destination 1",
"children": [
{
"name": "packet 1"
},
{
"name": "packet 4"
}
]
},
{
"name": "destination 2","children": [
{
"name": "packet 1"
},
{
"name": "packet 4"
}
]
},
I need a tree structure like this to be wriiten to a file .
If you are using play, your json structure can be represented with single case class
Here is a sample, where this case class is called Node
import play.api.libs.json.Json
case class Node(name: String, children: List[Node] = Nil)
implicit val format = Json.format[Node]
val childSource1 = Node("destination 1", List(Node("packet 1"), Node("packet 4")))
val childSource2 = Node("destination 2", List(Node("packet 1"), Node("packet 4")))
val source1 = Node("source 1", List(childSource1, childSource2))
val example = Node("protocols", List(Node("tcp", List(source1))))
Json.prettyPrint(Json.toJson(example))
I'd like to be able to search the following JSON object for objects containing the key 'location' then get in return an array or json object with the 'name' of the person plus the value of location for that person.
Sample return:
var matchesFound = [{Tom Brady, New York}, {Donald Steven,Los Angeles}];
var fbData0 = {
"data": [
{
"id": "X999_Y999",
"location": "New York",
"from": {
"name": "Tom Brady", "id": "X12"
},
"message": "Looking forward to 2010!",
"actions": [
{
"name": "Comment",
"link": "http://www.facebook.com/X999/posts/Y999"
},
{
"name": "Like",
"link": "http://www.facebook.com/X999/posts/Y999"
}
],
"type": "status",
"created_time": "2010-08-02T21:27:44+0000",
"updated_time": "2010-08-02T21:27:44+0000"
},
{
"id": "X998_Y998",
"location": "Los Angeles",
"from": {
"name": "Donald Steven", "id": "X18"
},
"message": "Where's my contract?",
"actions": [
{
"name": "Comment",
"link": "http://www.facebook.com/X998/posts/Y998"
},
{
"name": "Like",
"link": "http://www.facebook.com/X998/posts/Y998"
}
],
"type": "status",
"created_time": "2010-08-02T21:27:44+0000",
"updated_time": "2010-08-02T21:27:44+0000"
}
]
};
#vsiege - you can use this javascript lib (http://www.defiantjs.com/) to search your JSON structure.
var fbData0 = {
...
},
res = JSON.search( fbData0, '//*[./location and ./from/name]' ),
str = '';
for (var i=0; i<res.length; i++) {
str += res[i].location +': '+ res[i].from.name +'<br/>';
}
document.getElementById('output').innerHTML = str;
Here is a working fiddle;
http://jsfiddle.net/hbi99/XhRLP/
DefiantJS extends the global object JSON with the method "search" and makes it possible to query JSON with XPath expressions (XPath is standardised query language). The method returns an array with the matches (empty array if none were found).
You can test XPath expressions by pasting your JSON here:
http://www.defiantjs.com/#xpath_evaluator