how to show manytomany field data in json format - django - json

I'm trying to show manytomany data in json format(without using serializer), here is my models.py
class CustomerInvoice(models.Model):
customer = models.CharField(max_length=50)
items_model = models.ManyToManyField(Item,through='InvoiceItem')
created_at = models.DateTimeField(auto_now_add=True)
class InvoiceItem(models.Model):
item = models.ForeignKey(Item,on_delete=models.CASCADE)
invoice = models.ForeignKey(CustomerInvoice,on_delete=models.CASCADE,related_name='invoice')
quantity = models.IntegerField()
price = models.DecimalField(max_digits=20,decimal_places=2)
is it possible to make a look up base on many to many data?
something like this : Q(items_model__icontains=query_search) ,and also how to return the M2M data into a json format using values() and json.dumps please? this returns the ID Values('items_model') and this dont work Values('items_model__all')
and here is my views.py
def invoices_all_lists(request):
if request.is_ajax():
query_search = request.GET.get('filter')
if query_search:
all_item_qs = CustomerInvoice.objects.all()
a = []
for i in all_item_qs.items_model.all():
a.append(i.item.name)
invoices = CustomerInvoice.objects.annotate(
total=Sum((F('invoice__quantity') * F('invoice__price')),output_field=DecimalField(decimal_places=2,max_digits=20))
).filter(
Q(id__icontains=query_search) | Q(seller__username__icontains=query_search) |
Q(customer__icontains=query_search)).values(
'id','seller__username','customer','total','created_at','items_model').order_by('-id')
else:
all_item_qs = CustomerInvoice.objects.all()
a = []
for data in all_item_qs:
for i in data.items_model.all():
a.append(i.item.name)
invoices = CustomerInvoice.objects.annotate(
total=Sum((F('invoice__quantity') * F('invoice__price')) ,output_field=DecimalField(decimal_places=2,max_digits=20))
).values(
'id','seller__username','customer','total','created_at','items_model').order_by('-id')
start_from = 0
if request.GET.get('start'):
start_from = int(request.GET.get('start'))
limit = 10
if request.GET.get('limit'):
limit = int(request.GET.get('limit'))
data_lists = []
for index,value in enumerate(invoices[start_from:start_from+limit],start_from):
value['counter'] = index+1
data_lists.append(value)
data = {
'objects':data_lists,
'length':invoices.count(),
}
return HttpResponse(json.dumps(data, indent=4, sort_keys=True, default=str),'application/json')
else:
return redirect('invoiceapp:list-all-invoice')
can i add this part of the code into the query please?
a = []
for data in all_item_qs:
for i in data.items_model.all():
a.append(i.item.name)
note : i've used datatable server side in the client side

Related

Save class items as json file in python scrapy

I want to save all data of both these classes (Product_Items and Variant_Product) as JSON output files.
getProductDetails() : In this function I want to extract the data for just 1st element in product_variants list and ading it to the dict(item_list) and for rest of the elements I am creating a req to hit the same function recursively untill I have all the keys in my dict(item_list).
At the end of the function I want to write the extracted data as JSON file, but I can't return two values from a function.
Similarly, in getListingDetails() function I need to save the item as JSON file. PLEASE HELP!!!
Following is the snippet:
import scrapy
from scrapy.http import Request
from scrapy.selector import Selector
from scrapy.item import Item, Field
import re,json
class Product_Items(Item):
Image_URL = Field()
Product_Title = Field()
Price = Field()
PPU_Price = Field()
Product_URL = Field()
Product_SKU = Field()
Product_UPC = Field()
class Variant_Product(Item):
Image_URL = Field()
Product_Title = Field()
Price = Field()
PPU_Price = Field()
Product_URL = Field()
Product_SKU = Field()
Product_UPC = Field()
Product_Size = Field()
Meta = Field()
class walmartSpider(scrapy.Spider):
name = "walmart"
start_urls = ['https://www.walmart.com/all-departments']
item_list = {}
def parse(self,response):
reqs = []
base_url='https://www.walmart.com/'
hxs = Selector(text=response.body)
json_response = hxs.xpath('//script[#id="home"]//text()').get()
data = json.loads(json_response)
cat_urls = self.getCategoryUrls(data)
for url in cat_urls:
if url[:7] == '/browse':
url = base_url + url
link=Request(url=url,callback=self.getListingDetails)
reqs.append(link)
return reqs
def getCategoryUrls(self,data):
.....
return final_cat_url
def getListingDetails(self,response):
reqs = []
hxs = Selector(text=response)
data = json.loads(hxs.xpath('//script[#id="searchContent"]//text()').get())
products = data['searchContent']['preso']['items']
item = Product_Items()
for product in products:
item['Image_URL'] = product['imageUrl']
item['Product_Title'] = product['title']
item['Product_URL'] = base_url + product['productPageUrl']
item['Product_SKU'] = product['productId']
item['Product_UPC'] = product['standardUpc'][0]
try:
item['PPU_Price'] = product['primaryOffer']['unitPriceDisplayCondition']
except:
item['PPU_Price'] = ''
try:
regular_price = product['primaryOffer']['offerPrice']
except:
regular_price = ''
if regular_price:
item['Price'] = product['primaryOffer']['offerPrice']
else:
product_req = Request(url=item['Product_URL'],callback=self.getProductDetails)
reqs.append(product_req)
**Want to save this item as JSON file**
**#Pagination**
try:
next_page = data['searchContent']['preso']['pagination']['next']['url']
except:
next_page = ''
if next_page:
next_page_url = str(re.findall(r'^[\S]+\?',response.url)[0])+str(next_page)
req = Request(url=next_page_url,callback=self.getListingDetails)
reqs.append(req)
return reqs
def getProductDetails(self,response):
reqs = []
base_url = 'https://www.walmart.com/ip/'
hxs = Selector(text=response)
variant = Variant_Product()
prod_data = json.loads(hxs.xpath('//script[#id="item"]//text()').get())
product_variants = prod_data['item']['product']['buyBox']['products']
for product_variant in product_variants[1:]:
item_id = product_variant['usItemId']
if item_id not in self.item_list.keys():
self.item_list[item_id] = ''
req = Request(url=base_url+str(item_id),callback=self.getProductDetails)
reqs.append(req)
product_0 = prod_data['item']['product']['buyBox']['products'][0]
variant['Product_Title'] = product_0['productName']
variant['Product_SKU'] = product_0['walmartItemNumber']
variant['Product_UPC'] = product_0['upc']
variant['Product_Size'] = product_0['variants'][0]['value']
variant['Product_URL'] = product_0['canonicalUrl ']
variant['Price'] = product_0['priceMap']['price']
variant['PPU_Price'] = product_0['priceMap']['unitPriceDisplayValue']
variant['Meta'] = (product_0['categoryPath']).replace('Home Page/','')
**Want to save this item as JSON file**
return reqs
According to the scrapy docs, there are several built in "Exporters" that can serialize your data into several different formats (including JSON).
You should be able to do something like:
# ...
from scrapy.exporters import JsonItemExporter
# ...
def getListingDetails(self, response):
# ...
for product in products:
item = Product_Items(
Image_URL = product['imageUrl'],
Product_Title = product['title'],
Product_URL = base_url + product['productPageUrl'],
Product_SKU = product['productId'],
Product_UPC = product['standardUpc'][0],
PPU_Price = product.get('primaryOffer', {}).get('unitPriceDisplayCondition', ''),
Price = product.get('primaryOffer', {}).get('offerPrice', '')
)
if not item['Price']:
product_req = Request(url=item['Product_URL'],callback=self.getProductDetails)
reqs.append(product_req)
JsonItemExporter(open(f"{item['Product_SKU']}.json", "wb")).export_item(item)
Some notes:
The JsonItemExporter.__init__ method expects a file-like object whose write method accepts bytes, which is why the "wb"
dict.get() in Python allows you to specify a default value as the second argument, in case a key doesn't exist (not strictly necessary here, but reduces the try/except logic)
When handling exceptions, it's recommended by PEP8 standards to catch more specific exception types (in the above cases, except KeyError: might be appropriate) than just a bare except clause
Please let me know if the above works for you!

Trouble with joining tables together

I'm trying to join tables by using Django queryset, but for some reason it keeps throwing an error.
Tables are structured as below.
class Platform(models.Model):
P_key = models.AutoField(primary_key=True)
P_url = models.CharField(max_length=100)
P_userkey = models.IntegerField(default=0)
P_name = models.CharField(max_length=10)
objects = models.Manager()
class User_info(models.Model):
U_key = models.AutoField(primary_key=True)
U_name = models.CharField(max_length=20)
U_img = models.CharField(max_length=100)
U_info = models.CharField(max_length=100)
U_sudate = models.CharField(max_length=20)
P_key = models.ForeignKey(Platform, on_delete=models.CASCADE)
objects = models.Manager()
This is the code written to join two tables together.
queryset = User_info.objects.all().prefetch_related("Platform")
queryset = User_info.objects.all().select_related("Platform")
queryset = Platform.objects.all().select_related("User_info")
And the following is the error:
django.core.exceptions.FieldError: Invalid field name(s) given in
select_related: 'Platform'. Choices are: P_key.
I've tried a number of query sets but I wasn't able to get far.
you have to use field names
queryset = User_info.objects.all().prefetch_related("P_key")
queryset = User_info.objects.all().select_related("P_key")

Generate n-gram for a specific column present in mysql db

I'm writing a code to generate n-grams for every record in the table by reading a specific column.
def extract_from_db(inp_cust_id):
sql_db = TatDBHelper()
t_sql = "select notes from raw_data where customer_id = {0}"
db_data = sql_db.execute_read(t_sql.format(inp_cust_id))
for row in db_data:
text = row.values()
bi_grams = generate_ngrams(text[0].encode("utf-8"), 2)
print bi_grams
def generate_ngrams(sentence, n):
sentence = sentence.lower()
sentence = re.sub(r'[^a-zA-Z0-9\s]', ' ', sentence)
tokens = [token for token in sentence.split(" ") if token != ""]
ngrams = zip(*[tokens[i:] for i in range(n)])
return [" ".join(ngram) for ngram in ngrams]
I'm getting the output like:
['i highly', 'highly recommend', 'recommend it']
['the penguin', 'penguin encounter', 'encounter was', 'was awesome']
I want the output to look like below, can anybody help me to get this.
['i highly',
'highly recommend',
'recommend it',
...
]
creat another list all_ngrams, and keep appending the values to it , using .extend(), and finally you will have all the ngrams in one list.
Try this :
def extract_from_db(inp_cust_id):
sql_db = TatDBHelper()
t_sql = "select notes from raw_data where customer_id = {0}"
db_data = sql_db.execute_read(t_sql.format(inp_cust_id))
all_ngrams = []
for row in db_data:
text = row.values()
bi_grams = generate_ngrams(text[0].encode("utf-8"), 2)
all_ngrams.extend(bi_grams)
print all_ngrams

How to serialize relationship in django rest?

Try to serialize this Models
Model:
class Order (models.Model):
id = models.AutoField(primary_key=True)
date_create = models.DateField(auto_now_add=True)
date_change = models.DateField(auto_now=True)
summ =models.CharField(max_length=15,default='0')
delivery = models.ForeignKey('Delivery')
success = models.BooleanField(default=False)
paymentMethod = models.ForeignKey('Payments')
def __unicode__(self):
return unicode(self.id)
class OrderProduct(models.Model):
order=models.ForeignKey('Order')
id = models.AutoField(primary_key=True)
date_create = models.DateField(auto_now_add=True)
date_change = models.DateField(auto_now=True)
price = models.IntegerField()
product = models.ForeignKey('product.Product')
additionals = models.IntegerField(null=True,default=0)
count = models.IntegerField()
def __unicode__(self):
return self.id
class Delivery(models.Model):
id = models.AutoField(primary_key=True)
date_create = models.DateField(auto_now_add=True)
date_change = models.DateField(auto_now=True)
delivery_time = models.DateTimeField()
delivery_adress = models.TextField()
phone = models.TextField()
def __unicode__(self):
return self.phone
class Payments(models.Model):
id = models.AutoField(primary_key=True)
date_create = models.DateField(auto_now_add=True)
date_change = models.DateField(auto_now=True)
title = models.TextField();
def __unicode__(self):
return self.title
Serializers:
class DeliverySerializer(serializers.ModelSerializer):
class Meta:
model = Delivery
fields = ('id', 'delivery_time','delivery_adress','phone')
def create(self, validated_data):
return Delivery.objects.create(**validated_data)
class PaymentsSerializer(serializers.ModelSerializer):
class Meta:
model = Payments
fields = ('id', 'title')
def create(self, validated_data):
return Payments.objects.create(**validated_data)
class OrderSerializer(serializers.ModelSerializer):
delivery = DeliverySerializer(read_only=True)
paymentMethod = PaymentsSerializer(read_only=True)
class Meta:
model = Order
fields = ('id', 'delivery', 'paymentMethod','summ','success')
def create(self, validated_data):
deliverys_data = validated_data.pop('delivery')
paymentsMethod_data = validated_data.pop('paymentMethod')
order = Order.objects.create(**validated_data)
for delivery_data in deliverys_data:
Delivery.objects.create(order=order, **delivery_data)
for paymentMethod_data in paymentsMethod_data:
Payments.objects.create(order=order, **paymentMethod_data)
return order
View:
#api_view(['POST'])
def order_post(request, format=None):
#List all snippets, or create a new snippet.
if request.method == 'POST':
serializer = OrderSerializer(data=request.data)
if serializer.is_valid():
serializer.save()
return Response(serializer.data, status=status.HTTP_201_CREATED)
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
I need to get data by 1 packet, and then write data in DB
EveryTime I get a error:
deliverys_data = validated_data.pop('delivery')
KeyError: 'delivery'
Example of JSON packet
[{"delivery":{"delivery_time":"2016-05-31T12:18:47Z","delivery_adress":"123","phone":"123"},"paymentMethod":{"id":1,"title":"123124123"},"summ":"23","success":false}]
You are getting KeyError: 'delivery' because you have set delivery field as read_only. If DRF finds this field in the input, it will ignore that field.
From docs on read_only argument:
Read-only fields are included in the API output, but should not be
included in the input during create or update operations. Any
'read_only' fields that are incorrectly included in the serializer
input will be ignored.
Also, since you are using paymentMethod field in the create() method, you need to tell DRF to consider that field also in the input.
So, you need to remove the read_only argument from your serializer for both delivery and paymentMethod fields so that these fields are considered when deserializing.
class OrderSerializer(serializers.ModelSerializer):
delivery = DeliverySerializer() # remove read_only argument
paymentMethod = PaymentsSerializer() # remove read_only argument
Secondly, you are sending the data incorrectly. You need to send a single order input instead of list of orders you are sending.
# send without the list
{"delivery":{"delivery_time":"2016-05-31T12:18:47Z","delivery_adress":"123","phone":"123"},"paymentMethod":{"id":1,"title":"123124123"},"summ":"23","success":false}
EveryTime I get a error: deliverys_data = validated_data.pop('delivery') KeyError: 'delivery'
This is to be expected since the OrderSerialzier has DeliverySerializer flagged as read_only.
If you want to get the data writable, you'll need to remove that flag first.

Why 2 queries are executed instead of one?

I have following piece of code:
def detail(request, popular_id):
try:
popular = Popular.objects.get(pk = popular_id)
share = Share.objects.get(isin = popular.isin) #LINE 1
chart_data_json = share.get_chart_data_json()
except Popular.DoesNotExist:
raise Http404
return render(request, 'popular/detail.html', {'popular': popular, 'chart_data': chart_data_json})
In LINE 1 I noticed using debug-toolbar that there are two queries get executed:
SELECT `share_share`.`id`, `share_share`.`symbol`, `share_share`.`isin`, `share_share`.`name`, `share_share`.`market`, `share_share`.`updated` FROM `share_share` WHERE `share_share`.`id` = 1
and
SELECT `share_share`.`id`, `share_share`.`symbol`, `share_share`.`isin`, `share_share`.`name`, `share_share`.`market`, `share_share`.`updated` FROM `share_share` WHERE `share_share`.`isin` = 'US5949181045'
I cannot understand why we need the first query and how to avoid it?
EDIT:
Model definition of share:
class Share(models.Model):
symbol = models.CharField(max_length = 32)
isin = models.CharField(max_length = 12)
name = models.CharField(max_length = 256)
market = models.CharField(max_length = 64)
updated = models.BooleanField(default = False)
def get_chart_data_json(self):
quote_model = create_quote_model(str(self.isin))
data = quote_model.objects.values('date', 'adj_close', 'volume')
chart_data = []
for d in data.iterator():
chart_data.append({'date': d['date'].isoformat(), 'value': d['adj_close'], 'volume': d['volume']})
chart_data_json = json.dumps(chart_data)
return chart_data_json
def __unicode__(self):
return self.isin
Model definition of popular:
class Popular(models.Model):
title = models.CharField(max_length = 120)
text = models.CharField(max_length = 1024)
isin = models.ForeignKey(Share)
def __unicode__(self):
return self.title
First query is evaluated when you access foreign key isin from popular object:
share = Share.objects.get(isin = popular.isin)
Second query gets Share object:
share = Share.objects.get(isin = popular.isin)
If you want just one query at #LINE 1 you should replace it with:
share = popular.isin #LINE 1