When loading a JSON from mongo into a Python Dataframe, how should you handle NaN? - json

I am getting an error when I try and "flatten" json into a dataframe, I believe it is because some of the cells have NaN in. What is the best way to handle this?
The Error I get is "AttributeError: 'float' object has no attribute 'keys'"
import pandas as pd
from pymongo import MongoClient
client = MongoClient()
client = MongoClient('mongodb://localhost:27017/')
#Import Counterparties
counterpartydb = client.counterparties
cptylist = counterpartydb.counterparties
cptylists = pd.DataFrame(list(cptylist.find()))
details = pd.DataFrame(list(cptylists['details']))
CurRating = pd.DataFrame(list(cptylists['currentRating']))

Since MongoDB is schemaless, sometimes there will be Null values in a response. You can iterate over these and check to see if the value is None.
cptylists = pd.DataFrame(list(cptylist.find()))
creditRating = []
for rating in cptylists['creditRating']:
if rating['creditRating'] is not None:
creditRating.append(rating['creditRating'])
else:
creditRating.append('No value in database')
creditRating = pd.DataFrame(creditRating)
The list comprehension version of this would be something like:
if 'creditRating' in cptylists:
creditRating = pd.DataFrame([k for k in (cptylists['creditRating'] or [])] )

Related

How do I store a contentfile into ImageField in Django

I am trying to convert an image uploaded by user into a PDF , and then store it into an ImageField in a mysql database ,using a form, but am facing an error when trying to store the PDF into the database
My views.py is:
from django.core.files.storage import FileSystemStorage
from PIL import Image
import io
from io import BytesIO
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.core.files.base import ContentFile
def formsubmit(request): #submits the form
docs = request.FILES.getlist('photos')
print(docs)
section = request.POST['section']
for x in docs:
fs = FileSystemStorage()
print(type(x.size))
img = Image.open(io.BytesIO(x.read()))
imgc = img.convert('RGB')
pdfdata = io.BytesIO()
imgc.save(pdfdata,format='PDF')
thumb_file = ContentFile(pdfdata.getvalue())
filename = fs.save('photo.pdf', thumb_file)
linkobj = Link(link = filename.file, person = Section.objects.get(section_name = section), date = str(datetime.date.today()), time = datetime.datetime.now().strftime('%H:%M:%S'))
linkobj.save()
count += 1
size += x.size
return redirect('index')
My models.py:
class Link(models.Model):
id = models.BigAutoField(primary_key=True)
person = models.ForeignKey(Section, on_delete=models.CASCADE)
link = models.ImageField(upload_to= 'images', default = None)
date = models.CharField(max_length=80, default = None)
time = models.CharField(max_length=80,default = None)
Error I am getting is:
AttributeError: 'str' object has no attribute 'file'
Other methods I have tried:
1) linkobj = Link(link = thumb_file, person = Section.objects.get(section_name = section), date = str(datetime.date.today()), time = datetime.datetime.now().strftime('%H:%M:%S'))
RESULT OF ABOVE METHOD:
1)The thumb_file doesnt throw an error, rather it stores nothing in the database
Points I have noticed:
1)The file is being stored properly into the media folder, ie: I can see the pdf getting stored in the media folder
How do I solve this? Thank you
You don't (basically ever) need to initialize a Storage by yourself. This holds especially true since the storage for the field might not be a FileSystemStorage at all, but could e.g. be backed by S3.
Something like
import datetime
import io
from PIL import Image
from django.core.files.base import ContentFile
def convert_image_to_pdf_data(image):
img = Image.open(io.BytesIO(image.read()))
imgc = img.convert("RGB")
pdfdata = io.BytesIO()
imgc.save(pdfdata, format="PDF")
return pdfdata.getvalue()
def formsubmit(request): # submits the form
photos = request.FILES.getlist("photos") # list of UploadedFiles
section = request.POST["section"]
person = Section.objects.get(section_name=section)
date = str(datetime.date.today())
time = datetime.datetime.now().time("%H:%M:%S")
count = 0
size = 0
for image in photos:
pdfdata = convert_image_to_pdf_data(image)
thumb_file = ContentFile(pdfdata, name="photo.pdf")
Link.objects.create(
link=thumb_file,
person=person,
date=date,
time=time,
)
count += 1
size += image.size
return redirect("index")
should be enough here, i.e. using a ContentFile for the converted PDF content; the field should deal with saving it into the storage.
(As an aside, why are date and time stored separately as strings? Your database surely has a datetime type...)
Ok so I found an answer, to be fair I wont accept my own answer as it doesn't provide an exact answer to the question I asked, rather its a different method, so if anyone does know , please do share so that the community can benefit:
My Solution:
Instead of using ContentFile, I used InMemoryUploadedFile, to store the converted pdf and then moved it into the database( in an ImageField)
I am going to be honest, I am not completely sure about why ContentFile was not working, but when going through the documentation I found out that :
The ContentFile class inherits from File, but unlike File it operates on string content (bytes also supported), rather than an actual file.
Any detailed explanation is welcome
My new views.py
from django.core.files.storage import FileSystemStorage
from PIL import Image
import io
from io import BytesIO
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.core.files.base import ContentFile
import sys
def formsubmit(request): #submits the form
docs = request.FILES.getlist('photos')
print(docs)
section = request.POST['section']
for x in docs:
fs = FileSystemStorage()
print(type(x.size))
img = Image.open(io.BytesIO(x.read()))
imgc = img.convert('RGB')
pdfdata = io.BytesIO()
imgc.save(pdfdata,format='PDF')
thumb_file = InMemoryUploadedFile(pdfdata, None, 'photo.pdf', 'pdf',sys.getsizeof(pdfdata), None)
linkobj = Link(link = thumb_file, person = Section.objects.get(section_name = section), date = str(datetime.date.today()), time = datetime.datetime.now().strftime('%H:%M:%S'))
linkobj.save()
count += 1
size += x.size
return redirect('index')
If you have a question, you can leave it in the comments and ill try to answer it, Good luck!!!

How to let options of panel.widgets.MultiSelect depend on other widget inputs

I have a multiple select widget, pn.widgets.MultiSelect, and I am trying to get that when its value is selected it updates the options of another pn.widgets.MultiSelect. This is very similar to How do i automatically update a dropdown selection widget when another selection widget is changed? (Python panel pyviz), however, I have not had much success implementing it.
Using the data from the above example, but as a pandas dataframe, when I try to update the options nothing happens:
outerType = pn.widgets.MultiSelect(name="Outer",
value=["Africa"],
options=np.unique(df.cont).tolist())
innerType = pn.widgets.MultiSelect(name="Inner",
options=np.unique(df.loc[df['cont'].isin(outerType.value)].country).tolist())
#pn.depends(outerType, watch=True)
def _update(outerType):
_values = np.unique(df.loc[df['cont'].isin(outerType.value)].country).tolist()
innerType.options = _values
innerType.value = _values
pn.Row(outerType, innerType)
The same happens if I use
#pn.depends(outerType.param.value, watch=True)
def _update(outerType):
However, it does execute as I expect if I use the code below; this uses AnyOldDummy as the argument in def. How can I do this properly please?
outerType = pn.widgets.MultiSelect(name="Outer",
value=["Africa"],
options=np.unique(df.cont).tolist())
innerType = pn.widgets.MultiSelect(name="Inner",
options=np.unique(df.loc[df['cont'].isin(outerType.value)].country).tolist() )
#pn.depends(outerType, watch=True)
def _update(AnyOldDummy):
_values = np.unique(df.loc[df['cont'].isin(outerType.value)].country).tolist()
innerType.options = _values
innerType.value = _values
pn.Row(outerType, innerType)
Data:
import numpy as np
import pandas as pd
import panel as pn
pn.extension()
_countries = {
'Africa': ['Ghana', 'Togo', 'South Africa'],
'Asia' : ['China', 'Thailand', 'Japan'],
'Europe': ['Austria', 'Bulgaria', 'Greece']
}
df = []
for cont in _countries.keys():
co = _countries.get(cont)
df.append(np.c_[np.repeat(cont, len(co)), co])
df = pd.DataFrame(np.vstack(df), columns=['cont', 'country'])

Odoo: Export res.company object to JSON

company = self.env['res.company'].search([('id', '=', 1)])
print company.name # prints 'my company' which is correct
print json.dumps(company) # error TypeError: res.company(1,) is not JSON serializable
Question is how do simply export company object in json?
I am looking for a generic way which would work for any model.
Use Model.read(). You can also specify the fields to be read in the read method (see doc). Also, datetime objects are not json serializable. Fortunately, Odoo already provides a utility method.
import json
from odoo.tools import date_utils
raw_data = company.read()
json_data = json.dumps(raw_data, default=date_utils.json_default)
print(json_data)
Please try this code:
company = self.env['res.company'].search([('id', '=', 1)])
print company.name
print json.dumps(company.name)
Search will return object, so we have to manually add fields to dictionary to build the json. I have added few fields, you can add more fields.
company = self.env['res.company'].search([('id', '=', 1)])
params = {}
data = dict()
data['partner_id'] = company.partner_id
data['name'] = company.name
data['email'] = company.email
data['phone'] = company.phone
data['company_registry'] = company.company_registry
params['params'] = data
print json.dumps(params)
The last answer (Jerther) is the right answer.
You can also use in odoo 16
from odoo.tools import json_default
instead of
from odoo.tools import date_utils

Python Web Scraping BeautifulSoup - substring

Second day on web scraping using Python. I am trying to pull a substring within a string. I wrote the following python code using BeautifulSoup:
containers = page_soup.findAll("li",{"class":"grid-tile "})
container_test = containers[7]
product_container = container_test.findAll("div",{"class":"product-
swatches"})
product = product_container[0].findAll("li")
product[0].a.img.get("data-price")
This outputs the following:
'{"saleprice":"$39.90","price":""}'
How do I print out saleprice and price separately? Result should look like:
saleprice = $39.90
price = ""
Use the json module - specifically, the loads method, which loads JSON-formatted strings common on websites.
string = '{"saleprice":"$39.90","price":""}'
>>> import json
json_data = json.loads(string)
sale_price = json_data['saleprice']
price = json_date['price']
print(saleprice, price)
>>> (u'', u'$39.90')
The u preceding the string indicates that the string is unicode, which is well explained here.
Additionally, you could use ast.literal_eval, as the string is formatted like a normal Python dictionary. That process would be:
import ast
string = '{"saleprice":"$39.90","price":""}'
dict_representation_of_string = ast.literal_eval(string)
print(string.keys())
>>> ['price', 'saleprice']
this link should be able to help
Convert a String representation of a Dictionary to a dictionary?
import ast
BSoutput = '{"saleprice":"$39.90","price":""}'
testing = ast.literal_eval(BSoutput)
saleprice = testing['saleprice']
price = testing['price']
print "saleprice = " + saleprice

Use HttpResponse with JSON data in this code

This code seems to work fine when I used Django console and just print it.
reference = FloodHazard.objects.filter(hazard='High')
ids = reference.values_list('id', flat=True)
for myid in ids:
getgeom = FloodHazard.objects.get(id=myid).geom
response = BuildingStructure.objects.filter(geom__intersects=getgeom).values(
'brgy_locat').annotate(counthigh=Count('brgy_locat'))
print response
I was able to show all the values, but when using HttpResponse, it returns an empty set. What is the proper way of returning JSON data from a queryset? So far, tried this:
reference = FloodHazard.objects.filter(hazard='High')
ids = reference.values_list('id', flat=True)
response = {}
for myid in ids:
getgeom = FloodHazard.objects.get(id=myid).geom
response['high'] = BuildingStructure.objects.filter(geom__intersects=getgeom).values(
'brgy_locat').annotate(counthigh=Count('brgy_locat'))
json_post = ujson.dumps(list(response))
return HttpResponse(json_post, content_type='application/json')
There is no much sense in your code. You assign all querysets to the single key in the response dict. You should use a list for this purpose:
As far as I understand the code should be something like this:
response = []
for myid in ids:
getgeom = FloodHazard.objects.get(id=myid).geom
response.extend(BuildingStructure.objects.filter(geom__intersects=getgeom)
.values('brgy_locat')
.annotate(counthigh=Count('brgy_locat')))
json_post = ujson.dumps(response)
If you want to return a hazard level as well as the list of buildings then you can return a dict:
json_post = ujson.dumps({'hazard': 'high', 'buildings': response})