I'm writing a code to generate n-grams for every record in the table by reading a specific column.
def extract_from_db(inp_cust_id):
sql_db = TatDBHelper()
t_sql = "select notes from raw_data where customer_id = {0}"
db_data = sql_db.execute_read(t_sql.format(inp_cust_id))
for row in db_data:
text = row.values()
bi_grams = generate_ngrams(text[0].encode("utf-8"), 2)
print bi_grams
def generate_ngrams(sentence, n):
sentence = sentence.lower()
sentence = re.sub(r'[^a-zA-Z0-9\s]', ' ', sentence)
tokens = [token for token in sentence.split(" ") if token != ""]
ngrams = zip(*[tokens[i:] for i in range(n)])
return [" ".join(ngram) for ngram in ngrams]
I'm getting the output like:
['i highly', 'highly recommend', 'recommend it']
['the penguin', 'penguin encounter', 'encounter was', 'was awesome']
I want the output to look like below, can anybody help me to get this.
['i highly',
'highly recommend',
'recommend it',
...
]
creat another list all_ngrams, and keep appending the values to it , using .extend(), and finally you will have all the ngrams in one list.
Try this :
def extract_from_db(inp_cust_id):
sql_db = TatDBHelper()
t_sql = "select notes from raw_data where customer_id = {0}"
db_data = sql_db.execute_read(t_sql.format(inp_cust_id))
all_ngrams = []
for row in db_data:
text = row.values()
bi_grams = generate_ngrams(text[0].encode("utf-8"), 2)
all_ngrams.extend(bi_grams)
print all_ngrams
Related
conn = mysql.connector.connect(
host="localhost",
user="root",
passwd="12123123412"
database='newdb')
cur = conn.cursor()
xx_zz = self.screen.get_screen('end').ids["rgr"].text
ee_zz = self.screen.get_screen('end').ids["gfd"].text
qur = f"SELECT * FROM (%s) WHERE bedrooms = '(%s)' "
val = (xx_zz, ee_zz)
cur.execute(qur, val)
records = cur.fetchall()
I suggest that we use a function to create the query string using match-case. This will avoid any risk of SQL injection as we are not using the string provided by the front end.
You will need to modify and complete the option values and table names and decide whether there should be a default table name or no result if the option provided is not found.
Obviously this code has not been tested.
def makeQuery( option ):
match option:
case 'option1':
return f"SELECT * FROM table_name_1 WHERE bedrooms = '(%s)' "
case 'option2':
return f"SELECT * FROM table_name_2 WHERE bedrooms = '(%s)' "
case _:
return f"SELECT * FROM default_table_name WHERE bedrooms = '(%s)' "
conn = mysql.connector.connect(
host="localhost",
user="root",
passwd="12123123412"
database='newdb')
cur = conn.cursor()
xx_zz = self.screen.get_screen('end').ids["rgr"].text
ee_zz = self.screen.get_screen('end').ids["gfd"].text
qur = makeQuery(xx_zz )
val = ( ee_zz )
cur.execute(qur, val)
records = cur.fetchall()
In textInput (field) you use hint_text to show a placeholder in a text field(input).
I'm trying to show manytomany data in json format(without using serializer), here is my models.py
class CustomerInvoice(models.Model):
customer = models.CharField(max_length=50)
items_model = models.ManyToManyField(Item,through='InvoiceItem')
created_at = models.DateTimeField(auto_now_add=True)
class InvoiceItem(models.Model):
item = models.ForeignKey(Item,on_delete=models.CASCADE)
invoice = models.ForeignKey(CustomerInvoice,on_delete=models.CASCADE,related_name='invoice')
quantity = models.IntegerField()
price = models.DecimalField(max_digits=20,decimal_places=2)
is it possible to make a look up base on many to many data?
something like this : Q(items_model__icontains=query_search) ,and also how to return the M2M data into a json format using values() and json.dumps please? this returns the ID Values('items_model') and this dont work Values('items_model__all')
and here is my views.py
def invoices_all_lists(request):
if request.is_ajax():
query_search = request.GET.get('filter')
if query_search:
all_item_qs = CustomerInvoice.objects.all()
a = []
for i in all_item_qs.items_model.all():
a.append(i.item.name)
invoices = CustomerInvoice.objects.annotate(
total=Sum((F('invoice__quantity') * F('invoice__price')),output_field=DecimalField(decimal_places=2,max_digits=20))
).filter(
Q(id__icontains=query_search) | Q(seller__username__icontains=query_search) |
Q(customer__icontains=query_search)).values(
'id','seller__username','customer','total','created_at','items_model').order_by('-id')
else:
all_item_qs = CustomerInvoice.objects.all()
a = []
for data in all_item_qs:
for i in data.items_model.all():
a.append(i.item.name)
invoices = CustomerInvoice.objects.annotate(
total=Sum((F('invoice__quantity') * F('invoice__price')) ,output_field=DecimalField(decimal_places=2,max_digits=20))
).values(
'id','seller__username','customer','total','created_at','items_model').order_by('-id')
start_from = 0
if request.GET.get('start'):
start_from = int(request.GET.get('start'))
limit = 10
if request.GET.get('limit'):
limit = int(request.GET.get('limit'))
data_lists = []
for index,value in enumerate(invoices[start_from:start_from+limit],start_from):
value['counter'] = index+1
data_lists.append(value)
data = {
'objects':data_lists,
'length':invoices.count(),
}
return HttpResponse(json.dumps(data, indent=4, sort_keys=True, default=str),'application/json')
else:
return redirect('invoiceapp:list-all-invoice')
can i add this part of the code into the query please?
a = []
for data in all_item_qs:
for i in data.items_model.all():
a.append(i.item.name)
note : i've used datatable server side in the client side
I want to save all data of both these classes (Product_Items and Variant_Product) as JSON output files.
getProductDetails() : In this function I want to extract the data for just 1st element in product_variants list and ading it to the dict(item_list) and for rest of the elements I am creating a req to hit the same function recursively untill I have all the keys in my dict(item_list).
At the end of the function I want to write the extracted data as JSON file, but I can't return two values from a function.
Similarly, in getListingDetails() function I need to save the item as JSON file. PLEASE HELP!!!
Following is the snippet:
import scrapy
from scrapy.http import Request
from scrapy.selector import Selector
from scrapy.item import Item, Field
import re,json
class Product_Items(Item):
Image_URL = Field()
Product_Title = Field()
Price = Field()
PPU_Price = Field()
Product_URL = Field()
Product_SKU = Field()
Product_UPC = Field()
class Variant_Product(Item):
Image_URL = Field()
Product_Title = Field()
Price = Field()
PPU_Price = Field()
Product_URL = Field()
Product_SKU = Field()
Product_UPC = Field()
Product_Size = Field()
Meta = Field()
class walmartSpider(scrapy.Spider):
name = "walmart"
start_urls = ['https://www.walmart.com/all-departments']
item_list = {}
def parse(self,response):
reqs = []
base_url='https://www.walmart.com/'
hxs = Selector(text=response.body)
json_response = hxs.xpath('//script[#id="home"]//text()').get()
data = json.loads(json_response)
cat_urls = self.getCategoryUrls(data)
for url in cat_urls:
if url[:7] == '/browse':
url = base_url + url
link=Request(url=url,callback=self.getListingDetails)
reqs.append(link)
return reqs
def getCategoryUrls(self,data):
.....
return final_cat_url
def getListingDetails(self,response):
reqs = []
hxs = Selector(text=response)
data = json.loads(hxs.xpath('//script[#id="searchContent"]//text()').get())
products = data['searchContent']['preso']['items']
item = Product_Items()
for product in products:
item['Image_URL'] = product['imageUrl']
item['Product_Title'] = product['title']
item['Product_URL'] = base_url + product['productPageUrl']
item['Product_SKU'] = product['productId']
item['Product_UPC'] = product['standardUpc'][0]
try:
item['PPU_Price'] = product['primaryOffer']['unitPriceDisplayCondition']
except:
item['PPU_Price'] = ''
try:
regular_price = product['primaryOffer']['offerPrice']
except:
regular_price = ''
if regular_price:
item['Price'] = product['primaryOffer']['offerPrice']
else:
product_req = Request(url=item['Product_URL'],callback=self.getProductDetails)
reqs.append(product_req)
**Want to save this item as JSON file**
**#Pagination**
try:
next_page = data['searchContent']['preso']['pagination']['next']['url']
except:
next_page = ''
if next_page:
next_page_url = str(re.findall(r'^[\S]+\?',response.url)[0])+str(next_page)
req = Request(url=next_page_url,callback=self.getListingDetails)
reqs.append(req)
return reqs
def getProductDetails(self,response):
reqs = []
base_url = 'https://www.walmart.com/ip/'
hxs = Selector(text=response)
variant = Variant_Product()
prod_data = json.loads(hxs.xpath('//script[#id="item"]//text()').get())
product_variants = prod_data['item']['product']['buyBox']['products']
for product_variant in product_variants[1:]:
item_id = product_variant['usItemId']
if item_id not in self.item_list.keys():
self.item_list[item_id] = ''
req = Request(url=base_url+str(item_id),callback=self.getProductDetails)
reqs.append(req)
product_0 = prod_data['item']['product']['buyBox']['products'][0]
variant['Product_Title'] = product_0['productName']
variant['Product_SKU'] = product_0['walmartItemNumber']
variant['Product_UPC'] = product_0['upc']
variant['Product_Size'] = product_0['variants'][0]['value']
variant['Product_URL'] = product_0['canonicalUrl ']
variant['Price'] = product_0['priceMap']['price']
variant['PPU_Price'] = product_0['priceMap']['unitPriceDisplayValue']
variant['Meta'] = (product_0['categoryPath']).replace('Home Page/','')
**Want to save this item as JSON file**
return reqs
According to the scrapy docs, there are several built in "Exporters" that can serialize your data into several different formats (including JSON).
You should be able to do something like:
# ...
from scrapy.exporters import JsonItemExporter
# ...
def getListingDetails(self, response):
# ...
for product in products:
item = Product_Items(
Image_URL = product['imageUrl'],
Product_Title = product['title'],
Product_URL = base_url + product['productPageUrl'],
Product_SKU = product['productId'],
Product_UPC = product['standardUpc'][0],
PPU_Price = product.get('primaryOffer', {}).get('unitPriceDisplayCondition', ''),
Price = product.get('primaryOffer', {}).get('offerPrice', '')
)
if not item['Price']:
product_req = Request(url=item['Product_URL'],callback=self.getProductDetails)
reqs.append(product_req)
JsonItemExporter(open(f"{item['Product_SKU']}.json", "wb")).export_item(item)
Some notes:
The JsonItemExporter.__init__ method expects a file-like object whose write method accepts bytes, which is why the "wb"
dict.get() in Python allows you to specify a default value as the second argument, in case a key doesn't exist (not strictly necessary here, but reduces the try/except logic)
When handling exceptions, it's recommended by PEP8 standards to catch more specific exception types (in the above cases, except KeyError: might be appropriate) than just a bare except clause
Please let me know if the above works for you!
Is there a pattern for dealing with query params in a flask rest server? I know I can create a sql query word for word using string manipulation in python, but I find that to be ugly and error prone, I was wondering if there is a better way. Here's what I have:
param1 = request.args.get('param1', type = int)
param2 = request.args.get('param2', type = int)
if param1 is not None:
if param2 is not None:
cursor.execute("SELECT * FROM table WHERE p1 = %s AND p2 = %s", (str(param1), str(param2)))
else:
cursor.execute("SELECT * FROM table WHERE p1 = %s", (str(param1),))
else:
if param2 is not None:
cursor.execute("SELECT * FROM table WHERE p2 = %s", (str(param2),))
else:
cursor.execute("SELECT * FROM table")
It's easy to see the number of possible SQL statements is 2 to the number of parameters, which grows out of control... so, again, without using string manipulation to custom build the sql query, is there an idiom or pattern that is used to accomplish this in a more elegant way? Thanks.
Loop through your parameters.
params = []
for i in range(1, HoweverManyParamsYouNeed):
params.append(request.args.get('param' + str(i), type = int))
s = ""
for i in range(1, len(params)):
if params[ i ] is not None:
if not s:
s = "p" + str(i) + " = " + str(params[ i ])
else:
s = s + " AND p" + str(i) + " = " + str(params[ i ])
full = "SELECT * FROM table"
if s:
full = full + " WHERE " + s
cursor.execute(full)
You might need to correct this code, since I do not have a way to run it.
I suggest using ORM(https://en.wikipedia.org/wiki/Object-relational_mapping) instead of raw sql queries.
First you need install flask-sqlalchemy (https://flask-sqlalchemy.palletsprojects.com/)
Then define your model
class MyModel(db.Model):
id = db.Column(db.Integer, primary_key=True)
column1 = db.Column(db.Integer)
column2 = db.Column(db.Integer)
Let's say you have your filter lookup somewhere
allowed_filters = {"column1", "column2"}
Finally instead of cursor you can use SQLAlchemy's ORM to retrieve your filtered objects.
query = MyModel.query
for field, value in request.args.items():
if field in allowed_filters:
query = query.filter(getattr(MyModel, field) == value)
my_object_list = list(query.all())
If you really want to create your queries manually you can always iterate over args:
where_clause = ""
params = []
for field, value in request.args.items():
if field in allowed_filters:
if len(where_clause) > 0:
where_clause += " AND "
where_clause += "{} = %s".format(field)
params.append(value)
if len(where_clause) > 0:
cursor.execute("SELECT * FROM table WHERE {}".format(where_clause), tuple(params))
else:
cursor.execute("SELECT * FROM table")
I'm struggling with the formatting on a mysql query and I was hoping you could point me in the right direction. Here are the queries
sql = "SELECT price FROM inventory WHERE card_name = %s AND card_set = %s"
sql_rare = "SELECT rarity FROM inventory WHERE card_name = %s AND card_set = %s"
sql_count = "SELECT count(*) FROM inventory WHERE card_name = %s AND card_set = %s
When I run the following code, utilizing the sql_count query, i get an error saying:
File "C:\Users\Spencer\Desktop\Python Programs\PythonMTG\Revision3AutoAndManual\51515\magicassistantcsv.py", line 264, in output_card
for row in getmtgprice.query(sql_count, ([card_name, set_name])):
TypeError: query() takes exactly 4 arguments (3 given)
Here is the code producing this error:
getmtgprice = PriceCheck()
for row in getmtgprice.query(sql_count, ([card_name, set_name])):
if row[0] ==0:
priced_card = '0.00'
And here is the PriceCheck function:
class PriceCheck(object):
def __init__(self):
self.conn = MySQLdb.connect(host='localhost', user='root', passwd='', db='mscan')
self.c = self.conn.cursor()
def query(self, arg, cardname, setname):
self.c.execute(arg, cardname, setname)
return self.c
def __del__(self):
self.conn.close()
Do you see where I went wrong?
Your query method takes separate arguments for cardname and setname, not a list containing both. So, instead of:
for row in getmtgprice.query(sql_count, ([card_name, set_name])):
You should have:
for row in getmtgprice.query(sql_count, card_name, set_name):