Using scrapy and xpath to parse data

Using scrapy and xpath to parse data - html

I have been trying to scrape some data but keep getting a blank value or None. I've tried doing next sibling and failed (I probably did it wrong). Any and all help is greatly appreciated. Thank you in advance.
Website to scrape (final): https://www.unegui.mn/azhild-avna/ulan-bator/
Website to test (current, has less listings): https://www.unegui.mn/azhild-avna/mt-hariltsaa-holboo/slzhee-tehnik-hangamzh/ulan-bator/
Code Snippet:
def parse(self, response, **kwargs):
cards = response.xpath("//li[contains(#class,'announcement-container')]")
# parse details
for card in cards:
company = card.xpath(".//*[#class='announcement-block__company-name']/text()").extract_first()
date_block = card.xpath("normalize-space(.//div[contains(#class,'announcement-block__date')]/text())").extract_first().split(',')
date = date_block[0]
city = date_block[1]
item = {'date': date,
'city': city,
'company': company
}
HTML Snippet:
<div class="announcement-block__date">
<span class="announcement-block__company-name">Электро экспресс ХХК</span>
, Өчигдөр 13:05, Улаанбаатар</div>
Expected Output:
date = Өчигдөр 13:05
city = Улаанбаатар
UPDATE: I figured out how to get my date and city data. I ended up using follow next sibling to get date, split by comma, and get the 2nd and 3rd values.
date_block = card.xpath("normalize-space(.//div[contains(#class,'announcement-block__date')]/span/following-sibling::text())").extract_first().split(',')
date = date_block[1]
city = date_block[2]
Extra:
If anyone can tell me or refer me to how I can setup my pipeline file would be greatly appreciated. Is it correct to use pipeline or should you use items.py? Currently I have 3 spiders in the same project folder: apartments, jobs, cars. I need to clean my data and transform it. For example, for the jobs spider I am currently working on as shown above I want to create the following manipulations:
if salary is < 1000, then replace with string 'Negotiable'
if date contains the text "Өчигдөр" then replace with 'Yesterday'
without deleting the time
if employer contains value 'Хувь хүн' then change company value to 'Хувь хүн'
my pipelines.py file:
from itemadapter import ItemAdapter
class ScrapebooksPipeline:
def process_item(self, item, spider):
return item
my items.py file:
import scrapy
class ScrapebooksItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
pass

I changed your xpath to a smaller scope.
extract_first() will get the first instance, so use getall() instead.
In order to get the date I had to use regex (most of the results have time but not date so if you get a blank for the date it's perfectly fine).
I can't read the language so I had to guess (kind of) for the city, but even if it's wrong you can get the point.
import scrapy
import re
class TempSpider(scrapy.Spider):
name = 'temp_spider'
allowed_domains = ['unegui.mn']
start_urls = ['https://www.unegui.mn/azhild-avna/ulan-bator/']
def parse(self, response, **kwargs):
cards = response.xpath('//div[#class="announcement-block__date"]')
# parse details
for card in cards:
company = card.xpath('.//span/text()').get()
date_block = card.xpath('./text()').getall()
date = date_block[1].strip()
date = re.findall(r'(\d+-\d+-\d+)', date)
if date:
date = date[0]
else:
date = ''
city = date_block[1].split(',')[2].strip()
item = {'date': date,
'city': city,
'company': company
}
yield item
Output:
[scrapy.core.scraper] DEBUG: Scraped from <200 https://www.unegui.mn/azhild-avna/ulan-bator/>
{'date': '2021-11-07', 'city': 'Улаанбаатар', 'company': 'Arirang'}
[scrapy.core.scraper] DEBUG: Scraped from <200 https://www.unegui.mn/azhild-avna/ulan-bator/>
{'date': '2021-11-11', 'city': 'Улаанбаатар', 'company': 'Altangadas'}
[scrapy.core.scraper] DEBUG: Scraped from <200 https://www.unegui.mn/azhild-avna/ulan-bator/>
...
...
...

Looks like you are missing indentation.
Instead
def parse(self, response, **kwargs):
cards = response.xpath("//li[contains(#class,'announcement-container')]")
# parse details
for card in cards: date_block = card.xpath("normalize-space(.//div[contains(#class,'announcement-block__date')]/text())").extract_first().split(',')
date = date_block[0]
city = date_block[1]
Try this:
def parse(self, response, **kwargs):
cards = response.xpath("//li[contains(#class,'announcement-container')]")
# parse details
for card in cards: date_block = card.xpath("normalize-space(.//div[contains(#class,'announcement-block__date')]/text())").extract_first().split(',')
date = date_block[0]
city = date_block[1]

Related

SAP: join partner function data based on sales type

Working with SAP data, we are willing to enrich sales data with the last customer. Depending on the sales type, there are different partner function codes that correspond to the last company to which the sale is performed (e.g.: we may have indirect or direct sales). For now, we have been considering tables VBAP/VBAK/VBPA. We extract data from each table to separate files using sap4j, and then join VBAP and VBPA on VBELN, and consider partner codes WE (goods recipient) or custom consignation codes indicating the last buyer for consignations.
Is there some accurate way to know who is the last buyer in the chain for a given sale?

It can be done in the following way:
def sales_tabkey(row):
return "001{}{}".format(row['VBELN'], row['POSNR'])
def expected_partner_function_for_sales_type(row):
consignation_codes = set(['ORK', 'XKB', 'ZSOK', 'ZLZK', 'ZTSK', 'KE', 'ZED', 'ZZN'])
if row['AUART'] in consignation_codes:
return 'ZK'
return 'WE'
def get_kunnrf_frame(vbap, vbak, vbpa, kna):
consignation_codes = set(['ORK', 'XKB', 'ZSOK', 'ZLZK', 'ZTSK', 'KE', 'ZED', 'ZZN'])
df = pd.merge(vbap, vbak, on=['VBELN'], how='left')
df = pd.merge(df, vbpa, on='VBELN', how='left')
df["EXPPARVW"]=df.apply(expected_partner_function_for_sales_type, axis=1)
# KUNNR in kna is considered end_client_id
df = pd.merge(df, kna, on='ADRNR', how='left')[['VBELN','POSNR', 'KUNNR','end_client_id', 'ADRNR', 'PARVW', 'EXPPARVW', 'AUART']].drop_duplicates()
df['TABKEY']=df.apply(sales_tabkey,axis=1)
endclient_tabkeys = set(df.TABKEY.unique())
dfa = df[df.PARVW==df['EXPPARVW']]
dfb = df[df.TABKEY.isin(endclient_tabkeys.difference(set(dfa.TABKEY.unique())))]
return pd.concat([dfa, dfb])

Why I can't scrape the data inside of <ul> tag

I am trying to scrape reviews data on the booking.com that is inside of <ul> tag with the class="review_list". There are 10 reviews, each of them is inside of <li>with the class="review_list_new_item_block".
And here is the picture of data inside of the first <li>tag:
But I noticed that I can't scrape most of the data inside of this <ul> and <li>tags, although I always use the same logic for the xpaths. I tried for example following xpaths to scrape title, text, language, review date and stay date:
title = response.xpath('//h3[#class="c-review-block__title"]/text()').extract()
#title = response.xpath('//div[#class="c-review-block__row"]//h3/text()')
text = response.xpath('//span[#class="c-review__prefix c-review__prefix--color-green"]/span[2]/text()').extract()
lang = response.xpath('//span[#class="c-review__prefix c-review__prefix--color-green"]/span[2]/#lang').extract()
reviewdate = response.xpath('//span[#class="c-review-block__date"]/text()').extract()
staydate = response.xpath('//div[#class="c-review-block__room-info__name"]/div/span/text()').extract()
Only xpaths for these two items worked:
author = response.xpath('//span[#class="bui-avatar-block__title"]/text()').extract()
authorcountry = response.xpath('//span[#class="bui-avatar-block__subtitle"]/text()').extract()
Do you have any suggestions? Is it the issue in the way I am using the xpaths or does booking.com have any restrictions in this place of HTML-Code? Thank you in advance!
My script:
import scrapy
class BookingSpider(scrapy.Spider):
name = 'booking-spider'
allowed_domains = ['booking.com']
# start with the page of all countries
start_urls = [
'https://www.booking.com/country.de.html?aid=356980;label=gog235jc-1DCAIoLDgcSAdYA2gsiAEBmAEHuAEHyAEP2AED6AEB-AECiAIBqAIDuAK7q7DyBcACAQ;sid=8de61678ac61d10a89c13a3941fd3dcd'
]
# get country page
def parse(self, response):
for countryurl in response.xpath('normalize-space(//a[contains(text(),"Schweiz")]/#href)'):
url = response.urljoin(countryurl.extract())
yield scrapy.Request(url, callback=self.parse_country)
# get page of all hotels in a country
def parse_country(self, response):
for hotelsurl in response.xpath('normalize-space(//a[#class="bui-button bui-button--secondary"]/#href)'):
url = response.urljoin(hotelsurl.extract())
yield scrapy.Request(url, callback=self.parse_allhotels)
# get page of one hotel
def parse_allhotels(self, response):
for hotelurl in response.xpath('normalize-space(//a[#class="hotel_name_link url"]/#href)'):
url = response.urljoin(hotelurl.extract())
yield scrapy.Request(url, callback=self.parse_hotelpage)
next_page = response.xpath('//a[contains(#class,"paging-next") and contains(#title,"Nächste Seite")]/#href')
if next_page:
url = response.urljoin(next_page[0].extract())
yield scrapy.Request(url, self.parse_allhotels)
# get review page of this hotel
def parse_hotelpage(self, response):
reviewsurl = response.xpath('//a[#class="hp_nav_reviews_link toggle_review track_review_link_zh"]/#href')
url = response.urljoin(reviewsurl[0].extract())
new_url = url.replace('blockdisplay4', 'tab-reviews')
yield scrapy.Request(new_url, callback=self.parse_reviews, dont_filter=True)
# parse its reviews
def parse_reviews(self, response):
author = response.xpath('//span[#class="bui-avatar-block__title"]/text()').extract()
authorcountry = response.xpath('//span[#class="bui-avatar-block__subtitle"]/text()').extract()
title = response.xpath('//div[#class="c-review-block"]//div[#class="c-review-block__row"]//h3/text()').extract()
print(title)

You can try the below xpaths.
title:
//div[#class='c-review-block']//div[#class="c-review-block__row"]//h3/text()
text (includes both great and poor text)
//div[#class='c-review-block']//div[#class='c-review-block__row'][3]//text()
review date
//div[#class='c-review-block']//div[#class='c-review-block__row']//span[#class="c-review-block__date"]/text()
stay date:
//div[#class='c-review-block']//div[#class='c-review-block__room-info']//span[#class="c-review-block__date"]/text()
subtitle:
//div[#class='c-review-block']//span[#class="bui-avatar-block__subtitle"]/text()
You have to get the review nodes by using //div[#class='c-review-block'] and then iterate through all the nodes to get the details. If you are iterating through each review then you just have to replace //div[#class='c-review-block'] with . in the so that the xpath are in the review contenxt.

Django display data from json

i want to display cryptocurrency prices on my site. Therefor i parse the latest BTC/USD price from coinmarketcap.com
now i want to display them in a list but i first dont know who to save the symbol from the json to my database and second how can i display my view propperly. Currently i only save key:value of price_usd where key is the name of the currency.
views.py
def crypto_ticker(request):
list_prices = CryptoPrices.objects.get_queryset().order_by('-pk')
paginator = Paginator(list_prices, 100) # Show 100 prices per page
page = request.GET.get('page')
price = paginator.get_page(page)
return render(request, 'MyProject/crypto_ticker.html', {'price': price})
urls.py
url(r'^crypto_ticker/$', MyProject_views.crypto_ticker, name='crypto_ticker'),
models.py
class CryptoPrices(models.Model):
symbol = models.CharField(max_length=10)
key = models.CharField(max_length=30)
value = models.CharField(max_length=200)
celery update task:
#periodic_task(run_every=(crontab(minute='*/1')), name="Update Crypto rate(s)", ignore_result=True)
def get_exchange_rate():
api_url = "https://api.coinmarketcap.com/v1/ticker/?limit=100"
try:
exchange_rates = requests.get(api_url).json()
for exchange_rate in exchange_rates:
CryptoPrices.objects.update_or_create(key=exchange_rate['id'],
defaults={'value': round(float(exchange_rate['price_usd']), 3)}
)
logger.info("Exchange rate(s) updated successfully.")
except Exception as e:
print(e)

Surely just adding
symbol= exchange_rate['symbol']
to your update_or_create will work?
The JSON from coinmarketcap sets that as a key in the dictionary, unless you want an image that they use?
In that case you would have to save copies of that image yourself, create a mapping from the text of the symbol to the image itself, and format that on your html output.

Scrape table from webpage when in <div> format - using Beautiful Soup

So I'm aiming to scrape 2 tables (in different formats) from a website - https://info.fsc.org/details.php?id=a0240000005sQjGAAU&type=certificate after using the search bar to iterate this over a list of license codes. I haven't included the loop fully yet but I added it at the top for completeness.
My issue is that because the two tables I want, Product Data and Certificate Data are in 2 different formats, so I have to scrape them separately. As the Product data is in the normal "tr" format on the webpage, this bit is easy and I've managed to extract a CSV file of this. The harder bit is extracting Certificate Data, as it is in "div" form.
I've managed to print the Certificate Data as a list of text, using the class function, however I need to have it in a tabular form saved in a CSV file. As you can see, I've tried several unsuccessful ways of converting it to a CSV but If you have any suggestions, it would be much appreciated, thank you!! Also any other general tips to improve my code would be great too, as I am new to web-scraping.
#namelist = open('example.csv', newline='', delimiter = 'example')
#for name in namelist:
#include all of the below
driver = webdriver.Chrome(executable_path="/Users/jamesozden/Downloads/chromedriver")
url = "https://info.fsc.org/certificate.php"
driver.get(url)
search_bar = driver.find_element_by_xpath('//*[#id="code"]')
search_bar.send_keys("FSC-C001777")
search_bar.send_keys(Keys.RETURN)
new_url = driver.current_url
r = requests.get(new_url)
soup = BeautifulSoup(r.content,'lxml')
table = soup.find_all('table')[0]
df, = pd.read_html(str(table))
certificate = soup.find(class_= 'certificatecl').text
##certificate1 = pd.read_html(str(certificate))
driver.quit()
df.to_csv("Product_Data.csv", index=False)
##certificate1.to_csv("Certificate_Data.csv", index=False)
#print(df[0].to_json(orient='records'))
print certificate
Output:
Status
Valid
First Issue Date
2009-04-01
Last Issue Date
2018-02-16
Expiry Date
2019-04-01
Standard
FSC-STD-40-004 V3-0
What I want but over hundreds/thousands of license codes (I just manually created this one sample in Excel):
Desired output
EDIT
So whilst this is now working for Certificate Data, I also want to scrape the Product Data and output that into another .csv file. However currently it is only printing 5 copies of the product data for the final license code which is not what I want.
New Code:
df = pd.read_csv("MS_License_Codes.csv")
codes = df["License Code"]
def get_data_by_code(code):
data = [
('code', code),
('submit', 'Search'),
]
response = requests.post('https://info.fsc.org/certificate.php', data=data)
soup = BeautifulSoup(response.content, 'lxml')
status = soup.find_all("label", string="Status")[0].find_next_sibling('div').text
first_issue_date = soup.find_all("label", string="First Issue Date")[0].find_next_sibling('div').text
last_issue_date = soup.find_all("label", string="Last Issue Date")[0].find_next_sibling('div').text
expiry_date = soup.find_all("label", string="Expiry Date")[0].find_next_sibling('div').text
standard = soup.find_all("label", string="Standard")[0].find_next_sibling('div').text
return [code, status, first_issue_date, last_issue_date, expiry_date, standard]
# Just insert here output filename and codes to parse...
OUTPUT_FILE_NAME = 'Certificate_Data.csv'
#codes = ['C001777', 'C001777', 'C001777', 'C001777']
df3=pd.DataFrame()
with open(OUTPUT_FILE_NAME, 'w') as f:
writer = csv.writer(f)
for code in codes:
print('Getting code# {}'.format(code))
writer.writerow((get_data_by_code(code)))
table = soup.find_all('table')[0]
df1, = pd.read_html(str(table))
df3 = df3.append(df1)
df3.to_csv('Product_Data.csv', index = False, encoding='utf-8')

Here's all you need.
No chromedriver. No pandas. Forget about it in context of scraping.
import requests
import csv
from bs4 import BeautifulSoup
# This is all what you need for your task. Really.
# No chromedriver. Don't use it for scraping. EVER.
# No pandas. Don't use it for writing csv. It's not what pandas was made for.
#Function to parse single data page based on single input code.
def get_data_by_code(code):
# Parameters to build POST-request.
# "type" and "submit" params are static. "code" is your desired code to scrape.
data = [
('type', 'certificate'),
('code', code),
('submit', 'Search'),
]
# POST-request to gain page data.
response = requests.post('https://info.fsc.org/certificate.php', data=data)
# "soup" object to parse html data.
soup = BeautifulSoup(response.content, 'lxml')
# "status" variable. Contains first's found [LABEL tag, with text="Status"] following sibling DIV text. Which is status.
status = soup.find_all("label", string="Status")[0].find_next_sibling('div').text
# Same for issue dates... etc.
first_issue_date = soup.find_all("label", string="First Issue Date")[0].find_next_sibling('div').text
last_issue_date = soup.find_all("label", string="Last Issue Date")[0].find_next_sibling('div').text
expiry_date = soup.find_all("label", string="Expiry Date")[0].find_next_sibling('div').text
standard = soup.find_all("label", string="Standard")[0].find_next_sibling('div').text
# Returning found data as list of values.
return [response.url, status, first_issue_date, last_issue_date, expiry_date, standard]
# Just insert here output filename and codes to parse...
OUTPUT_FILE_NAME = 'output.csv'
codes = ['C001777', 'C001777', 'C001777', 'C001777']
with open(OUTPUT_FILE_NAME, 'w') as f:
writer = csv.writer(f)
for code in codes:
print('Getting code# {}'.format(code))
#Writing list of values to file as single row.
writer.writerow((get_data_by_code(code)))
Everything is really straightforward here. I'd suggest you spend some time in Chrome dev tools "network" tab to have a better understanding of request forging, which is a must for scraping tasks.
In general, you don't need to run chrome to click the "search" button, you need to forge request generated by this click. Same for any form and ajax.

well... you should sharpen your skills (:
df3=pd.DataFrame()
with open(OUTPUT_FILE_NAME, 'w') as f:
writer = csv.writer(f)
for code in codes:
print('Getting code# {}'.format(code))
writer.writerow((get_data_by_code(code)))
### HERE'S THE PROBLEM:
# "soup" variable is declared inside of "get_data_by_code" function.
# So you can't use it in outer context.
table = soup.find_all('table')[0] # <--- you should move this line to
#definition of "get_data_by_code" function and return it's value somehow...
df1, = pd.read_html(str(table))
df3 = df3.append(df1)
df3.to_csv('Product_Data.csv', index = False, encoding='utf-8')
As per example you can return dictionary of values from "get_data_by_code" function:
def get_data_by_code(code):
...
table = soup.find_all('table')[0]
return dict(row=row, table=table)

Odoo - Search products with code instead of id

I am using odoo 10 and I have two models Order_Line and Products.
OrderLine
class OrderLine(models.Model):
_name = 'order_line'
_description = 'Order Lines'
name = fields.Char()
products = fields.Many2one('amgl.products', String='Products')
Products
class Products(models.Model):
_name = 'products'
_description = 'Products'
_sql_constraints = [
('uniq_poduct_code', 'unique(product_code)', 'Product Code already exists!')
]
name = fields.Char()
product_code = Char()
Now i am trying to create order_line from a csv file and in csv file the customer is providing me 'Product Code' instead of Id. How to handle this that, we use product code and system automatically fills the products associated with that product code.
Note :
Product Code in products table is also unique, so there is no chance of duplicating.
CSV template:
customer/account_number,customer/first_name,customer/last_name,customer/account_type,order/transaction_id,order/products/product_code,order/quantity,order/customer_id/id

Case 1: there are no products stored in the database with any of the product codes the customer is giving to you
If the product codes haven't been created yet in the database, you should have two CSV files (Products.csv and OrderLine.csv). The first one must have three columns (id, name and product_code). The second one must have three columns too (id, name and products/id). So you would only have to make up a XML ID under the id column in Products.csv and call this XML ID from the respective row of the column products/id of the file OrderLine.csv.
Case 2: the product codes the customer has given to you belong to existing products in the database
Now, the customer has given you product codes of products which already exist in the database. In this case you don't have to create a Products.csv file. You need to know which are the XML IDs of the products which have the product codes the customer gave to you. For that, you can go through the interface of Odoo to the tree view of the model products (if this view doesn't exist, you must create it). Then, you'll have to select all records (click on the number 80 of the top right corner to show more records per page if you need it). Once all of them are selected, click on More button and afterwars on Export. Select the column product_code and name and afterwards proceed. Save the generated CSV file as Products.csv, for example. Open it, you'll see all the XML ID of the exported products (if they hadn't XML ID, after the exportation they'll do -an exportation generates XML ID for each exported record if it doesn't have anyone-). Now, I guess the customer has given you something like a file with columns Name of the order line, Product code, so replace the Product code column values with the respective XML IDs of the products you have just exported. So in the end youu should have one file to import, OrderLine.csv, with id, name and products/id columns.
Case 3: there are some product codes belonging to existing products stored in the database and there are some ones which still don't exist
In this case you will have to combine both cases 1 and 2, first, export the products as described in case 2, and then, create a new one with the products whose code doesn't exist yet, as described in case 1. Then replace the product codes the customer gave to you with the respective ones as described in case 2.
Note: this process will give you a lot of time if you have thousands of records to import and you replace them manually. In this case it is mandatory to create a macro in your CSV editor which does the replacements (with search and replace). For example, with LibreOffice you can do macros with Python.
Example (Case 3)
The customer has given you a file of order lines, with two lines:
Name: OL A, Product Code: AAA
Name: OL B, Product Code: BBB
You export products from Odoo interface and you get a file with one
line:
id,name,product_code
__export__.products_a,"Product A","AAA"
You look for the coincidences of the product codes in both files, and
do the replacements in a copy of the customer file, so now you have
this:
Name: OL A, Product Code: __export__.products_a
Name: OL B, Product Code: BBB
Then you create a new CSV Products.csv and put in there the products
whose product code don't exist yet:
id,name,product_code
__import__.products_b,"Product B","BBB"
Now apply the replacements again comparing this new file with the one
we had, and you will get this:
Name: OL A, Product Code: __export__.products_a
Name: OL B, Product Code: __import__.products_b
Convert this file to a right CSV format for Odoo, and save it as
OrderLine.csv:
id,name,products/id
__import__.order_line_1,"OL A",__export__.products_a
__import__.order_line_2,"OL B",__import__.products_b
And finally, import the files, and take into account: import
Products.csv before OrderLine.csv.
EDIT
I think it should be better to waste a bit of time in programming a macro for your CSV editor (Excel, LibreOffice, Open Office or whatever), but if you're desperated and you need to do this only through Odoo, I came up with an awful workaround, but at least, it should work too.
1.Create a new Char field named product_code in order_line model (it would be there temporaly).
2.Modify the ORM create method of this model:
#api.model
def create(self, vals):
product_id = False
product_code = vals.get('product_code', False)
if product_code:
product = self.env['products'].search([
('product_code', '=', product_code)
])
if product:
product_id = product[0].id
vals.update({
'products': product_id,
})
return super(OrderLine, self).create(vals)
3.Copy the file which the customer's sent you, rename the headers properly, and rename the column order/products/product_code as product_code. Import the CSV file. Each importation of records will call the ORM create method of order_line model.
After the importation you'll have in the database the order lines rightly related to the products.
When you've finished you'll have to remember to remove the code you've added (and also remove the column product_code from order_line model in the database, in order to remove junk).

Solution 1
You can create a transient model with the fields that you are using in the CSV. And applying the idea of #forvas:
class ImportOrderLines(models.TransientModel):
_name = 'import.order.lines'
product_code = Char()
#api.model
def create(self, vals):
product_id = False
product_code = vals.get('product_code', False)
if product_code:
product = self.env['products'].search([
('product_code', '=', product_code)
])
if product:
product_id = product[0].id
self.env['order_line'].create({
'products': product_id,
})
return False # you don't need to create the record in the transient model
You can go to the list view of this transient model and import like in any other model, with the base_import view.
Solution 2
You could create a wizard in order to import the CSV to create the Order Lines.
Check the following source code. You must assing the method import_order_lines to a button in the wizard.
import base64
import magic
import csv
from cStringIO import StringIO
import codecs
from openerp import models, fields, api, _
from openerp.exceptions import Warning
class ImportDefaultCodeWizard(models.TransientModel):
_name = 'import.default_code.wizard'
name = fields.Char(
string='File name',
)
file = fields.Binary(
string='ZIP file to import to Odoo',
required=True,
)
#api.multi
def import_order_lines(self):
self.ensure_one()
content = base64.decodestring(self.file)
if codecs.BOM_UTF8 == content[:3]: # remove "byte order mark" (windows)
content = content[3:]
file_type = magic.from_buffer(content, mime=True)
if file_type == 'text/plain':
self._generate_order_line_from_csv(content)
return self._show_result_wizard()
raise Warning(
_('WRONG FILETYPE'),
_('You should send a CSV file')
)
def _show_result_wizard(self):
return {
'type': 'ir.actions.act_window',
'res_model': self._name,
'view_type': 'form',
'view_mode': 'form',
'target': 'new',
'context': self.env.context,
}
def _generate_order_line_from_csv(self, data):
try:
reader = csv.DictReader(StringIO(data))
except Exception:
raise Warning(
_('ERROR getting data from csv file'
'\nThere was some error trying to get the data from the csv file.'
'\nMake sure you are using the right format.'))
n = 1
for row in reader:
n += 1
self._validate_data(n, row)
default_code = row.get('default_code', False)
order_line = {
'default_code': self._get_product_id(default_code),
# here you should add all the order line fields
}
try:
self.env['order_line'].create(order_line)
except Exception:
raise Warning(
_('The order line could not be created.'
'\nROW: %s') % n
)
def _validate_data(self, n, row):
csv_fields = [
'default_code',
]
""" here is where you should add the CSV fields in order to validate them
customer/account_number, customer/first_name, customer/last_name,
customer/account_type, order/transaction_id, order/products/product_code ,order/quantity, order/customer_id/id
"""
for key in row:
if key not in csv_fields:
raise Warning(_('ERROR\nThe file format is not right.'
'\nCheck the column names and the CSV format'
'\nKEY: %s' % key))
if row.get('default_code', False) == '':
raise Warning(
_('ERROR Validating data'),
_('The product code should be filled.'
'\nROW: %s') % n
)
def _get_product_id(self, default_code):
if partner_id:
product_obj = self.env['product.product'].search([
('default_code', '=', default_code),
])
if len(product_code_obj) == 1:
return product_obj.default_code
else:
raise Warning(
_('ERROR Validating data'),
_('The product code should be filled.'
'\nROW: %s') % n
)
return False

You can search by product_code like so:
#api.model
def search_by_code(self, code):
result = self.env['products'].search([('product_code', '=', code)])

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008