Upload file using python requests

Upload file using python requests - box-api

I've been trying to upload a file using the box v2 api with requests.
So far I had little luck though. Maybe someone here can help me see what I'm actually doing wrong.
file_name = "%s%s" % (slugify(sync_file.description), file_suffix)
file_handle = open(settings.MEDIA_ROOT + str(sync_file.document), 'rb')
folder_id = str(sync_file.patient.box_patient_folder_id)
r = requests.post(
files_url,
headers=headers,
files={
file_name: file_handle,
"folder_id": folder_id,
},
)
My authentication works, because I'm creating a folder just before that, using the same data.
A response looks something like this:
{
u'status': 404,
u'code': u'not_found',
u'help_url': u'http://developers.box.com/docs/#errors',
u'request_id': u'77019510950608f791a0c1',
u'message': u'Not Found',
u'type': u'error'
}
Maybe someone on here ran into a similar issue.

You need to pass 2 Python dictionaries, files and data. files are {uniqFileName:openFileObj}, and data are {uniqFileName:filename}. Below is the upload method from my box class. And remember to add a final entry in data, 'folder_id': destination_id.
def uploadFiles(self, ufiles, folid):
'''uploads 1 or more files in the ufiles list of tuples containing
(src fullpath, dest name). folid is the id of the folder to
upload to.'''
furl = URL2 + 'files/data'
data, files = {}, {}
for i, v in enumerate(ufiles):
ff = v[0]
fn = v[1]
#copy to new, renamed file in tmp folder if necessary
#can't find a way to do this with the api
if os.path.basename(ff) != fn:
dest = os.path.join(TMP, fn)
shutil.copy2(ff, dest)
ff = dest
f = open(ff, 'rb')
k = 'filename' + str(i)
data[k] = fn
files[k] = f
data['folder_id'] = folid
res = self.session.post(furl, files=files, data=data)
for k in files:
files[k].close()
return res.status_code
Here is a sample call:
destFol = '406600304'
ret = box.uploadFile((('c:/1temp/hc.zip', 'hz.zip'),), destFol)
Like I said, the above function is a method of a class, with an instance attr that holds a requests session. But you can use requests.post instead of self.session.post, and it will work just the same. Just remember to add the headers with your apikey and token if you do it outside a session.
According to the documentation, you are supposed to be able to rename the file by giving it a new name in the data dict. But I can't make this work except by copying the src file to a temp dir with the desired name and uploading that. It's a bit of a hack, but it works.
good luck,
Mike

As someone requested my implementation, I figured I would put it out here for anyone trying to achieve something similar.
files_url = "%s/files/content" % (settings.BOX_API_HOST)
headers = {"Authorization": "BoxAuth api_key=%s&auth_token=%s" %
(settings.BOX_API_KEY, self.doctor.box_auth_token)
}
file_root, file_suffix = os.path.splitext(str(self.document))
filename = "%s%s" % (slugify(self.description), file_suffix)
files = {
'filename1': open(settings.MEDIA_ROOT + str(self.document), 'rb'),
}
data = {
'filename1': filename,
'folder_id': str(self.patient.get_box_folder()),
}
r = requests.post(files_url,
headers=headers,
files=files,
data=data)
file_response = simplejson.loads(r.text)
try:
if int(file_response['entries'][0]['id']) > 0:
box_file_id = int(file_response['entries'][0]['id'])
#Update the name of file
file_update_url = "%s/files/%s" % (settings.BOX_API_HOST, box_file_id)
data_update = {"name": filename}
file_update = requests.put(file_update_url,
data=simplejson.dumps(data_update),
headers=headers)
LocalDocument.objects.filter(id=self.id).update(box_file_id=box_file_id)
except:
pass
So in essence, I needed to send the file and retrieve the ID of the newly updated file and send another request to box. Personally, I don't like it either, but it works for me and haven't been able to find any other implementations that do the correct naming from the get-go.
Hope someone can benefit from this snippet.

My solution, using requests:
def upload_to_box(folder_id, auth_token, file_out):
headers = { 'Authorization' : BOX_AUTH.format(auth_token) }
url = 'https://api.box.com/2.0/files/content'
files = { 'filename': (new_file_name, open(file_out,'rb')) }
data = { 'folder_id': folder_id }
response = requests.post(url, params=data, files=files, headers=headers)
It would be nice if you could specify the new_copy parameter but there's nothing documented for it and it doesn't seem to work.

Related

Trying to make a command to store and retreat info from a json file

Im trying to make a command that will store name,description and image of a character and another command to retrieve that data in an embed,but i have trouble working with json files
this is my code to add them:
#client.command()
async def addskillset(ctx):
await ctx.send("Let's add this skillset!")
questions = ["What is the monster name?","What is the monster description?","what is the monster image link?"]
answers = []
#code checking the questions results
embedkra = nextcord.Embed(title = f"{answers[0]}", description = f"{answers[1]}",color=ctx.author.color)
embedkra.set_image(url = f"{answers[2]}")
mess = await ctx.reply(embed=embedkra,mention_author=False)
await mess.add_reaction('✅')
await mess.add_reaction('❌')
def check(reaction, user):
return user == ctx.author and (str(reaction.emoji) == "✅" or "❌")
try:
reaction, user = await client.wait_for('reaction_add', timeout=1000.0, check=check)
except asyncio.TimeoutError:
#giving a message that the time is over
else:
if reaction.emoji == "✅":
monsters = await get_skillsets_data() #this data is added at the end
if str(monster_name) in monsters:
await ctx.reply("the monster is already added")
else:
monsters[str(monster_name)]["monster_name"] = {}
monsters[str(monster_name)]["monster_name"] = answers[0]
monsters[str(monster_name)]["monster_description"] = answers[1]
monsters[str(monster_name)]["monster_image"] = answers[2]
with open('skillsets.json','w') as f:
json.dump(monsters,f)
await mess.delete()
await ctx.reply(f"{answers[0]} successfully added to the list")
Code to get the embed with the asked info:
#client.command()
async def skilltest(ctx,*,monster_name):
data = open('skillsets.json').read()
data = json.loads(data)
if str(monster_name) in data:
name = data["monster_name"]
description = data["monster_description"]
link = data["monster_image"]
embedkra = nextcord.Embed(title = f"{name}", description = f"{description}",color=ctx.author.color)
embedkra.set_image(url = f"{link}")
await ctx.reply(embed=embedkra,mention_author=False)
else:
# otherwise, it is still None meaning we didn't find it
await ctx.reply("monster not found",mention_author=False)
and my json should look like this:
{"katufo": {"monster_name": "Katufo","Monster_description":"Katufo is the best","Monster_image":"#image_link"},
"armor claw":{"monster_name": "Armor Claw","Monster_description":"Armor claw is the best","Monster_image":#image_link}}
The get_skillsets_data used in first command:
async def get_skillsets_data():
with open('skillsets.json','r') as f:
monsters = json.load(f)
return monsters

Well, When you are trying to retrieve data from your json file try using name = data["katufo"]["monster_name"] now here it will only retrieve monster_name of key katufo. If You want to retrieve data for armor claw code must go like this name = data["armor claw"]["monster_name"]. So try this code :
#client.command()
async def skilltest(ctx,*,monster):
data = open('skillsets.json').read()
data = json.loads(data)
if str(monster) in data:
name = data[f"monster"]["monster_name"]
description = data[f"monster"]["Monster_description"]
link = data[f"monster"]["Monster_image"]
embedkra = nextcord.Embed(title = f"{name}", description = f"{description}",color=ctx.author.color)
embedkra.set_image(url = f"{link}")
await ctx.reply(embed=embedkra,mention_author=False)
else:
# otherwise, it is still None meaning we didn't find it
await ctx.reply("monster not found",mention_author=False)
Hope this works for you :)

If your json looks like what you showed above,
{
"katufo":{
"monster_name":"Katufo",
"Monster_description":"Katufo is the best",
"Monster_image":"#image_link"
},
"armor claw":{
"monster_name":"Armor Claw",
"Monster_description":"Armor claw is the best",
"Monster_image":"#image_link"
}
}
then there is no data["monster_name"] the two objects inside of your JSON are named katufo and armor_claw. To get one of them you can simply write data['katufo']['monster_name'] or data.katufo.monster_name.
Your problem stems from looking up the monster name like this:
if str(monster_name) in data:
name = data["monster_name"]
description = data["monster_description"]
link = data["monster_image"]
What you could do instead is loop through data, as it contains several monsters and then on each object, to the check that you do:
for monster in data:
if str(monster_name) in monster.values():
name = monster.monster_name
description = monster.Monster_description
link = monster.Monster_image
One thing to think about, the way the variables are named is not something I personally recommend. Don't be afraid of adding longer descriptive names so things make more sense for you in the code. Also, in the JSON you provided, there are certain attributes starting with a capital letter, something you should think about.
Edit:
Dicts in python are the equivalent of objects in Javascript and are initialized using the same syntax which we can see below:
monster_data = {}
But since you want a specific structure on these monsters we can go further and create a function called add_monster_object():
def add_monster_object(original_dict, new_monster):
new_monster = {
"monster_name": '',
"monster_description": '',
"monster_image": ''
}
#Now we have a new empty object with the correct names.
return original_dict.update(new_monster)
Now every time you run this function with a given name, in the dict there will be an object with that name. Example is if user writes armor_sword as the monster_name attribute, then we can call the function above as add_monster_object(original_dict, monster_name).
This will, if we take your initial dict as an example, return this:
{
"katufo":{
"monster_name":"Katufo",
"Monster_description":"Katufo is the best",
"Monster_image":"#image_link"
},
"armor claw":{
"monster_name":"Armor Claw",
"Monster_description":"Armor claw is the best",
"Monster_image":"#image_link"
},
"armor sword":{
"monster_name":"",
"monster_description":"",
"monster_image":""
}
}
Then you can populate them as you want, or update the function to take more parameters. The important part here is that you take a minute and figure out what you want to keep saved. Then make sure that you can read and write from file and you should have a somewhat simple structure going. Warning: This isn't a slap and dry method, you will also have to think about special cases, such as adding an object that already exists and soforth.
If you decide to go with Replit you could use their database to create similar functionality but you wouldn't have to worry about reading and writing to a file.
As it is right now, I still think you need to proceed with your bot, add some of the changes that I mentioned before the next actual problem arrives as there are many things that arent quite right. I also suggest you break everything into managing parts, 1 would be to read from a file. 2 would be to write. 3 to write a dict to a file. 4 to update a dict and soforth. Good luck!

how to just get the ID out of the json without []?

is there a way to use something diffenrent to just get the ID without the [] in the json file, without removing or adding something? only things i know are "append" and "remove" and something like read, get, load or check didn't work. questionmarks are there where i need the right word. If i just type in f'Name: <#{json_dict["blockedUser"]>' i just get the ID in the [] and <# and > are just there. i would love to see a solution. if possible, i would display the user.display_name instead of the mention in Name:. Thanks for the help!
#commands.command(name='checkblock') # checks if a user is on the list of blocked users
async def checkblock(self, ctx):
user = User
with open('./bot_config/blocked_users.json', 'r') as json_file:
json_dict = json.load(json_file)
await ctx.send('Here are the blocked users:\n'
f'Name: <#{json_dict["blockedUser"]???(user.id)}>'
f'ID: {json_dict["blockedUser"].???(user.id)}')
return
Here is my .json file with an ID inside.
{
"blockedUser": [
360881524410810380
]
}

json_dict["blockedUser"] is a list, so you can loop through it's content:
#commands.command(name='checkblock') # checks if a user is on the list of blocked users
async def checkblock(self, ctx):
with open('bot_config/blocked_users.json') as file:
json_dict = json.load(file)
template = 'Name: <#{}> (ID: {})'
member_list = '\n'.join([template.format(id, id) for id in json_dict])
await ctx.send(f'Here are the blocked users:\n{member_list}')
If you want the member's display_name, you'll have to get the discord.Member object, either with:
discord.Guild.fetch_member
discord.Guild.get_member
discord.utils.get

Groovy - parse/ convert x-www-form-urlencoded to something like JSON

this is my second try to explain a bit more precisely what I'm looking for ;-)
I set a webhook in Mailchimp that fires every time a new subscriber of an audience appears. Mailchimp sends a HTTP POST request to a Jira Sriptrunner REST endpoint.
The content type of this request is application/x-www-form-urlencoded.
Within the Jira endpoint I would like to read the request data. How can I do that?
The payload (raw body) I receive looks like this:
type=unsubscribe&fired_at=2020-05-26+07%3A04%3A42&data%5Baction%5D=unsub&data%5Breason%5D=manual&data%5Bid%5D=34f28a4516&data%5Bemail%5D=examlple%40bla.com&data%5Bemail_type%5D=html&data%5Bip_opt%5D=xx.xxx.xxx.198&data%5Bweb_id%5D=118321378&data%5Bmerges%5D%5BEMAIL%5D=example%40bla.com&data%5Bmerges%5D%5BFNAME%5D=Horst&data%5Bmerges%5D%5BLNAME%5D=Schlemmer&data%5Bmerges%5D%5BCOMPANY%5D=First&data%5Bmerges%5D%5BADDRESS%5D%5Baddr1%5D=XXX
Now I would like to parse the data of the raw body into a JSON or something similiar.
The result might look like this:
{
"web_id": 123,
"email": "example#bla.com",
"company": "First",
...
}
Meanwhile I searched around a little and found something like the node.js "querystring" module. It would be great if there is something similiar within Groovy or any other way to parse the data of application/x-www-form-urlencoded to json format.
Best regards and thanks in advance
Bernhard

def body = "type=unsubscribe&fired_at=2020-05-26+07%3A04%3A42&data%5Baction%5D=unsub&data%5Breason%5D=manual&data%5Bid%5D=34f28a4516&data%5Bemail%5D=examlple%40bla.com&data%5Bemail_type%5D=html&data%5Bip_opt%5D=xx.xxx.xxx.198&data%5Bweb_id%5D=118321378&data%5Bmerges%5D%5BEMAIL%5D=example%40bla.com&data%5Bmerges%5D%5BFNAME%5D=Horst&data%5Bmerges%5D%5BLNAME%5D=Schlemmer&data%5Bmerges%5D%5BCOMPANY%5D=First&data%5Bmerges%5D%5BADDRESS%5D%5Baddr1%5D=XXX"
def map = body.split('&').collectEntries{e->
e.split('=').collect{ URLDecoder.decode(it, "UTF-8") }
}
assert map.'data[merges][EMAIL]'=='example#bla.com'
map.each{println it}
prints:
type=unsubscribe
fired_at=2020-05-26 07:04:42
data[action]=unsub
data[reason]=manual
data[id]=34f28a4516
data[email]=examlple#bla.com
data[email_type]=html
data[ip_opt]=xx.xxx.xxx.198
data[web_id]=118321378
data[merges][EMAIL]=example#bla.com
data[merges][FNAME]=Horst
data[merges][LNAME]=Schlemmer
data[merges][COMPANY]=First
data[merges][ADDRESS][addr1]=XXX

A imple no-brainer groovy:
def a = '''
data[email_type]: html
data[web_id]: 123
fired_at: 2020-05-26 07:28:25
data[email]: example#bla.com
data[merges][COMPANY]: First
data[merges][FNAME]: Horst
data[ip_opt]: xx.xxx.xxx.xxx
data[merges][PHONE]: xxxxx
data[merges][ADDRESS][zip]: 33615
type: subscribe
data[list_id]: xxXXyyXX
data[merges][ADDRESS][addr1]: xxx.xxx'''
def res = [:]
a.eachLine{
def parts = it.split( /\s*:\s*/, 2 )
if( 2 != parts.size() ) return
def ( k, v ) = parts
def complexKey = ( k =~ /\[(\w+)\]/ ).findAll()
if( complexKey ) complexKey = complexKey.last().last()
res[ ( complexKey ?: k ).toLowerCase() ] = v
}
res
gives:
[email_type:html, web_id:123, fired_at:2020-05-26 07:28:25,
email:example#bla.com, company:First, fname:Horst, ip_opt:xx.xxx.xxx.xxx,
phone:xxxxx, zip:33615, type:subscribe, list_id:xxXXyyXX, addr1:xxx.xxx]

I found a solution finally. I hope you understand and maybe it helps others too ;-)
Starting from daggett's answer I did the following:
// Split body and remove unnecessary characters
def map = body.split('&').collectEntries{e->
e.split('=').collect{ URLDecoder.decode(it, "UTF-8") }
}
// Processing the map to readable stuff
def prettyMap = new JsonBuilder(map).toPrettyString()
// Convert the pretty map into a json object
def slurper = new JsonSlurper()
def jsonObject = slurper.parseText(prettyMap)
(The map looks pretty much like in daggett's answer.
prettyMap)
Then I extract the keys:
// Finally extracting customer data
def type = jsonObject['type']
And I get the data I need. For example
Type : subscribe
...
First Name : Heinz
...
Thanks to daggett!

A python client to ContextualWeb News API using RapidAPI

I am trying to consume ContextualWeb News API. The endpoint is described here:
https://rapidapi.com/contextualwebsearch/api/web-search
Here is the request snippet in Python as described in RapidAPI:
response = unirest.get("https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/NewsSearchAPI?autoCorrect=true&pageNumber=1&pageSize=10&q=Taylor+Swift&safeSearch=false",
headers={
"X-RapidAPI-Host": "contextualwebsearch-websearch-v1.p.rapidapi.com",
"X-RapidAPI-Key": "XXXXXX"
}
)
How do I send the request and parse the response? Can you provide a complete code example for the News API?

use the python version 3.X for below code.Below is the complete example example where I am passing string Taylor Swift and parsing response...Let me know if you stuck anywhere
import requests # install from: http://docs.python-requests.org/en/master/
# Replace the following string value with your valid X-RapidAPI-Key.
Your_X_RapidAPI_Key = "XXXXXXXXXXXXXXXXXXX";
# The query parameters: (update according to your search query)
q = "Taylor%20Swift" # the search query
pageNumber = 1 # the number of requested page
pageSize = 10 # the size of a page
autoCorrect = True # autoCorrectspelling
safeSearch = False # filter results for adult content
response = requests.get(
"https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/NewsSearchAPI?q={}&pageNumber={}&pageSize={}&autocorrect={}&safeSearch={}".format(
q, pageNumber, pageSize, autoCorrect, safeSearch),
headers={
"X-RapidAPI-Key": Your_X_RapidAPI_Key
}
).json()
# Get the numer of items returned
totalCount = response["totalCount"];
# Get the list of most frequent searches related to the input search query
relatedSearch = response["relatedSearch"]
# Go over each resulting item
for webPage in response["value"]:
# Get the web page metadata
url = webPage["url"]
title = webPage["title"]
description = webPage["description"]
keywords = webPage["keywords"]
provider = webPage["provider"]["name"]
datePublished = webPage["datePublished"]
# Get the web page image (if exists)
imageUrl = webPage["image"]["url"]
imageHeight = webPage["image"]["height"]
imageWidth = webPage["image"]["width"]
thumbnail = webPage["image"]["thumbnail"]
thumbnailHeight = webPage["image"]["thumbna`enter code here`ilHeight"]
# An example: Output the webpage url, title and published date:
print("Url: %s. Title: %s. Published Date:%s." % (url, title, datePublished))

Scrape table from webpage when in <div> format - using Beautiful Soup

So I'm aiming to scrape 2 tables (in different formats) from a website - https://info.fsc.org/details.php?id=a0240000005sQjGAAU&type=certificate after using the search bar to iterate this over a list of license codes. I haven't included the loop fully yet but I added it at the top for completeness.
My issue is that because the two tables I want, Product Data and Certificate Data are in 2 different formats, so I have to scrape them separately. As the Product data is in the normal "tr" format on the webpage, this bit is easy and I've managed to extract a CSV file of this. The harder bit is extracting Certificate Data, as it is in "div" form.
I've managed to print the Certificate Data as a list of text, using the class function, however I need to have it in a tabular form saved in a CSV file. As you can see, I've tried several unsuccessful ways of converting it to a CSV but If you have any suggestions, it would be much appreciated, thank you!! Also any other general tips to improve my code would be great too, as I am new to web-scraping.
#namelist = open('example.csv', newline='', delimiter = 'example')
#for name in namelist:
#include all of the below
driver = webdriver.Chrome(executable_path="/Users/jamesozden/Downloads/chromedriver")
url = "https://info.fsc.org/certificate.php"
driver.get(url)
search_bar = driver.find_element_by_xpath('//*[#id="code"]')
search_bar.send_keys("FSC-C001777")
search_bar.send_keys(Keys.RETURN)
new_url = driver.current_url
r = requests.get(new_url)
soup = BeautifulSoup(r.content,'lxml')
table = soup.find_all('table')[0]
df, = pd.read_html(str(table))
certificate = soup.find(class_= 'certificatecl').text
##certificate1 = pd.read_html(str(certificate))
driver.quit()
df.to_csv("Product_Data.csv", index=False)
##certificate1.to_csv("Certificate_Data.csv", index=False)
#print(df[0].to_json(orient='records'))
print certificate
Output:
Status
Valid
First Issue Date
2009-04-01
Last Issue Date
2018-02-16
Expiry Date
2019-04-01
Standard
FSC-STD-40-004 V3-0
What I want but over hundreds/thousands of license codes (I just manually created this one sample in Excel):
Desired output
EDIT
So whilst this is now working for Certificate Data, I also want to scrape the Product Data and output that into another .csv file. However currently it is only printing 5 copies of the product data for the final license code which is not what I want.
New Code:
df = pd.read_csv("MS_License_Codes.csv")
codes = df["License Code"]
def get_data_by_code(code):
data = [
('code', code),
('submit', 'Search'),
]
response = requests.post('https://info.fsc.org/certificate.php', data=data)
soup = BeautifulSoup(response.content, 'lxml')
status = soup.find_all("label", string="Status")[0].find_next_sibling('div').text
first_issue_date = soup.find_all("label", string="First Issue Date")[0].find_next_sibling('div').text
last_issue_date = soup.find_all("label", string="Last Issue Date")[0].find_next_sibling('div').text
expiry_date = soup.find_all("label", string="Expiry Date")[0].find_next_sibling('div').text
standard = soup.find_all("label", string="Standard")[0].find_next_sibling('div').text
return [code, status, first_issue_date, last_issue_date, expiry_date, standard]
# Just insert here output filename and codes to parse...
OUTPUT_FILE_NAME = 'Certificate_Data.csv'
#codes = ['C001777', 'C001777', 'C001777', 'C001777']
df3=pd.DataFrame()
with open(OUTPUT_FILE_NAME, 'w') as f:
writer = csv.writer(f)
for code in codes:
print('Getting code# {}'.format(code))
writer.writerow((get_data_by_code(code)))
table = soup.find_all('table')[0]
df1, = pd.read_html(str(table))
df3 = df3.append(df1)
df3.to_csv('Product_Data.csv', index = False, encoding='utf-8')

Here's all you need.
No chromedriver. No pandas. Forget about it in context of scraping.
import requests
import csv
from bs4 import BeautifulSoup
# This is all what you need for your task. Really.
# No chromedriver. Don't use it for scraping. EVER.
# No pandas. Don't use it for writing csv. It's not what pandas was made for.
#Function to parse single data page based on single input code.
def get_data_by_code(code):
# Parameters to build POST-request.
# "type" and "submit" params are static. "code" is your desired code to scrape.
data = [
('type', 'certificate'),
('code', code),
('submit', 'Search'),
]
# POST-request to gain page data.
response = requests.post('https://info.fsc.org/certificate.php', data=data)
# "soup" object to parse html data.
soup = BeautifulSoup(response.content, 'lxml')
# "status" variable. Contains first's found [LABEL tag, with text="Status"] following sibling DIV text. Which is status.
status = soup.find_all("label", string="Status")[0].find_next_sibling('div').text
# Same for issue dates... etc.
first_issue_date = soup.find_all("label", string="First Issue Date")[0].find_next_sibling('div').text
last_issue_date = soup.find_all("label", string="Last Issue Date")[0].find_next_sibling('div').text
expiry_date = soup.find_all("label", string="Expiry Date")[0].find_next_sibling('div').text
standard = soup.find_all("label", string="Standard")[0].find_next_sibling('div').text
# Returning found data as list of values.
return [response.url, status, first_issue_date, last_issue_date, expiry_date, standard]
# Just insert here output filename and codes to parse...
OUTPUT_FILE_NAME = 'output.csv'
codes = ['C001777', 'C001777', 'C001777', 'C001777']
with open(OUTPUT_FILE_NAME, 'w') as f:
writer = csv.writer(f)
for code in codes:
print('Getting code# {}'.format(code))
#Writing list of values to file as single row.
writer.writerow((get_data_by_code(code)))
Everything is really straightforward here. I'd suggest you spend some time in Chrome dev tools "network" tab to have a better understanding of request forging, which is a must for scraping tasks.
In general, you don't need to run chrome to click the "search" button, you need to forge request generated by this click. Same for any form and ajax.

well... you should sharpen your skills (:
df3=pd.DataFrame()
with open(OUTPUT_FILE_NAME, 'w') as f:
writer = csv.writer(f)
for code in codes:
print('Getting code# {}'.format(code))
writer.writerow((get_data_by_code(code)))
### HERE'S THE PROBLEM:
# "soup" variable is declared inside of "get_data_by_code" function.
# So you can't use it in outer context.
table = soup.find_all('table')[0] # <--- you should move this line to
#definition of "get_data_by_code" function and return it's value somehow...
df1, = pd.read_html(str(table))
df3 = df3.append(df1)
df3.to_csv('Product_Data.csv', index = False, encoding='utf-8')
As per example you can return dictionary of values from "get_data_by_code" function:
def get_data_by_code(code):
...
table = soup.find_all('table')[0]
return dict(row=row, table=table)

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Upload file using python requests - box-api

Related

Trying to make a command to store and retreat info from a json file

how to just get the ID out of the json without []?

Groovy - parse/ convert x-www-form-urlencoded to something like JSON

A python client to ContextualWeb News API using RapidAPI

Scrape table from webpage when in <div> format - using Beautiful Soup

Categories

Resources