I am trying to write a spider which crawls through the following JSON response:
http://gdata.youtube.com/feeds/api/standardfeeds/UK/most_popular?v=2&alt=json
How would the spider look if I would want to crawl all the titles of the videos? All my Spiders dont work.
from scrapy.spider import BaseSpider
import json
from youtube.items import YoutubeItem
class MySpider(BaseSpider):
name = "youtubecrawler"
allowed_domains = ["gdata.youtube.com"]
start_urls = ['http://www.gdata.youtube.com/feeds/api/standardfeeds/DE/most_popular?v=2&alt=json']
def parse(self, response):
items []
jsonresponse = json.loads(response)
for video in jsonresponse["feed"]["entry"]:
item = YoutubeItem()
print jsonresponse
print video["media$group"]["yt$videoid"]["$t"]
print video["media$group"]["media$description"]["$t"]
item ["title"] = video["title"]["$t"]
print video["author"][0]["name"]["$t"]
print video["category"][1]["term"]
items.append(item)
return items
I always get following error:
2014-01-05 16:55:21+0100 [youtubecrawler] ERROR: Spider error processing <GET http://gdata.youtube.com/feeds/api/standardfeeds/DE/most_popular?v=2&alt=json>
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/base.py", line 1201, in mainLoop
self.runUntilCurrent()
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/base.py", line 824, in runUntilCurrent
call.func(*call.args, **call.kw)
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 382, in callback
self._startRunCallbacks(result)
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 490, in _startRunCallbacks
self._runCallbacks()
--- <exception caught here> ---
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 577, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/home/bxxxx/svn/ba_txxxxx/scrapy/youtube/spiders/test.py", line 15, in parse
jsonresponse = json.loads(response)
File "/usr/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 365, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
exceptions.TypeError: expected string or buffer
found two issues in your code:
start url is not accessible, I took out the www from it
changed json.loads(response) to json.loads(response.body_as_unicode())
this works well for me:
class MySpider(BaseSpider):
name = "youtubecrawler"
allowed_domains = ["gdata.youtube.com"]
start_urls = ['http://gdata.youtube.com/feeds/api/standardfeeds/DE/most_popular?v=2&alt=json']
def parse(self, response):
items = []
jsonresponse = json.loads(response.body_as_unicode())
for video in jsonresponse["feed"]["entry"]:
item = YoutubeItem()
print video["media$group"]["yt$videoid"]["$t"]
print video["media$group"]["media$description"]["$t"]
item ["title"] = video["title"]["$t"]
print video["author"][0]["name"]["$t"]
print video["category"][1]["term"]
items.append(item)
return items
Related
I'm trying to use marshmallow-sqlalchemy with aiohttp and I have followed their docs with the basic example and I'm getting an error.
I have this schema:
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from db.customer import Customer
class CustomerSchema(SQLAlchemyAutoSchema):
class Meta:
model = Customer
include_relationships = True
load_instance = True
And then the following code for the query:
from sqlalchemy import select
from db import db_conn
from db.customer import Customer
from queries.schema import CustomerSchema
customer_schema = CustomerSchema()
async def get_all_users():
async with db_conn.get_async_sa_session() as session:
statement = select(Customer)
results = await session.execute(statement)
_ = (results.scalars().all())
print(_)
response = customer_schema.dump(_, many=True)
print(response)
For the first print statement I'm getting
[<db.customer.Customer object at 0x10a183340>, <db.customer.Customer object at 0x10a183940>, <db.customer.Customer object at 0x10b0cd9d0>]
But then it fails with
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 60, in await_only
raise exc.MissingGreenlet(
sqlalchemy.exc.MissingGreenlet: greenlet_spawn has not been called; can't call await_() here. Was IO attempted in an unexpected place? (Background on this error at: http://sqlalche.me/e/14/xd2s)
So how can I use marshmallow-sqlalchemy to serialize the SqlAlchemy reponse?
Another options (packages, etc) or a generic custom solutions are OK too.
For the time being I'm using this:
statement = select(Customer)
results = await session.execute(statement)
_ = (results.scalars().all())
response = {}
for result in _:
value = {k: (v if not isinstance(v, sqlalchemy.orm.state.InstanceState) else '_') for k, v in result.__dict__.items()}
response[f'customer {value["id"]}'] = value
return response
Full traceback:
Traceback (most recent call last):
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/aiohttp/web_protocol.py", line 422, in _handle_request
resp = await self._request_handler(request)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/aiohttp/web_app.py", line 499, in _handle
resp = await handler(request)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/aiohttp/web_urldispatcher.py", line 948, in _iter
resp = await method()
File "/Users/ruslan/OneDrive/Home/Dev/projects/code/education/other/cft/views/user.py", line 24, in get
await get_all_users()
File "/Users/ruslan/OneDrive/Home/Dev/projects/code/education/other/cft/queries/user.py", line 18, in get_all_users
response = customer_schema.dump(_, many=True)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 547, in dump
result = self._serialize(processed_obj, many=many)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 509, in _serialize
return [
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 510, in <listcomp>
self._serialize(d, many=False)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 515, in _serialize
value = field_obj.serialize(attr_name, obj, accessor=self.get_attribute)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/fields.py", line 310, in serialize
value = self.get_value(obj, attr, accessor=accessor)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow_sqlalchemy/fields.py", line 27, in get_value
return super(fields.List, self).get_value(obj, attr, accessor=accessor)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/fields.py", line 239, in get_value
return accessor_func(obj, check_key, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 472, in get_attribute
return get_value(obj, attr, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/utils.py", line 239, in get_value
return _get_value_for_key(obj, key, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/utils.py", line 253, in _get_value_for_key
return getattr(obj, key, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py", line 480, in __get__
return self.impl.get(state, dict_)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py", line 931, in get
value = self.callable_(state, passive)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py", line 879, in _load_for_state
return self._emit_lazyload(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py", line 1036, in _emit_lazyload
result = session.execute(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/session.py", line 1689, in execute
result = conn._execute_20(statement, params or {}, execution_options)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1582, in _execute_20
return meth(self, args_10style, kwargs_10style, execution_options)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/sql/lambdas.py", line 481, in _execute_on_connection
return connection._execute_clauseelement(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1451, in _execute_clauseelement
ret = self._execute_context(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1813, in _execute_context
self._handle_dbapi_exception(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1998, in _handle_dbapi_exception
util.raise_(exc_info[1], with_traceback=exc_info[2])
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/util/compat.py", line 207, in raise_
raise exception
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1770, in _execute_context
self.dialect.do_execute(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/default.py", line 717, in do_execute
cursor.execute(statement, parameters)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 449, in execute
self._adapt_connection.await_(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 60, in await_only
raise exc.MissingGreenlet(
sqlalchemy.exc.MissingGreenlet: greenlet_spawn has not been called; can't call await_() here. Was IO attempted in an unexpected place? (Background on this error at: http://sqlalche.me/e/14/xd2s)
The problem in this case is that the Marshmallow schema is configured to load related models (include_relationships=True). Since the initial query doesn't load them automatically, the schema triggers a query to fetch them, and this causes the error.
The simplest solution, demonstrated in the docs, is to eagerly load the related objects with their "parent":
async def get_all_users():
async with db_conn.get_async_sa_session() as session:
# Let's assume a Customer has a 1 to many relationship with an Order model
statement = select(Customer).options(orm.selectinload(Customer.orders))
results = await session.execute(statement)
_ = (results.scalars().all())
print(_)
response = customer_schema.dump(_, many=True)
print(response)
There is more discussion in the Preventing Implicit IO when Using AsyncSession section of the docs.
This is my code:-
from flask import Flask, render_template, request
from flask_sqlalchemy import SQLAlchemy
import json
import datetime
with open ("config.json", "r", errors="ignore") as c:
parameters = json.load(c)["parameters"]
local_server = True
app = Flask(__name__)
if local_server:
app.config['SQLALCHEMY_DATABASE_URI'] = parameters["local_uri"]
else:
app.config['SQLALCHEMY_DATABASE_URI'] = parameters["prod_uri"]
db = SQLAlchemy(app)
class Contacts(db.Model):
'''
sno, name, email, message, date
'''
sno = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(80), nullable=False)
message = db.Column(db.String(120), nullable=False)
date = db.Column(db.String(12), nullable=True)
email = db.Column(db.String(20), nullable=False)
#app.route("/")
def index ():
return render_template('index.html', parameters=parameters)
#app.route("/about/")
def about():
return render_template('about.html', parameters=parameters)
#app.route("/contact/", methods = ['GET', 'POST'])
def contact():
if(request.method=='POST'):
'''Add entry to the database'''
name = request.form.get('name')
email = request.form.get('email')
message = request.form.get('message')
entry = Contacts(name=name, message=message, date=datetime.date.today(), email=email )
db.session.add(entry)
db.session.commit()
return render_template('contact.html', parameters=parameters)
app.run(debug=True)
and this is my json file saved as config.json:-
{
"parameters":
{
"local_server": "True",
"local_uri": "mysql://root:#localhost/coderoad",
"prod_uri": "mysql://root:#localhost/coderoad",
"git_url": "github.com/road2code"
}
}
This is my error:-
PS C:\Users\shomi\OneDrive\Desktop\Flask> & C:/Users/shomi/AppData/Local/Programs/Python/Python38/python.exe c:/Users/shomi/OneDrive/Desktop/Flask/flask3.py
Traceback (most recent call last):
File "c:/Users/shomi/OneDrive/Desktop/Flask/flask3.py", line 7, in <module>
parameters = json.load(c)["parameters"]
File "C:\Users\shomi\AppData\Local\Programs\Python\Python38\lib\json\__init__.py", line 293, in load
return loads(fp.read(),
File "C:\Users\shomi\AppData\Local\Programs\Python\Python38\lib\json\__init__.py", line 357, in loads
return _default_decoder.decode(s)
File "C:\Users\shomi\AppData\Local\Programs\Python\Python38\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\shomi\AppData\Local\Programs\Python\Python38\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
I'm working on a bootstrap template, even after banging my head around google I am unable to find a solution, a little help would be much appreciated.
You may want to specify path to config.json, perhaps './config.json'; also, try loading it in repl (e.g. ipython)
I don't know where your config.json resides relative to your flask app. If relative path doesn't work, try using complete path.
with open ("./config.json", "r", errors="ignore") as c:
parameters = json.load(c)["parameters"]
update:
try this
with open('foo.bar', 'w') as dummy:
dummy.write('dummy')
put this before your the code that reads the config.json . Here, i'm not specifying the path to 'foo.bar', just like you didn't when you were trying to read config.json. After you run your app again, the location of foo.bar will be the location where flask is trying to read config.json from.
I have some application which uses aiohttp.
I sent POST request into approptiate endpoint, e.g.:
POST mysite.com/someendpoind/
with data similar to:
{"param1": "value1", "param2": "value2", ..., "paramn": None}
Then on backend side, I want to add some additional conditional into this request:
data = await request.json()
data["additional_conditional"] = True
But request.json() fails with an error:
[ERROR] Error handling request
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/aiohttp/web_protocol.py", line 422, in start
resp = yield from self._request_handler(request)
File "/usr/local/lib/python3.5/dist-packages/aiohttp/web.py", line 306, in _handle
resp = yield from handler(request)
File "/usr/local/lib/python3.5/dist-packages/aiohttp_session/__init__.py", line 129, in middleware
response = yield from handler(request)
File "/opt/bikeamp/auth/__init__.py", line 57, in wrapped
return (yield from f(request, user))
File "<my_module>.py", line 185, in <my_func>
data_json = await request.json()
File "/usr/local/lib/python3.5/dist-packages/aiohttp/web_request.py", line 469, in json
return loads(body)
File "/usr/lib/python3.5/json/__init__.py", line 319, in loads
return _default_decoder.decode(s)
File "/usr/lib/python3.5/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python3.5/json/decoder.py", line 357, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Then I decided to check somehow what is the content of my request:
await request.read()
b'field1=value1&field2=value2&field3=value3&field4=&field5=&field6='
So, I'm not sure, but the problem may be with empty parameters.
Also, I was trying to get this data via:
data = await request.post()
data["additional_condition"] = True
But this returns MultiDictProxy. Python can't pickle these objects.
Is there any known solutions?
I had the same issue, if post was something like {"email": "some#email.com"} check it with:
#router('/', methods=['POST', ])
async def post_request(request):
post = await request.post()
email = post.get('email') # because it's MultiDict
logging.warning(post) # see post details
logging.warning(email) # shows value "some#email.com"
json = await request.text() #
logging.warning(json) # shows json if it was ajax post request
I am attempting a custom encode, but get an error. The following code sample generates an error:
#!/usr/bin/python3
import json
class Contact:
def __init__(self, first, last):
self.first = first
self.last = last
#property
def full_name(self):
return ("{} {}".format(self.first, self.last))
class ContactEncoder(json.JSONEncoder):
def defualt(self, obj):
if isinstance(obj, Contact):
return {"is_contact": 'T'
,"first": obj.first
,"last": obj.last
,"full_name": obj.full_name}
return super().defualt(obj)
if __name__ == "__main__":
c = Contact("Jay", "Loophole")
print(json.dumps(c.__dict__))
print(json.dumps(c, cls=ContactEncoder))
The error generated is:
{"first": "Jay", "last": "Loophole"}
Traceback (most recent call last):
File "json_dump.py", line 26, in <module>
print(json.dumps(c, cls=ContactEncoder))
File "/usr/lib/python3.5/json/__init__.py", line 237, in dumps
**kw).encode(obj)
File "/usr/lib/python3.5/json/encoder.py", line 198, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/usr/lib/python3.5/json/encoder.py", line 256, in iterencode
return _iterencode(o, 0)
File "/usr/lib/python3.5/json/encoder.py", line 179, in default
raise TypeError(repr(o) + " is not JSON serializable")
TypeError: <__main__.Contact object at 0x7ffb3445a400> is not JSON serializable
The default dictionary is successfully displayed, but when a custom encode is passed as a cls parameter, an error occurs.
Any suggestions for the reason for the error?
Here is your updated code after the defUAlt --> defAUlt correction:
import json
class Contact:
def __init__(self, first, last):
self.first = first
self.last = last
#property
def full_name(self):
return ("{} {}".format(self.first, self.last))
class ContactEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Contact):
return {"is_contact": 'T'
,"first": obj.first
,"last": obj.last
,"full_name": obj.full_name}
return super().default(obj)
if __name__ == "__main__":
c = Contact("Jay", "Loophole")
print(json.dumps(c.__dict__))
print(json.dumps(c, cls=ContactEncoder))
You can check it out live on this page.
I am a newbie to Django error handling and I spent a week trying to figure out what exactly goes wrong when trying to update a MySQL table using Django forms. I end up with ValueError: invalid literal for int() with base 10: '\x01' error. I tried to surround the erroneous code with try catch block trapping valueError and printing the row sql query, and here's what I get.
The code:
def updateTask(request, task_id):
#cur_usr_sale_point = PersonUnique.objects.filter(employees__employeeuser__auth_user = request.user.id).values_list('agreementemployees__agreement_unique__sale_point_id',flat=True)
selected_task = Tasks.objects.get(id=task_id)
responsible_people = TaskResponsiblePeople.objects.get(task_id = task_id)
task_table = Tasks. objects.all()
if request.method == 'POST':
task_form = TaskForm(request.POST,instance=selected_task)
responsible_people_form = TaskResponsiblePeopleForm(request.POST, instance = responsible_people)
if task_form.is_valid() and responsible_people_form.is_valid():
responsible_people_instance = responsible_people_form.save(commit=False)
try:
responsible_people_instance.task = task_form.save()
responsible_people_form.save()
except ValueError:
from django.db import connection
print connection.queries[-1]
return HttpResponseRedirect(reverse('task_list'))
Print gives me an absolutely valid MySQL Select-statement (to my surpise, I expected an update-statement).
The traceback without try-catch block:
Internal Server Error: /task_list/update_task/200/
Traceback (most recent call last):
File "C:\Python27\lib\site-packages\django\core\handlers\base.py", line 149, in get_response
response = self.process_exception_by_middleware(e, request)
File "C:\Python27\lib\site-packages\django\core\handlers\base.py", line 147, in get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "\\10.8.0.1\share\djprj\djprj\djprj\task\views.py", line 101, in updateTask
task_form.save();
File "C:\Python27\lib\site-packages\django\forms\models.py", line 451, in save
self.instance.save()
File "C:\Python27\lib\site-packages\django\db\models\base.py", line 700, in save
force_update=force_update, update_fields=update_fields)
File "C:\Python27\lib\site-packages\django\db\models\base.py", line 728, in save_base
updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
File "C:\Python27\lib\site-packages\django\db\models\base.py", line 793, in _save_table
forced_update)
File "C:\Python27\lib\site-packages\django\db\models\base.py", line 843, in _do_update
return filtered._update(values) > 0
File "C:\Python27\lib\site-packages\django\db\models\query.py", line 645, in _update
return query.get_compiler(self.db).execute_sql(CURSOR)
File "C:\Python27\lib\site-packages\django\db\models\sql\compiler.py", line 1149, in execute_sql
cursor = super(SQLUpdateCompiler, self).execute_sql(result_type)
File "C:\Python27\lib\site-packages\django\db\models\sql\compiler.py", line 837, in execute_sql
sql, params = self.as_sql()
File "C:\Python27\lib\site-packages\django\db\models\sql\compiler.py", line 1117, in as_sql
val = field.get_db_prep_save(val, connection=self.connection)
File "C:\Python27\lib\site-packages\django\db\models\fields\__init__.py", line 728, in get_db_prep_save
prepared=False)
File "C:\Python27\lib\site-packages\django\db\models\fields\__init__.py", line 720, in get_db_prep_value
value = self.get_prep_value(value)
File "C:\Python27\lib\site-packages\django\db\models\fields\__init__.py", line 1853, in get_prep_value
return int(value)
ValueError: invalid literal for int() with base 10: '\x01'
[10/Apr/2016 11:15:46] "POST /task_list/update_task/200/ HTTP/1.1" 500 126245
Help me out please !!!
EDIT: added is_valid method
You need to first use the form's is_valid method:
A Form instance has an is_valid() method, which runs validation routines for all its fields. When this method is called, if all fields contain valid data.
from django.shortcuts import render
from django.http import HttpResponseRedirect
from .forms import NameForm
def get_name(request):
# if this is a POST request we need to process the form data
if request.method == 'POST':
# create a form instance and populate it with data from the request:
form = NameForm(request.POST)
# check whether it's valid:
if form.is_valid():
# process the data in form.cleaned_data as required
# ...
# redirect to a new URL:
return HttpResponseRedirect('/thanks/')
# if a GET (or any other method) we'll create a blank form
else:
form = NameForm()
return render(request, 'name.html', {'form': form})
Source: Django Docs: Working with forms
I would recommend that you read the above documentation.