I am a newbie to Django error handling and I spent a week trying to figure out what exactly goes wrong when trying to update a MySQL table using Django forms. I end up with ValueError: invalid literal for int() with base 10: '\x01' error. I tried to surround the erroneous code with try catch block trapping valueError and printing the row sql query, and here's what I get.
The code:
def updateTask(request, task_id):
#cur_usr_sale_point = PersonUnique.objects.filter(employees__employeeuser__auth_user = request.user.id).values_list('agreementemployees__agreement_unique__sale_point_id',flat=True)
selected_task = Tasks.objects.get(id=task_id)
responsible_people = TaskResponsiblePeople.objects.get(task_id = task_id)
task_table = Tasks. objects.all()
if request.method == 'POST':
task_form = TaskForm(request.POST,instance=selected_task)
responsible_people_form = TaskResponsiblePeopleForm(request.POST, instance = responsible_people)
if task_form.is_valid() and responsible_people_form.is_valid():
responsible_people_instance = responsible_people_form.save(commit=False)
try:
responsible_people_instance.task = task_form.save()
responsible_people_form.save()
except ValueError:
from django.db import connection
print connection.queries[-1]
return HttpResponseRedirect(reverse('task_list'))
Print gives me an absolutely valid MySQL Select-statement (to my surpise, I expected an update-statement).
The traceback without try-catch block:
Internal Server Error: /task_list/update_task/200/
Traceback (most recent call last):
File "C:\Python27\lib\site-packages\django\core\handlers\base.py", line 149, in get_response
response = self.process_exception_by_middleware(e, request)
File "C:\Python27\lib\site-packages\django\core\handlers\base.py", line 147, in get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "\\10.8.0.1\share\djprj\djprj\djprj\task\views.py", line 101, in updateTask
task_form.save();
File "C:\Python27\lib\site-packages\django\forms\models.py", line 451, in save
self.instance.save()
File "C:\Python27\lib\site-packages\django\db\models\base.py", line 700, in save
force_update=force_update, update_fields=update_fields)
File "C:\Python27\lib\site-packages\django\db\models\base.py", line 728, in save_base
updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
File "C:\Python27\lib\site-packages\django\db\models\base.py", line 793, in _save_table
forced_update)
File "C:\Python27\lib\site-packages\django\db\models\base.py", line 843, in _do_update
return filtered._update(values) > 0
File "C:\Python27\lib\site-packages\django\db\models\query.py", line 645, in _update
return query.get_compiler(self.db).execute_sql(CURSOR)
File "C:\Python27\lib\site-packages\django\db\models\sql\compiler.py", line 1149, in execute_sql
cursor = super(SQLUpdateCompiler, self).execute_sql(result_type)
File "C:\Python27\lib\site-packages\django\db\models\sql\compiler.py", line 837, in execute_sql
sql, params = self.as_sql()
File "C:\Python27\lib\site-packages\django\db\models\sql\compiler.py", line 1117, in as_sql
val = field.get_db_prep_save(val, connection=self.connection)
File "C:\Python27\lib\site-packages\django\db\models\fields\__init__.py", line 728, in get_db_prep_save
prepared=False)
File "C:\Python27\lib\site-packages\django\db\models\fields\__init__.py", line 720, in get_db_prep_value
value = self.get_prep_value(value)
File "C:\Python27\lib\site-packages\django\db\models\fields\__init__.py", line 1853, in get_prep_value
return int(value)
ValueError: invalid literal for int() with base 10: '\x01'
[10/Apr/2016 11:15:46] "POST /task_list/update_task/200/ HTTP/1.1" 500 126245
Help me out please !!!
EDIT: added is_valid method
You need to first use the form's is_valid method:
A Form instance has an is_valid() method, which runs validation routines for all its fields. When this method is called, if all fields contain valid data.
from django.shortcuts import render
from django.http import HttpResponseRedirect
from .forms import NameForm
def get_name(request):
# if this is a POST request we need to process the form data
if request.method == 'POST':
# create a form instance and populate it with data from the request:
form = NameForm(request.POST)
# check whether it's valid:
if form.is_valid():
# process the data in form.cleaned_data as required
# ...
# redirect to a new URL:
return HttpResponseRedirect('/thanks/')
# if a GET (or any other method) we'll create a blank form
else:
form = NameForm()
return render(request, 'name.html', {'form': form})
Source: Django Docs: Working with forms
I would recommend that you read the above documentation.
Related
In given below if Medicine table consists medicine_name then query execute fine, but when medicine name doesn't exist on the Corresponding table then error is occured.
Matching Query Doesn't Exists
views.py Code
#api_view(['POST'])
def addPeople(request):
m = People()
m.bp_no = request.POST['bp_no']
m.name = request.POST['name']
m.corporation_name = request.POST['corporation_name']
m.medicine_name = request.POST['medicine_name']
m.no_of_medicine = request.POST['no_of_medicine']
existing = Medicine.objects.get(medicine_name=m.medicine_name).no_of_medicine - int(m.no_of_medicine)
p_key = Medicine.objects.get(medicine_name=m.medicine_name).id
if Medicine.objects.filter(medicine_name=m.medicine_name).exists():
if existing > 0:
m.save()
Medicine.objects.filter(id=p_key).update(no_of_medicine=existing)
return Response({"message": "Successfully Recorded"})
else:
return Response({"message": "Not much Medicine Stored"})
else:
return Response({"message": "Medicine is not Stored"})
models.py
class People(models.Model):
bp_no = models.IntegerField(blank=False,null=False)
name = models.CharField(blank=False,null=False,max_length=200)
corporation_name = models.CharField(blank=False,null=False,max_length=200)
medicine_name = models.CharField(blank=False,null=False,max_length=200)
no_of_medicine = models.IntegerField()
class Medicine(models.Model):
medicine_name = models.CharField(null=False,blank=False,max_length=200)
no_of_medicine = models.IntegerField(null=False,blank=False)
def __str__(self):
return self.medicine_name
Error Traceback: When Medicine table doesn't contains the corresponding filter name then This error will be shown
Internal Server Error: /api/add-people/
Traceback (most recent call last):
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\django\core\handlers\exception.py", line 47, in inner
response = get_response(request)
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\django\core\handlers\base.py", line 181, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\django\views\decorators\csrf.py", line 54, in wrapped_view
return view_func(*args, **kwargs)
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\django\views\generic\base.py", line 70, in view
return self.dispatch(request, *args, **kwargs)
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\rest_framework\views.py", line 509, in dispatch
response = self.handle_exception(exc)
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\rest_framework\views.py", line 469, in handle_exception
self.raise_uncaught_exception(exc)
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\rest_framework\views.py", line 480, in raise_uncaught_exception
raise exc
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\rest_framework\views.py", line 506, in dispatch
response = handler(request, *args, **kwargs)
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\rest_framework\decorators.py", line 50, in handler
return func(*args, **kwargs)
File "E:\django\backend\medirecords\api\views.py", line 37, in addPeople
existing = Medicine.objects.get(medicine_name=m.medicine_name).no_of_medicine - int(m.no_of_medicine)
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\django\db\models\manager.py", line 85, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "C:\Users\MonirHossain\AppData\Roaming\Python\Python39\site-packages\django\db\models\query.py", line 435, in get
raise self.model.DoesNotExist(
api.models.Medicine.DoesNotExist: Medicine matching query does not exist.
I haven't tested this, but purely in terms of handling objects not found, I have a suggestion you can try to simplify the conditions / flow.
.get(): Wrap in try / except
get(*args, **kwargs) raises Model.DoesNotExist when not found.
When there's a not found, you need to determine how you want to handle it. For example, you can have a default fallback value, or raise an error.
In this case the code would fail if the existing medicine wasn't found, so I will have it raise.
#api_view(["POST"])
def addPeople(request):
medicine_name = request.POST["medicine_name"]
try:
medicine = Medicine.objects.get(medicine_name=medicine_name)
except Medicine.DoesNotExist:
return Response({"message": "Not much Medicine Stored"})
m = People()
m.bp_no = request.POST["bp_no"]
m.name = request.POST["name"]
m.corporation_name = request.POST["corporation_name"]
m.medicine_name = medicine_name
m.no_of_medicine = request.POST["no_of_medicine"]
existing = medicine.no_of_medicine - int(m.no_of_medicine)
p_key = medicine.id
if medicine.exists():
if existing > 0:
m.save()
medicine.num_of_medicine = existing
medicine.save(update_fields=["num_of_medicine"])
return Response({"message": "Successfully Recorded"})
else:
return Response({"message": "Not much Medicine Stored"})
else:
return Response({"message": "Medicine is not Stored"})
Move object check earlier
Since the request relies on Medicine, this is should be caught early as possible.
Uses try/catch as .get() will always raise when object is not found
The alternative is to do .exists() or filter(*args, **kwargs) + .first()
Update dependent model via Model.save() and limit field updated to num_of_medicine via update_fields.
get_object_or_404()
get_object_or_404(klass, *args, **kwargs) can also be used in lieu of the earlier try/catch block:
from django.shortcuts import get_object_or_404
medicine = get_object_or_404(Medicine.objects.all(), medicine_name=m.medicine_name)
existing = medicine.no_of_medicine - int(m.no_of_medicine)
Final note
I'm assuming this is an example, but it also may be good to validate request.POST["no_of_medicine"] in the POST data against a backend source of truth to make sure it's not faked.
I'm trying to use marshmallow-sqlalchemy with aiohttp and I have followed their docs with the basic example and I'm getting an error.
I have this schema:
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from db.customer import Customer
class CustomerSchema(SQLAlchemyAutoSchema):
class Meta:
model = Customer
include_relationships = True
load_instance = True
And then the following code for the query:
from sqlalchemy import select
from db import db_conn
from db.customer import Customer
from queries.schema import CustomerSchema
customer_schema = CustomerSchema()
async def get_all_users():
async with db_conn.get_async_sa_session() as session:
statement = select(Customer)
results = await session.execute(statement)
_ = (results.scalars().all())
print(_)
response = customer_schema.dump(_, many=True)
print(response)
For the first print statement I'm getting
[<db.customer.Customer object at 0x10a183340>, <db.customer.Customer object at 0x10a183940>, <db.customer.Customer object at 0x10b0cd9d0>]
But then it fails with
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 60, in await_only
raise exc.MissingGreenlet(
sqlalchemy.exc.MissingGreenlet: greenlet_spawn has not been called; can't call await_() here. Was IO attempted in an unexpected place? (Background on this error at: http://sqlalche.me/e/14/xd2s)
So how can I use marshmallow-sqlalchemy to serialize the SqlAlchemy reponse?
Another options (packages, etc) or a generic custom solutions are OK too.
For the time being I'm using this:
statement = select(Customer)
results = await session.execute(statement)
_ = (results.scalars().all())
response = {}
for result in _:
value = {k: (v if not isinstance(v, sqlalchemy.orm.state.InstanceState) else '_') for k, v in result.__dict__.items()}
response[f'customer {value["id"]}'] = value
return response
Full traceback:
Traceback (most recent call last):
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/aiohttp/web_protocol.py", line 422, in _handle_request
resp = await self._request_handler(request)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/aiohttp/web_app.py", line 499, in _handle
resp = await handler(request)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/aiohttp/web_urldispatcher.py", line 948, in _iter
resp = await method()
File "/Users/ruslan/OneDrive/Home/Dev/projects/code/education/other/cft/views/user.py", line 24, in get
await get_all_users()
File "/Users/ruslan/OneDrive/Home/Dev/projects/code/education/other/cft/queries/user.py", line 18, in get_all_users
response = customer_schema.dump(_, many=True)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 547, in dump
result = self._serialize(processed_obj, many=many)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 509, in _serialize
return [
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 510, in <listcomp>
self._serialize(d, many=False)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 515, in _serialize
value = field_obj.serialize(attr_name, obj, accessor=self.get_attribute)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/fields.py", line 310, in serialize
value = self.get_value(obj, attr, accessor=accessor)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow_sqlalchemy/fields.py", line 27, in get_value
return super(fields.List, self).get_value(obj, attr, accessor=accessor)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/fields.py", line 239, in get_value
return accessor_func(obj, check_key, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 472, in get_attribute
return get_value(obj, attr, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/utils.py", line 239, in get_value
return _get_value_for_key(obj, key, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/utils.py", line 253, in _get_value_for_key
return getattr(obj, key, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py", line 480, in __get__
return self.impl.get(state, dict_)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py", line 931, in get
value = self.callable_(state, passive)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py", line 879, in _load_for_state
return self._emit_lazyload(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py", line 1036, in _emit_lazyload
result = session.execute(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/session.py", line 1689, in execute
result = conn._execute_20(statement, params or {}, execution_options)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1582, in _execute_20
return meth(self, args_10style, kwargs_10style, execution_options)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/sql/lambdas.py", line 481, in _execute_on_connection
return connection._execute_clauseelement(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1451, in _execute_clauseelement
ret = self._execute_context(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1813, in _execute_context
self._handle_dbapi_exception(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1998, in _handle_dbapi_exception
util.raise_(exc_info[1], with_traceback=exc_info[2])
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/util/compat.py", line 207, in raise_
raise exception
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1770, in _execute_context
self.dialect.do_execute(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/default.py", line 717, in do_execute
cursor.execute(statement, parameters)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 449, in execute
self._adapt_connection.await_(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 60, in await_only
raise exc.MissingGreenlet(
sqlalchemy.exc.MissingGreenlet: greenlet_spawn has not been called; can't call await_() here. Was IO attempted in an unexpected place? (Background on this error at: http://sqlalche.me/e/14/xd2s)
The problem in this case is that the Marshmallow schema is configured to load related models (include_relationships=True). Since the initial query doesn't load them automatically, the schema triggers a query to fetch them, and this causes the error.
The simplest solution, demonstrated in the docs, is to eagerly load the related objects with their "parent":
async def get_all_users():
async with db_conn.get_async_sa_session() as session:
# Let's assume a Customer has a 1 to many relationship with an Order model
statement = select(Customer).options(orm.selectinload(Customer.orders))
results = await session.execute(statement)
_ = (results.scalars().all())
print(_)
response = customer_schema.dump(_, many=True)
print(response)
There is more discussion in the Preventing Implicit IO when Using AsyncSession section of the docs.
I am having problems in displaying the page due to models. I honestly don't know what the problem is and every solution I see here is beyond me, since I am only beginning to learn Django. Here's the summary:
I have created two forms from two models(which are interrelated) and rendering the data from those forms in a single page. The data displays fine on the dashboard panel and the submission does save it to the database. I have tried overriding the save function in form but with no result. However, whenever I try and load the pages containing same data (display pages like home for customers)I am hit with:
Traceback (most recent call last):
File "C:\Users\shahi\AppData\Local\Programs\Python\Python38-32\lib\site-packages\django\core\handlers\exception.py", line 34, in inner
response = get_response(request)
File "C:\Users\shahi\AppData\Local\Programs\Python\Python38-32\lib\site-packages\django\core\handlers\base.py", line 115, in _get_response
response = self.process_exception_by_middleware(e, request)
File "C:\Users\shahi\AppData\Local\Programs\Python\Python38-32\lib\site-packages\django\core\handlers\base.py", line 113, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "D:\Softwarica\3rd semester\Modern Web\Assignment\Project - Farmer's Market\farmersmarket\home\views.py", line 9, in index
order, created = Order.objects.get_or_create(customer=customer, complete=False)
File "C:\Users\shahi\AppData\Local\Programs\Python\Python38-32\lib\site-packages\django\db\models\manager.py", line 82, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "C:\Users\shahi\AppData\Local\Programs\Python\Python38-32\lib\site-packages\django\db\models\query.py", line 559, in get_or_create
return self.get(**kwargs), False
File "C:\Users\shahi\AppData\Local\Programs\Python\Python38-32\lib\site-packages\django\db\models\query.py", line 419, in get
raise self.model.MultipleObjectsReturned(
shop.models.Order.MultipleObjectsReturned: get() returned more than one Order -- it returned 2!
The code in forms.py:
from django.forms import ModelForm
from shop.models import *
class OrderForm(ModelForm):
class Meta:
model = Order
fields = ('customer',)
class OrderItemForm(ModelForm):
class Meta:
model = OrderItem
fields = ('item', 'status',)
views.py
def createOrder(request):
form_order = OrderForm()
form_orderitem = OrderItemForm()
if request.method == 'POST':
# print('Printing POST', request.POST)
form_order = OrderForm(request.POST)
form_orderitem = OrderItemForm(request.POST)
if form_order.is_valid() and form_orderitem.is_valid():
form_order.save()
form_orderitem.save()
return redirect('/dashboard')
context = {'form_order': form_order, 'form_orderitem': form_orderitem}
return render(request, 'dashboard/order_form.html', context)
I have tried overriding the save function in form but with no result.
get() cannot be used for when more than 1 objects are returned. try with filter().
Do check first() or last(), if it helps you.
I have a CSV file, which contains a list of Google extension IDs.
I'm writing a code that will read the extension IDs, add the webstore url, then perform a basic get request:
import csv
import requests
with open('small.csv', 'rb') as f:
reader = csv.reader(f)
for row in reader:
urls = "https://chrome.google.com/webstore/detail/" + row[0]
print urls
r = requests.get([urls])
Running this code results in the following Traceback:
Traceback (most recent call last):
File "C:\Users\tom\Dropbox\Python\panya\test.py", line 9, in <module>
r = requests.get([urls])
File "C:\Python27\lib\site-packages\requests\api.py", line 69, in get
return request('get', url, params=params, **kwargs)
File "C:\Python27\lib\site-packages\requests\api.py", line 50, in request
response = session.request(method=method, url=url, **kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 465, in request
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 567, in send
adapter = self.get_adapter(url=request.url)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 641, in get_adapter
raise InvalidSchema("No connection adapters were found for '%s'" % url)
InvalidSchema: No connection adapters were found for '['https://chrome.google.com/webstore/detail/blpcfgokakmgnkcojhhkbfbldkacnbeo']'
How can revise the code, so that it would accept the urls in the list, and make the GET request?
requests.get expects a string, but you're creating and passing a list [urls]
r = requests.get([urls])
Change it to just
r = requests.get(urls)
and it should work.
I am trying to write a spider which crawls through the following JSON response:
http://gdata.youtube.com/feeds/api/standardfeeds/UK/most_popular?v=2&alt=json
How would the spider look if I would want to crawl all the titles of the videos? All my Spiders dont work.
from scrapy.spider import BaseSpider
import json
from youtube.items import YoutubeItem
class MySpider(BaseSpider):
name = "youtubecrawler"
allowed_domains = ["gdata.youtube.com"]
start_urls = ['http://www.gdata.youtube.com/feeds/api/standardfeeds/DE/most_popular?v=2&alt=json']
def parse(self, response):
items []
jsonresponse = json.loads(response)
for video in jsonresponse["feed"]["entry"]:
item = YoutubeItem()
print jsonresponse
print video["media$group"]["yt$videoid"]["$t"]
print video["media$group"]["media$description"]["$t"]
item ["title"] = video["title"]["$t"]
print video["author"][0]["name"]["$t"]
print video["category"][1]["term"]
items.append(item)
return items
I always get following error:
2014-01-05 16:55:21+0100 [youtubecrawler] ERROR: Spider error processing <GET http://gdata.youtube.com/feeds/api/standardfeeds/DE/most_popular?v=2&alt=json>
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/base.py", line 1201, in mainLoop
self.runUntilCurrent()
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/base.py", line 824, in runUntilCurrent
call.func(*call.args, **call.kw)
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 382, in callback
self._startRunCallbacks(result)
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 490, in _startRunCallbacks
self._runCallbacks()
--- <exception caught here> ---
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/defer.py", line 577, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/home/bxxxx/svn/ba_txxxxx/scrapy/youtube/spiders/test.py", line 15, in parse
jsonresponse = json.loads(response)
File "/usr/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 365, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
exceptions.TypeError: expected string or buffer
found two issues in your code:
start url is not accessible, I took out the www from it
changed json.loads(response) to json.loads(response.body_as_unicode())
this works well for me:
class MySpider(BaseSpider):
name = "youtubecrawler"
allowed_domains = ["gdata.youtube.com"]
start_urls = ['http://gdata.youtube.com/feeds/api/standardfeeds/DE/most_popular?v=2&alt=json']
def parse(self, response):
items = []
jsonresponse = json.loads(response.body_as_unicode())
for video in jsonresponse["feed"]["entry"]:
item = YoutubeItem()
print video["media$group"]["yt$videoid"]["$t"]
print video["media$group"]["media$description"]["$t"]
item ["title"] = video["title"]["$t"]
print video["author"][0]["name"]["$t"]
print video["category"][1]["term"]
items.append(item)
return items