How to define a unique json column in sqlalchemy? (postgres) - json

I'd like to define a unique json column via sqlalchemy on postgres. the naive approach did not work:
this:
values = db.Column(db.JSON(), nullable=False, unique=True)
led to this:
sqlalchemy.exc.ProgrammingError: (psycopg2.ProgrammingError) data type json has no default operator class for access method "btree"
any ideas?

Create a new column that will receive the json md5 hash:
hash_values = db.Column(db.String(32), default="")
Declare the combination of the json field and the hash as unique:
__table_args__ = (db.UniqueConstraint('values', 'hash_values'))
Staying like this:
import json
import hashlib
class Register(db.Model):
__tablename__ = 'register'
__table_args__ = (
db.UniqueConstraint('values', 'hash_values'),
)
values = db.Column(db.JSON, default="{}")
hash_values = db.Column(db.String(32), default="")
def __init__(self, values):
self.values = values
self.hash_values = hashlib.md5(
json.dumps(
values,
sort_keys=True
).encode("utf-8")
).hexdigest()

I don't know if you import JSON from sqlalchemy as follows:
from sqlalchemy.types import JSON
I think calling sqlalchemy JSON type should work. You could try something like this:
values = db.Column(JSON, nullable=False, unique=True)
Remember the base types.JSON provides keyed index operations, integer index operations and path index operations.
For more information see this
Hope it works for you.

Related

How to use Enum with schema in SQLAlchemy?

I am trying to create a table inside a schema using SQLAlchemy. It has a column of type Enum. Following is the code
import enum
import sqlalchemy
from sqlalchemy import Column, Text, Enum
from sqlalchemy.schema import CreateSchema
import sqlalchemy_utils
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class T(enum.Enum):
X = 1
Y = 2
ET = Enum(T, inherit_schema=True)
#ET = Enum(T, schema="schema1") # This works
class A(Base):
__tablename__ = 'a'
c1 = Column(Text, primary_key=True, nullable=False)
c2 = Column(Text, nullable=False)
c3 = Column(ET)
engine = sqlalchemy.create_engine("postgresql://postgres:mypass#172.17.0.2/mydb")
engine.execute(CreateSchema('schema1'))
schema_engine = engine.execution_options(schema_translate_map = { None: "schema1" } )
Base.metadata.create_all(schema_engine)
This fails at the "create_all" line with the following error
sqlalchemy.exc.ProgrammingError: (psycopg2.errors.DuplicateObject)
type "t" already exists [SQL: "CREATE TYPE schema1.t AS ENUM ('X',
'Y')"] (Background on this error at: http://sqlalche.me/e/f405)
I am using this pattern because I will have multiple schemas inside which the same table has to be created.
The reason you get the error is because there is a bug in the version that you are using.
I would suggest to use a virtual environment and use the latest stable release of SQLAlchemy.

How to save Scrapy items from pipeline in MySQL table by order from another table (multiple tables)?

This is my first question in Stackoverflow ever. :P Everything work just fine, except a crawl order, I add a priority method but didn`t work correctly. Need to first write all author data, then all album and songs data and store to DB with this order. I want to query items in a MySql table by order from item in another one.
Database structure: https://i.postimg.cc/GhF4w32x/db.jpg
Example: first write all author items in Author table, and then order album items in Album table by authorId from Author table.
Github repository: https://github.com/markostalma/discogs/tree/master/discogs
P.S. I have a three item class for author, album and song parser.
Also I was tried to make a another flow of spider and put all in one item class, but with no success. Order was a same. :(
Sorry for my bad English.
You need to setup an item pipeline for this. I would suggest using SQL Alchemy to build the SQL item and connect to the DB. You're SQL Alchemy class will reflect all the table relationships you have in your DB schema. Let me show you. This is a working example of a similar pipeline that I have except you would setup your class on the SQLAlchemy to container the m2m or foreignkey relationships you need. You'll have to refer to their documentation [1] .
An even more pythonic way of doing this would be to keep your SQL Alchemy class and item names the same and do something like for k,v in item.items():
This way you can just loop the item and set what is there. Code is long and violates DRY for a purpose though.
# -*- coding: utf-8 -*-
from scrapy.exceptions import DropItem
from sqlalchemy import create_engine, Column, Integer, String, DateTime, ForeignKey, Boolean, Sequence, Date, Text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
import datetime
DeclarativeBase = declarative_base()
def db_connect():
"""
This function connections to the database. Tables will automatically be created if they do not exist.
See __tablename__ under RateMds class
MySQL example: engine = create_engine('mysql://scott:tiger#localhost/foo')
"""
return create_engine('sqlite:///reviews.sqlite', echo=True)
class GoogleReviewItem(DeclarativeBase):
__tablename__ = 'google_review_item'
pk = Column('pk', String, primary_key=True)
query = Column('query', String(500))
entity_name = Column('entity_name', String(500))
user = Column('user', String(500))
review_score = Column('review_score', Integer)
description = Column('description', String(5000))
top_words = Column('top_words', String(10000), nullable=True)
bigrams = Column('bigrams', String(10000), nullable=True)
trigrams = Column('trigrams', String(10000), nullable=True)
google_average = Column('google_average', Integer)
total_reviews = Column('total_reviews', Integer)
review_date = Column('review_date', DateTime)
created_on = Column('created_on', DateTime, default=datetime.datetime.now)
engine = db_connect()
Session = sessionmaker(bind=engine)
def create_individual_table(engine):
# checks for tables existance and creates them if they do not already exist
DeclarativeBase.metadata.create_all(engine)
create_individual_table(engine)
session = Session()
def get_row_by_pk(pk, model):
review = session.query(model).get(pk)
return review
class GooglePipeline(object):
def process_item(self, item, spider):
review = get_row_by_pk(item['pk'], GoogleReviewItem)
if review is None:
googlesite = GoogleReviewItem(
query=item['query'],
google_title=item['google_title'],
review_score=item['review_score'],
review_count=item['review_count'],
website=item['website'],
website_type=item['website_type'],
top_words=item['top_words'],
bigrams=item['bigrams'],
trigrams=item['trigrams'],
text=item['text'],
date=item['date']
)
session.add(googlesite)
session.commit()
return item
else:
raise DropItem()
[1]: https://docs.sqlalchemy.org/en/13/core/constraints.html

Coercion in SQLAlchemy from Column annotations

Good day everyone,
I have a file of strings corresponding to the fields of my SQLAlchemy object. Some fields are floats, some are ints, and some are strings.
I'd like to be able to coerce my string into the proper type by interrogating the column definition. Is this possible?
For instance:
class MyClass(Base):
...
my_field = Column(Float)
It feels like one should be able to say something like MyClass.my_field.column.type and either ask the type to coerce the string directly or write some conditions and int(x), float(x) as needed.
I wondered whether this would happen automatically if all the values were strings, but I received Oracle errors because the type was incorrect.
Currently I naively coerce -- if it's float()able, that's my value, else it's a string, and I trust that integral floats will become integers upon inserting because they are represented exactly. But the runtime value is wrong (e.g. 1.0 vs 1) and it just seems sloppy.
Thanks for your input!
SQLAlchemy 0.7.4
You can iterate over columns of the mapped Table:
for col in MyClass.__table__.columns:
print col, repr(col.type)
... so you can check the type of each field by its name like this:
def get_col_type(cls_, fld_):
for col in cls_.__table__.columns:
if col.name == fld_:
return col.type # this contains the instance of SA type
assert Float == type(get_col_type(MyClass, 'my_field'))
I would cache the results though if your file is large in order to save the for-loop on every row imported from the file.
Type coercion for sqlalchemy prior to committing to some database.
How can I verify Column data types in the SQLAlchemy ORM?
from sqlalchemy import (
Column,
Integer,
String,
DateTime,
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import event
import datetime
Base = declarative_base()
type_coercion = {
Integer: int,
String: str,
DateTime: datetime.datetime,
}
# this event is called whenever an attribute
# on a class is instrumented
#event.listens_for(Base, 'attribute_instrument')
def configure_listener(class_, key, inst):
if not hasattr(inst.property, 'columns'):
return
# this event is called whenever a "set"
# occurs on that instrumented attribute
#event.listens_for(inst, "set", retval=True)
def set_(instance, value, oldvalue, initiator):
desired_type = type_coercion.get(inst.property.columns[0].type.__class__)
coerced_value = desired_type(value)
return coerced_value
class MyObject(Base):
__tablename__ = 'mytable'
id = Column(Integer, primary_key=True)
svalue = Column(String)
ivalue = Column(Integer)
dvalue = Column(DateTime)
x = MyObject(svalue=50)
assert isinstance(x.svalue, str)
I'm not sure if I'm reading this question correctly, but I would do something like:
class MyClass(Base):
some_float = Column(Float)
some_string = Column(String)
some_int = Column(Int)
...
def __init__(self, some_float, some_string, some_int, ...):
if isinstance(some_float, float):
self.some_float = somefloat
else:
try:
self.some_float = float(somefloat)
except:
# do something intelligent
if isinstance(some_string, string):
...
And I would repeat the checking process for each column. I would trust anything to do it "automatically". I also expect your file of strings to be well structured, otherwise something more complicated would have to be done.
Assuming your file is a CSV (I'm not good with file reads in python, so read this as pseudocode):
while not EOF:
thisline = readline('thisfile.csv', separator=',') # this line is an ordered list of strings
thisthing = MyClass(some_float=thisline[0], some_string=thisline[1]...)
DBSession.add(thisthing)

Example using BLOB in SQLAlchemy

Does anybody have example on how to use BLOB in SQLAlchemy?
from sqlalchemy import *
from sqlalchemy.orm import mapper, sessionmaker
import os
engine = create_engine('sqlite://', echo=True)
metadata = MetaData(engine)
sample = Table(
'sample', metadata,
Column('id', Integer, primary_key=True),
Column('lob', Binary),
)
class Sample(object):
def __init__(self, lob):
self.lob = lob
mapper(Sample, sample)
metadata.create_all()
session = sessionmaker(engine)()
# Creating new object
blob = os.urandom(100000)
obj = Sample(lob=blob)
session.add(obj)
session.commit()
obj_id = obj.id
session.expunge_all()
# Retrieving existing object
obj = session.query(Sample).get(obj_id)
assert obj.lob==blob
from sqlalchemy import *
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from struct import *
_DeclarativeBase = declarative_base()
class MyTable(_DeclarativeBase):
__tablename__ = 'mytable'
id = Column(Integer, Sequence('my_table_id_seq'), primary_key=True)
my_blob = Column(BLOB)
DB_NAME = 'sqlite:///C:/BlobbingTest.db'
db = create_engine(DB_NAME)
#self.__db.echo = True
_DeclarativeBase.metadata.create_all(db)
Session = sessionmaker(bind=db)
session = Session()
session.add(MyTable(my_blob=pack('H', 365)))
l = [n + 1 for n in xrange(10)]
session.add(MyTable(my_blob=pack('H'*len(l), *l)))
session.commit()
query = session.query(MyTable)
for mt in query.all():
print unpack('H'*(len(mt.my_blob)/2), mt.my_blob)
Why don't you use LargeBinary?
Extract from: https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.LargeBinary
class sqlalchemy.types.LargeBinary(length=None)
A type for large binary byte data.
The LargeBinary type corresponds to a large and/or unlengthed binary type for the target platform, such as BLOB on MySQL and BYTEA for PostgreSQL. It also handles the necessary conversions for the DBAPI.
I believe this might assist you.
From the documentation BINARY seems the way to go: http://docs.sqlalchemy.org/en/latest/dialects/mysql.html
class sqlalchemy.dialects.mysql.BLOB(length=None) Bases:
sqlalchemy.types.LargeBinary
The SQL BLOB type.
__init__(length=None) Construct a LargeBinary type.
Parameters: length – optional, a length for the column for use in DDL
statements, for those BLOB types that accept a length (i.e. MySQL). It
does not produce a lengthed BINARY/VARBINARY type - use the
BINARY/VARBINARY types specifically for those. May be safely omitted
if no CREATE TABLE will be issued. Certain databases may require a
length for use in DDL, and will raise an exception when the CREATE
TABLE DDL is issued.

Random ids in sqlalchemy (pylons)

I'm using pylons and sqlalchemy and I was wondering how I could have some randoms ids as primary_key.
the best way is to use randomly generated UUIDs:
import uuid
id = uuid.uuid4()
uuid datatypes are available natively in some databases such as Postgresql (SQLAlchemy has a native PG uuid datatype for this purpose - in 0.5 its called sqlalchemy.databases.postgres.PGUuid). You should also be able to store a uuid in any 16 byte CHAR field (though I haven't tried this specifically on MySQL or others).
i use this pattern and it works pretty good. source
from sqlalchemy import types
from sqlalchemy.databases.mysql import MSBinary
from sqlalchemy.schema import Column
import uuid
class UUID(types.TypeDecorator):
impl = MSBinary
def __init__(self):
self.impl.length = 16
types.TypeDecorator.__init__(self,length=self.impl.length)
def process_bind_param(self,value,dialect=None):
if value and isinstance(value,uuid.UUID):
return value.bytes
elif value and not isinstance(value,uuid.UUID):
raise ValueError,'value %s is not a valid uuid.UUID' % value
else:
return None
def process_result_value(self,value,dialect=None):
if value:
return uuid.UUID(bytes=value)
else:
return None
def is_mutable(self):
return False
id_column_name = "id"
def id_column():
import uuid
return Column(id_column_name,UUID(),primary_key=True,default=uuid.uuid4)
#usage
my_table = Table('test',metadata,id_column(),Column('parent_id',UUID(),ForeignKey(table_parent.c.id)))
Though zzzeek I believe is the author of sqlalchemy, so if this is wrong he would know, and I would listen to him.
Or with ORM mapping:
import uuid
from sqlalchemy import Column, Integer, String, Boolean
def uuid_gen():
return str(uuid.uuid4())
Base = declarative_base()
class Device(Base):
id = Column(String, primary_key=True, default=uuid_gen)
This stores it as a string providing better database compatibility. However, you lose the database's ability to more optimally store and use the uuid.