getting error while writing data onto cloud bigTable through dataflow - google-cloud-functions

I am using 2nd gen cloud function to trigger dataflow job. Dataflow template is basically reading parquet files from cloud storage and loading data onto bigTable.
Here are the code and package details
import os
import datetime
import logging
from configparser import ConfigParser
import apache_beam as beam
from google.cloud.bigtable import Client
from google.cloud.bigtable.row import DirectRow
from apache_beam.options.pipeline_options import PipelineOptions
from google.cloud import bigtable
from google.cloud.bigtable import column_family
from google.cloud.bigtable import row_filters
from apache_beam.io.gcp.bigtableio import WriteToBigTable
logger = logging.getLogger()
logger.setLevel(logging.INFO)
config_object = ConfigParser()
config_object.read("config.ini")
project_id = config_object["uprn"]["project_id"]
instance_id = config_object["uprn"]["instance_id"]
table_id = config_object["uprn"]["table_id"]
column_family_id = config_object["uprn"]["column_family_id"]
#input_columns = config_object["uprn"]["input_columns"]
timestamp = datetime.datetime(1970, 1, 1)
logging.info("--Starting..")
#client = bigtable.Client(project=project_id, admin=True)
#instance = client.instance(instance_id)
#table = instance.table(table_id)
def big_table_load(ele):
try:
rows = []
column_names = list(ele.keys())
row_key = str(str(ele['uprn'])).encode()
logging.info("--row_key "+str(row_key))
row = DirectRow(row_key)
for key in column_names:
row.set_cell(
column_family_id, key, str(ele[key]).encode('utf-8'), timestamp=timestamp
)
rows.append(row)
return rows
except Exception as e:
logging.info("Error encountered for row_key " + str(row_key) + " with error message "+ str(e))
def find_err_file():
filename_err = user_options.efilename.get()
return filename_err
class UserOptions(PipelineOptions):
#classmethod
def _add_argparse_args(cls, parser):
parser.add_value_provider_argument('--input_location',
default='gs://my-proj-dev-local-landing-zone/mock_data/*'
)
pipeline_options = PipelineOptions()
user_options = pipeline_options.view_as(UserOptions)
def run():
try:
with beam.Pipeline(options=pipeline_options) as p:
records = (p | 'Read' >> beam.io.ReadFromParquet(user_options.input_location)
| 'Format Rows' >> beam.ParDo(big_table_load)
| WriteToBigTable(
project_id=project_id,
instance_id=instance_id,
table_id=table_id
)
)
except Exception as e:
logging.info(e)
raise e
if __name__ == '__main__':
run()
Requirement.txt
google-cloud-bigtable==1.7.0
apache-beam[gcp]==2.39.0
Error processing instruction process_bundle-4225915941562411087-3. Original traceback is Traceback (most recent call last): File "apache_beam/runners/common.py", line 1232, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method File "apache_beam/runners/common.py", line 475, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "apache_beam/runners/common.py", line 481, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 187, in finish_bundle self.batcher.flush() File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 88, in flush status.code))) Exception: Failed to write a batch of 12 records due to 'not_found' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 267, in _execute response = task() File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 340, in lambda: self.create_worker().do_instruction(request), request) File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 581, in do_instruction getattr(request, request_type), request.instruction_id) File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 618, in process_bundle bundle_processor.process_bundle(instruction_id)) File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/bundle_processor.py", line 1001, in process_bundle op.finish() File "apache_beam/runners/worker/operations.py", line 736, in apache_beam.runners.worker.operations.DoOperation.finish File "apache_beam/runners/worker/operations.py", line 738, in apache_beam.runners.worker.operations.DoOperation.finish File "apache_beam/runners/worker/operations.py", line 739, in apache_beam.runners.worker.operations.DoOperation.finish File "apache_beam/runners/common.py", line 1253, in apache_beam.runners.common.DoFnRunner.finish File "apache_beam/runners/common.py", line 1234, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method File "apache_beam/runners/common.py", line 1281, in apache_beam.runners.common.DoFnRunner._reraise_augmented File "apache_beam/runners/common.py", line 1232, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method File "apache_beam/runners/common.py", line 475, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "apache_beam/runners/common.py", line 481, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 187, in finish_bundle self.batcher.flush() File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 88, in flush status.code))) Exception: Failed to write a batch of 12 records due to 'not_found' [while running 'WriteToBigTable/ParDo(_BigTableWriteFn)-ptransform-43']

There is a “not found” error - does the table and columns family you are writing to exist?

Related

How To Handle CCXT Binance Intermittent Network Error

I stumbled across an issue which causes the below script to throw an error every so often, like every other day on average.
The script is being run 24/7 and dozens of instances similar to it are being run simultaneously. That seems to be relevant because as can be seen from the error, it appears to throw it on another instance (different asset than the one being retrieved).
OS: W10
Programming Language version: 3.9
CCXT version: 1.54.87
import ccxt
import pandas_ta as ta
import config
import schedule
import pandas as pd
from datetime import datetime
import time
import socket
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 250)
exchange = ccxt.binance({
'apiKey': config.BINANCE_API_KEY,
'secret': config.BINANCE_API_SECRET,
'enableRateLimit': True,
'options': {
'defaultType': 'future'
},
})
in_position = False
free_balance = exchange.fetch_free_balance()
used_balance = exchange.fetch_used_balance()
free_usd = (free_balance['USDT'])
used_usd = (used_balance['USDT'])
amount = free_usd + used_usd
quantity = 0
new_quantity = 0
def trigger(df):
// strategy
def algo():
print(f"Loading data as of {datetime.now().isoformat()}")
bars = exchange.fetch_ohlcv('BNB/USDT', timeframe='30m', limit=50)
df = pd.DataFrame(bars, columns=['time', 'open', 'high', 'low', 'close', 'volume'])
df['time'] = pd.to_datetime(df['time'], unit='ms')
df.set_index(pd.DatetimeIndex(df['time']), inplace=True)
trigger(df)
try:
schedule.every(2).seconds.do(algo)
while True:
schedule.run_pending()
time.sleep(1)
except ConnectionResetError:
schedule.every(3).seconds.do(algo)
while True:
schedule.run_pending()
time.sleep(1)
except socket.timeout:
schedule.every(3).seconds.do(algo)
while True:
schedule.run_pending()
time.sleep(1)
Traceback (most recent call last):
File "C:\Users\", line 699, in urlopen
httplib_response = self._make_request(
File "C:\Users\", line 445, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Users\", line 440, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\", line 1349, in getresponse
response.begin()
File "C:\Users\", line 316, in begin
version, status, reason = self._read_status()
File "C:\Users\", line 277, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\", line 704, in readinto
return self._sock.recv_into(b)
File "C:\Users\", line 1241, in recv_into
return self.read(nbytes, buffer)
File "C:\Users\", line 1099, in read
return self._sslobj.read(len, buffer)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\", line 439, in send
resp = conn.urlopen(
File "C:\Users\", line 755, in urlopen
retries = retries.increment(
File "C:\Users\", line 532, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Users\", line 769, in reraise
raise value.with_traceback(tb)
File "C:\Users\", line 699, in urlopen
httplib_response = self._make_request(
File "C:\Users\", line 445, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Users\", line 440, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\", line 1349, in getresponse
response.begin()
File "C:\Users\", line 316, in begin
version, status, reason = self._read_status()
File "C:\Users\", line 277, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\", line 704, in readinto
return self._sock.recv_into(b)
File "C:\Users\", line 1241, in recv_into
return self.read(nbytes, buffer)
File "C:\Users\", line 1099, in read
return self._sslobj.read(len, buffer)
urllib3.exceptions.ProtocolError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\", line 571, in fetch
response = self.session.request(
File "C:\Users\", line 542, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\", line 655, in send
r = adapter.send(request, **kwargs)
File "C:\Users\", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\", line 79, in <module>
schedule.run_pending()
File "C:\Users\", line 780, in run_pending
default_scheduler.run_pending()
File "C:\Users\", line 100, in run_pending
self._run_job(job)
File "C:\Users\", line 172, in _run_job
ret = job.run()
File "C:\Users\", line 661, in run
ret = self.job_func()
File "C:\Users\", line 67, in algo
bars = exchange.fetch_ohlcv('ADA/USDT', timeframe='15m', limit=300)
File "C:\Users\", line 1724, in fetch_ohlcv
response = getattr(self, method)(self.extend(request, params))
File "C:\Users\", line 463, in inner
return entry(_self, **inner_kwargs)
File "C:\Users\", line 4119, in request
response = self.fetch2(path, api, method, params, headers, body)
File "C:\Users\", line 486, in fetch2
return self.fetch(request['url'], request['method'], request['headers'], request['body'])
File "C:\Users\", line 623, in fetch
raise NetworkError(details) from e
ccxt.base.errors.NetworkError: binance GET https://fapi.binance.com/fapi/v1/klines?symbol=ADAUSDT&interval=15m&limit=300
I got the same problem, my browser was able to access the url fine, but pycharm ran with a network error, i m using proxy to access binance.com , and my pycharm proxy setting is manual and Connection detection is normal

How to use marshmallow-sqlalchemy with async code?

I'm trying to use marshmallow-sqlalchemy with aiohttp and I have followed their docs with the basic example and I'm getting an error.
I have this schema:
from marshmallow_sqlalchemy import SQLAlchemyAutoSchema
from db.customer import Customer
class CustomerSchema(SQLAlchemyAutoSchema):
class Meta:
model = Customer
include_relationships = True
load_instance = True
And then the following code for the query:
from sqlalchemy import select
from db import db_conn
from db.customer import Customer
from queries.schema import CustomerSchema
customer_schema = CustomerSchema()
async def get_all_users():
async with db_conn.get_async_sa_session() as session:
statement = select(Customer)
results = await session.execute(statement)
_ = (results.scalars().all())
print(_)
response = customer_schema.dump(_, many=True)
print(response)
For the first print statement I'm getting
[<db.customer.Customer object at 0x10a183340>, <db.customer.Customer object at 0x10a183940>, <db.customer.Customer object at 0x10b0cd9d0>]
But then it fails with
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 60, in await_only
raise exc.MissingGreenlet(
sqlalchemy.exc.MissingGreenlet: greenlet_spawn has not been called; can't call await_() here. Was IO attempted in an unexpected place? (Background on this error at: http://sqlalche.me/e/14/xd2s)
So how can I use marshmallow-sqlalchemy to serialize the SqlAlchemy reponse?
Another options (packages, etc) or a generic custom solutions are OK too.
For the time being I'm using this:
statement = select(Customer)
results = await session.execute(statement)
_ = (results.scalars().all())
response = {}
for result in _:
value = {k: (v if not isinstance(v, sqlalchemy.orm.state.InstanceState) else '_') for k, v in result.__dict__.items()}
response[f'customer {value["id"]}'] = value
return response
Full traceback:
Traceback (most recent call last):
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/aiohttp/web_protocol.py", line 422, in _handle_request
resp = await self._request_handler(request)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/aiohttp/web_app.py", line 499, in _handle
resp = await handler(request)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/aiohttp/web_urldispatcher.py", line 948, in _iter
resp = await method()
File "/Users/ruslan/OneDrive/Home/Dev/projects/code/education/other/cft/views/user.py", line 24, in get
await get_all_users()
File "/Users/ruslan/OneDrive/Home/Dev/projects/code/education/other/cft/queries/user.py", line 18, in get_all_users
response = customer_schema.dump(_, many=True)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 547, in dump
result = self._serialize(processed_obj, many=many)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 509, in _serialize
return [
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 510, in <listcomp>
self._serialize(d, many=False)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 515, in _serialize
value = field_obj.serialize(attr_name, obj, accessor=self.get_attribute)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/fields.py", line 310, in serialize
value = self.get_value(obj, attr, accessor=accessor)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow_sqlalchemy/fields.py", line 27, in get_value
return super(fields.List, self).get_value(obj, attr, accessor=accessor)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/fields.py", line 239, in get_value
return accessor_func(obj, check_key, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/schema.py", line 472, in get_attribute
return get_value(obj, attr, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/utils.py", line 239, in get_value
return _get_value_for_key(obj, key, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/marshmallow/utils.py", line 253, in _get_value_for_key
return getattr(obj, key, default)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py", line 480, in __get__
return self.impl.get(state, dict_)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/attributes.py", line 931, in get
value = self.callable_(state, passive)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py", line 879, in _load_for_state
return self._emit_lazyload(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/strategies.py", line 1036, in _emit_lazyload
result = session.execute(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/orm/session.py", line 1689, in execute
result = conn._execute_20(statement, params or {}, execution_options)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1582, in _execute_20
return meth(self, args_10style, kwargs_10style, execution_options)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/sql/lambdas.py", line 481, in _execute_on_connection
return connection._execute_clauseelement(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1451, in _execute_clauseelement
ret = self._execute_context(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1813, in _execute_context
self._handle_dbapi_exception(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1998, in _handle_dbapi_exception
util.raise_(exc_info[1], with_traceback=exc_info[2])
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/util/compat.py", line 207, in raise_
raise exception
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1770, in _execute_context
self.dialect.do_execute(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/engine/default.py", line 717, in do_execute
cursor.execute(statement, parameters)
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/dialects/postgresql/asyncpg.py", line 449, in execute
self._adapt_connection.await_(
File "/Users/ruslan/.local/share/virtualenvs/cft-RKlbQ9iX/lib/python3.9/site-packages/sqlalchemy/util/_concurrency_py3k.py", line 60, in await_only
raise exc.MissingGreenlet(
sqlalchemy.exc.MissingGreenlet: greenlet_spawn has not been called; can't call await_() here. Was IO attempted in an unexpected place? (Background on this error at: http://sqlalche.me/e/14/xd2s)
The problem in this case is that the Marshmallow schema is configured to load related models (include_relationships=True). Since the initial query doesn't load them automatically, the schema triggers a query to fetch them, and this causes the error.
The simplest solution, demonstrated in the docs, is to eagerly load the related objects with their "parent":
async def get_all_users():
async with db_conn.get_async_sa_session() as session:
# Let's assume a Customer has a 1 to many relationship with an Order model
statement = select(Customer).options(orm.selectinload(Customer.orders))
results = await session.execute(statement)
_ = (results.scalars().all())
print(_)
response = customer_schema.dump(_, many=True)
print(response)
There is more discussion in the Preventing Implicit IO when Using AsyncSession section of the docs.

sqlalchemy.exc.NoInspectionAvailable: No inspection system is available for object of type <class 'sqlalchemy.ext.asyncio.engine.AsyncEngine'>

I followed this example but with aiosqlite: https://docs.sqlalchemy.org/en/14/tutorial/metadata.html#table-reflection
like:
import asyncio
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy import MetaData
from sqlalchemy import Table, Column, Integer, String
metadata = MetaData()
engine = create_async_engine(
"sqlite+aiosqlite:////Users/rui/database.db",
echo=True,
future=True,
)
# Table Reflection (Loading existing table)
users = Table("users", metadata, autoload_with=engine)
Then I'm having this error:
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/util/deprecations.py", line 298, in warned
return fn(*args, **kwargs)
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/sql/schema.py", line 597, in __new__
metadata._remove_table(name, schema)
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/util/langhelpers.py", line 70, in __exit__
compat.raise_(
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/util/compat.py", line 211, in raise_
raise exception
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/sql/schema.py", line 592, in __new__
table._init(name, metadata, *args, **kw)
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/sql/schema.py", line 667, in _init
self._autoload(
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/sql/schema.py", line 700, in _autoload
insp = inspection.inspect(autoload_with)
File "/usr/local/Caskroom/miniconda/base/envs/playground/lib/python3.9/site-packages/sqlalchemy/inspection.py", line 71, in inspect
raise exc.NoInspectionAvailable(
sqlalchemy.exc.NoInspectionAvailable: No inspection system is available for object of type <class 'sqlalchemy.ext.asyncio.engine.AsyncEngine'>
python-BaseException
Process finished with exit code 1
I'm using Python 3.9.2, sqlalchemy==1.4.5 and aiosqlite==0.17.0
Should I replace aiosqlite by another one? Or should I wait until asyncio will be improved in sqlalchemy?

FileNotFoundError: [Errno 2] File b'Downloads/BetterLifeIndex2015.csv' does not exist: b'Downloads/BetterLifeIndex2015.csv'

Resolved
Answer: Changed the path, it was in fact inncorect path after all. Used absolute path (alt+d+copy from file explorer". Also used "r" before the path so the path is treated like a raw string.
# load the data
BetterLifeIndex = pd.read_csv(r"C:\Users\brede\OneDrive\Dokumenter\Downloads\BetterLifeIndex2015.csv", thousands = ',')
gdp_per_capita = pd.read_csv(r"C:\Users\brede\OneDrive\Dokumenter\Downloads\gdpcapita.csv", thousands= ',', delimiter ='\t',
encoding = 'latin1' , na_values="n/a")
Im new to Python and I'm running a Example from a machine learning book. I cant get python to read my csv file.
Code:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn.linear_model
def prepare_country_stats(oecd_bli, gdp_per_capita):
oecd_bli = oecd_bli[oecd_bli["INEQUALITY"]=="TOT"]
oecd_bli = oecd_bli.pivot(index="Country", columns="Indicator", values="Value")
gdp_per_capita.rename(columns={"2015": "GDP per capita"}, inplace=True)
gdp_per_capita.set_index("Country", inplace=True)
full_country_stats = pd.merge(left=oecd_bli, right=gdp_per_capita,
left_index=True, right_index=True)
full_country_stats.sort_values(by="GDP per capita", inplace=True)
remove_indices = [0, 1, 6, 8, 33, 34, 35]
keep_indices = list(set(range(36)) - set(remove_indices))
return full_country_stats[["GDP per capita", 'Life satisfaction']].iloc[keep_indices]
# load the data
oecd_bli = pd.read_csv("Downloads/BetterLifeIndex2015.csv", thousands = ',')
gdp_per_capita = pd.read_csv("C:/Users/brede/Downloads/gdpcapita.csv", thousands= ',', delimiter ='\t',
encoding = 'latin1' , na_values="n/a")
#prepare the data
country_stats = prepare_country_stats (oecd_bli, gdp_per_capita)
x = np.c_[country_stats["gdp per capita"]]
y = np.c_[country_stats["life satisfaction"]]
#visualize the data
country_stats.plot(kind= 'scatter' , x = "GDP per capita", y ='Life satisfaction')
#select a linear model
model = sklearn.linear_model.LinearRegression()
#train the model
model.fit (x, y)
#make a prediction for Cyprus
X_new = [[22587]] #Cyprus GDP per capita
print(model.predict(X_new)) #outputs[[5.96242338]]
The output is:
runfile('C:/Users/brede/Downloads/practice_gdp.py', wdir='C:/Users/brede/Downloads')
Traceback (most recent call last):
File "<ipython-input-59-2f130edd277c>", line 1, in <module>
runfile('C:/Users/brede/Downloads/practice_gdp.py', wdir='C:/Users/brede/Downloads')
File "C:\Users\brede\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\brede\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/brede/Downloads/practice_gdp.py", line 31, in <module>
oecd_bli = pd.read_csv("Downloads/BetterLifeIndex2015.csv", thousands = ',')
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 685, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 457, in _read
parser = TextFileReader(fp_or_buf, **kwds)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 895, in __init__
self._make_engine(self.engine)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1135, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1917, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas\_libs\parsers.pyx", line 382, in pandas._libs.parsers.TextReader.__cinit__
File "pandas\_libs\parsers.pyx", line 689, in pandas._libs.parsers.TextReader._setup_parser_source
FileNotFoundError: [Errno 2] File b'Downloads/BetterLifeIndex2015.csv' does not exist: b'Downloads/BetterLifeIndex2015.csv'
I have triplechecked the path to the file, and I can't seem to figure this out! All help is appreciated.
This is done in Spyder, also tried in Jupyter with same result. I've even copied the path etc.
help...
I think you have to include'/' in the file path.Try that 'C:/Users/brede/OneDrive....'

Sqlalchemy class _MatchType(sqltypes.Float, sqltypes.MatchType): AttributeError

sometimes execute query and got this error:
ct = db.session.query(CIType).filter(
CIType.type_name == key).first() or \
db.session.query(CIType).filter(CIType.type_id == key).first()
full error info
2016-08-11 14:27:26,177 ERROR /usr/lib/python2.6/site-packages/flask/app.py 1306 - Exception on /api/v0.1/projects/search-indexer-rafael/product [GET]
Traceback (most recent call last):
File "/usr/lib/python2.6/site-packages/flask/app.py", line 1687, in wsgi_app
response = self.full_dispatch_request()
File "/usr/lib/python2.6/site-packages/flask/app.py", line 1360, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/usr/lib/python2.6/site-packages/flask/app.py", line 1358, in full_dispatch_request
rv = self.dispatch_request()
File "/usr/lib/python2.6/site-packages/flask/app.py", line 1344, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/data/webapps/cmdb-api/core/special.py", line 175, in get_project_product
product = ProjectManager().get_for_product(project_name)
File "/data/webapps/cmdb-api/lib/special/project.py", line 18, in __init__
self.ci_type = CITypeCache.get("project")
File "/data/webapps/cmdb-api/models/cmdb.py", line 458, in get
ct = db.session.query(CIType).filter(
File "/usr/lib64/python2.6/site-packages/sqlalchemy/orm/scoping.py", line 149, in do
def do(self, *args, **kwargs):
File "/usr/lib64/python2.6/site-packages/sqlalchemy/util/_collections.py", line 903, in __call__
item = dict.get(self, key)
File "/usr/lib/python2.6/site-packages/flask_sqlalchemy.py", line 201, in __init__
bind=db.engine,
File "/usr/lib/python2.6/site-packages/flask_sqlalchemy.py", line 754, in engine
return self.get_engine(self.get_app())
File "/usr/lib/python2.6/site-packages/flask_sqlalchemy.py", line 771, in get_engine
return connector.get_engine()
File "/usr/lib/python2.6/site-packages/flask_sqlalchemy.py", line 451, in get_engine
self._engine = rv = sqlalchemy.create_engine(info, **options)
File "/usr/lib64/python2.6/site-packages/sqlalchemy/engine/__init__.py", line 344, in create_engine
of 0 indicates no limit; to disable pooling, set ``poolclass`` to
File "/usr/lib64/python2.6/site-packages/sqlalchemy/engine/strategies.py", line 50, in create
File "/usr/lib64/python2.6/site-packages/sqlalchemy/engine/url.py", line 116, in get_dialect
return self.get_dialect().driver
File "/usr/lib64/python2.6/site-packages/sqlalchemy/util/langhelpers.py", line 170, in load
fn.__func__.__doc__ = doc
File "/usr/lib64/python2.6/site-packages/sqlalchemy/dialects/__init__.py", line 33, in _auto_fn
try:
File "/usr/lib64/python2.6/site-packages/sqlalchemy/dialects/mysql/__init__.py", line 8, in <module>
from . import base, mysqldb, oursql, \
File "/usr/lib64/python2.6/site-packages/sqlalchemy/dialects/mysql/base.py", line 681, in <module>
class _MatchType(sqltypes.Float, sqltypes.MatchType):
AttributeError: 'module' object has no attribute 'MatchType'
code
#special.route("/api/v0.1/projects/<string:project_name>/product",
methods=["GET"])
def get_project_product(project_name):
product = ProjectManager().get_for_product(project_name)
return jsonify(product=product)
...
goto
class ProjectManager(object):
def __init__(self):
self.ci_type = CITypeCache.get("project")
...
then
class CITypeCache(object):
#classmethod
def get(cls, key):
if key is None:
return
ct = cache.get("CIType::ID::%s" % key) or \
cache.get("CIType::Name::%s" % key)
if ct is None:
ct = db.session.query(CIType).filter(
CIType.type_name == key).first() or \
db.session.query(CIType).filter(CIType.type_id == key).first()
if ct is not None:
CITypeCache.set(ct)
return ct
The sqlalchemy's version is SQLAlchemy-1.0.8-py2.6.egg-info
and after many same error, I can't catch this error any more. What's the reason of this error?
I assume CIType.type_name and CIType.type_id have different data types (perhaps string and numeric types). It may lead to situation when:
db.session.query(CIType).filter(CIType.type_name == key).first()
is valid expression but:
db.session.query(CIType).filter(CIType.type_id == key).first()
produces error because of type mismatch. You need to convert key to the type_id column type in this expression.
The second expression is calculated when first expression returns no results. As written in Python documentation:
The expression x or y first evaluates x; if x is true, its value is returned; otherwise, y is evaluated and the resulting value is returned.
For example:
>>> a = 1 or 2 + '2'
>>> print a
1
>>> a = 0 or 2 + '2'
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unsupported operand type(s) for +: 'int' and 'str'