FileNotFoundError: [Errno 2] File b'Downloads/BetterLifeIndex2015.csv' does not exist: b'Downloads/BetterLifeIndex2015.csv' - csv

Resolved
Answer: Changed the path, it was in fact inncorect path after all. Used absolute path (alt+d+copy from file explorer". Also used "r" before the path so the path is treated like a raw string.
# load the data
BetterLifeIndex = pd.read_csv(r"C:\Users\brede\OneDrive\Dokumenter\Downloads\BetterLifeIndex2015.csv", thousands = ',')
gdp_per_capita = pd.read_csv(r"C:\Users\brede\OneDrive\Dokumenter\Downloads\gdpcapita.csv", thousands= ',', delimiter ='\t',
encoding = 'latin1' , na_values="n/a")
Im new to Python and I'm running a Example from a machine learning book. I cant get python to read my csv file.
Code:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn.linear_model
def prepare_country_stats(oecd_bli, gdp_per_capita):
oecd_bli = oecd_bli[oecd_bli["INEQUALITY"]=="TOT"]
oecd_bli = oecd_bli.pivot(index="Country", columns="Indicator", values="Value")
gdp_per_capita.rename(columns={"2015": "GDP per capita"}, inplace=True)
gdp_per_capita.set_index("Country", inplace=True)
full_country_stats = pd.merge(left=oecd_bli, right=gdp_per_capita,
left_index=True, right_index=True)
full_country_stats.sort_values(by="GDP per capita", inplace=True)
remove_indices = [0, 1, 6, 8, 33, 34, 35]
keep_indices = list(set(range(36)) - set(remove_indices))
return full_country_stats[["GDP per capita", 'Life satisfaction']].iloc[keep_indices]
# load the data
oecd_bli = pd.read_csv("Downloads/BetterLifeIndex2015.csv", thousands = ',')
gdp_per_capita = pd.read_csv("C:/Users/brede/Downloads/gdpcapita.csv", thousands= ',', delimiter ='\t',
encoding = 'latin1' , na_values="n/a")
#prepare the data
country_stats = prepare_country_stats (oecd_bli, gdp_per_capita)
x = np.c_[country_stats["gdp per capita"]]
y = np.c_[country_stats["life satisfaction"]]
#visualize the data
country_stats.plot(kind= 'scatter' , x = "GDP per capita", y ='Life satisfaction')
#select a linear model
model = sklearn.linear_model.LinearRegression()
#train the model
model.fit (x, y)
#make a prediction for Cyprus
X_new = [[22587]] #Cyprus GDP per capita
print(model.predict(X_new)) #outputs[[5.96242338]]
The output is:
runfile('C:/Users/brede/Downloads/practice_gdp.py', wdir='C:/Users/brede/Downloads')
Traceback (most recent call last):
File "<ipython-input-59-2f130edd277c>", line 1, in <module>
runfile('C:/Users/brede/Downloads/practice_gdp.py', wdir='C:/Users/brede/Downloads')
File "C:\Users\brede\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\brede\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/brede/Downloads/practice_gdp.py", line 31, in <module>
oecd_bli = pd.read_csv("Downloads/BetterLifeIndex2015.csv", thousands = ',')
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 685, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 457, in _read
parser = TextFileReader(fp_or_buf, **kwds)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 895, in __init__
self._make_engine(self.engine)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1135, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1917, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas\_libs\parsers.pyx", line 382, in pandas._libs.parsers.TextReader.__cinit__
File "pandas\_libs\parsers.pyx", line 689, in pandas._libs.parsers.TextReader._setup_parser_source
FileNotFoundError: [Errno 2] File b'Downloads/BetterLifeIndex2015.csv' does not exist: b'Downloads/BetterLifeIndex2015.csv'
I have triplechecked the path to the file, and I can't seem to figure this out! All help is appreciated.
This is done in Spyder, also tried in Jupyter with same result. I've even copied the path etc.
help...

I think you have to include'/' in the file path.Try that 'C:/Users/brede/OneDrive....'

Related

getting error while writing data onto cloud bigTable through dataflow

I am using 2nd gen cloud function to trigger dataflow job. Dataflow template is basically reading parquet files from cloud storage and loading data onto bigTable.
Here are the code and package details
import os
import datetime
import logging
from configparser import ConfigParser
import apache_beam as beam
from google.cloud.bigtable import Client
from google.cloud.bigtable.row import DirectRow
from apache_beam.options.pipeline_options import PipelineOptions
from google.cloud import bigtable
from google.cloud.bigtable import column_family
from google.cloud.bigtable import row_filters
from apache_beam.io.gcp.bigtableio import WriteToBigTable
logger = logging.getLogger()
logger.setLevel(logging.INFO)
config_object = ConfigParser()
config_object.read("config.ini")
project_id = config_object["uprn"]["project_id"]
instance_id = config_object["uprn"]["instance_id"]
table_id = config_object["uprn"]["table_id"]
column_family_id = config_object["uprn"]["column_family_id"]
#input_columns = config_object["uprn"]["input_columns"]
timestamp = datetime.datetime(1970, 1, 1)
logging.info("--Starting..")
#client = bigtable.Client(project=project_id, admin=True)
#instance = client.instance(instance_id)
#table = instance.table(table_id)
def big_table_load(ele):
try:
rows = []
column_names = list(ele.keys())
row_key = str(str(ele['uprn'])).encode()
logging.info("--row_key "+str(row_key))
row = DirectRow(row_key)
for key in column_names:
row.set_cell(
column_family_id, key, str(ele[key]).encode('utf-8'), timestamp=timestamp
)
rows.append(row)
return rows
except Exception as e:
logging.info("Error encountered for row_key " + str(row_key) + " with error message "+ str(e))
def find_err_file():
filename_err = user_options.efilename.get()
return filename_err
class UserOptions(PipelineOptions):
#classmethod
def _add_argparse_args(cls, parser):
parser.add_value_provider_argument('--input_location',
default='gs://my-proj-dev-local-landing-zone/mock_data/*'
)
pipeline_options = PipelineOptions()
user_options = pipeline_options.view_as(UserOptions)
def run():
try:
with beam.Pipeline(options=pipeline_options) as p:
records = (p | 'Read' >> beam.io.ReadFromParquet(user_options.input_location)
| 'Format Rows' >> beam.ParDo(big_table_load)
| WriteToBigTable(
project_id=project_id,
instance_id=instance_id,
table_id=table_id
)
)
except Exception as e:
logging.info(e)
raise e
if __name__ == '__main__':
run()
Requirement.txt
google-cloud-bigtable==1.7.0
apache-beam[gcp]==2.39.0
Error processing instruction process_bundle-4225915941562411087-3. Original traceback is Traceback (most recent call last): File "apache_beam/runners/common.py", line 1232, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method File "apache_beam/runners/common.py", line 475, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "apache_beam/runners/common.py", line 481, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 187, in finish_bundle self.batcher.flush() File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 88, in flush status.code))) Exception: Failed to write a batch of 12 records due to 'not_found' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 267, in _execute response = task() File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 340, in lambda: self.create_worker().do_instruction(request), request) File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 581, in do_instruction getattr(request, request_type), request.instruction_id) File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/sdk_worker.py", line 618, in process_bundle bundle_processor.process_bundle(instruction_id)) File "/usr/local/lib/python3.7/site-packages/apache_beam/runners/worker/bundle_processor.py", line 1001, in process_bundle op.finish() File "apache_beam/runners/worker/operations.py", line 736, in apache_beam.runners.worker.operations.DoOperation.finish File "apache_beam/runners/worker/operations.py", line 738, in apache_beam.runners.worker.operations.DoOperation.finish File "apache_beam/runners/worker/operations.py", line 739, in apache_beam.runners.worker.operations.DoOperation.finish File "apache_beam/runners/common.py", line 1253, in apache_beam.runners.common.DoFnRunner.finish File "apache_beam/runners/common.py", line 1234, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method File "apache_beam/runners/common.py", line 1281, in apache_beam.runners.common.DoFnRunner._reraise_augmented File "apache_beam/runners/common.py", line 1232, in apache_beam.runners.common.DoFnRunner._invoke_bundle_method File "apache_beam/runners/common.py", line 475, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "apache_beam/runners/common.py", line 481, in apache_beam.runners.common.DoFnInvoker.invoke_finish_bundle File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 187, in finish_bundle self.batcher.flush() File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/bigtableio.py", line 88, in flush status.code))) Exception: Failed to write a batch of 12 records due to 'not_found' [while running 'WriteToBigTable/ParDo(_BigTableWriteFn)-ptransform-43']
There is a “not found” error - does the table and columns family you are writing to exist?

Dash Plotly error TypeError: Object of type DataFrame is not JSON serializable

Hello I am working with Dash for making dashboard.
Below is my code.
I tried to fix the error but not able to fix, Can anyone look into this?
on chrome i am getting. Error loading layout
I am getting TypeError
import dash_bootstrap_components as dbc
from dash import dcc
import dash_html_components as html
from dash import dash_table
import pandas as pd
import numpy as np
def getData():
return preprocess()
def back_to_df(dictio):
return pd.DataFrame.from_dict(dictio)
tblcols =[{"name": i, "id": i} for i in back_to_df(getData()).columns]
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
body = html.Div([
html.H1("Live rates")
, dbc.Row([
dbc.Col(html.Div([dcc.Interval('graph-update', interval = 80, n_intervals = 0),
dash_table.DataTable(
id = 'table',
data = getData(),
columns=tblcols,
page_size= 10,
style_table={'overflowX': 'auto'},
)]),width=3)
])
])
app.layout = html.Div([body])
#app.callback(
dash.dependencies.Output('table','data'),
[dash.dependencies.Input('graph-update', 'n_intervals')])
def updateTable(n):
return getData()
if __name__ == "__main__":
app.run_server(debug = False, port = 8010)
I tried to fix the error but not able to fix, Can anyone look into this?
I am getting error as follows.
Looking for help for below error. dash pandas plotly dataframe
Traceback (most recent call last):
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
response = self.full_dispatch_request()
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
rv = self.handle_user_exception(e)
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
raise value
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
rv = self.dispatch_request()
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "C:\Users\Admin\anaconda3\lib\site-packages\dash\dash.py", line 569, in serve_layout
to_json(layout),
File "C:\Users\Admin\anaconda3\lib\site-packages\dash\_utils.py", line 20, in to_json
return to_json_plotly(value)
File "C:\Users\Admin\anaconda3\lib\site-packages\plotly\io\_json.py", line 124, in to_json_plotly
return json.dumps(plotly_object, cls=PlotlyJSONEncoder, **opts)
File "C:\Users\Admin\anaconda3\lib\json\__init__.py", line 234, in dumps
return cls(
File "C:\Users\Admin\anaconda3\lib\site-packages\_plotly_utils\utils.py", line 59, in encode
encoded_o = super(PlotlyJSONEncoder, self).encode(o)
File "C:\Users\Admin\anaconda3\lib\json\encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "C:\Users\Admin\anaconda3\lib\json\encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "C:\Users\Admin\anaconda3\lib\site-packages\_plotly_utils\utils.py", line 136, in default
return _json.JSONEncoder.default(self, obj)
File "C:\Users\Admin\anaconda3\lib\json\encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type DataFrame is not JSON serializable```
Sounds like the getData function is returning a pandas DataFrame directly. That won't work. You'll need to do this:
return df.to_dict(orient='records')
That should work.

ValueError: arrays must all be same length - Parse the JSON into Pandas DataFrame

import requests
import json
import pandas as pd
data = requests.get("https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson")
json_data = data.json()
with open(r"C:\path\file.json",'w') as outfile:
json.dump(json_data, outfile)
df = pd.read_json(r"C:\path\file.json")
when I tried to parse the json data into Pandas Dataframe, I get the below error:
ValueError: arrays must all be same length
Can anyone help me out in this?
Traceback (most recent call last):
File "c:/path/file.py", line 29, in <module>
df = pd.read_json(r"C:\path\file.json")
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 199, in wrapper
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 296, in wrapper
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 618, in read_json
result = json_reader.read()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 755, in read
obj = self._get_object_parser(self.data)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 777, in _get_object_parser
obj = FrameParser(json, **kwargs).parse()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 886, in parse
self._parse_no_numpy()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 1118, in _parse_no_numpy
self.obj = DataFrame(
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 468, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py", line 283, in init_dict
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py", line 78, in arrays_to_mgr
index = extract_index(arrays)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py", line 397, in extract_index
raise ValueError("arrays must all be same length")
ValueError: arrays must all be same length
Simpler approach is not to save to a file and use json_normalize()
import requests
import json
import pandas as pd
data = requests.get("https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson")
json_data = data.json()
pd.json_normalize(json_data["features"])

Skyfield year is out of range

I'm trying to use Skyfield to plot an orbit, but it doesn't work.
Here's the code:
import numpy as np
import matplotlib.pyplot as plt
from skyfield.api import Loader, Topos, EarthSatellite
text = """
GOCE
1 34602U 09013A 13314.96046236 .14220718 20669-5 50412-4 0 930
2 34602 096.5717 344.5256 0009826 296.2811 064.0942 16.58673376272979
"""
lines = text.strip().splitlines()
sat = EarthSatellite(lines[1], lines[2], lines[0])
print(sat.epoch.utc_jpl())
Here's the error I get:
File "orbit_preditor.py", line 21, in <module>
ISS = EarthSatellite(L1, L2)
File "C:\Python\Python36\lib\site-packages\skyfield\sgp4lib.py", line 86, in __init__
EarthSatellite.timescale = load.timescale()
File "C:\Python\Python36\lib\site-packages\skyfield\iokit.py", line 232, in timescale
preds = self('deltat.preds')
File "C:\Python\Python36\lib\site-packages\skyfield\iokit.py", line 142, in __call__
expiration_date, data = parser(f)
File "C:\Python\Python36\lib\site-packages\skyfield\iokit.py", line 309, in parse_deltat_preds
expiration_date = date(year[0] + 2, month[0], 1)
ValueError: year 58668 is out of range
Any ideas?
Try upgrading to the new version of Skyfield with pip install -U skyfield. A third party data file changed formats and so we made a new Skyfield release to fix it.

can you modify this theano code for fft-convolution available?

I'm searching for the way to use fft-convolution in theano.
I wrote simple convolution code with theano.
But this code doesn't work if i set "fft_conv = 1" though simple convolution works with "fft_conv = 0"
Please tell me what is wrong with this code?
import numpy as np
import theano.sandbox.cuda.fftconv
from theano.tensor.nnet import conv
import theano.tensor as T
xdata_test = np.random.uniform(low=-1, high=1, size=(100,76,76),)
xdata_test = np.asarray(xdata_test,dtype='float32')
CONVfilter = np.random.uniform(low=-1,high=1,size=(10,1,6,6))
CONVfilter = np.asarray(CONVfilter,dtype='float32')
x = T.tensor3('x') # the data is presented as rasterized images
layer0_input = x.reshape((100, 1, 76, 76))
fft_flag = 1
if fft_flag == 1 :
##### FFT-CONVOLUTION VERSION
conv_out = theano.sandbox.cuda.fftconv.conv2d_fft(
input=layer0_input,
filters=CONVfilter,
filter_shape=(10, 1, 6, 6),
image_shape=(100,1,76,76),
border_mode='valid',
pad_last_dim=False
)
elif fft_flag == 0 :
###### CONVENTIONAL CONVOLUTION VERSION
conv_out = conv.conv2d(
input=layer0_input,
filters=CONVfilter,
filter_shape=(10, 1, 6, 6),
image_shape=(100,1,76,76),
)
test_conv = theano.function([x],conv_out)
result = test_conv(xdata_test)
The error message is like below:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 580, in runfile
execfile(filename, namespace)
File "C:/Users/user/Documents/Python Scripts/ffttest.py", line 38, in <module>
result = test_conv(xdata_test)
File "C:\Anaconda\lib\site-packages\theano\compile\function_module.py", line 606, in __call__
storage_map=self.fn.storage_map)
File "C:\Anaconda\lib\site-packages\theano\gof\link.py", line 205, in raise_with_op
'\n' + '\n'.join(hints))
TypeError: __init__() takes at least 3 arguments (2 given)