ValueError: arrays must all be same length - Parse the JSON into Pandas DataFrame - json

import requests
import json
import pandas as pd
data = requests.get("https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson")
json_data = data.json()
with open(r"C:\path\file.json",'w') as outfile:
json.dump(json_data, outfile)
df = pd.read_json(r"C:\path\file.json")
when I tried to parse the json data into Pandas Dataframe, I get the below error:
ValueError: arrays must all be same length
Can anyone help me out in this?
Traceback (most recent call last):
File "c:/path/file.py", line 29, in <module>
df = pd.read_json(r"C:\path\file.json")
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 199, in wrapper
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 296, in wrapper
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 618, in read_json
result = json_reader.read()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 755, in read
obj = self._get_object_parser(self.data)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 777, in _get_object_parser
obj = FrameParser(json, **kwargs).parse()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 886, in parse
self._parse_no_numpy()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\json\_json.py", line 1118, in _parse_no_numpy
self.obj = DataFrame(
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 468, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py", line 283, in init_dict
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py", line 78, in arrays_to_mgr
index = extract_index(arrays)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\construction.py", line 397, in extract_index
raise ValueError("arrays must all be same length")
ValueError: arrays must all be same length

Simpler approach is not to save to a file and use json_normalize()
import requests
import json
import pandas as pd
data = requests.get("https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson")
json_data = data.json()
pd.json_normalize(json_data["features"])

Related

Dash Plotly error TypeError: Object of type DataFrame is not JSON serializable

Hello I am working with Dash for making dashboard.
Below is my code.
I tried to fix the error but not able to fix, Can anyone look into this?
on chrome i am getting. Error loading layout
I am getting TypeError
import dash_bootstrap_components as dbc
from dash import dcc
import dash_html_components as html
from dash import dash_table
import pandas as pd
import numpy as np
def getData():
return preprocess()
def back_to_df(dictio):
return pd.DataFrame.from_dict(dictio)
tblcols =[{"name": i, "id": i} for i in back_to_df(getData()).columns]
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
body = html.Div([
html.H1("Live rates")
, dbc.Row([
dbc.Col(html.Div([dcc.Interval('graph-update', interval = 80, n_intervals = 0),
dash_table.DataTable(
id = 'table',
data = getData(),
columns=tblcols,
page_size= 10,
style_table={'overflowX': 'auto'},
)]),width=3)
])
])
app.layout = html.Div([body])
#app.callback(
dash.dependencies.Output('table','data'),
[dash.dependencies.Input('graph-update', 'n_intervals')])
def updateTable(n):
return getData()
if __name__ == "__main__":
app.run_server(debug = False, port = 8010)
I tried to fix the error but not able to fix, Can anyone look into this?
I am getting error as follows.
Looking for help for below error. dash pandas plotly dataframe
Traceback (most recent call last):
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
response = self.full_dispatch_request()
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
rv = self.handle_user_exception(e)
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
raise value
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
rv = self.dispatch_request()
File "C:\Users\Admin\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "C:\Users\Admin\anaconda3\lib\site-packages\dash\dash.py", line 569, in serve_layout
to_json(layout),
File "C:\Users\Admin\anaconda3\lib\site-packages\dash\_utils.py", line 20, in to_json
return to_json_plotly(value)
File "C:\Users\Admin\anaconda3\lib\site-packages\plotly\io\_json.py", line 124, in to_json_plotly
return json.dumps(plotly_object, cls=PlotlyJSONEncoder, **opts)
File "C:\Users\Admin\anaconda3\lib\json\__init__.py", line 234, in dumps
return cls(
File "C:\Users\Admin\anaconda3\lib\site-packages\_plotly_utils\utils.py", line 59, in encode
encoded_o = super(PlotlyJSONEncoder, self).encode(o)
File "C:\Users\Admin\anaconda3\lib\json\encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "C:\Users\Admin\anaconda3\lib\json\encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "C:\Users\Admin\anaconda3\lib\site-packages\_plotly_utils\utils.py", line 136, in default
return _json.JSONEncoder.default(self, obj)
File "C:\Users\Admin\anaconda3\lib\json\encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type DataFrame is not JSON serializable```
Sounds like the getData function is returning a pandas DataFrame directly. That won't work. You'll need to do this:
return df.to_dict(orient='records')
That should work.

How can I use ujson as a Flask encoder/decoder?

I have seen that once can use simplejson as a JSON encoder / decoder within a Flask application like this:
from simplejson import JSONEncoder, JSONDecoder
app.json_encoder = JSONEncoder
app.json_decoder = JSONDecoder
But ujson does not have such objects:
>>> from ujson import JSONEncoder
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ImportError: cannot import name 'JSONEncoder' from 'ujson' (/.../site-packages/ujson.cpython-38-x86_64-linux-gnu.so
What I tried
I thought of something like this:
from uuid import UUID, uuid4
import ujson as json
from flask import Flask, jsonify
from flask.json import JSONEncoder
class CustomJSONEncoder(JSONEncoder):
def default(self, obj):
if isinstance(obj, UUID):
return str(obj)
return JSONEncoder.default(self, obj)
def encode(self, o):
return json.dumps(o)
app = Flask(__name__)
app.json_encoder = CustomJSONEncoder
#app.route("/")
def index():
return jsonify({"foo": uuid4()})
app.run()
But I'm uncertain because the help to the decoder shows this:
| decode(self, s, _w=<built-in method match of re.Pattern object at 0x7f6a608404b0>, _PY3=True)
| Return the Python representation of ``s`` (a ``str`` or ``unicode``
| instance containing a JSON document)
|
| raw_decode(self, s, idx=0, _w=<built-in method match of re.Pattern object at 0x7f6a608404b0>, _PY3=True)
| Decode a JSON document from ``s`` (a ``str`` or ``unicode``
| beginning with a JSON document) and return a 2-tuple of the Python
| representation and the index in ``s`` where the document ended.
| Optionally, ``idx`` can be used to specify an offset in ``s`` where
| the JSON document begins.
|
| This can be used to decode a JSON document from a string that may
| have extraneous data at the end.
Is my implementation ok? How would I support those other parameters? When is decode and when raw_decode used?
When I run this, I get:
[2020-10-09 10:54:52,063] ERROR in app: Exception on / [GET]
Traceback (most recent call last):
File "/home/moose/.pyenv/versions/3.8.3/lib/python3.8/site-packages/flask/app.py", line 2447, in wsgi_app
response = self.full_dispatch_request()
File "/home/moose/.pyenv/versions/3.8.3/lib/python3.8/site-packages/flask/app.py", line 1952, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/home/moose/.pyenv/versions/3.8.3/lib/python3.8/site-packages/flask/app.py", line 1821, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "/home/moose/.pyenv/versions/3.8.3/lib/python3.8/site-packages/flask/_compat.py", line 39, in reraise
raise value
File "/home/moose/.pyenv/versions/3.8.3/lib/python3.8/site-packages/flask/app.py", line 1950, in full_dispatch_request
rv = self.dispatch_request()
File "/home/moose/.pyenv/versions/3.8.3/lib/python3.8/site-packages/flask/app.py", line 1936, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "main.py", line 28, in index
return jsonify({"foo": uuid4()})
File "/home/moose/.pyenv/versions/3.8.3/lib/python3.8/site-packages/flask/json/__init__.py", line 370, in jsonify
dumps(data, indent=indent, separators=separators) + "\n",
File "/home/moose/.pyenv/versions/3.8.3/lib/python3.8/site-packages/flask/json/__init__.py", line 211, in dumps
rv = _json.dumps(obj, **kwargs)
File "/home/moose/.pyenv/versions/3.8.3/lib/python3.8/site-packages/simplejson/__init__.py", line 398, in dumps
return cls(
File "main.py", line 14, in encode
return json.dumps(o)
TypeError: UUID('1f45a2bc-c964-48f0-b2f5-9ef7a2557966') is not JSON serializable
You can use a try block like that:
import ujson as json
from flask.json import JSONEncoder
class CustomJSONEncoder(JSONEncoder):
def default(self, obj):
try:
return json.dumps(obj)
except TypeError:
return JSONEncoder.default(self, obj)
from flask import Flask
app = Flask(__name__)
app.json_encoder = CustomJSONEncoder

FileNotFoundError: [Errno 2] File b'Downloads/BetterLifeIndex2015.csv' does not exist: b'Downloads/BetterLifeIndex2015.csv'

Resolved
Answer: Changed the path, it was in fact inncorect path after all. Used absolute path (alt+d+copy from file explorer". Also used "r" before the path so the path is treated like a raw string.
# load the data
BetterLifeIndex = pd.read_csv(r"C:\Users\brede\OneDrive\Dokumenter\Downloads\BetterLifeIndex2015.csv", thousands = ',')
gdp_per_capita = pd.read_csv(r"C:\Users\brede\OneDrive\Dokumenter\Downloads\gdpcapita.csv", thousands= ',', delimiter ='\t',
encoding = 'latin1' , na_values="n/a")
Im new to Python and I'm running a Example from a machine learning book. I cant get python to read my csv file.
Code:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn.linear_model
def prepare_country_stats(oecd_bli, gdp_per_capita):
oecd_bli = oecd_bli[oecd_bli["INEQUALITY"]=="TOT"]
oecd_bli = oecd_bli.pivot(index="Country", columns="Indicator", values="Value")
gdp_per_capita.rename(columns={"2015": "GDP per capita"}, inplace=True)
gdp_per_capita.set_index("Country", inplace=True)
full_country_stats = pd.merge(left=oecd_bli, right=gdp_per_capita,
left_index=True, right_index=True)
full_country_stats.sort_values(by="GDP per capita", inplace=True)
remove_indices = [0, 1, 6, 8, 33, 34, 35]
keep_indices = list(set(range(36)) - set(remove_indices))
return full_country_stats[["GDP per capita", 'Life satisfaction']].iloc[keep_indices]
# load the data
oecd_bli = pd.read_csv("Downloads/BetterLifeIndex2015.csv", thousands = ',')
gdp_per_capita = pd.read_csv("C:/Users/brede/Downloads/gdpcapita.csv", thousands= ',', delimiter ='\t',
encoding = 'latin1' , na_values="n/a")
#prepare the data
country_stats = prepare_country_stats (oecd_bli, gdp_per_capita)
x = np.c_[country_stats["gdp per capita"]]
y = np.c_[country_stats["life satisfaction"]]
#visualize the data
country_stats.plot(kind= 'scatter' , x = "GDP per capita", y ='Life satisfaction')
#select a linear model
model = sklearn.linear_model.LinearRegression()
#train the model
model.fit (x, y)
#make a prediction for Cyprus
X_new = [[22587]] #Cyprus GDP per capita
print(model.predict(X_new)) #outputs[[5.96242338]]
The output is:
runfile('C:/Users/brede/Downloads/practice_gdp.py', wdir='C:/Users/brede/Downloads')
Traceback (most recent call last):
File "<ipython-input-59-2f130edd277c>", line 1, in <module>
runfile('C:/Users/brede/Downloads/practice_gdp.py', wdir='C:/Users/brede/Downloads')
File "C:\Users\brede\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\brede\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/brede/Downloads/practice_gdp.py", line 31, in <module>
oecd_bli = pd.read_csv("Downloads/BetterLifeIndex2015.csv", thousands = ',')
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 685, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 457, in _read
parser = TextFileReader(fp_or_buf, **kwds)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 895, in __init__
self._make_engine(self.engine)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1135, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Users\brede\Anaconda3\lib\site-packages\pandas\io\parsers.py", line 1917, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas\_libs\parsers.pyx", line 382, in pandas._libs.parsers.TextReader.__cinit__
File "pandas\_libs\parsers.pyx", line 689, in pandas._libs.parsers.TextReader._setup_parser_source
FileNotFoundError: [Errno 2] File b'Downloads/BetterLifeIndex2015.csv' does not exist: b'Downloads/BetterLifeIndex2015.csv'
I have triplechecked the path to the file, and I can't seem to figure this out! All help is appreciated.
This is done in Spyder, also tried in Jupyter with same result. I've even copied the path etc.
help...
I think you have to include'/' in the file path.Try that 'C:/Users/brede/OneDrive....'

Upload Pandas dataframe as a JSON object in Cloud Storage

I have been trying to upload a Pandas dataframe to a JSON object in Cloud Storage using Cloud Function. Follwing is my code -
def upload_blob(bucket_name, source_file_name, destination_blob_name):
"""Uploads a file to the bucket."""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_file(source_file_name)
print('File {} uploaded to {}.'.format(
source_file_name,
destination_blob_name))
final_file = pd.concat([df, df_second], axis=0)
final_file.to_json('/tmp/abc.json')
with open('/tmp/abc.json', 'r') as file_obj:
upload_blob('test-bucket',file_obj,'abc.json')
I am getting the following error in line - blob.upload_from_file(source_file_name)
Deployment failure:
Function failed on loading user code. Error message: Code in file main.py
can't be loaded.
Detailed stack trace: Traceback (most recent call last):
File "/env/local/lib/python3.7/site-
packages/google/cloud/functions/worker.py", line 305, in
check_or_load_user_function
_function_handler.load_user_function()
File "/env/local/lib/python3.7/site-
packages/google/cloud/functions/worker.py", line 184, in load_user_function
spec.loader.exec_module(main)
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/user_code/main.py", line 6, in <module>
import datalab.storage as gcs
File "/env/local/lib/python3.7/site-packages/datalab/storage/__init__.py",
line 16, in <module>
from ._bucket import Bucket, Buckets
File "/env/local/lib/python3.7/site-packages/datalab/storage/_bucket.py",
line 21, in <module>
import datalab.context
File "/env/local/lib/python3.7/site-packages/datalab/context/__init__.py",
line 15, in <module>
from ._context import Context
File "/env/local/lib/python3.7/site-packages/datalab/context/_context.py",
line 20, in <module>
from . import _project
File "/env/local/lib/python3.7/site-packages/datalab/context/_project.py",
line 18, in <module>
import datalab.utils
File "/env/local/lib/python3.7/site-packages/datalab/utils/__init__.py",
line 15
from ._async import async, async_function, async_method
^
SyntaxError: invalid syntax
What possibly is the error?
You are passing a string to blob.upload_from_file(), but this method requires a file object. You probably want to use blob.upload_from_filename() instead. Check the sample in the GCP docs.
Alternatively, you could get the file object, and keep using blob.upload_from_file(), but it's unnecessary extra lines.
with open('/tmp/abc.json', 'r') as file_obj:
upload_blob('test-bucket', file_obj, 'abc.json')
Use a bucket object instead of string
something like upload_blob(conn.get_bucket(mybucket),'/tmp/abc.json','abc.json')}

How to Print list [] based JSON?

I am trying to print below json output using below script,
Json Sample:
[{"id":"56cd7e4d2d0edcace915e674","protocol":"https","hostName":"127.0.0.1","port":443,"serverName":"Site1","status":"connected","connected":true}]
Code i have used:
import Requests
response = requests.get("https://Site1/rest/settings/all-server-status",params={'serverId': '56cd7e4d2d0edcace915e674'}, verify=False)
json_data = json.loads(response.text)
When i am trying to print json_data i got below error,
Traceback (most recent call last):
File "<pyshell#116>", line 1, in <module>
json_data = json.loads(response.text)
File "C:\Python27\lib\json\__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\json\decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python27\lib\json\decoder.py", line 384, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
Please help me on this, Thanks in advance!!