I get a JSON error while trying to run a script for yFinance - json

import pandas as pd
import numpy as np
import datetime
import yfinance as yf
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
To use interactive plotting we can also use cufflinks
import cufflinks as cf
To enable offline mode
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
The function downloads daily market data to a pandas DataFrame
def download_daily_data(ticker, start, end):
data = yf.download(ticker, start, end)
return data
The function downloads daily market data to a pandas DataFrame
def download_daily_data(ticker, start, end):
data = yf.download(ticker, start, end)
return data

Related

How to fix "FileNotFoundError" when using proper code and file extension CSV?

I am trying to open a file with the extension .csv in python, however it keeps saying that the file is not found. I am copying the path from the side bar, so I don't believe that's the problem
I have tried to insert / and ./ before the path of the file
And r in front of the file name
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
bkgrnd = pd.read_csv('/Desktop/Sro/Natrium22.csv')
No matter what I've tried, it keeps saying FileNotFoundError
you can import csv if file will be always .csv,
import csv
with open('C:\Users\user\Desktop\Sro\Natrium22.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
specifix on windows, it needs normalization of your pathname, maybe thats the issue,
try doing, will surely work,
import os
import pandas as pd
cwd = os.getcwd()
filePath = 'C:/Users/user/Desktop/Sro/Natrium22.csv'
data = pd.read_csv(os.path.normcase(os.path.join(cwd, filePath)))
print(data)
you can try even,
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
bkgrnd = pd.read_csv(r'C:\Users\user\Desktop\Sro\Natrium22.csv')
print(bkgrnd)

Expected an indented block Exception

After running this code, i get this exception and i didn't found any place to fix it properly
import networkx as nx
from networkx.algorithms import bipartite
import numpy as np
from pandas import DataFrame, concat
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import ast
import csv
import sys
def plot_degree_dist(G):
in_degrees = G.in_degree()
in_degrees=dict(in_degrees)
in_values = sorted(set(in_degrees.values()))
in_hist = [in_degrees.values().count(x) for x in in_values]
plt.figure()
plt.grid(True)
plt.loglog(in_values, in_hist, 'ro-')
plt.plot(out_values, out_hist, 'bv-')
plt.legend(['In-degree', 'Out-degree'])
plt.xlabel('Degree')
plt.ylabel('Number of nodes')
plt.title('network of places in Cambridge')
#plt.xlim([0, 2*10**2])
I expect to receive a proper graph but all i get is this warning
File "<ipython-input-32-f89b896484d7>", line 2
in_degrees = G.in_degree()
^
IndentationError: expected an indented block
Python relies on proper indentation to identify function blocks. This code should work:
import networkx as nx
from networkx.algorithms import bipartite
import numpy as np
from pandas import DataFrame, concat
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import ast
import csv
import sys
def plot_degree_dist(G):
in_degrees = G.in_degree()
in_degrees=dict(in_degrees)
in_values = sorted(set(in_degrees.values()))
in_hist = [in_degrees.values().count(x) for x in in_values]
plt.figure()
plt.grid(True)
plt.loglog(in_values, in_hist, 'ro-')
plt.plot(out_values, out_hist, 'bv-')
plt.legend(['In-degree', 'Out-degree'])
plt.xlabel('Degree')
plt.ylabel('Number of nodes')
plt.title('network of places in Cambridge')
#plt.xlim([0, 2*10**2])
Basically just indent it by 2 or 4 spaces as per your style requirements.

raise_FirstSetError in SpaCy topic modeling

I want to create a LDA topic model and am using SpaCy to do so, following a tutorial. The error I receive when I try to use spacy is one I cannot find on google, so I'm hoping someone here knows what it's about.
I'm running this code on Anaconda:
import numpy as np
import pandas as pd
import re, nltk, spacy, gensim
# Sklearn
from sklearn.decomposition import LatentDirichletAllocation, TruncatedSVD
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from pprint import pprint
# Plotting tools
import pyLDAvis
import pyLDAvis.sklearn
import matplotlib.pyplot as plt
df = pd.DataFrame(data)
def sent_to_words(sentences):
for sentence in sentences:
yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))
# deacc=True removes punctuations
data_words = list(sent_to_words(data))
print(data_words[:1])
def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
"""https://spacy.io/api/annotation"""
texts_out = []
for sent in texts:
doc = nlp(" ".join(sent))
texts_out.append(" ".join([token.lemma_ if token.lemma_ not in ['-PRON-'] else '' for token in doc if token.pos_ in allowed_postags]))
return texts_out
nlp = spacy.load('en', disable=['parser', 'ner'])
# Do lemmatization keeping only Noun, Adj, Verb, Adverb
data_lemmatized = lemmatization(data_words, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])
print(data_lemmatized[:1])
And I receive the following error:
File "C:\Users\maart\AppData\Local\Continuum\anaconda3\lib\site-packages\_regex_core.py", line 1880, in get_firstset
raise _FirstSetError()
_FirstSetError
The error must occur somewhere after the lemmatization, because the other parts work fine.
Thanks a bunch!
I had this same issue and I was able to resolve it by uninstalling regex (I had the wrong version installed) and then running python -m spacy download en again. This will reinstall the correct version of regex.

How to convert Fantasy Premier League Data from JSON to CSV?

I am new to python and as per my thesis work I am trying to convert JSON to csv.I am able to download data in JSON but when I am writing it back using dictionaries it is not converting JSON to CSV with every column.
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import matplotlib.pyplot as plt
import numpy as np
import requests
from pprint import pprint
import csv
from time import sleep
s1='https://fantasy.premierleague.com/drf/element-summary/'
print s1
players = []
for player_link in range(1,450,1):
link = s1+""+str(player_link)
print link
r = requests.get(link)
print r
player =r.json()
players.append(player)
sleep(1)
with open('C:\Users\dell\Downloads\players_new2.csv', 'w') as f: # Just use 'w' mode in 3.x
w = csv.DictWriter(f,player.keys())
w.writeheader()
for player in players:
w.writerow(player)
I have uploaded the expected output(dec_15_expected.csv) and the program out with file name "player_new_wrong_output.csv"
https://drive.google.com/drive/folders/0BwKYmRU_0K6tZUljd3Q0aG1LT0U?usp=sharing
It will be a great help if some can tell what I am doing wrong.
Converting JSON to CSV is simple with pandas. Try this:
import pandas as pd
df=pd.read_json("input.json")
df.to_csv('output.csv')

Plotting candlestick data from a dataframe in Python

I would like create a daily candlestick plot from data i downloaded from yahoo using pandas. I'm having trouble figuring out how to use the candlestick matplotlib function in this context.
Here is the code:
#The following example, downloads stock data from Yahoo and plots it.
from pandas.io.data import get_data_yahoo
import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots, draw
from matplotlib.finance import candlestick
symbol = "GOOG"
data = get_data_yahoo(symbol, start = '2013-9-01', end = '2013-10-23')[['Open','Close','High','Low','Volume']]
ax = subplots()
candlestick(ax,data['Open'],data['High'],data['Low'],data['Close'])
Thanks
Andrew.
Using bokeh:
import io
from math import pi
import pandas as pd
from bokeh.plotting import figure, show, output_file
df = pd.read_csv(
io.BytesIO(
b'''Date,Open,High,Low,Close
2016-06-01,69.6,70.2,69.44,69.76
2016-06-02,70.0,70.15,69.45,69.54
2016-06-03,69.51,70.48,68.62,68.91
2016-06-04,69.51,70.48,68.62,68.91
2016-06-05,69.51,70.48,68.62,68.91
2016-06-06,70.49,71.44,69.84,70.11
2016-06-07,70.11,70.11,68.0,68.35'''
)
)
df["Date"] = pd.to_datetime(df["Date"])
inc = df.Close > df.Open
dec = df.Open > df.Close
w = 12*60*60*1000
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=1000, title
= "Candlestick")
p.xaxis.major_label_orientation = pi/4
p.grid.grid_line_alpha=0.3
p.segment(df.Date, df.High, df.Date, df.Low, color="black")
p.vbar(df.Date[inc], w, df.Open[inc], df.Close[inc], fill_color="#D5E1DD", line_color="black")
p.vbar(df.Date[dec], w, df.Open[dec], df.Close[dec], fill_color="#F2583E", line_color="black")
output_file("candlestick.html", title="candlestick.py example")
show(p)
Code above forked from here:
http://docs.bokeh.org/en/latest/docs/gallery/candlestick.html
I have no reputation to comment #randall-goodwin answer, but for pandas 0.16.2 line:
# convert the datetime64 column in the dataframe to 'float days'
data.Date = mdates.date2num(data.Date)
must be:
data.Date = mdates.date2num(data.Date.dt.to_pydatetime())
because matplotlib does not support the numpy datetime64 dtype
I stumbled across a great pastebin entry: http://pastebin.com/ne7Fjdiq that does this well. I too was having trouble getting the calling syntax right. It usually revolves around transforming your data in simple ways to get the function to work right. My issue was with the datetime. There must be something in my format data. Once I replaced the Date series with range(maxdata) then it worked.
data = pandas.read_csv('data.csv', parse_dates={'Timestamp': ['Date', 'Time']}, index_col='Timestamp')
ticks = data.ix[:, ['Price', 'Volume']]
bars = ticks.Price.resample('1min', how='ohlc')
barsa = bars.fillna(method='ffill')
fig = plt.figure()
fig.subplots_adjust(bottom=0.1)
ax = fig.add_subplot(111)
plt.title("Candlestick chart")
volume = ticks.Volume.resample('1min', how='sum')
value = ticks.prod(axis=1).resample('1min', how='sum')
vwap = value / volume
Date = range(len(barsa))
#Date = matplotlib.dates.date2num(barsa.index)#
DOCHLV = zip(Date , barsa.open, barsa.close, barsa.high, barsa.low, volume)
matplotlib.finance.candlestick(ax, DOCHLV, width=0.6, colorup='g', colordown='r', alpha=1.0)
plt.show()
Here is the solution:
from pandas.io.data import get_data_yahoo
import matplotlib.pyplot as plt
from matplotlib import dates as mdates
from matplotlib import ticker as mticker
from matplotlib.finance import candlestick_ohlc
import datetime as dt
symbol = "GOOG"
data = get_data_yahoo(symbol, start = '2014-9-01', end = '2015-10-23')
data.reset_index(inplace=True)
data['Date']=mdates.date2num(data['Date'].astype(dt.date))
fig = plt.figure()
ax1 = plt.subplot2grid((1,1),(0,0))
plt.ylabel('Price')
ax1.xaxis.set_major_locator(mticker.MaxNLocator(6))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
candlestick_ohlc(ax1,data.values,width=0.2)
Found this question when I too was looking how to use candlestick with a pandas dataframe returned from one of the DataReader services like get_data_yahoo. I eventually figured it out. One of the keys was this other question, answered by Wes McKinney and RJRyV. Here is that link:
Pandas convert dataframe to array of tuples
The key was to read the candlestick.py function definition to determine how it expected to receive the data. The date needed to be converted first, then the entire dataframe needed to be converted to an array of tuples.
Here is the final code that worked for me. Maybe there is some other Candlestick chart out there somewhere that works directly on a pandas dataframe returned from one of the stock quote services. That would be very nice.
# Imports
from pandas.io.data import get_data_yahoo
from datetime import datetime, timedelta
import matplotlib.dates as mdates
from matplotlib.pyplot import subplots, draw
from matplotlib.finance import candlestick
import matplotlib.pyplot as plt
# get the data on a symbol (gets last 1 year)
symbol = "TSLA"
data = get_data_yahoo(symbol, datetime.now() - timedelta(days=365))
# drop the date index from the dateframe
data.reset_index(inplace = True)
# convert the datetime64 column in the dataframe to 'float days'
data.Date = mdates.date2num(data.Date)
# make an array of tuples in the specific order needed
dataAr = [tuple(x) for x in data[['Date', 'Open', 'Close', 'High', 'Low']].to_records(index=False)]
# construct and show the plot
fig = plt.figure()
ax1 = plt.subplot(1,1,1)
candlestick(ax1, dataAr)
plt.show()