plotting a Date from an csv. file in pylab - csv

I'm trying to plot dates from a csv. file column against three other columns of numbers. I'm new to python and have so far managed to import the columns into python and have tried to read them has an array but i can't seem to append them with the datetime module and plot the dates along the x axis along with my data.
Please can anyone help?
At the minute I keep getting the error message:
Traceback (most recent call last):
File "H:\AppliedGIS\Python\woops.py", line 24, in <module>
date = datetime.datetime.strptime['x', '%d/%m/%Y']
AttributeError: type object 'datetime.datetime' has no attribute 'datetime'
But i'm sure i'm going wrong in more than one place...
The data itself is formatted in four columns and when printed looks like this: ('04/03/2013', 7.0, 12.0, 17.0) ('11/03/2013', 23.0, 15.0, 23.0).
Here is the complete code
import csv
import numpy as np
import pylab as pl
import datetime
from datetime import datetime
data = np.genfromtxt('H:/AppliedGIS/Python/AssignmentData/GrowthDistribution/full.csv', names=True, usecols=(0, 1, 2, 3), delimiter= ',', dtype =[('Date', 'S10'),('HIGH', '<f8'), ('Medium', '<f8'), ('Low', '<f8')])
print data
x = [foo['Date'] for foo in data]
y = [foo['HIGH'] for foo in data]
y2 = [foo['Medium'] for foo in data]
y3 = [foo['Low'] for foo in data]
print x, y, y2, y3
dates = []
for x in data:
date = datetime.datetime.strptime['x', '%d/%m/%Y']
dates.append(date)
pl.plot(data[:, x], data[:, y], '-r', label= 'High Stocking Rate')
pl.plot(data[:, x], data[:, y2], '-g', label= 'Medium Stocking Rate')
pl.plot(data[:, x], data[:, y3], '-b', label= 'Low Stocking Rate')
pl.title('Amount of Livestock Grazing per hectare', fontsize=18)
pl.ylabel('Livestock per ha')
pl.xlabel('Date')
pl.grid(True)
pl.ylim(0,100)
pl.show()

The problem is in the way you have imported datetime.
The datetime module contains a class, also called datetime. At the moment, you are just importing the class as datetime, from which you can use the strptime method, like so:
from datetime import datetime
...
x = [foo['Date'] for foo in data]
...
dates=[]
for i in x:
date = datetime.strptime(i,'%d/%m/%Y')
dates.append(date)
Alternatively, you can import the complete datetime module, and then access the datetime class using datetime.datetime:
import datetime
...
x = [foo['Date'] for foo in data]
...
dates=[]
for i in x:
date = datetime.datetime.strptime(i,'%d/%m/%Y')
dates.append(date)

Related

TypeError: document must be an instance of dict, bson.son.SON, bson.raw_bson.RawBSONDocument, a type that inherits from collections.MutableMapping

I am trying to write data into pymongo and this the TypeError that I am getting. The Type for mydict1 is List. Do I have to convert my data into json or bson before I write it to pymongo? Kindly help.
Thanks.
from numpy.polynomial import Polynomial as poly
import numpy as np
import matplotlib.pyplot as plt
import pymongo
import json
import pandas as pd
df = pd.read_csv(r'D:\polynomial\points.csv')
print(df)
x= np.array(df['Wavelength(A)'].tolist())
x= np.divide([299792.458], x)
y= np.array(df['Level(A)'].tolist())
x_trimmed = np.delete(x, np.where(y < 1e-4))
y_trimmed = np.delete(y, np.where(y < 1e-4))
test= poly.fit(x_trimmed, y_trimmed, 10)
print (test)
list1= test.convert().coef
print (list1)
print (len(list1))
#print (type(list1))
to_list= list1.tolist()
#print(to_list)
#data_format= json.dumps(to_list)
l = len(to_list)
#print (l)
mydict1= []
for i in range(l):
mydict = { "a"+str(i) : to_list[i] }
mydict1.append(mydict)
print (mydict1)
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["mydatabase"]
mycol = mydb["coefficients"]
x = mycol.insert_one(mydict1)
This is mydict1=
[{'a0': -2.3373800910827825e+34}, {'a1': 1.2084654060419298e+33}, {'a2': -2.811587585787653e+31}, {'a3': 3.876370042231405e+29}, {'a4': -3.507261557232249e+27}, {'a5': 2.1759768836934694e+25}, {'a6': -9.37514311649608e+22}, {'a7': 2.7697765301392782e+20}, {'a8': -5.370081422614614e+17}, {'a9': 616983041924503.2}, {'a10': -318990754999.1472}]
The problem is that MongoDB's insert_one method inserts a single document that is represented by a dictionary, not a list.
The possible solutions are:
use insert_many instead. In this case, you will have every list item as a separate mongodb document
make a dict with your list values. You can use something like {"items": mydict1}, or reduce(lambda x, y: x | y, mydict1) depending on the document structure that will be better for your needs

How to reduce Geojson size for repeated geometries (like timestamped data) in Pandas

I have a geopandas data frame which contain respective geometries as follows:
Date , value, Region Name, Geometry
2022-01-01 10 , ABC , Point((194 34),(121,23))
2022-02-01, 12 , ABC , Point((194 34),(121,23))
2022-02-01, 13 , DEF , Point((195 35),(123,24))
Almost equivalent Py code
import pandas as pd
import geopandas
import matplotlib.pyplot as plt
from shapely.geometry import Point
import geopandas
d = pd.DataFrame({'RegionName': ['ABC', 'ABC','DEF'],'Date': ['2021-01-01', '2021-02-01','2021-01-01'], 'Values': [10,11,12], 'Latitude': [-34.58, -34.58, -33.45], 'Longitude': [-58.66, -58.66, -70.66]})
gdf = geopandas.GeoDataFrame(d, geometry=geopandas.points_from_xy(d.Longitude, d.Latitude))
gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
How can I save this data into a json/geojson file by reducing the size of the file and appending non-repetitive data (e.g. date and value) to the repetitive value (e.g. geometry)
Sth like this:
[
---Region name:
-----ABC
-----Date:
--------2022-01-01
--------2022-02-01
-----Value:
--------10
--------12
-----Geometry
--------Polygon((194 34),(121,23))
---Region name:
-----DEF
-----Date:
--------2022-02-01
-----Value:
--------13
-----Geometry
--------Polygon((194 34),(121,23))
]
Requirement:
This file needs to be consumed by mapbox/leaflet/or any other similar tool
Was able to solve this, first we need the distinct of repetitive columns (lets call it A,e.g. geometry), then form a list of non repetetive ones (lets call this B, e.g. date and value) and then merge B and A and then do the Json conversion.
Py code:
import pandas as pd
import geopandas
import matplotlib.pyplot as plt
from shapely.geometry import Point
import geopandas
d = pd.DataFrame({'RegionName': ['ABC', 'ABC','DEF'],'Date': ['2021-01-01', '2021-02-01','2021-01-01'], 'Values': [10,11,12], 'Latitude': [-34.58, -34.58, -33.45], 'Longitude': [-58.66, -58.66, -70.66]})
gdf = geopandas.GeoDataFrame(d, geometry=geopandas.points_from_xy(d.Longitude, d.Latitude))
gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
#create a unique list of static data
df_dis_test= pd.DataFrame({'RegionName': ['ABC', 'DEF'],'Latitude': [-34.58, -33.45], 'Longitude': [-58.66, -70.66]})
gdfdf_dis_test = geopandas.GeoDataFrame(df_dis_test, geometry=geopandas.points_from_xy(df_dis_test['Longitude'], df_dis_test['Latitude']))
gdfdf_dis_test = geopandas.GeoDataFrame(df_dis_test, crs="EPSG:4326")
dgrp=d.groupby(['RegionName']). agg({ 'Date': lambda x: ','.join(x) } )
result = dgrp.merge( gdfdf_dis_test, how="inner", on="RegionName")
dgrpval=d.groupby(['RegionName']). agg({ 'Values': lambda x: list(x) } )
result2 = result.merge( dgrpval, how="inner", on="RegionName")
result2=result2.rename(columns={'geometry_x':'geometry'})
result2Gpd = geopandas.GeoDataFrame(result2, crs="EPSG:4326")#.drop(['geometry_y'],axis=1)
with open('Result2.geojson', 'w') as f:
f.write(result2Gpd.to_json (sort_keys=True, default=str))
and the output

Convert numbers from mathematica csv export to numpy complex array

I have exported data from mathematica to a csv file. The file structure looke as follows:
"x","y","Ex","Ey"
0.,0.,0.+0.*I,-3.0434726787506006*^-12+3.4234894344189825*^-12*I
0.,0.,0.+0.*I,-5.0434726787506006*^-12+10.4234894344189825*^-13*I
...
I'm reading in the data with pandas, but I get an error
import csv
import pandas as pd
import numpy as np
df=pd.read_csv('filename.csv')
df.columns=['x', 'y', 'Ex','Ey']
df['Ey'] = df['Ey'].str.replace('*^','E')
df['Ey'] = df['Ey'].str.replace('I','1j').apply(lambda x: np.complex(x))
Edit: I'm getting the following error in the second last line of my code:
Traceback (most recent call last):
File "plot.py", line 6, in <module>
df['Ey'] = df['Ey'].str.replace('*^','E')
File "/home/.../.local/lib/python2.7/site-packages/pandas/core/strings.py", line 1579, in replace
flags=flags)
File "/home/.../.local/lib/python2.7/site-packages/pandas/core/strings.py", line 424, in str_replace
regex = re.compile(pat, flags=flags)
File "/usr/lib/python2.7/re.py", line 194, in compile
return _compile(pattern, flags)
File "/usr/lib/python2.7/re.py", line 251, in _compile
raise error, v # invalid expression
sre_constants.error: nothing to repeat
When I write instead
df['Ey'] = df['Ey'].str.replace('*','E')
or
df['Ey'] = df['Ey'].str.replace('^','E')
I'm not getting an error. It seems like one can only give one charcter which is replaced?
Why beat yourself up messing with ascii encoded floats?
here is how to exchange complex arrays between python and mathematica using raw binary files.
in mathematica:
cdat = RandomComplex[{0, 1 + I}, 5]
{0.0142816 + 0.0835513 I, 0.434109 + 0.977644 I,
0.579678 + 0.337286 I, 0.426271 + 0.166166 I, 0.363249 + 0.0867334 I}
f = OpenWrite["test", BinaryFormat -> True]
BinaryWrite[f, cdat, "Complex64"]
Close[f]
or:
Export["test", cdat, "Binary", "DataFormat" -> "Complex64"]
in python:
import numpy as np
x=np.fromfile('test',np.complex64)
print x
[ 0.01428160+0.0835513j 0.43410850+0.97764391j 0.57967812+0.3372865j
0.42627081+0.16616575j 0.36324903+0.08673338j]
going the other way:
y=np.array([[1+2j],[3+4j]],np.complex64)
y.tofile('test')
f = OpenRead["test", BinaryFormat -> True]
BinaryReadList[f, "Complex64"]
Close[f]
note this will be several orders of magnitude faster than exchanging data by csv.

Cannot process a csv file into a pandas with tickers, and fetch stock-info from the DataFrame

I am trying to import a list of stock-tickers (the line that is #symbols_list...read_csv..), and fetch stock-info on that date into a pandas.
import datetime
import pandas as pd
from pandas import DataFrame
from pandas.io.data import DataReader
#symbols_list = [pd.read_csv('Stock List.csv', index_col=0)]
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
symbols=[]
start = datetime.datetime(2014, 2, 9)
#end = datetime.datetime(2014, 12, 30)
for ticker in symbols_list:
r = DataReader(ticker, "yahoo",
start = start)
#start=start, end)
# add a symbol column
r['Symbol'] = ticker
symbols.append(r)
# concatenate all the dfs
df = pd.concat(symbols)
#define cell with the columns that i need
cell= df[['Symbol','Open','High','Low','Adj Close','Volume']]
#changing sort of Symbol (ascending) and Date(descending) setting Symbol as first column and changing date format
cell.reset_index().sort(['Symbol', 'Date'], ascending=[1,0]).set_index('Symbol').to_csv('stock.csv', date_format='%d/%m/%Y')
The input file Stock list.csv
has the following content with these entries on each their separate row:
Index
MMM
ABT
ABBV
ACE
ACN
ACT
ADBE
ADT
AES
AET
AFL
AMG
and many more tickers of interest.
When run with the manually coded list
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
It all works fine and processes the input and stores it to a file,
But whenever I run the code with the read_csv from file, I get the following error:
runfile('Z:/python/CrystallBall/SpyderProject/getstocks3.py', wdir='Z:/python/CrystallBall/SpyderProject') Reloaded modules: pandas.io.data, pandas.tseries.common Traceback (most recent call last):
File "<ipython-input-32-67cbdd367f48>", line 1, in <module>
runfile('Z:/python/CrystallBall/SpyderProject/getstocks3.py', wdir='Z:/python/CrystallBall/SpyderProject')
File "C:\Program Files (x86)\WinPython-32bit-3.4.2.4\python-3.4.2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 601, in runfile
execfile(filename, namespace)
File "C:\Program Files (x86)\WinPython-32bit-3.4.2.4\python-3.4.2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 80, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "Z:/python/CrystallBall/SpyderProject/getstocks3.py", line 35, in <module>
cell.reset_index().sort(['Symbol', 'Date'], ascending=[1,0]).set_index('Symbol').to_csv('stock.csv', date_format='%d/%m/%Y')
File "C:\Users\Morten\AppData\Roaming\Python\Python34\site-packages\pandas\core\generic.py", line 1947, in __getattr__
(type(self).__name__, name))
AttributeError: 'Panel' object has no attribute 'reset_index'
Why can I only process the symbol_list manually laid out, and not the imported tickers from file?
Any takers? Any help greatly appreciated!
Your code has numerous issues which the following code has fixed and works:
In [4]:
import datetime
import pandas as pd
from pandas import DataFrame
from pandas.io.data import DataReader
temp='''Index
MMM
ABT
ABBV
ACE
ACN
ACT
ADBE
ADT
AES
AET
AFL
AMG'''
df = pd.read_csv(io.StringIO(temp), index_col=[0])
symbols=[]
start = datetime.datetime(2014, 2, 9)
for ticker in df.index:
r = DataReader(ticker, "yahoo",
start = start)
#start=start, end)
# add a symbol column
r['Symbol'] = ticker
symbols.append(r)
# concatenate all the dfs
df = pd.concat(symbols)
#define cell with the columns that i need
cell= df[['Symbol','Open','High','Low','Adj Close','Volume']]
#changing sort of Symbol (ascending) and Date(descending) setting Symbol as first column and changing date format
cell.reset_index().sort(['Symbol', 'Date'], ascending=[1,0]).set_index('Symbol').to_csv('stock.csv', date_format='%d/%m/%Y')
cell
Out[4]:
Symbol Open High Low Adj Close Volume
Date
2014-02-10 MMM 129.65 130.41 129.02 126.63 3317400
2014-02-11 MMM 129.70 131.49 129.65 127.88 2604000
... ... ... ... ... ... ...
2015-02-06 AMG 214.35 215.82 212.64 214.45 424400
[3012 rows x 6 columns]
So firstly this: symbols_list = [pd.read_csv('Stock List.csv', index_col=0)]
This will create a list with a single entry which will be a df with no columns and just an index of your ticker values.
This: for ticker in symbols_list:
won't work because the iterable object that is returned from the df is the column and not each entry, in your case you need to iterate over the index which is what my code does.
I'm not sure what you wanted to achieve, it isn't necessary to specify that index_col=0 if there is only one column, you can either create a df with just a single column, or if you pass squeeze=True this will create a Series which just has a single column.

what is the proper way to convert between mysql datetime and python timestamp?

according to http://dev.mysql.com/doc/refman/5.0/en/datetime.html. i got to find a way to convert the string value 'YYYY-MM-DD HH:MM:SS' to a timestamp int.
i looked up in python's doc.
i tried:
print(time.strptime('2013-01-12 15:27:43', '%Y-%m-%d %H:%M:%S'))
python give me a result like this.
time.struct_time(tm_year=2013, tm_mon=1, tm_mday=12, tm_hour=15, tm_min=27, tm_sec=43, tm_wday=5, tm_yday=12, tm_isdst=-1)
i tried this to convert timestamp to YYYY-MM-DD HH:MM:SS format
print(time.strftime('%Y-%m-%d %H:%M:%S',time.time()))
python give me a type error.
i only use timestamp to calculate time and date, i hope there's already a way in python, simple and efficient , and don't have to create temp data.
according to the answer i write two methods. hope it would be helpful
import time
def convertTimestampToSQLDateTime(value):
return time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(value))
def convertSQLDateTimeToTimestamp(value):
return time.mktime(time.strptime(value, '%Y-%m-%d %H:%M:%S'))
Happy to update this if I'm not properly understanding, but here are a few examples which may help. Note that this uses the datetime module instead of time.
>>> import datetime
Here we set up an example timestamp ts and a format f:
>>> ts = '2013-01-12 15:27:43'
>>> f = '%Y-%m-%d %H:%M:%S'
Similar to what you did above, we use the strptime function (from datetime.datetime) to convert our string into a datetime object based on the formatting parameter:
>>> datetime.datetime.strptime(ts, f)
datetime.datetime(2013, 1, 12, 15, 27, 43)
Now in reverse - here we use datetime.datetime.now() to get the current time as a datetime object:
>>> now = datetime.datetime.now()
>>> now
datetime.datetime(2013, 1, 12, 0, 46, 54, 490219)
In the datetime case, the strftime method is actually called on the datetime object itself, with the formatting parameter as an argument:
>>> now.strftime(f)
'2013-01-12 00:46:54'
In your situation, the reason you were getting an error is because time.time() returns a float:
>>> time.time()
1357980846.290231
But time.strftime needs a time tuple, similar to what you had above. Without getting into the maddening spiral that is time, a function such as time.localtime() will return the aforementioned time tuple and will return as you expect:
>>> now = time.localtime()
>>> now
time.struct_time(tm_year=2013, tm_mon=1, tm_mday=12, tm_hour=0, tm_min=55, tm_sec=55, tm_wday=5, tm_yday=12, tm_isdst=0)
>>> f = '%Y-%m-%d %H:%M:%S'
>>> time.strftime(f, now)
'2013-01-12 00:55:55'
I'm only adding this class to potentially save the next guy a little time. If anyone finds this useful, upvote RocketDonkey's answer.
## dev on v3.7.6
from datetime import datetime
from time import mktime, time
class Time:
'''\
*Convenience class for easy format conversion*\n
Accepts time() float, datetime object, or SQL datetime str.\n
If no time arg is provided, object is initialized with time().\n
id kwarg can be used to keep track of objects.\n
Access formats as instance.t, instance.dt, or instance.sql.\
'''
f = '%Y-%m-%d %H:%M:%S'
def __init__(self, *arg, id=None) -> None:
self.id = id
if len(arg) == 0:
self.t = time()
self.dt = self._dt
self.sql = self._sql
else:
arg = arg[0]
if isinstance(arg, float) or arg == None:
if isinstance(arg, float):
self.t = arg
else:
self.t = time()
self.dt = self._dt
self.sql = self._sql
elif isinstance(arg, datetime):
self.t = arg.timestamp()
self.dt = arg
self.sql = self._sql
elif isinstance(arg, str):
self.sql = arg
if '.' not in arg:
self.dt = datetime.strptime(self.sql, Time.f)
else:
normal, fract = arg.split('.')
py_t = datetime.strptime(normal, Time.f)
self.dt = py_t.replace(
microsecond=int(fract.ljust(6, '0')[:6]))
self.t = self.dt.timestamp()
#property
def _dt(self) -> datetime:
return datetime.fromtimestamp(self.t)
#property
def _sql(self) -> str:
t = self.dt
std = t.strftime(Time.f)
fract = f'.{str(round(t.microsecond, -3))[:3]}'
return std + fract
def __str__(self) -> str:
if self.id == None:
return self.sql
else:
return f'Time obj "{self.id}": {self.sql}'
def test():
def test_one(*arg):
t = Time(*arg, id=type(*arg))
print(t)
print(t.t)
print(t.dt)
sql = '2020-01-22 15:30:33.433'
time_float = 1579927395.3708763
dt_obj = datetime.now()
for datum in [sql, time_float, dt_obj, None]:
test_one(datum)
if __name__ == '__main__':
test()