I am using CTGAN library on colab notebook. I have passed on a tabular dataset, with one categorical feature. I have mentioned the categorical feature as given in dcumentation. The model training is also complete without error. I am getting ValueError, while generating a simulated data.
How to resolve this error
Adding a reproducible code below
import pandas as pd
import numpy as np
import seaborn as sns
from ctgan import CTGAN
iris = sns.load_dataset('iris')
iris.head()
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(iris['species'].unique())
iris['species'] = pd.DataFrame(le.transform(iris['species']))
data = iris.copy()
ctgan_model = CTGAN(epochs=2,batch_size=50,verbose = True)
ctgan_model.fit(data)
n_ctgan_generated_data = 2000
synthetic_data = ctgan.sample(n_ctgan_generated_data)
Complete error message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-17-199b6dc04389> in <module>
1 n_ctgan_generated_data = 2000
----> 2 synthetic_data = ctgan.sample(n_ctgan_generated_data)
6 frames
/usr/local/lib/python3.8/dist-packages/ctgan/synthesizers/base.py in wrapper(self, *args, **kwargs)
48 def wrapper(self, *args, **kwargs):
49 if self.random_states is None:
---> 50 return function(self, *args, **kwargs)
51
52 else:
/usr/local/lib/python3.8/dist-packages/ctgan/synthesizers/ctgan.py in sample(self, n, condition_column, condition_value)
475 data = data[:n]
476
--> 477 return self._transformer.inverse_transform(data)
478
479 def set_device(self, device):
/usr/local/lib/python3.8/dist-packages/ctgan/data_transformer.py in inverse_transform(self, data, sigmas)
211 column_data = data[:, st:st + dim]
212 if column_transform_info.column_type == 'continuous':
--> 213 recovered_column_data = self._inverse_transform_continuous(
214 column_transform_info, column_data, sigmas, st)
215 else:
/usr/local/lib/python3.8/dist-packages/ctgan/data_transformer.py in _inverse_transform_continuous(self, column_transform_info, column_data, sigmas, st)
185 def _inverse_transform_continuous(self, column_transform_info, column_data, sigmas, st):
186 gm = column_transform_info.transform
--> 187 data = pd.DataFrame(column_data[:, :2], columns=list(gm.get_output_sdtypes()))
188 data.iloc[:, 1] = np.argmax(column_data[:, 1:], axis=1)
189 if sigmas is not None:
/usr/local/lib/python3.8/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
670 )
671 else:
--> 672 mgr = ndarray_to_mgr(
673 data,
674 index,
/usr/local/lib/python3.8/dist-packages/pandas/core/internals/construction.py in ndarray_to_mgr(values, index, columns, dtype, copy, typ)
322 )
323
--> 324 _check_values_indices_shape_match(values, index, columns)
325
326 if typ == "array":
/usr/local/lib/python3.8/dist-packages/pandas/core/internals/construction.py in _check_values_indices_shape_match(values, index, columns)
391 passed = values.shape
392 implied = (len(index), len(columns)-1)
--> 393 raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
394
395
ValueError: Shape of passed values is (2000, 2), indices imply (2000, 3)
Is this issue from the library, where I have to change any source code?
I am trying to write the Barycenter positions of planets to a csv file. I am using skyfield api, csv and python 3.7. The position output is given as x y z coordinates. I want to have columns for the date/time which I have, columns for each of the x, y & z coordinates for each planet on the same row. I have tried 2 ways to achieve this, 1 gives the data in the columns how I want it but on separate rows and the other gives the header as I want but the coordinates for a planet are in are in a single column rather than 3 columns. I have looked at other formatting examples but none have resolved the issue I have.
#This is first attempt;#
`
from skyfield.api import utc
from skyfield.api import load
import csv
import datetime
from datetime import datetime
from datetime import timedelta, date
planets = load('de421.bsp')
sun = planets['sun']
earth = planets['earth']
moon = planets['moon']
mercury = planets['mercury']
venus = planets['venus']
mars = planets['mars']
JUPITER_BARYCENTER = planets['JUPITER_BARYCENTER']
SATURN_BARYCENTER = planets['SATURN_BARYCENTER']
URANAS_BARYCENTER = planets['URANUS_BARYCENTER']
NEPTUNE_BARYCENTER = planets['NEPTUNE_BARYCENTER']
PLUTO_BARYCENTER = planets['PLUTO_BARYCENTER']
ts = load.timescale()
start_date = date(1986, 11, 8)
end_date = date(1986, 12, 31)
with open('BCRS positions-1.2.csv', 'w') as csvfile:
for single_date in daterange(start_date, end_date):
single_date.strftime("%Y/%m/%d")
#date = datetime.strptime(single_date, "%Y/%m/%d")
writer = csv.writer(csvfile)
t = ts.utc(single_date, 10, 30, 0)
BCRS = ('Date', single_date,
'Sun-x','Sun-y','Sun- z',sun.at(t).position.au,
'Mercury-x','Mercury-y','Mercury-z', mercury.at(t).position.au,
'Venus-x','Venus-y','Venus-z', venus.at(t).position.au,
'Moon-x','Moon-y','Moon-z',moon.at(t).position.au,
'Earth-x', 'Earth-y', 'Earth-z', earth.at(t).position.au,
'Mars-x', 'Mars-y', 'Mars-z', mars.at(t).position.au,
'Jupiter-x','Jupiter-y','Jupiter-z', JUPITER_BARYCENTER.at(t).position.au,
'Saturn-x','Saturn-y','Saturn-z', SATURN_BARYCENTER.at(t).position.au,
'Uranas-x','Uranas-y','Uranas-z', URANAS_BARYCENTER.at(t).position.au,
'Neptune-x','Neptune-y','Neptune-z', NEPTUNE_BARYCENTER.at(t).position.au,
'Pluto-x','Pluto-y','Pluto-z', PLUTO_BARYCENTER.at(t).position.au)
writer.writerow(BCRS)
csvfile.close()`
Output
Date,1986-11-08,Sun-x,Sun-y,Sun-z,[-0.0038418 0.0051725 0.00223502],Mercury-x,Mercury-y,Mercury-z,[0.30680392 0.12163008 0.03220969],Venus-x,Venus-y,Venus-z,[0.48875971 0.4985835 0.19301198],Moon-x,Moon-y,Moon-z,[0.6923354 0.65149371 0.28223558],Earth-x,Earth-y,Earth-z,[0.69095218 0.65328502 0.28325265],Mars-x,Mars-y,Mars-z,[1.38798446 0.094756 0.00565359],Jupiter-x,Jupiter-y,Jupiter-z,[ 4.93083814 -0.48155161 -0.32663981],Saturn-x,Saturn-y,Saturn-z,[-3.16415776 -8.82081358 -3.50690074],Uranas-x,Uranas-y,Uranas-z,[ -2.56470042 -17.41419317 -7.59061325],Neptune-x,Neptune-y,Neptune-z,[ 2.85009934 -27.8321119 -11.46284476],Pluto-x,Pluto-y,Pluto-z,[-22.59654067 -19.26368339 0.79656139]
##This is the second attempt;##
from skyfield.api import utc
from skyfield.api import load
import csv
import datetime
from datetime import datetime
from datetime import timedelta, date
# Sun, Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto
planets = load('de421.bsp')
sun = planets['sun']
earth = planets['earth']
moon = planets['moon']
mercury = planets['mercury']
mars = planets['mars']
venus = planets['venus']
JUPITER_BARYCENTER = planets['JUPITER_BARYCENTER']
SATURN_BARYCENTER = planets['SATURN_BARYCENTER']
URANAS_BARYCENTER = planets['URANUS_BARYCENTER']
NEPTUNE_BARYCENTER = planets['NEPTUNE_BARYCENTER']
PLUTO_BARYCENTER = planets['PLUTO_BARYCENTER']
# Specfiy the date and time (UTC) for planets positions
# date/time format - t = ts.utc(yyyy, mm ,dd, hh, mm, ss)
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
ts = load.timescale()
start_date = datetime(1986, 11, 8, 10, 30, 0, tzinfo=utc)
end_date = datetime(1986, 12, 31, 10, 30, 0, tzinfo=utc)
with open('BCRS positions-test.csv', 'w', newline='') as csvFile:
writer = csv.writer(csvFile, delimiter=',')
writer.writerow(['Date', 'Sun-x', 'Sun-y', 'Sun-z', 'Mercury-x','Mercury-y','Mercury-z', 'Venus-x','Venus-y','Venus-z', 'Moon-x','Moon-y','Moon-z',
'Earth-x', 'Earth-y', 'Earth-z', 'Mars-x', 'Mars-y', 'Mars-z', 'Jupiter-x','Jupiter-y','Jupiter-z',
'Saturn-x','Saturn-y','Saturn-z', 'Uranas-x','Uranas-y','Uranas-z', 'Neptune-x','Neptune-y','UNeptune-z',
'Pluto-x','Pluto-y','Pluto-z'])
for single_date in daterange(start_date, end_date):
single_date.strftime("%Y/%m/%d")
#date = datetime.strptime(single_date, "%Y/%m/%d")
t = ts.utc(single_date, 10, 30, 0)
writer.writerow([single_date, sun.at(t).position.au, mercury.at(t).position.au, venus.at(t).position.au, moon.at(t).position.au,
earth.at(t).position.au, mars.at(t).position.au, JUPITER_BARYCENTER.at(t).position.au, SATURN_BARYCENTER.at(t).position.au,
URANAS_BARYCENTER.at(t).position.au, NEPTUNE_BARYCENTER.at(t).position.au, PLUTO_BARYCENTER.at(t).position.au])
csvFile.close()
Output
Sun-x,Sun-y,Sun-z - I get this as a header in 3 columns
[-0.0038418 0.0051725 0.00223502] I get this in a single column below the header but needs to be in 3 columns one for each x, y, z position
###What I am trying to achieve is;###
Sun-x,Sun-y,Sun-z,Mercury-x,Mercury-y,Mercury-z,Venus-x,Venus-y,Venus-z
-0.003953380897142202,0.004828488778607356,0.0020912483521329586,-0.11600122254182059,-0.39948956059670143,-0.20224588140237967,-0.4899043811693688,0.47522226197221284,0.2444554690221239
Any help with this would be greatly appreciated
I have found the solution I need to achieve what I wanted.
from skyfield.api import wgs84
from skyfield.api import load
from skyfield.data import iers
from skyfield.api import utc
from pytz import timezone
import csv
from datetime import datetime, timedelta
from itertools import chain
import pandas as pd
from decimal import *
def weekly_it(start, finish):
while finish > start:
start = start + timedelta(weeks=1)
yield start
start = datetime(1985, 6, 29, 9, 30, 15, tzinfo=utc)
finish = datetime(2022, 1, 1, 9, 30, 15, tzinfo=utc)
fieldnames = ['Date', 'Sun-x', 'Sun-y', 'Sun-z', 'Mercury-x','Mercury-y','Mercury-z', 'Venus-x','Venus-y','Venus-z', 'Moon-x','Moon-y','Moon-z',
'Earth-x', 'Earth-y', 'Earth-z', 'Mars-x', 'Mars-y', 'Mars-z', 'Jupiter-x','Jupiter-y','Jupiter-z',
'Saturn-x','Saturn-y','Saturn-z', 'Uranas-x','Uranas-y','Uranas-z', 'Neptune-x','Neptune-y','UNeptune-z',
'Pluto-x','Pluto-y','Pluto-z']
with open('FileName.csv', 'w', newline='') as csvFile:
writer = csv.writer(csvFile)
writer.writerow(fieldnames)
for week in weekly_it(start, finish):
w = [week.strftime("%Y-%m-%d")]
astrometric = aveley.at(ts.utc(week)).observe(sun)
r = w, astrometric.position.au
flatten_list = list(chain.from_iterable(r))
p = flatten_list
writer = csv.writer(csvFile, delimiter=",")
writer.writerows([p])
I Hope this helps someone with same issue
I'm trying to train a convolutional autoencoder to encode and decode a piano roll representation of monophonic midi clips. I reduced the note range to 3 octaves, divide songs into 100 time step pieces (where 1 time step = 1/100th of a second), and train the net in batches of 3 pieces.
I'm using Adagrad as my optimizer, and MSE as my loss function. The loss is huge, and I see no decrease in average loss even after hundreds of training examples are fed in.
Here's my code:
"""
Most absolutely simple assumptions:
- not changing the key of any of the files
- not changing the tempo of any of the files
- take blocks of 36 by 100
- divide up all songs by this amount, cutting off any excess from the
end, train
"""
from __future__ import print_function
import cPickle as pickle
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from reverse_pianoroll import piano_roll_to_pretty_midi as pr2pm
N = 1000
# load a NxMxC dataset
# N: Number of clips
# M: Piano roll size, the number of midi notes that could possibly be 'on'
# C: Clip length, in 100ths of a second
dataset = pickle.load(open('mh-midi-data.pickle', 'rb'))
######## take a subset of the data for training ######
# based on the mean and standard deviation of non zero entries in the data, I've
# found that the most populous, and thus best range of notes to take is from
# 48 to 84 (C2 - C5); this is 3 octaves, which is much less than the original
# 10 and a half. Additionally, we're going to take a subsample of 1000 because
# i'm training on my macbook and the network is pretty simple
######################################################
dataset = dataset[:, :, 48:84, :]
dataset = dataset[:N]
######################################################
midi_dim, clip_len = dataset.shape[2:]
class Autoencoder(nn.Module):
def __init__(self, **kwargs):
super(Autoencoder, self).__init__(**kwargs)
# input is 3 x 1 x 36 x 100
self.conv1 = nn.Conv2d(in_channels=1, out_channels=14, kernel_size=(midi_dim, 2))
# now transformed to 3 x 14 x 1 x 99
self.conv2 = nn.Conv2d(in_channels=14, out_channels=77, kernel_size=(1, 4))
# now transformed to 3 x 77 x 1 x 96
input_size = 3*77*1*96
self.fc1 = nn.Linear(input_size, input_size/2)
self.fc2 = nn.Linear(input_size/2, input_size/4)
self.fc3 = nn.Linear(input_size/4, input_size/2)
self.fc4 = nn.Linear(input_size/2, input_size)
self.tconv2 = nn.ConvTranspose2d(in_channels=77, out_channels=14, kernel_size=(1, 4))
self.tconv1 = nn.ConvTranspose2d(in_channels=14, out_channels=1, kernel_size=(midi_dim, 2))
self.sigmoid = nn.Sigmoid()
return
def forward(self, x):
# print("1: {}".format(x.size()))
x = F.relu(self.conv1(x))
# print("2: {}".format(x.size()))
x = F.relu(self.conv2(x))
# print("3: {}".format(x.size()))
x = x.view(-1, np.prod(x.size()[:]))
# print("4: {}".format(x.size()))
x = F.relu(self.fc1(x))
# print("5: {}".format(x.size()))
h = F.relu(self.fc2(x))
# print("6: {}".format(h.size()))
d = F.relu(self.fc3(h))
# print("7: {}".format(d.size()))
d = F.relu(self.fc4(d))
# print("8: {}".format(d.size()))
d = d.view(3, 77, 1, 96)
# print("9: {}".format(d.size()))
d = F.relu(self.tconv2(d))
# print("10: {}".format(d.size()))
d = self.tconv1(d)
d = self.sigmoid(d)
# print("11: {}".format(d.size()))
return d
net = Autoencoder()
loss_fn = nn.MSELoss()
# optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9)
optimizer = optim.Adagrad(net.parameters(), lr=1e-3)
batch_count = 0
avg_loss = 0.0
print_every = 3
print("Beginning Training")
for epoch in xrange(2):
# for i, clip in enumerate(dataset):
for i in xrange(len(dataset)/3):
batch = dataset[(3*i):(3*i + 3), :, :]
# get the input, wrap it in a Variable
inpt = Variable(torch.from_numpy(batch).type(torch.FloatTensor))
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outpt = net(inpt)
loss = loss_fn(outpt, inpt)
loss.backward()
optimizer.step()
# print stats out
avg_loss += loss.data[0]
if batch_count % print_every == print_every - 1:
print('epoch: %d, batch_count: %d, loss: %.3f'%(
epoch + 1, batch_count + 1, avg_loss / print_every))
avg_loss = 0.0
batch_count += 1
print('Finished Training')
I'm really a beginner with this stuff, so any advice would be greatly appreciated.
Double check that you normalize your inpt to be in the range of 0 to 1. For instance, if you are working with images you could just divide inpt variable by 255.