keras Input layer (Nnoe,200,3), Why there is None?input have 3 dimensions, but got array with shape (200, 3) - deep-learning

The acc gyro in model.fit is (200 * 3),in the Input layer shape is (200 * 3). Why is there such a problem? Error when checking input: expected acc_input to have 3 dimensions, but got array with shape (200, 3).This is a visualization of my model.
Here's my code:
WIDE = 20
FEATURE_DIM = 30
CHANNEL = 1
CONV_NUM = 64
CONV_LEN = 3
CONV_LEN_INTE = 3#4
CONV_LEN_LAST = 3#5
CONV_NUM2 = 64
CONV_MERGE_LEN = 8
CONV_MERGE_LEN2 = 6
CONV_MERGE_LEN3 = 4
rnn_size=128
acc_input_tensor = Input(shape=(200,3),name = 'acc_input')
gyro_input_tensor = Input(shape=(200,3),name= 'gyro_input')
Acc_input_tensor = Reshape(target_shape=(20,30,1))(acc_input_tensor)
Gyro_input_tensor = Reshape(target_shape=(20,30,1))(gyro_input_tensor)
acc_conv1 = Conv2D(CONV_NUM,(1, 1*3*CONV_LEN),strides= (1,1*3),padding='valid',activation=None)(Acc_input_tensor)
acc_conv1 = BatchNormalization(axis=1)(acc_conv1)
acc_conv1 = Activation('relu')(acc_conv1)
acc_conv1 = Dropout(0.2)(acc_conv1)
acc_conv2 = Conv2D(CONV_NUM,(1,CONV_LEN_INTE),strides= (1,1),padding='valid',activation=None)(acc_conv1)
acc_conv2 = BatchNormalization(axis=1)(acc_conv2)
acc_conv2 = Activation('relu')(acc_conv2)
acc_conv2 = Dropout(0.2)(acc_conv2)
acc_conv3 = Conv2D(CONV_NUM,(1,CONV_LEN_LAST),strides=(1,1),padding='valid',activation=None)(acc_conv2)
acc_conv3 = BatchNormalization(axis=1)(acc_conv3)
acc_conv3 = Activation('relu')(acc_conv3)
acc_conv3 = Dropout(0.2)(acc_conv3)
gyro_conv1 = Conv2D(CONV_NUM,(1, 1*3*CONV_LEN),strides=(1,1*3),padding='valid',activation=None)(Gyro_input_tensor)
gyro_conv1 = BatchNormalization(axis=1)(gyro_conv1)
gyro_conv1 = Activation('relu')(gyro_conv1)
gyro_conv1 = Dropout(0.2)(gyro_conv1)
gyro_conv2 = Conv2D(CONV_NUM,(1, CONV_LEN_INTE),strides=(1,1),padding='valid',activation=None)(gyro_conv1)
gyro_conv2 = BatchNormalization(axis=1)(gyro_conv2)
gyro_conv2 = Activation('relu')(gyro_conv2)
gyro_conv2 = Dropout(0.2)(gyro_conv2)
gyro_conv3 = Conv2D(CONV_NUM,(1, CONV_LEN_LAST),strides=(1,1),padding='valid',activation=None)(gyro_conv2)
gyro_conv3 = BatchNormalization(axis=1)(gyro_conv3)
gyro_conv3 = Activation('relu')(gyro_conv3)
gyro_conv3 = Dropout(0.2)(gyro_conv3)
sensor_conv_in = concatenate([acc_conv3, gyro_conv3], 2)
sensor_conv_in = Dropout(0.2)(sensor_conv_in)
sensor_conv1 = Conv2D(CONV_NUM2,kernel_size=(2, CONV_MERGE_LEN),padding='SAME')(sensor_conv_in)
sensor_conv1 = BatchNormalization(axis=1)(sensor_conv1)
sensor_conv1 = Activation('relu')(sensor_conv1)
sensor_conv1 = Dropout(0.2)(sensor_conv1)
sensor_conv2 = Conv2D(CONV_NUM2,kernel_size=(2, CONV_MERGE_LEN2),padding='SAME')(sensor_conv1)
sensor_conv2 = BatchNormalization(axis=1)(sensor_conv2)
sensor_conv2 = Activation('relu')(sensor_conv2)
sensor_conv2 = Dropout(0.2)(sensor_conv2)
sensor_conv3 = Conv2D(CONV_NUM2,kernel_size=(2, CONV_MERGE_LEN3),padding='SAME')(sensor_conv2)
sensor_conv3 = BatchNormalization(axis=1)(sensor_conv3)
sensor_conv3 = Activation('relu')(sensor_conv3)
conv_shape = sensor_conv3.get_shape()
print conv_shape
x1 = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2]*conv_shape[3])))(sensor_conv3)
x1 = Dense(64, activation='relu')(x1)
gru_1 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru1')(x1)
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru1_b')(x1)
gru1_merged = merge([gru_1, gru_1b], mode='sum')
gru_2 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru2_b')(gru1_merged)
x = merge([gru_2, gru_2b], mode='concat')
x = Dropout(0.25)(x)
n_class=2
x = Dense(n_class)(x)
model = Model(input=[acc_input_tensor,gyro_input_tensor], output=x)
model.compile(loss='mean_squared_error',optimizer='adam')
model.fit(inputs=[acc,gyro],outputs=labels,batch_size=1, validation_split=0.2, epochs=2,verbose=1 ,
shuffle=False)
The acc gyro in model.fit is (200 * 3),in the Input layer shape is (200 * 3). Why is there such a problem? Error when checking input: expected acc_input to have 3 dimensions, but got array with shape (200, 3)

Shape (None, 200, 3) is used in Keras for batches, None means batch_size, because in the time of creating or reshaping input arrays, the batch size might be unknown, so if you will be using batch_size = 128 your batch input matrix will have shape (128, 200, 3)

Related

Questions on interface condition convergence in PINNs(Physics-Informed Neural Network)

I am solving a magnetostatic problem using PINN.
I have succeeded in solving a simple Poisson equation. However, in the analysis considering the geometry, a problem was found in which the interface condition loss did not converge.
I've tried numerous things including changing the mini-batch addition model.
I'd appreciate it if you could let me know what's wrong with my code.
class ironmaxwell(Model):
def __init__(self):
super(ironmaxwell, self).__init__()
initializer = tf.keras.initializers.GlorotUniform
self.id1 = tf.keras.layers.Dropout(rate=0.2)
self.ih1 = Dense(40, activation='elu', kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id2 = tf.keras.layers.Dropout(rate=0.2)
self.ih2 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id3 = tf.keras.layers.Dropout(rate=0.2)
self.ih3 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id4 = tf.keras.layers.Dropout(rate=0.2)
self.ih4 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id5 = tf.keras.layers.Dropout(rate=0.2)
self.ih5 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id6 = tf.keras.layers.Dropout(rate=0.2)
self.ih6 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.iu1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.iw1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.iu = Dense(1, activation='linear')
def call(self, state):
ix = self.id1(state)
iy = self.iu1(state)
iz = self.iw1(state)
ix = (1-self.ih1(ix))*iy + self.ih1(ix)*iz
ix = self.id2(ix)
ix = (1-self.ih2(ix))*iy + self.ih2(ix)*iz
ix = self.id3(ix)
ix = (1-self.ih3(ix))*iy + self.ih4(ix)*iz
ix = self.id4(ix)
ix = (1-self.ih4(ix))*iy + self.ih4(ix)*iz
ix = self.id5(ix)
ix = (1-self.ih5(ix))*iy + self.ih5(ix)*iz
ix = self.id6(ix)
ix = (1-self.ih6(ix))*iy + self.ih6(ix)*iz
iout = self.iu(ix)
return iout
class coilmaxwell(Model):
def __init__(self):
super(coilmaxwell, self).__init__()
initializer = tf.keras.initializers.GlorotUniform
self.d1 = tf.keras.layers.Dropout(rate=0.2)
self.h1 = Dense(40, activation='elu', kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d2 = tf.keras.layers.Dropout(rate=0.2)
self.h2 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d3 = tf.keras.layers.Dropout(rate=0.2)
self.h3 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d4 = tf.keras.layers.Dropout(rate=0.2)
self.h4 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d5 = tf.keras.layers.Dropout(rate=0.2)
self.h5 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d6 = tf.keras.layers.Dropout(rate=0.2)
self.h6 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.u1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.w1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.u = Dense(1, activation='linear')
def call(self, state):
x = self.d1(state)
y = self.u1(state)
z = self.w1(state)
x = (1-self.h1(x))*y + self.h1(x)*z
x = self.d2(x)
x = (1-self.h2(x))*y + self.h2(x)*z
x = self.d3(x)
x = (1-self.h3(x))*y + self.h4(x)*z
x = self.d4(x)
x = (1-self.h4(x))*y + self.h4(x)*z
x = self.d5(x)
x = (1-self.h5(x))*y + self.h5(x)*z
x = self.d6(x)
x = (1-self.h6(x))*y + self.h6(x)*z
out = self.u(x)
return out
##############################################################################################################################
class MaxwellPinn(object):
def __init__(self):
self.lr = 0.001
#self.lr = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=.001, decay_steps=10, decay_rate=.01)
self.opt_iron = Adam(self.lr)
self.opt_coil = Adam(self.lr)
self.ironmaxwell = ironmaxwell()
self.ironmaxwell.build(input_shape=(None, 2))
self.coilmaxwell = coilmaxwell()
self.coilmaxwell.build(input_shape=(None, 2))
self.train_loss_history = []
self.iter_count = 0
self.instant_loss = 0
self.bnd_loss = 0
self.ic_loss = 0
self.lamda = 0.1
self.pde_loss = 0
self.max_value = 0.012315021035034
self.iron_loss = 0
self.coil_loss = 0
################################################################################################################################
#tf.function
def physics_net_iron(self, xt,jmu):
x_i = xt[:, 0:1]
t_i = xt[:, 1:2]
with tf.GradientTape(persistent=True) as tape:
tape.watch(t_i)
tape.watch(x_i)
xt_t_i = tf.concat([x_i,t_i], axis=1)
u_i = self.ironmaxwell(xt_t_i)
u_x_i = tape.gradient(u_i, x_i)
u_t_i = tape.gradient(u_i, t_i)
u_xx_i = tape.gradient(u_x_i, x_i)
u_tt_i = tape.gradient(u_t_i, t_i)
del tape
return (u_xx_i+u_tt_i+jmu)
#tf.function
def physics_net_coil(self, xt,jmu):
x_c = xt[:, 0:1]
t_c = xt[:, 1:2]
with tf.GradientTape(persistent=True) as tape:
tape.watch(t_c)
tape.watch(x_c)
xt_t_c = tf.concat([x_c,t_c], axis=1)
u_c = self.coilmaxwell(xt_t_c)
u_x_c = tape.gradient(u_c, x_c)
u_t_c = tape.gradient(u_c, t_c)
u_xx_c = tape.gradient(u_x_c, x_c)
u_tt_c = tape.gradient(u_t_c, t_c)
del tape
return (u_xx_c+u_tt_c+jmu)
####################################################################################################################
#tf.function
def physics_net_for_ic(self, xt,in_mu,nom,out_mu): # 경계조건 물리정보
x = xt[:, 0:1]
t = xt[:, 1:2]
with tf.GradientTape(persistent=True) as tape:
tape.watch(t)
tape.watch(x)
xt_t = tf.concat([x,t], axis=1)
out_u = self.ironmaxwell(xt_t)
out_u_x = tape.gradient(out_u, x)
out_u_y = tape.gradient(out_u, t)
in_u = self.coilmaxwell(xt_t)
in_u_x = tape.gradient(in_u, x)
in_u_y = tape.gradient(in_u, t)
del tape
out_b_x = out_u_y
out_b_y = out_u_x
out_h_x = out_b_x/out_mu
out_h_y = out_b_y/out_mu
in_b_x = in_u_y
in_b_y = in_u_x
in_h_x = in_b_x/in_mu
in_h_y = in_b_y/in_mu
loss_b = tf.add(tf.multiply((in_b_x-out_b_x),nom),tf.multiply((in_b_y-out_b_y),(1-nom)))
loss_h = tf.add(tf.multiply((in_h_x-out_h_x),(1-nom)),tf.multiply((in_h_y-out_h_y),nom))
return loss_b, loss_h
#############################################################################################################
def save_weights(self, path):
self.ironmaxwell.save_weights(path + 'ironmaxwell.h5')
self.coilmaxwell.save_weights(path + 'coilmaxwell.h5')
#############################################################################################################
def load_weights(self, path):
self.ironmaxwell.load_weights(path + 'ironmaxwell.h5')
self.coilmaxwell.load_weights(path + 'coilmaxwell.h5')
#############################################################################################################
def compute_loss_iron(self, f, u_bnd_hat, u_bnd_sol,penalty,loss_b,loss_h):
loss_col = tf.reduce_mean(tf.square(f))
loss_bnd = tf.reduce_mean(tf.square(u_bnd_hat - u_bnd_sol))
loss_mag = tf.reduce_mean(tf.square(loss_b))
loss_field =tf.reduce_mean(tf.square(loss_h))
loss = loss_col + loss_bnd + loss_mag + loss_field
self.iron_loss = loss.numpy()
return loss
def compute_loss_coil(self, f,penalty,loss_b,loss_h):
loss_col = tf.reduce_mean(tf.square(f))
loss_mag = tf.reduce_mean(tf.square(loss_b))
loss_field =tf.reduce_mean(tf.square(loss_h))
loss = loss_col + loss_mag + loss_field
self.coil_loss = loss.numpy()
return loss
#############################################################################################################
def compute_grad(self, xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol,ic,nom):
with tf.GradientTape(persistent=True) as tape:
J_coil = 9800
J_iron = 0
mu_coil = 1.2566e-06
mu_iron = mu_coil*2000
f_iron = self.physics_net_iron(xt_col_iron,J_iron*mu_iron) # iron의 PDE 손실
f_coil = self.physics_net_coil(xt_col_coil,J_coil*mu_coil) # coil의 PDE 손실
u_bnd_hat = self.ironmaxwell(xt_bnd) # IRON이 OUT
loss_b, loss_h = self.physics_net_for_ic(ic,mu_coil,nom,mu_iron)
loss_iron = self.compute_loss_iron(f_iron, u_bnd_hat, u_bnd_sol,1,loss_b,loss_h)
loss_coil = self.compute_loss_coil(f_coil,1,loss_b,loss_h)
iron_grads = tape.gradient(loss_iron, self.ironmaxwell.trainable_variables)
coil_grads = tape.gradient(loss_coil, self.coilmaxwell.trainable_variables)
loss = loss_iron + loss_coil
return loss, iron_grads, coil_grads
#############################################################################################################
def callback(self, arg=None):
if self.iter_count % 10 == 0:
print('iter=', self.iter_count, ', loss=', self.instant_loss,'iron_loss=',self.iron_loss,'coil_loss=',self.coil_loss)
self.train_loss_history.append([self.iter_count, self.instant_loss])
self.iter_count += 1
#############################################################################################################
def train_with_adam(self,xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol, adam_num,ic,nom):
def learn():
loss, iron_grads, coil_grads = self.compute_grad(xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol,ic,nom)
self.opt_iron.apply_gradients(zip(iron_grads, self.ironmaxwell.trainable_variables))
self.opt_coil.apply_gradients(zip(coil_grads, self.coilmaxwell.trainable_variables))
return loss
for iter in range(int(adam_num)):
loss = learn()
self.instant_loss = loss.numpy()
self.opt_iron = Adam(self.lr/(1+0.001*iter))
self.opt_coil = Adam(self.lr/(1+0.001*iter))
self.callback()
#############################################################################################################
def train_with_lbfgs(self, xt_col, xt_bnd, u_bnd_sol, lbfgs_num,J,mu,penalty,ii,ic,ii_mu,ic_mu,nom):
def vec_weight():
# vectorize weights
weight_vec = []
# Loop over all weights
for v in self.burgers.trainable_variables:
weight_vec.extend(v.numpy().flatten())
weight_vec = tf.convert_to_tensor(weight_vec)
return weight_vec
w0 = vec_weight().numpy()
def restore_weight(weight_vec):
# restore weight vector to model weights
idx = 0
for v in self.burgers.trainable_variables:
vs = v.shape
# weight matrices
if len(vs) == 2:
sw = vs[0] * vs[1]
updated_val = tf.reshape(weight_vec[idx:idx + sw], (vs[0], vs[1]))
idx += sw
# bias vectors
elif len(vs) == 1:
updated_val = weight_vec[idx:idx + vs[0]]
idx += vs[0]
# assign variables (Casting necessary since scipy requires float64 type)
v.assign(tf.cast(updated_val, dtype=tf.float32))
def loss_grad(w):
# update weights in model
restore_weight(w)
loss, grads, loss_bnd = self.compute_grad(xt_col, xt_bnd, u_bnd_sol,J,mu,penalty,ii,ic,ii_mu,ic_mu,nom)
# vectorize gradients
grad_vec = []
for g in grads:
grad_vec.extend(g.numpy().flatten())
# gradient list to array
# scipy-routines requires 64-bit floats
loss = loss.numpy().astype(np.float64)
self.instant_loss = loss
grad_vec = np.array(grad_vec, dtype=np.float64)
return loss, grad_vec
return scipy.optimize.minimize(fun=loss_grad,
x0=w0,
jac=True,
method='L-BFGS-B',
callback=self.callback,
options={'maxiter': lbfgs_num,
'maxfun': 5000,
'maxcor': 500,
'maxls': 500,
'ftol': 1.0 * np.finfo(float).eps}) #1.0 * np.finfo(float).eps
########################################################################################################################
def predict_iron(self, xt):
u_pred = self.ironmaxwell(xt)
return u_pred
def predict_coil(self, xt):
u_pred = self.coilmaxwell(xt)
return u_pred
#############################################################################################################
def train(self, adam_num, lbfgs_num):
iron_x = scipy.io.loadmat('iron_x.mat') # iron 좌표
iron_y = scipy.io.loadmat('iron_y.mat')
coil_x = scipy.io.loadmat('coil_x.mat') # coil 좌표
coil_y = scipy.io.loadmat('coil_y.mat')
iron_J = scipy.io.loadmat('iron_J.mat')
iron_mu = scipy.io.loadmat('iron_mu.mat')
coil_J = scipy.io.loadmat('iron_J.mat')
coil_mu = scipy.io.loadmat('iron_mu.mat')
ini = scipy.io.loadmat('bnd.mat')
inter_coil_x = scipy.io.loadmat('inter_coil_x.mat')
inter_coil_y = scipy.io.loadmat('inter_coil_y.mat')
icx = inter_coil_x['coil_inter_x']
icy = inter_coil_y['coil_inter_y']
ic = np.concatenate([icx, icy], axis=1) # interface 코일 데이터
inter_coil_mu = scipy.io.loadmat('inter_coil_mu.mat')
ic_mu = np.transpose(inter_coil_mu['mu_inter_coil'])
nomvec = scipy.io.loadmat('nom_vec.mat')
nom = nomvec['nom_vec']
nom = tf.convert_to_tensor(nom, dtype=tf.float32)
x_ini = np.transpose(ini['iron_bnd_x'])
y_ini = np.transpose(ini['iron_bnd_y'])
xt_bnd_data = np.concatenate([x_ini, y_ini], axis=1)
tu_bnd_data = []
for xt in xt_bnd_data:
tu_bnd_data.append(0)
tu_bnd_data = np.transpose(tu_bnd_data)
# collocation point iron
x_col_data = (iron_x['x'])
y_col_data = (iron_y['y'])
xy_col_data_iron = np.concatenate([x_col_data, y_col_data], axis=1)
# coil
x_col_data = (coil_x['coil_x'])
y_col_data = (coil_y['coil_y'])
xy_col_data_coil = np.concatenate([x_col_data, y_col_data], axis=1)
xt_col_iron = tf.convert_to_tensor(xy_col_data_iron, dtype=tf.float32)
xt_col_coil = tf.convert_to_tensor(xy_col_data_coil, dtype=tf.float32)
xt_bnd = tf.convert_to_tensor(xt_bnd_data, dtype=tf.float32)
u_bnd_sol = tf.convert_to_tensor(tu_bnd_data, dtype=tf.float32)
ic = tf.convert_to_tensor(ic, dtype=tf.float32)
ic_mu = tf.convert_to_tensor(ic_mu, dtype=tf.float32)
# Start timer
self.load_weights("C:/Users/user/Desktop/1-Cars (2 cases)/save_weights/maxwell/new_test/")
t0 = time()
self.train_with_adam(xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol, adam_num,ic,nom)
print('\nComputation time of adam: {} seconds'.format(time() - t0))
self.save_weights("C:/Users/user/Desktop/1-Cars (2 cases)/save_weights/maxwell/new_test/")
np.savetxt('C:/Users/user/Desktop/1-Cars (2 cases)/save_weights/maxwell/new_test/loss.txt', self.train_loss_history)
train_loss_history = np.array(self.train_loss_history)
plt.plot(train_loss_history[:, 0], train_loss_history[:, 1])
plt.yscale("log")
plt.show()
#############################################################################################################
# main
def main():
adam_num = 100000
lbfgs_num = 1000
agent = MaxwellPinn()
agent.train(adam_num, lbfgs_num)
if __name__=="__main__":
main()
It's my code.
Including mini-batch. Changing the model

what causes the model can not predict using other data

I am here using ResNet50 to create a regression model. I ran into a problem when I wanted to test a model using other data. The length of the dataset is 2050. Then I separate it into training and testing data. I divide it by 1500 as training data and 500 as test data. At the time of the training process, I had good results and was able to predict quite accurately. but when I want to test it using testing data, the prediction results are bad.
below is the model loss result
the code :
Insole = pd.read_csv('1119_Rwalk40s1_list.txt', header=None, low_memory=False)
SIData = np.asarray(Insole)
df = pd.read_csv('1119_Rwalk40s1.csv', low_memory=False)
columns = ['Fx','Fy','Fz','Mx','My','Mz']
selected_df = df[columns]
FCDatas = selected_df[:2050]
SmartInsole = np.array(SIData)
FCData = np.array(FCDatas)
xX = SmartInsole
yY = FCData
scaler_x = MinMaxScaler(feature_range=(0, 1))
scaler_x.fit(xX)
xscale = scaler_x.transform(xX)
scaler_y = MinMaxScaler(feature_range=(0, 1))
scaler_y.fit(yY)
yscale = scaler_y.transform(yY)
SIDataPCA = xscale
pca = PCA(n_components=12)
pca.fit(SIDataPCA)
SIdata_pca = pca.transform(SIDataPCA)
#For Training
trainX = SIdata_pca[:1500]
trainY = yscale[:1500]
#For Testing
testX = SIdata_pca[1500]
testY = yscale[1500:]
X_train, X_test, y_train, y_test = train_test_split(trainX, trainY, test_size=0.20, random_state=2)
Below is the my resnet model structure:
Below is identity blok:
def identity_block(input_tensor,units):
x = layers.Dense(units)(input_tensor)
x = layers.Activation('relu')(x)
x = layers.Dense(units)(x)
x = layers.Activation('relu')(x)
x = layers.Dense(units)(x)
x = layers.add([x, input_tensor])
x = layers.Activation('relu')(x)
return x
Below is dens_block:
def dens_block(input_tensor,units):
x = layers.Dense(units)(input_tensor)
x = layers.Activation('relu')(x)
x = layers.Dense(units)(x)
x = layers.Activation('relu')(x)
x = layers.Dense(units)(x)
shortcut = layers.Dense(units)(input_tensor)
x = layers.add([x, shortcut])
x = layers.Activation('relu')(x)
return x
Resnet50 model:
def ResNet50Regression():
Res_input = layers.Input(shape=(12,))
width = 32
x = dens_block(Res_input,width)
x = identity_block(x,width)
x = identity_block(x,width)
x = dens_block(x,width)
x = identity_block(x,width)
x = identity_block(x,width)
x = dens_block(x,width)
x = identity_block(x,width)
x = identity_block(x,width)
x = layers.Dense(6,activation="sigmoid")(x)
model = models.Model(inputs=Res_input, outputs=x)
return model
model = ResNet50Regression()
model.compile(loss='mse',
optimizer=Adam(),
metrics=['mse'])
history = model.fit(X_train, y_train,
batch_size=32,
epochs=50,
validation_data=(X_test, y_test),
verbose=2)
model.save('Resnet50-1203.h5')
ypred = model.predict(trainX)
x=[]
colors=['red','green','brown','teal','gray','black','maroon','orange','purple']
colors2=['green','red','orange','black','maroon','teal','blue','gray','brown']
x = np.arange(0,1500)*40/1500
for i in range(0,6):
plt.figure(figsize=(15,6))
plt.plot(x,trainY[0:1500,i],color=colors[i])
plt.plot(x,ypred[0:1500,i], markerfacecolor='none',color=colors2[i])
plt.title('Result for ResNet Regression (Training Data)')
plt.ylabel(columns[i])
plt.xlabel('Time(s)')
plt.legend(['FP Data', 'SI Prediction'], loc='best')
# plt.savefig('Regression Result.png'[i])
plt.show()
Testing Model using other data code:
new_model = load_model('Resnet50-1203.h5')
model.evaluate(testX, testY)
Test_xX_model = new_model.predict(testX)
x=[]
colors=['red','green','brown','teal','gray','black','maroon','orange','purple']
colors2=['green','red','orange','black','maroon','teal','blue','gray','brown']
x = np.arange(0,550)*40/550
for i in range(0,6):
plt.figure(figsize=(15,6))
plt.plot(x,testY[0:550,i],color=colors[i])
plt.plot(x,Test_xX_model[0:550,i], markerfacecolor='none',color=colors2[i])
plt.title('Result for ResNet Regression (Testing Data)')
plt.ylabel(columns[i])
plt.xlabel('Time(s)')
plt.legend(['FP Data', 'SI Prediction'], loc='best')
# plt.savefig('Regression Result.png'[i])
plt.show()
1 of traning data predictions results:
1 of testing data predictions results:
what should i do for this case?

Actor-Critic Reinforcement Learning,network can't learning

I have some Actor-Critic reinforcement learning questions want to ask.In my code,I try to use two network(actor and critic) to play Pong-game,but I don't know my "learn()" function have problem?The "Loss1" is actor_network loss(-logP(a|s) * TDerror),the "Loss2" is critic_network loss((pre - TDtarget) ** 2 / 2).
class Actor_Critic():
def __init__(self,input_dim=6400,n_actions=2):
self.input_dim = input_dim
self.n_action = n_actions//Pong game action space
self.lr = 0.0005
self.gamma = 0.99 // discounted factor
self.critic_network = self.__build_critic_network()
self.actor_network = self.__build_actor_network()
def __build_critic_network(self):
model_input = layers.Input(shape=(self.input_dim + self.n_action,))
layer1 = layers.Dense(128, activation='relu')(model_input)
layer2 = layers.Dense(32, activation='relu')(layer1)
model_output = layers.Dense(1, activation=None)(layer2)
model = Model(model_input,model_output)
model.compile(optimizer=Adam(learning_rate=self.lr))
return model
def __build_actor_network(self):
model_input = layers.Input(shape=(self.input_dim,))
layer1 = layers.Dense(128, activation='relu')(model_input)
layer2 = layers.Dense(32, activation='relu')(layer1)
model_output = layers.Dense(self.n_action, activation='softmax')(layer2)
model = Model(model_input, model_output)
model.compile(optimizer=Adam(learning_rate=self.lr))
return model
def choose_action(self,state):
state = tf.convert_to_tensor([state],dtype=tf.float16)
probability = self.actor_network(state)
action_probs = tfp.distributions.Categorical(probs=probability)
action = action_probs.sample()
return action_probs,action.numpy()[0] // return action probability and action
def action_hot_code(self, action):
action_hot_code = np.zeros(self.n_action, dtype=np.float16)
action_hot_code[action] = 1.0
action_hot_code = tf.convert_to_tensor([action_hot_code], dtype=tf.float16)
return action_hot_code
def learn(self,state,reward,next_state,done):
with tf.GradientTape(persistent=True) as tape:
action_probs, action = self.choose_action(state)
next_action_prob, next_action = self.choose_action(next_state)
action_hot_code = self.action_hot_code(action)
next_action_hot_code = self.action_hot_code(next_action)
state = tf.convert_to_tensor([state], dtype=tf.float16)
next_state = tf.convert_to_tensor([next_state], dtype=tf.float16)
state_action = tf.concat([state, action_hot_code], axis=1)
next_state_action = tf.concat([next_state, next_action_hot_code], axis=1)
state_value = self.critic_network(state_action)
next_state_value = self.critic_network(next_state_action)
TDtarget = reward + self.gamma * next_state_value * (1-done)
TDerror = state_value - TDtarget
Loss1 = -TDerror * action_probs.log_prob(action)
Loss2 = (TDerror ** 2) / 2
Loss1 = tf.squeeze(Loss1)
Loss2 = tf.squeeze(Loss2)
gradient1 = tape.gradient(Loss1, self.actor_network.trainable_variables)
gradient2 = tape.gradient(Loss2,self.critic_network.trainable_variables)
self.actor_network.optimizer.apply_gradients(zip(gradient1, self.actor_network.trainable_variables))
self.critic_network.optimizer.apply_gradients(zip(gradient2,self.critic_network.trainable_variables))

handwriting synthesis alex graves

I have been trying to replicate the alex graves handwriting synthesis model, and I did this with tensorflow, and python on a 1080Ti GPU with cuda,
I exactly replicated all of the features explained in the paper and even clipped the respective gradient values in place, but I have real difficulty training it.
I also preproccessed the data in the way explained in the paper, including normalizing the X and y offsets, but the problem is that the training usually can't lower the negative log likelihood more than 1000 which in the paper it reaches -1000, and after that i see NaN weights.
The only extra thing I did was to add 0.0000001 to the conditional probability of every stroke to prevent NaN values in log likelihood.
Any tips or suggestions or experience with such a task?
this is the cell code I use,
class Custom_Cell(RNNCell):
def __init__(self,forget_bias,bias,one_hot_vector, hidden_layer_nums=[700,700,700], mixture_num=10, attention_num=4):
self.bias = bias
self.lstms = []
for i in hidden_layer_nums:
self.lstms.append(LSTMCell(num_units=i, initializer=tf.truncated_normal_initializer(0.075), dtype=tf.float32, forget_bias=forget_bias))
self.attention_num = attention_num
self.mixture_num = mixture_num
self.state_size = 2*sum(hidden_layer_nums) + 3*self.attention_num
self.attention_var_num = 3*self.attention_num
self.output_size = 6*self.mixture_num + 1 + 1
self.one_hot_vector = one_hot_vector
self.lstm_num = len(hidden_layer_nums)
self.hidden_layer_nums = hidden_layer_nums
temp_shape = self.one_hot_vector.shape
self.char_num = temp_shape[2]
self.i_to_h = []
self.w_to_h = []
self.h_to_h = []
self.prev_h_to_h = []
self.lstm_bias = []
self.lstm_to_attention_weights = tf.get_variable("lstms/first_to_attention_mtrx",shape=[hidden_layer_nums[0],self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.lstm_to_attention_bias = tf.get_variable("lstms/first_to_attention_bias",shape=[self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.all_to_output_mtrx = []
for i in range(self.lstm_num):
self.all_to_output_mtrx.append( tf.get_variable("lstms/to_output_mtrx_" + str(i), shape=[hidden_layer_nums[i],self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.all_to_output_bias = tf.get_variable("lstms/output_bias",shape=[self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
for i in range(self.lstm_num):
self.i_to_h.append(tf.get_variable("lstms/i_to_h_"+str(i),shape=[3,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.w_to_h.append(tf.get_variable("lstms/w_to_h_"+str(i),shape=[self.char_num,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.h_to_h.append(tf.get_variable("lstms/h_to_h_"+str(i),shape=[hidden_layer_nums[i],hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.lstm_bias.append(tf.get_variable("lstms/bias_" + str(i),shape=[hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
if not i == 0:
self.prev_h_to_h.append(
tf.get_variable("lstms/prev_h_to_h_" + str(i), shape=[hidden_layer_nums[i-1], hidden_layer_nums[i]],
dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.075),
trainable=True))
def __call__(self, inputs, state, scope=None):
# Extracting previous configuration and vectors
splitarray = []
for i in self.hidden_layer_nums:
splitarray.append(i)
splitarray.append(i)
splitarray.append(3*self.attention_num)
splitted = tf.split(state,splitarray,axis=1)
prev_tuples = []
for i in range(self.lstm_num):
newtuple = LSTMStateTuple(splitted[2*i],splitted[2*i + 1])
prev_tuples.append(newtuple)
prev_attention_vec = splitted[2*self.lstm_num]
new_attention_vec = 0
next_states = []
most_attended = 0
last_output = 0
for i in range(self.lstm_num):
prev_c, prev_h = prev_tuples[i]
cell = self.lstms[i]
if i == 0:
with tf.name_scope("layer_1"):
w, most_attended = self.gaussian_attention(self.one_hot_vector,prev_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[0]) + tf.matmul(prev_h,self.h_to_h[0]) + tf.matmul(w,self.w_to_h[0]) + self.lstm_bias[0]
_, new_state = cell(input_vec, prev_tuples[0])
new_c, new_h = new_state
next_states.append(new_c)
next_states.append(new_h)
last_output = tf.matmul(new_h,self.all_to_output_mtrx[0])
with tf.name_scope("attention_layer"):
temp_attention = tf.matmul(new_h,self.lstm_to_attention_weights) + self.lstm_to_attention_bias
new_alpha, new_beta, new_kappa = tf.split(temp_attention,[self.attention_num,self.attention_num,self.attention_num],axis=1)
old_alpha, old_beta, old_kappa = tf.split(prev_attention_vec,[self.attention_num,self.attention_num,self.attention_num], axis=1)
new_alpha = tf.exp(new_alpha)
new_beta = tf.exp(new_beta)
new_kappa = tf.exp(new_kappa) + old_kappa
new_attention_vec = tf.concat([new_alpha,new_beta,new_kappa],axis=1)
else:
with tf.name_scope("layer_" + str(i)):
w, most_attended = self.gaussian_attention(self.one_hot_vector,new_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[i]) + tf.matmul(next_states[-1],self.prev_h_to_h[i-1]) + tf.matmul(prev_h,self.h_to_h[i]) + tf.matmul(w,self.w_to_h[i]) + self.lstm_bias[i]
_,new_state = cell(input_vec,prev_tuples[i])
new_c, new_h = new_state
next_states.append(new_c)
next_states.append(new_h)
last_output = last_output + tf.matmul(new_h, self.all_to_output_mtrx[i])
with tf.name_scope("output"):
last_output = last_output + self.all_to_output_bias
next_states.append(new_attention_vec)
state_to_return = tf.concat(next_states,axis=1)
output_split_param = [1,self.mixture_num,2*self.mixture_num,2*self.mixture_num,self.mixture_num]
binomial_param, pi, mu, sigma, rho = tf.split(last_output,output_split_param,axis=1)
binomial_param = tf.divide(1.,1.+tf.exp(binomial_param))
pi = tf.nn.softmax(tf.multiply(pi,1.+self.bias),axis=1)
mu = mu
sigma = tf.exp(sigma-self.bias)
rho = tf.tanh(rho)
output_to_return = tf.concat([most_attended, binomial_param, pi, mu, sigma, rho],axis=1)
return output_to_return, state_to_return
def state_size(self):
return self.state_size
def output_size(self):
return self.output_size
def gaussian_attention(self,sequence,params):
with tf.name_scope("attention_calculation"):
alpha, beta, kappa = tf.split(params,[self.attention_num,self.attention_num,self.attention_num],axis=1)
seq_shape = sequence.shape
seq_length = seq_shape[1]
temp_vec = 20*np.asarray(range(seq_length),dtype=float)
final_result = 0
alpha = tf.split(alpha,self.attention_num,1)
beta = tf.split(beta,self.attention_num,1)
kappa = tf.split(kappa,self.attention_num,1)
for i in range(self.attention_num):
alpha_now = alpha[i]
beta_now = beta[i]
kappa_now = kappa[i]
result = kappa_now - temp_vec
result = tf.multiply(tf.square(result),tf.negative(beta_now))
result = tf.multiply(tf.exp(result),alpha_now)
final_result = final_result+result
most_attended = tf.argmax(final_result,axis=1)
most_attended = tf.reshape(tf.cast(most_attended,dtype=tf.float32),shape=[-1,1])
final_result = tf.tile(tf.reshape(final_result,[-1,seq_shape[1],1]),[1,1,seq_shape[2]])
to_return = tf.reduce_sum(tf.multiply(final_result,sequence),axis=1)
return to_return, most_attended
and this is the rnn with loss network:
`to_write_one_hot = tf.placeholder(dtype=tf.float32,shape=(None,line_length,dict_length))
sequence = tf.placeholder(dtype=tf.float32,shape=(None,None,3))
sequence_shift = tf.placeholder(dtype=tf.float32,shape=(None,None,3))
bias = tf.placeholder(shape=[1],dtype=tf.float32)
sequence_length = tf.placeholder(shape=(None),dtype=tf.int32)
forget_bias_placeholder = tf.placeholder(shape=(None),dtype=tf.float32)
graves_cell = Custom_Cell(forget_bias=1,one_hot_vector=to_write_one_hot,hidden_layer_nums=hidden_layer_nums,mixture_num=mixture_num,bias=bias,attention_num=attention_num)
output, state = tf.nn.dynamic_rnn(graves_cell,sequence,dtype=tf.float32,sequence_length=sequence_length)
with tf.name_scope("loss_layer"):
mask = tf.sign(tf.reduce_max(tf.abs(output), 2))
most_attended, binomial_param, pi, mu, sigma, rho = tf.split(output,[1,1,mixture_num,2*mixture_num,2*mixture_num,mixture_num], axis=2)
pi = tf.split(pi,mixture_num,axis=2)
mu = tf.split(mu,mixture_num,axis=2)
sigma = tf.split(sigma,mixture_num,axis=2)
rho = tf.split(rho,mixture_num,axis=2)
negative_log_likelihood = 0
probability = 0
x1, x2, e = tf.split(sequence_shift,3,axis=2)
for i in range(mixture_num):
pi_now = pi[i]
mu_now = tf.split(mu[i],2,axis=2)
mu_1 = mu_now[0]
mu_2 = mu_now[1]
sigma_now = tf.split(sigma[i],2,axis=2)
sigma_1 = sigma_now[0] + (1-tf.reshape(mask, [-1,max_len,1]))
sigma_2 = sigma_now[1] + (1-tf.reshape(mask, [-1,max_len,1]))
rho_now = rho[i]
Z = tf.divide(tf.square(x1-mu_1),tf.square(sigma_1)) + tf.divide(tf.square(x2-mu_2),tf.square(sigma_2)) - tf.divide(tf.multiply(tf.multiply(x1-mu_1,x2-mu_2),2*rho_now),tf.multiply(sigma_1,sigma_2))
prob = tf.exp(tf.div(tf.negative(Z),2*(1-tf.square(rho_now))))
Normalizing_factor = 2*np.pi*tf.multiply(sigma_1,sigma_2)
Normalizing_factor = tf.multiply(Normalizing_factor,tf.sqrt(1-tf.square(rho_now)))
prob = tf.divide(prob,Normalizing_factor)
prob = tf.multiply(pi_now,prob)
probability = probability + prob
binomial_likelihood = tf.multiply(binomial_param,e) + tf.multiply(1-binomial_param,1-e)
probability = tf.multiply(probability,binomial_likelihood)
probability = probability + (1-tf.reshape(mask,[-1,max_len,1]))
temp_tensor = tf.multiply(mask, tf.log(tf.reshape(probability,[-1,max_len]) + mask*0.00001))
negative_log_likelihood_0 = tf.negative(tf.reduce_sum(temp_tensor,axis=1))
negative_log_likelihood_1 = tf.divide(negative_log_likelihood_0,tf.reshape(tf.cast(sequence_length, dtype=tf.float32), shape=[-1,1]))
negative_log_likelihood_1 = tf.reduce_mean(negative_log_likelihood_1)
tf.summary.scalar("average_per_timestamp_log_likelihood", negative_log_likelihood_1)
negative_log_likelihood = tf.reduce_mean(negative_log_likelihood_0)
with tf.name_scope("train_op"):
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001,momentum=0.9, decay=0.95,epsilon=0.0001)
gvs = optimizer.compute_gradients(negative_log_likelihood)
capped_gvs = []
for grad, var in gvs:
if var.name.__contains__("rnn"):
capped_gvs.append((tf.clip_by_value(grad,-10,10),var))
else:
capped_gvs.append((tf.clip_by_value(grad,-100,100),var))
train_op = optimizer.apply_gradients(capped_gvs)
`
Edit.1. I discovered that I was clipping gradients in a wrong way, the correct way was to introduce a new 'op' as explained by https://github.com/tensorflow/tensorflow/issues/2793 to clip only the output gradients of the whole network and lstm cells.
#tf.custom_gradient
def clip_gradient(x, clip):
def grad(dresult):
return [tf.clip_by_norm(dresult, clip)]
return x, grad
add the lines above to your code and use the function on any variable you want to clip the gradient in back propagation!
I should still see my results.
Edit 2.
The changed Model code is:
from tensorflow.contrib.rnn import RNNCell
from tensorflow.contrib.rnn import LSTMCell
from tensorflow.contrib.rnn import LSTMStateTuple
import tensorflow as tf
import numpy as np
#tf.custom_gradient
def clip_gradient_lstm(x):
def grad(dresult):
return [tf.clip_by_value(dresult,-10,10)]
return x, grad
#tf.custom_gradient
def clip_gradient_output(x):
def grad(dresult):
return [tf.clip_by_value(dresult,-100,100)]
return x, grad
def length_of(seq):
used = tf.sign(tf.reduce_max(tf.abs(seq),axis=2))
length = tf.reduce_sum(used,1)
length = tf.cast(length,tf.int32)
return length
class Custom_Cell(RNNCell):
def __init__(self,forget_bias,bias,one_hot_vector, hidden_layer_nums=[700,700,700], mixture_num=10, attention_num=4):
self.bias = bias
self.lstms = []
for i in hidden_layer_nums:
self.lstms.append(LSTMCell(num_units=i, initializer=tf.truncated_normal_initializer(0.075), dtype=tf.float32, forget_bias=forget_bias))
self.attention_num = attention_num
self.mixture_num = mixture_num
self.state_size = 2*sum(hidden_layer_nums) + 3*self.attention_num
self.attention_var_num = 3*self.attention_num
self.output_size = 6*self.mixture_num + 1 + 1
self.one_hot_vector = one_hot_vector
self.lstm_num = len(hidden_layer_nums)
self.hidden_layer_nums = hidden_layer_nums
temp_shape = self.one_hot_vector.shape
self.char_num = temp_shape[2]
self.i_to_h = []
self.w_to_h = []
self.h_to_h = []
self.prev_h_to_h = []
self.lstm_bias = []
self.lstm_to_attention_weights = tf.get_variable("lstms/first_to_attention_mtrx",shape=[hidden_layer_nums[0],self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.lstm_to_attention_bias = tf.get_variable("lstms/first_to_attention_bias",shape=[self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.all_to_output_mtrx = []
for i in range(self.lstm_num):
self.all_to_output_mtrx.append( tf.get_variable("lstms/to_output_mtrx_" + str(i), shape=[hidden_layer_nums[i],self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.all_to_output_bias = tf.get_variable("lstms/output_bias",shape=[self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
for i in range(self.lstm_num):
self.i_to_h.append(tf.get_variable("lstms/i_to_h_"+str(i),shape=[3,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.w_to_h.append(tf.get_variable("lstms/w_to_h_"+str(i),shape=[self.char_num,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.h_to_h.append(tf.get_variable("lstms/h_to_h_"+str(i),shape=[hidden_layer_nums[i],hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.lstm_bias.append(tf.get_variable("lstms/bias_" + str(i),shape=[hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
if not i == 0:
self.prev_h_to_h.append(
tf.get_variable("lstms/prev_h_to_h_" + str(i), shape=[hidden_layer_nums[i-1], hidden_layer_nums[i]],
dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.075),
trainable=True))
def __call__(self, inputs, state, scope=None):
# Extracting previous configuration and vectors
splitarray = []
for i in self.hidden_layer_nums:
splitarray.append(i)
splitarray.append(i)
splitarray.append(3*self.attention_num)
splitted = tf.split(state,splitarray,axis=1)
prev_tuples = []
for i in range(self.lstm_num):
newtuple = LSTMStateTuple(splitted[2*i],splitted[2*i + 1])
prev_tuples.append(newtuple)
prev_attention_vec = splitted[2*self.lstm_num]
new_attention_vec = 0
next_states = []
most_attended = 0
last_output = 0
for i in range(self.lstm_num):
prev_c, prev_h = prev_tuples[i]
cell = self.lstms[i]
if i == 0:
with tf.name_scope("layer_1"):
w, most_attended = self.gaussian_attention(self.one_hot_vector,prev_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[0]) + tf.matmul(prev_h,self.h_to_h[0]) + tf.matmul(w,self.w_to_h[0]) + self.lstm_bias[0]
_, new_state = cell(input_vec, prev_tuples[0])
new_c, new_h = new_state
new_h = clip_gradient_lstm(new_h)
next_states.append(new_c)
next_states.append(new_h)
last_output = tf.matmul(new_h,self.all_to_output_mtrx[0])
with tf.name_scope("attention_layer"):
temp_attention = tf.matmul(new_h,self.lstm_to_attention_weights) + self.lstm_to_attention_bias
new_alpha, new_beta, new_kappa = tf.split(temp_attention,[self.attention_num,self.attention_num,self.attention_num],axis=1)
old_alpha, old_beta, old_kappa = tf.split(prev_attention_vec,[self.attention_num,self.attention_num,self.attention_num], axis=1)
new_alpha = tf.exp(new_alpha)
new_beta = tf.exp(new_beta)
new_kappa = tf.exp(new_kappa) + old_kappa
new_attention_vec = tf.concat([new_alpha,new_beta,new_kappa],axis=1)
else:
with tf.name_scope("layer_" + str(i)):
w, most_attended = self.gaussian_attention(self.one_hot_vector,new_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[i]) + tf.matmul(next_states[-1],self.prev_h_to_h[i-1]) + tf.matmul(prev_h,self.h_to_h[i]) + tf.matmul(w,self.w_to_h[i]) + self.lstm_bias[i]
_,new_state = cell(input_vec,prev_tuples[i])
new_c, new_h = new_state
new_h = clip_gradient_lstm(new_h)
next_states.append(new_c)
next_states.append(new_h)
last_output = last_output + tf.matmul(new_h, self.all_to_output_mtrx[i])
with tf.name_scope("output"):
last_output = last_output + self.all_to_output_bias
last_output = clip_gradient_output(last_output)
next_states.append(new_attention_vec)
state_to_return = tf.concat(next_states,axis=1)
output_split_param = [1,self.mixture_num,2*self.mixture_num,2*self.mixture_num,self.mixture_num]
binomial_param, pi, mu, sigma, rho = tf.split(last_output,output_split_param,axis=1)
binomial_param = tf.divide(1.,1.+tf.exp(binomial_param))
pi = tf.nn.softmax(tf.multiply(pi,1.+self.bias),axis=1)
mu = mu
sigma = tf.exp(sigma-self.bias)
rho = tf.tanh(rho)
output_to_return = tf.concat([most_attended, binomial_param, pi, mu, sigma, rho],axis=1)
return output_to_return, state_to_return
def state_size(self):
return self.state_size
def output_size(self):
return self.output_size
def gaussian_attention(self,sequence,params):
with tf.name_scope("attention_calculation"):
alpha, beta, kappa = tf.split(params,[self.attention_num,self.attention_num,self.attention_num],axis=1)
seq_shape = sequence.shape
seq_length = seq_shape[1]
temp_vec = np.asarray(range(seq_length),dtype=float)
final_result = 0
alpha = tf.split(alpha,self.attention_num,1)
beta = tf.split(beta,self.attention_num,1)
kappa = tf.split(kappa,self.attention_num,1)
for i in range(self.attention_num):
alpha_now = alpha[i]
beta_now = beta[i]
kappa_now = kappa[i]
result = kappa_now - temp_vec
result = tf.multiply(tf.square(result),tf.negative(beta_now))
result = tf.multiply(tf.exp(result),alpha_now)
final_result = final_result+result
most_attended = tf.argmax(final_result,axis=1)
most_attended = tf.reshape(tf.cast(most_attended,dtype=tf.float32),shape=[-1,1])
final_result = tf.tile(tf.reshape(final_result,[-1,seq_shape[1],1]),[1,1,seq_shape[2]])
to_return = tf.reduce_sum(tf.multiply(final_result,sequence),axis=1)
return to_return, most_attended
and the Training is done by
with tf.name_scope("train_op"):
optimizer =
tf.train.RMSPropOptimizer(learning_rate=0.0001,momentum=0.9, decay=0.95,epsilon=0.0001,centered=True)
train_op = optimizer.minimize(negative_log_likelihood)
and right now is still in training, but it is now as low as -10.

How to read data from csv file in tensorflow?

I want to read data from csv file in tensorflow .So I've been trying out different ways of reading a CSV file with 2000 lines and each line with 93 features,and I hope to get one-hot value.
my dataset is like this:
the first column is data of 93 features,and the second column is labels of 16 one-hot .
this is my code
import tensorflow as tf
# data_input = pd.read_csv('ans_string.csv')
# data_train = pd.read_csv('ans_result.csv')
x = tf.placeholder(tf.float32,[None,93])
W = tf.Variable(tf.zeros([93,16]))
b = tf.Variable(tf.zeros([16]))
sess = tf.InteractiveSession()
filename_queue = tf.train.string_input_producer(["dataset.csv"])
reader = tf.TextLineReader()
key,value = reader.read(filename_queue)
# _,csv_row = reader.read(filename_queue)
# data = tf.decode_csv(csv_row,record_fefaults = rDeraults)
record_defaults_key = [[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1]]
record_defaults_value = [[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1]]
list_result_key = tf.decode_csv(key,record_defaults = record_defaults_key)
list_result_value = tf.decode_csv(value,record_defaults = record_defaults_value)
features = tf.stack(list_result_key)
labels = tf.stack(list_result_value)
y = tf.nn.softmax(tf.matmul(x,W)+b)
y_ = tf.placeholder(tf.float32,[None,16])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y),reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
with tf.Session() as sess:
# something happened
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord = coord)
tf.global_variables_initializer().run()
for _ in range (1000):
example,label = sess.run([features,labels])
print(sess.run(example,label))
sess.run(train_step,feed_dict={x:example,y_:label})
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
print(sess.run(accuracy.eval({x:example,y_:label})))
coord.request_stop()
coord.join(threads)
I want to train my model,but I got Error like this.
How can I fix it?