handwriting synthesis alex graves - deep-learning
I have been trying to replicate the alex graves handwriting synthesis model, and I did this with tensorflow, and python on a 1080Ti GPU with cuda,
I exactly replicated all of the features explained in the paper and even clipped the respective gradient values in place, but I have real difficulty training it.
I also preproccessed the data in the way explained in the paper, including normalizing the X and y offsets, but the problem is that the training usually can't lower the negative log likelihood more than 1000 which in the paper it reaches -1000, and after that i see NaN weights.
The only extra thing I did was to add 0.0000001 to the conditional probability of every stroke to prevent NaN values in log likelihood.
Any tips or suggestions or experience with such a task?
this is the cell code I use,
class Custom_Cell(RNNCell):
def __init__(self,forget_bias,bias,one_hot_vector, hidden_layer_nums=[700,700,700], mixture_num=10, attention_num=4):
self.bias = bias
self.lstms = []
for i in hidden_layer_nums:
self.lstms.append(LSTMCell(num_units=i, initializer=tf.truncated_normal_initializer(0.075), dtype=tf.float32, forget_bias=forget_bias))
self.attention_num = attention_num
self.mixture_num = mixture_num
self.state_size = 2*sum(hidden_layer_nums) + 3*self.attention_num
self.attention_var_num = 3*self.attention_num
self.output_size = 6*self.mixture_num + 1 + 1
self.one_hot_vector = one_hot_vector
self.lstm_num = len(hidden_layer_nums)
self.hidden_layer_nums = hidden_layer_nums
temp_shape = self.one_hot_vector.shape
self.char_num = temp_shape[2]
self.i_to_h = []
self.w_to_h = []
self.h_to_h = []
self.prev_h_to_h = []
self.lstm_bias = []
self.lstm_to_attention_weights = tf.get_variable("lstms/first_to_attention_mtrx",shape=[hidden_layer_nums[0],self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.lstm_to_attention_bias = tf.get_variable("lstms/first_to_attention_bias",shape=[self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.all_to_output_mtrx = []
for i in range(self.lstm_num):
self.all_to_output_mtrx.append( tf.get_variable("lstms/to_output_mtrx_" + str(i), shape=[hidden_layer_nums[i],self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.all_to_output_bias = tf.get_variable("lstms/output_bias",shape=[self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
for i in range(self.lstm_num):
self.i_to_h.append(tf.get_variable("lstms/i_to_h_"+str(i),shape=[3,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.w_to_h.append(tf.get_variable("lstms/w_to_h_"+str(i),shape=[self.char_num,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.h_to_h.append(tf.get_variable("lstms/h_to_h_"+str(i),shape=[hidden_layer_nums[i],hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.lstm_bias.append(tf.get_variable("lstms/bias_" + str(i),shape=[hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
if not i == 0:
self.prev_h_to_h.append(
tf.get_variable("lstms/prev_h_to_h_" + str(i), shape=[hidden_layer_nums[i-1], hidden_layer_nums[i]],
dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.075),
trainable=True))
def __call__(self, inputs, state, scope=None):
# Extracting previous configuration and vectors
splitarray = []
for i in self.hidden_layer_nums:
splitarray.append(i)
splitarray.append(i)
splitarray.append(3*self.attention_num)
splitted = tf.split(state,splitarray,axis=1)
prev_tuples = []
for i in range(self.lstm_num):
newtuple = LSTMStateTuple(splitted[2*i],splitted[2*i + 1])
prev_tuples.append(newtuple)
prev_attention_vec = splitted[2*self.lstm_num]
new_attention_vec = 0
next_states = []
most_attended = 0
last_output = 0
for i in range(self.lstm_num):
prev_c, prev_h = prev_tuples[i]
cell = self.lstms[i]
if i == 0:
with tf.name_scope("layer_1"):
w, most_attended = self.gaussian_attention(self.one_hot_vector,prev_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[0]) + tf.matmul(prev_h,self.h_to_h[0]) + tf.matmul(w,self.w_to_h[0]) + self.lstm_bias[0]
_, new_state = cell(input_vec, prev_tuples[0])
new_c, new_h = new_state
next_states.append(new_c)
next_states.append(new_h)
last_output = tf.matmul(new_h,self.all_to_output_mtrx[0])
with tf.name_scope("attention_layer"):
temp_attention = tf.matmul(new_h,self.lstm_to_attention_weights) + self.lstm_to_attention_bias
new_alpha, new_beta, new_kappa = tf.split(temp_attention,[self.attention_num,self.attention_num,self.attention_num],axis=1)
old_alpha, old_beta, old_kappa = tf.split(prev_attention_vec,[self.attention_num,self.attention_num,self.attention_num], axis=1)
new_alpha = tf.exp(new_alpha)
new_beta = tf.exp(new_beta)
new_kappa = tf.exp(new_kappa) + old_kappa
new_attention_vec = tf.concat([new_alpha,new_beta,new_kappa],axis=1)
else:
with tf.name_scope("layer_" + str(i)):
w, most_attended = self.gaussian_attention(self.one_hot_vector,new_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[i]) + tf.matmul(next_states[-1],self.prev_h_to_h[i-1]) + tf.matmul(prev_h,self.h_to_h[i]) + tf.matmul(w,self.w_to_h[i]) + self.lstm_bias[i]
_,new_state = cell(input_vec,prev_tuples[i])
new_c, new_h = new_state
next_states.append(new_c)
next_states.append(new_h)
last_output = last_output + tf.matmul(new_h, self.all_to_output_mtrx[i])
with tf.name_scope("output"):
last_output = last_output + self.all_to_output_bias
next_states.append(new_attention_vec)
state_to_return = tf.concat(next_states,axis=1)
output_split_param = [1,self.mixture_num,2*self.mixture_num,2*self.mixture_num,self.mixture_num]
binomial_param, pi, mu, sigma, rho = tf.split(last_output,output_split_param,axis=1)
binomial_param = tf.divide(1.,1.+tf.exp(binomial_param))
pi = tf.nn.softmax(tf.multiply(pi,1.+self.bias),axis=1)
mu = mu
sigma = tf.exp(sigma-self.bias)
rho = tf.tanh(rho)
output_to_return = tf.concat([most_attended, binomial_param, pi, mu, sigma, rho],axis=1)
return output_to_return, state_to_return
def state_size(self):
return self.state_size
def output_size(self):
return self.output_size
def gaussian_attention(self,sequence,params):
with tf.name_scope("attention_calculation"):
alpha, beta, kappa = tf.split(params,[self.attention_num,self.attention_num,self.attention_num],axis=1)
seq_shape = sequence.shape
seq_length = seq_shape[1]
temp_vec = 20*np.asarray(range(seq_length),dtype=float)
final_result = 0
alpha = tf.split(alpha,self.attention_num,1)
beta = tf.split(beta,self.attention_num,1)
kappa = tf.split(kappa,self.attention_num,1)
for i in range(self.attention_num):
alpha_now = alpha[i]
beta_now = beta[i]
kappa_now = kappa[i]
result = kappa_now - temp_vec
result = tf.multiply(tf.square(result),tf.negative(beta_now))
result = tf.multiply(tf.exp(result),alpha_now)
final_result = final_result+result
most_attended = tf.argmax(final_result,axis=1)
most_attended = tf.reshape(tf.cast(most_attended,dtype=tf.float32),shape=[-1,1])
final_result = tf.tile(tf.reshape(final_result,[-1,seq_shape[1],1]),[1,1,seq_shape[2]])
to_return = tf.reduce_sum(tf.multiply(final_result,sequence),axis=1)
return to_return, most_attended
and this is the rnn with loss network:
`to_write_one_hot = tf.placeholder(dtype=tf.float32,shape=(None,line_length,dict_length))
sequence = tf.placeholder(dtype=tf.float32,shape=(None,None,3))
sequence_shift = tf.placeholder(dtype=tf.float32,shape=(None,None,3))
bias = tf.placeholder(shape=[1],dtype=tf.float32)
sequence_length = tf.placeholder(shape=(None),dtype=tf.int32)
forget_bias_placeholder = tf.placeholder(shape=(None),dtype=tf.float32)
graves_cell = Custom_Cell(forget_bias=1,one_hot_vector=to_write_one_hot,hidden_layer_nums=hidden_layer_nums,mixture_num=mixture_num,bias=bias,attention_num=attention_num)
output, state = tf.nn.dynamic_rnn(graves_cell,sequence,dtype=tf.float32,sequence_length=sequence_length)
with tf.name_scope("loss_layer"):
mask = tf.sign(tf.reduce_max(tf.abs(output), 2))
most_attended, binomial_param, pi, mu, sigma, rho = tf.split(output,[1,1,mixture_num,2*mixture_num,2*mixture_num,mixture_num], axis=2)
pi = tf.split(pi,mixture_num,axis=2)
mu = tf.split(mu,mixture_num,axis=2)
sigma = tf.split(sigma,mixture_num,axis=2)
rho = tf.split(rho,mixture_num,axis=2)
negative_log_likelihood = 0
probability = 0
x1, x2, e = tf.split(sequence_shift,3,axis=2)
for i in range(mixture_num):
pi_now = pi[i]
mu_now = tf.split(mu[i],2,axis=2)
mu_1 = mu_now[0]
mu_2 = mu_now[1]
sigma_now = tf.split(sigma[i],2,axis=2)
sigma_1 = sigma_now[0] + (1-tf.reshape(mask, [-1,max_len,1]))
sigma_2 = sigma_now[1] + (1-tf.reshape(mask, [-1,max_len,1]))
rho_now = rho[i]
Z = tf.divide(tf.square(x1-mu_1),tf.square(sigma_1)) + tf.divide(tf.square(x2-mu_2),tf.square(sigma_2)) - tf.divide(tf.multiply(tf.multiply(x1-mu_1,x2-mu_2),2*rho_now),tf.multiply(sigma_1,sigma_2))
prob = tf.exp(tf.div(tf.negative(Z),2*(1-tf.square(rho_now))))
Normalizing_factor = 2*np.pi*tf.multiply(sigma_1,sigma_2)
Normalizing_factor = tf.multiply(Normalizing_factor,tf.sqrt(1-tf.square(rho_now)))
prob = tf.divide(prob,Normalizing_factor)
prob = tf.multiply(pi_now,prob)
probability = probability + prob
binomial_likelihood = tf.multiply(binomial_param,e) + tf.multiply(1-binomial_param,1-e)
probability = tf.multiply(probability,binomial_likelihood)
probability = probability + (1-tf.reshape(mask,[-1,max_len,1]))
temp_tensor = tf.multiply(mask, tf.log(tf.reshape(probability,[-1,max_len]) + mask*0.00001))
negative_log_likelihood_0 = tf.negative(tf.reduce_sum(temp_tensor,axis=1))
negative_log_likelihood_1 = tf.divide(negative_log_likelihood_0,tf.reshape(tf.cast(sequence_length, dtype=tf.float32), shape=[-1,1]))
negative_log_likelihood_1 = tf.reduce_mean(negative_log_likelihood_1)
tf.summary.scalar("average_per_timestamp_log_likelihood", negative_log_likelihood_1)
negative_log_likelihood = tf.reduce_mean(negative_log_likelihood_0)
with tf.name_scope("train_op"):
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001,momentum=0.9, decay=0.95,epsilon=0.0001)
gvs = optimizer.compute_gradients(negative_log_likelihood)
capped_gvs = []
for grad, var in gvs:
if var.name.__contains__("rnn"):
capped_gvs.append((tf.clip_by_value(grad,-10,10),var))
else:
capped_gvs.append((tf.clip_by_value(grad,-100,100),var))
train_op = optimizer.apply_gradients(capped_gvs)
`
Edit.1. I discovered that I was clipping gradients in a wrong way, the correct way was to introduce a new 'op' as explained by https://github.com/tensorflow/tensorflow/issues/2793 to clip only the output gradients of the whole network and lstm cells.
#tf.custom_gradient
def clip_gradient(x, clip):
def grad(dresult):
return [tf.clip_by_norm(dresult, clip)]
return x, grad
add the lines above to your code and use the function on any variable you want to clip the gradient in back propagation!
I should still see my results.
Edit 2.
The changed Model code is:
from tensorflow.contrib.rnn import RNNCell
from tensorflow.contrib.rnn import LSTMCell
from tensorflow.contrib.rnn import LSTMStateTuple
import tensorflow as tf
import numpy as np
#tf.custom_gradient
def clip_gradient_lstm(x):
def grad(dresult):
return [tf.clip_by_value(dresult,-10,10)]
return x, grad
#tf.custom_gradient
def clip_gradient_output(x):
def grad(dresult):
return [tf.clip_by_value(dresult,-100,100)]
return x, grad
def length_of(seq):
used = tf.sign(tf.reduce_max(tf.abs(seq),axis=2))
length = tf.reduce_sum(used,1)
length = tf.cast(length,tf.int32)
return length
class Custom_Cell(RNNCell):
def __init__(self,forget_bias,bias,one_hot_vector, hidden_layer_nums=[700,700,700], mixture_num=10, attention_num=4):
self.bias = bias
self.lstms = []
for i in hidden_layer_nums:
self.lstms.append(LSTMCell(num_units=i, initializer=tf.truncated_normal_initializer(0.075), dtype=tf.float32, forget_bias=forget_bias))
self.attention_num = attention_num
self.mixture_num = mixture_num
self.state_size = 2*sum(hidden_layer_nums) + 3*self.attention_num
self.attention_var_num = 3*self.attention_num
self.output_size = 6*self.mixture_num + 1 + 1
self.one_hot_vector = one_hot_vector
self.lstm_num = len(hidden_layer_nums)
self.hidden_layer_nums = hidden_layer_nums
temp_shape = self.one_hot_vector.shape
self.char_num = temp_shape[2]
self.i_to_h = []
self.w_to_h = []
self.h_to_h = []
self.prev_h_to_h = []
self.lstm_bias = []
self.lstm_to_attention_weights = tf.get_variable("lstms/first_to_attention_mtrx",shape=[hidden_layer_nums[0],self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.lstm_to_attention_bias = tf.get_variable("lstms/first_to_attention_bias",shape=[self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.all_to_output_mtrx = []
for i in range(self.lstm_num):
self.all_to_output_mtrx.append( tf.get_variable("lstms/to_output_mtrx_" + str(i), shape=[hidden_layer_nums[i],self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.all_to_output_bias = tf.get_variable("lstms/output_bias",shape=[self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
for i in range(self.lstm_num):
self.i_to_h.append(tf.get_variable("lstms/i_to_h_"+str(i),shape=[3,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.w_to_h.append(tf.get_variable("lstms/w_to_h_"+str(i),shape=[self.char_num,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.h_to_h.append(tf.get_variable("lstms/h_to_h_"+str(i),shape=[hidden_layer_nums[i],hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.lstm_bias.append(tf.get_variable("lstms/bias_" + str(i),shape=[hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
if not i == 0:
self.prev_h_to_h.append(
tf.get_variable("lstms/prev_h_to_h_" + str(i), shape=[hidden_layer_nums[i-1], hidden_layer_nums[i]],
dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.075),
trainable=True))
def __call__(self, inputs, state, scope=None):
# Extracting previous configuration and vectors
splitarray = []
for i in self.hidden_layer_nums:
splitarray.append(i)
splitarray.append(i)
splitarray.append(3*self.attention_num)
splitted = tf.split(state,splitarray,axis=1)
prev_tuples = []
for i in range(self.lstm_num):
newtuple = LSTMStateTuple(splitted[2*i],splitted[2*i + 1])
prev_tuples.append(newtuple)
prev_attention_vec = splitted[2*self.lstm_num]
new_attention_vec = 0
next_states = []
most_attended = 0
last_output = 0
for i in range(self.lstm_num):
prev_c, prev_h = prev_tuples[i]
cell = self.lstms[i]
if i == 0:
with tf.name_scope("layer_1"):
w, most_attended = self.gaussian_attention(self.one_hot_vector,prev_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[0]) + tf.matmul(prev_h,self.h_to_h[0]) + tf.matmul(w,self.w_to_h[0]) + self.lstm_bias[0]
_, new_state = cell(input_vec, prev_tuples[0])
new_c, new_h = new_state
new_h = clip_gradient_lstm(new_h)
next_states.append(new_c)
next_states.append(new_h)
last_output = tf.matmul(new_h,self.all_to_output_mtrx[0])
with tf.name_scope("attention_layer"):
temp_attention = tf.matmul(new_h,self.lstm_to_attention_weights) + self.lstm_to_attention_bias
new_alpha, new_beta, new_kappa = tf.split(temp_attention,[self.attention_num,self.attention_num,self.attention_num],axis=1)
old_alpha, old_beta, old_kappa = tf.split(prev_attention_vec,[self.attention_num,self.attention_num,self.attention_num], axis=1)
new_alpha = tf.exp(new_alpha)
new_beta = tf.exp(new_beta)
new_kappa = tf.exp(new_kappa) + old_kappa
new_attention_vec = tf.concat([new_alpha,new_beta,new_kappa],axis=1)
else:
with tf.name_scope("layer_" + str(i)):
w, most_attended = self.gaussian_attention(self.one_hot_vector,new_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[i]) + tf.matmul(next_states[-1],self.prev_h_to_h[i-1]) + tf.matmul(prev_h,self.h_to_h[i]) + tf.matmul(w,self.w_to_h[i]) + self.lstm_bias[i]
_,new_state = cell(input_vec,prev_tuples[i])
new_c, new_h = new_state
new_h = clip_gradient_lstm(new_h)
next_states.append(new_c)
next_states.append(new_h)
last_output = last_output + tf.matmul(new_h, self.all_to_output_mtrx[i])
with tf.name_scope("output"):
last_output = last_output + self.all_to_output_bias
last_output = clip_gradient_output(last_output)
next_states.append(new_attention_vec)
state_to_return = tf.concat(next_states,axis=1)
output_split_param = [1,self.mixture_num,2*self.mixture_num,2*self.mixture_num,self.mixture_num]
binomial_param, pi, mu, sigma, rho = tf.split(last_output,output_split_param,axis=1)
binomial_param = tf.divide(1.,1.+tf.exp(binomial_param))
pi = tf.nn.softmax(tf.multiply(pi,1.+self.bias),axis=1)
mu = mu
sigma = tf.exp(sigma-self.bias)
rho = tf.tanh(rho)
output_to_return = tf.concat([most_attended, binomial_param, pi, mu, sigma, rho],axis=1)
return output_to_return, state_to_return
def state_size(self):
return self.state_size
def output_size(self):
return self.output_size
def gaussian_attention(self,sequence,params):
with tf.name_scope("attention_calculation"):
alpha, beta, kappa = tf.split(params,[self.attention_num,self.attention_num,self.attention_num],axis=1)
seq_shape = sequence.shape
seq_length = seq_shape[1]
temp_vec = np.asarray(range(seq_length),dtype=float)
final_result = 0
alpha = tf.split(alpha,self.attention_num,1)
beta = tf.split(beta,self.attention_num,1)
kappa = tf.split(kappa,self.attention_num,1)
for i in range(self.attention_num):
alpha_now = alpha[i]
beta_now = beta[i]
kappa_now = kappa[i]
result = kappa_now - temp_vec
result = tf.multiply(tf.square(result),tf.negative(beta_now))
result = tf.multiply(tf.exp(result),alpha_now)
final_result = final_result+result
most_attended = tf.argmax(final_result,axis=1)
most_attended = tf.reshape(tf.cast(most_attended,dtype=tf.float32),shape=[-1,1])
final_result = tf.tile(tf.reshape(final_result,[-1,seq_shape[1],1]),[1,1,seq_shape[2]])
to_return = tf.reduce_sum(tf.multiply(final_result,sequence),axis=1)
return to_return, most_attended
and the Training is done by
with tf.name_scope("train_op"):
optimizer =
tf.train.RMSPropOptimizer(learning_rate=0.0001,momentum=0.9, decay=0.95,epsilon=0.0001,centered=True)
train_op = optimizer.minimize(negative_log_likelihood)
and right now is still in training, but it is now as low as -10.
Related
Questions on interface condition convergence in PINNs(Physics-Informed Neural Network)
I am solving a magnetostatic problem using PINN. I have succeeded in solving a simple Poisson equation. However, in the analysis considering the geometry, a problem was found in which the interface condition loss did not converge. I've tried numerous things including changing the mini-batch addition model. I'd appreciate it if you could let me know what's wrong with my code. class ironmaxwell(Model): def __init__(self): super(ironmaxwell, self).__init__() initializer = tf.keras.initializers.GlorotUniform self.id1 = tf.keras.layers.Dropout(rate=0.2) self.ih1 = Dense(40, activation='elu', kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.id2 = tf.keras.layers.Dropout(rate=0.2) self.ih2 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.id3 = tf.keras.layers.Dropout(rate=0.2) self.ih3 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.id4 = tf.keras.layers.Dropout(rate=0.2) self.ih4 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.id5 = tf.keras.layers.Dropout(rate=0.2) self.ih5 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.id6 = tf.keras.layers.Dropout(rate=0.2) self.ih6 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.iu1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.iw1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.iu = Dense(1, activation='linear') def call(self, state): ix = self.id1(state) iy = self.iu1(state) iz = self.iw1(state) ix = (1-self.ih1(ix))*iy + self.ih1(ix)*iz ix = self.id2(ix) ix = (1-self.ih2(ix))*iy + self.ih2(ix)*iz ix = self.id3(ix) ix = (1-self.ih3(ix))*iy + self.ih4(ix)*iz ix = self.id4(ix) ix = (1-self.ih4(ix))*iy + self.ih4(ix)*iz ix = self.id5(ix) ix = (1-self.ih5(ix))*iy + self.ih5(ix)*iz ix = self.id6(ix) ix = (1-self.ih6(ix))*iy + self.ih6(ix)*iz iout = self.iu(ix) return iout class coilmaxwell(Model): def __init__(self): super(coilmaxwell, self).__init__() initializer = tf.keras.initializers.GlorotUniform self.d1 = tf.keras.layers.Dropout(rate=0.2) self.h1 = Dense(40, activation='elu', kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.d2 = tf.keras.layers.Dropout(rate=0.2) self.h2 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.d3 = tf.keras.layers.Dropout(rate=0.2) self.h3 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.d4 = tf.keras.layers.Dropout(rate=0.2) self.h4 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.d5 = tf.keras.layers.Dropout(rate=0.2) self.h5 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.d6 = tf.keras.layers.Dropout(rate=0.2) self.h6 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.u1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.w1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001)) self.u = Dense(1, activation='linear') def call(self, state): x = self.d1(state) y = self.u1(state) z = self.w1(state) x = (1-self.h1(x))*y + self.h1(x)*z x = self.d2(x) x = (1-self.h2(x))*y + self.h2(x)*z x = self.d3(x) x = (1-self.h3(x))*y + self.h4(x)*z x = self.d4(x) x = (1-self.h4(x))*y + self.h4(x)*z x = self.d5(x) x = (1-self.h5(x))*y + self.h5(x)*z x = self.d6(x) x = (1-self.h6(x))*y + self.h6(x)*z out = self.u(x) return out ############################################################################################################################## class MaxwellPinn(object): def __init__(self): self.lr = 0.001 #self.lr = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=.001, decay_steps=10, decay_rate=.01) self.opt_iron = Adam(self.lr) self.opt_coil = Adam(self.lr) self.ironmaxwell = ironmaxwell() self.ironmaxwell.build(input_shape=(None, 2)) self.coilmaxwell = coilmaxwell() self.coilmaxwell.build(input_shape=(None, 2)) self.train_loss_history = [] self.iter_count = 0 self.instant_loss = 0 self.bnd_loss = 0 self.ic_loss = 0 self.lamda = 0.1 self.pde_loss = 0 self.max_value = 0.012315021035034 self.iron_loss = 0 self.coil_loss = 0 ################################################################################################################################ #tf.function def physics_net_iron(self, xt,jmu): x_i = xt[:, 0:1] t_i = xt[:, 1:2] with tf.GradientTape(persistent=True) as tape: tape.watch(t_i) tape.watch(x_i) xt_t_i = tf.concat([x_i,t_i], axis=1) u_i = self.ironmaxwell(xt_t_i) u_x_i = tape.gradient(u_i, x_i) u_t_i = tape.gradient(u_i, t_i) u_xx_i = tape.gradient(u_x_i, x_i) u_tt_i = tape.gradient(u_t_i, t_i) del tape return (u_xx_i+u_tt_i+jmu) #tf.function def physics_net_coil(self, xt,jmu): x_c = xt[:, 0:1] t_c = xt[:, 1:2] with tf.GradientTape(persistent=True) as tape: tape.watch(t_c) tape.watch(x_c) xt_t_c = tf.concat([x_c,t_c], axis=1) u_c = self.coilmaxwell(xt_t_c) u_x_c = tape.gradient(u_c, x_c) u_t_c = tape.gradient(u_c, t_c) u_xx_c = tape.gradient(u_x_c, x_c) u_tt_c = tape.gradient(u_t_c, t_c) del tape return (u_xx_c+u_tt_c+jmu) #################################################################################################################### #tf.function def physics_net_for_ic(self, xt,in_mu,nom,out_mu): # 경계조건 물리정보 x = xt[:, 0:1] t = xt[:, 1:2] with tf.GradientTape(persistent=True) as tape: tape.watch(t) tape.watch(x) xt_t = tf.concat([x,t], axis=1) out_u = self.ironmaxwell(xt_t) out_u_x = tape.gradient(out_u, x) out_u_y = tape.gradient(out_u, t) in_u = self.coilmaxwell(xt_t) in_u_x = tape.gradient(in_u, x) in_u_y = tape.gradient(in_u, t) del tape out_b_x = out_u_y out_b_y = out_u_x out_h_x = out_b_x/out_mu out_h_y = out_b_y/out_mu in_b_x = in_u_y in_b_y = in_u_x in_h_x = in_b_x/in_mu in_h_y = in_b_y/in_mu loss_b = tf.add(tf.multiply((in_b_x-out_b_x),nom),tf.multiply((in_b_y-out_b_y),(1-nom))) loss_h = tf.add(tf.multiply((in_h_x-out_h_x),(1-nom)),tf.multiply((in_h_y-out_h_y),nom)) return loss_b, loss_h ############################################################################################################# def save_weights(self, path): self.ironmaxwell.save_weights(path + 'ironmaxwell.h5') self.coilmaxwell.save_weights(path + 'coilmaxwell.h5') ############################################################################################################# def load_weights(self, path): self.ironmaxwell.load_weights(path + 'ironmaxwell.h5') self.coilmaxwell.load_weights(path + 'coilmaxwell.h5') ############################################################################################################# def compute_loss_iron(self, f, u_bnd_hat, u_bnd_sol,penalty,loss_b,loss_h): loss_col = tf.reduce_mean(tf.square(f)) loss_bnd = tf.reduce_mean(tf.square(u_bnd_hat - u_bnd_sol)) loss_mag = tf.reduce_mean(tf.square(loss_b)) loss_field =tf.reduce_mean(tf.square(loss_h)) loss = loss_col + loss_bnd + loss_mag + loss_field self.iron_loss = loss.numpy() return loss def compute_loss_coil(self, f,penalty,loss_b,loss_h): loss_col = tf.reduce_mean(tf.square(f)) loss_mag = tf.reduce_mean(tf.square(loss_b)) loss_field =tf.reduce_mean(tf.square(loss_h)) loss = loss_col + loss_mag + loss_field self.coil_loss = loss.numpy() return loss ############################################################################################################# def compute_grad(self, xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol,ic,nom): with tf.GradientTape(persistent=True) as tape: J_coil = 9800 J_iron = 0 mu_coil = 1.2566e-06 mu_iron = mu_coil*2000 f_iron = self.physics_net_iron(xt_col_iron,J_iron*mu_iron) # iron의 PDE 손실 f_coil = self.physics_net_coil(xt_col_coil,J_coil*mu_coil) # coil의 PDE 손실 u_bnd_hat = self.ironmaxwell(xt_bnd) # IRON이 OUT loss_b, loss_h = self.physics_net_for_ic(ic,mu_coil,nom,mu_iron) loss_iron = self.compute_loss_iron(f_iron, u_bnd_hat, u_bnd_sol,1,loss_b,loss_h) loss_coil = self.compute_loss_coil(f_coil,1,loss_b,loss_h) iron_grads = tape.gradient(loss_iron, self.ironmaxwell.trainable_variables) coil_grads = tape.gradient(loss_coil, self.coilmaxwell.trainable_variables) loss = loss_iron + loss_coil return loss, iron_grads, coil_grads ############################################################################################################# def callback(self, arg=None): if self.iter_count % 10 == 0: print('iter=', self.iter_count, ', loss=', self.instant_loss,'iron_loss=',self.iron_loss,'coil_loss=',self.coil_loss) self.train_loss_history.append([self.iter_count, self.instant_loss]) self.iter_count += 1 ############################################################################################################# def train_with_adam(self,xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol, adam_num,ic,nom): def learn(): loss, iron_grads, coil_grads = self.compute_grad(xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol,ic,nom) self.opt_iron.apply_gradients(zip(iron_grads, self.ironmaxwell.trainable_variables)) self.opt_coil.apply_gradients(zip(coil_grads, self.coilmaxwell.trainable_variables)) return loss for iter in range(int(adam_num)): loss = learn() self.instant_loss = loss.numpy() self.opt_iron = Adam(self.lr/(1+0.001*iter)) self.opt_coil = Adam(self.lr/(1+0.001*iter)) self.callback() ############################################################################################################# def train_with_lbfgs(self, xt_col, xt_bnd, u_bnd_sol, lbfgs_num,J,mu,penalty,ii,ic,ii_mu,ic_mu,nom): def vec_weight(): # vectorize weights weight_vec = [] # Loop over all weights for v in self.burgers.trainable_variables: weight_vec.extend(v.numpy().flatten()) weight_vec = tf.convert_to_tensor(weight_vec) return weight_vec w0 = vec_weight().numpy() def restore_weight(weight_vec): # restore weight vector to model weights idx = 0 for v in self.burgers.trainable_variables: vs = v.shape # weight matrices if len(vs) == 2: sw = vs[0] * vs[1] updated_val = tf.reshape(weight_vec[idx:idx + sw], (vs[0], vs[1])) idx += sw # bias vectors elif len(vs) == 1: updated_val = weight_vec[idx:idx + vs[0]] idx += vs[0] # assign variables (Casting necessary since scipy requires float64 type) v.assign(tf.cast(updated_val, dtype=tf.float32)) def loss_grad(w): # update weights in model restore_weight(w) loss, grads, loss_bnd = self.compute_grad(xt_col, xt_bnd, u_bnd_sol,J,mu,penalty,ii,ic,ii_mu,ic_mu,nom) # vectorize gradients grad_vec = [] for g in grads: grad_vec.extend(g.numpy().flatten()) # gradient list to array # scipy-routines requires 64-bit floats loss = loss.numpy().astype(np.float64) self.instant_loss = loss grad_vec = np.array(grad_vec, dtype=np.float64) return loss, grad_vec return scipy.optimize.minimize(fun=loss_grad, x0=w0, jac=True, method='L-BFGS-B', callback=self.callback, options={'maxiter': lbfgs_num, 'maxfun': 5000, 'maxcor': 500, 'maxls': 500, 'ftol': 1.0 * np.finfo(float).eps}) #1.0 * np.finfo(float).eps ######################################################################################################################## def predict_iron(self, xt): u_pred = self.ironmaxwell(xt) return u_pred def predict_coil(self, xt): u_pred = self.coilmaxwell(xt) return u_pred ############################################################################################################# def train(self, adam_num, lbfgs_num): iron_x = scipy.io.loadmat('iron_x.mat') # iron 좌표 iron_y = scipy.io.loadmat('iron_y.mat') coil_x = scipy.io.loadmat('coil_x.mat') # coil 좌표 coil_y = scipy.io.loadmat('coil_y.mat') iron_J = scipy.io.loadmat('iron_J.mat') iron_mu = scipy.io.loadmat('iron_mu.mat') coil_J = scipy.io.loadmat('iron_J.mat') coil_mu = scipy.io.loadmat('iron_mu.mat') ini = scipy.io.loadmat('bnd.mat') inter_coil_x = scipy.io.loadmat('inter_coil_x.mat') inter_coil_y = scipy.io.loadmat('inter_coil_y.mat') icx = inter_coil_x['coil_inter_x'] icy = inter_coil_y['coil_inter_y'] ic = np.concatenate([icx, icy], axis=1) # interface 코일 데이터 inter_coil_mu = scipy.io.loadmat('inter_coil_mu.mat') ic_mu = np.transpose(inter_coil_mu['mu_inter_coil']) nomvec = scipy.io.loadmat('nom_vec.mat') nom = nomvec['nom_vec'] nom = tf.convert_to_tensor(nom, dtype=tf.float32) x_ini = np.transpose(ini['iron_bnd_x']) y_ini = np.transpose(ini['iron_bnd_y']) xt_bnd_data = np.concatenate([x_ini, y_ini], axis=1) tu_bnd_data = [] for xt in xt_bnd_data: tu_bnd_data.append(0) tu_bnd_data = np.transpose(tu_bnd_data) # collocation point iron x_col_data = (iron_x['x']) y_col_data = (iron_y['y']) xy_col_data_iron = np.concatenate([x_col_data, y_col_data], axis=1) # coil x_col_data = (coil_x['coil_x']) y_col_data = (coil_y['coil_y']) xy_col_data_coil = np.concatenate([x_col_data, y_col_data], axis=1) xt_col_iron = tf.convert_to_tensor(xy_col_data_iron, dtype=tf.float32) xt_col_coil = tf.convert_to_tensor(xy_col_data_coil, dtype=tf.float32) xt_bnd = tf.convert_to_tensor(xt_bnd_data, dtype=tf.float32) u_bnd_sol = tf.convert_to_tensor(tu_bnd_data, dtype=tf.float32) ic = tf.convert_to_tensor(ic, dtype=tf.float32) ic_mu = tf.convert_to_tensor(ic_mu, dtype=tf.float32) # Start timer self.load_weights("C:/Users/user/Desktop/1-Cars (2 cases)/save_weights/maxwell/new_test/") t0 = time() self.train_with_adam(xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol, adam_num,ic,nom) print('\nComputation time of adam: {} seconds'.format(time() - t0)) self.save_weights("C:/Users/user/Desktop/1-Cars (2 cases)/save_weights/maxwell/new_test/") np.savetxt('C:/Users/user/Desktop/1-Cars (2 cases)/save_weights/maxwell/new_test/loss.txt', self.train_loss_history) train_loss_history = np.array(self.train_loss_history) plt.plot(train_loss_history[:, 0], train_loss_history[:, 1]) plt.yscale("log") plt.show() ############################################################################################################# # main def main(): adam_num = 100000 lbfgs_num = 1000 agent = MaxwellPinn() agent.train(adam_num, lbfgs_num) if __name__=="__main__": main() It's my code. Including mini-batch. Changing the model
Why is my REINFORCE algorithm not learning?
I am training a REINFORCE algorithm on the CartPole environment. Due to the simple nature of the environment, I expect it to learn quickly. However, that doesn't happen. Here is the main portion of the algorithm - for i in range(episodes): print("i = ", i) state = env.reset() done = False transitions = [] tot_rewards = 0 while not done: act_proba = model(torch.from_numpy(state)) action = np.random.choice(np.array([0,1]), p = act_proba.data.numpy()) next_state, reward, done, info = env.step(action) tot_rewards += 1 transitions.append((state, action, tot_rewards)) state = next_state if i%50==0: print("i = ", i, ",reward = ", tot_rewards) score.append(tot_rewards) reward_batch = torch.Tensor([r for (s,a,r) in transitions]) disc_rewards = discount_rewards(reward_batch) nrml_disc_rewards = normalize_rewards(disc_rewards) state_batch = torch.Tensor([s for (s,a,r) in transitions]) action_batch = torch.Tensor([a for (s,a,r) in transitions]) pred_batch = model(state_batch) prob_batch = pred_batch.gather(dim=1, index=action_batch.long().view(-1, 1)).squeeze() loss = -(torch.sum(torch.log(prob_batch)*nrml_disc_rewards)) opt.zero_grad() loss.backward() opt.step() Here is the entire algorithm - #I referred to this when writing the code - https://github.com/DeepReinforcementLearning/DeepReinforcementLearningInAction/blob/master/Chapter%204/Ch4_book.ipynb import numpy as np import gym import torch from torch import nn env = gym.make('CartPole-v0') learning_rate = 0.0001 episodes = 10000 def discount_rewards(reward, gamma = 0.99): return torch.pow(gamma, torch.arange(len(reward)))*reward def normalize_rewards(disc_reward): return disc_reward/(disc_reward.max()) class NeuralNetwork(nn.Module): def __init__(self, state_size, action_size): super(NeuralNetwork, self).__init__() self.state_size = state_size self.action_size = action_size self.linear_relu_stack = nn.Sequential( nn.Linear(state_size, 300), nn.ReLU(), nn.Linear(300, 128), nn.ReLU(), nn.Linear(128, 128), nn.ReLU(), nn.Linear(128, action_size), nn.Softmax() ) def forward(self,x): x = self.linear_relu_stack(x) return x model = NeuralNetwork(env.observation_space.shape[0], env.action_space.n) opt = torch.optim.Adam(params = model.parameters(), lr = learning_rate) score = [] for i in range(episodes): print("i = ", i) state = env.reset() done = False transitions = [] tot_rewards = 0 while not done: act_proba = model(torch.from_numpy(state)) action = np.random.choice(np.array([0,1]), p = act_proba.data.numpy()) next_state, reward, done, info = env.step(action) tot_rewards += 1 transitions.append((state, action, tot_rewards)) state = next_state if i%50==0: print("i = ", i, ",reward = ", tot_rewards) score.append(tot_rewards) reward_batch = torch.Tensor([r for (s,a,r) in transitions]) disc_rewards = discount_rewards(reward_batch) nrml_disc_rewards = normalize_rewards(disc_rewards) state_batch = torch.Tensor([s for (s,a,r) in transitions]) action_batch = torch.Tensor([a for (s,a,r) in transitions]) pred_batch = model(state_batch) prob_batch = pred_batch.gather(dim=1, index=action_batch.long().view(-1, 1)).squeeze() loss = -(torch.sum(torch.log(prob_batch)*nrml_disc_rewards)) opt.zero_grad() loss.backward() opt.step()
Your computation for discounting the reward is where your mistake is. In REINFORCE (and many other algorithms) you need to compute the sum of future discounted rewards for every step onward. This means that the sum of discounted rewards for the first step should be: G_1 = r_1 + gamma * r_2 + gamma ^ 2 * r_3 + ... + gamma ^ (T-1) * r_T G_2 = r_2 + gamma * r_3 + gamma ^ 2 * r_4 + ... + gamma ^ (T-1) * r_T And so on... This gives you an array containing all the sum of future rewards for every step (i.e. [G_1, G_2, G_3, ... , G_T]) However, what you compute currently is only applying a discount on the current step's reward: G_1 = r_1 G_2 = gamma * r_2 G_3 = gamma ^ 2 * r_3 And so on... Here is the Python code fixing your problem. We compute from the back of the list of reward to the front to be more computationally efficient. def discount_rewards(reward, gamma=0.99): R = 0 returns = [] reward = reward.tolist() for r in reward[::-1]: R = r + gamma * R returns.append(R) returns = torch.tensor(returns[::-1]) return returns Here is a figure showing the progression of the algorithm's score over the first 5000 steps.
Summary in pytorch don't print all the layers
I'm using "summary" from torchsummary, but some of the layers and parameters for the deep learning model are missing in the outcome of the print. Here is the code: from torchvision import models from torchsummary import summary import torch import torch.nn as nn device1 = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device1) Params = {} Params['Basic_filters_num'] = 32 Params['Levels'] = 6 Params['Duplication_of_filters'] = 2 Params['Blocks_in_level'] = 2 Params['Skip'] = True Params['Act'] = "ReLU" Params['Last_layer_act'] = "ReLU" Params['Kernel_size_encoder'] = (8,8) Params['Kernel_size_decoder'] = (8,8) Params['Kernel_size_deconvlayer'] = (2,2) Params['padding_conv_layer'] = 'same' Params['Norm_layer'] = True Params['Norm_layer_kind'] = "Batch" Params['Pool_kind'] = "Max" Params['Pool_size'] = (2,2) Params['Pool_stride'] = (2,2) Params['Stride_size_encoder'] = (1,1) Params['Stride_size_decoder'] = (1,1) Params['Dropout_encoder'] = True Params['Droput_decoder'] = True Params['Droput'] = 0.5 Params['Basic_CH'] = 1 def BringAct(ACT): if ACT == "ReLU": Act = nn.ReLU() return Act def BringNorm(NORM_KIND): if NORM_KIND == "Batch": NORM = nn.BatchNorm2d return NORM def BringPool(POOL_KIND,POOL_SIZE,STRIDE_SIZE): if POOL_KIND == "Max": pool = nn.MaxPool2d(POOL_SIZE,STRIDE_SIZE) return pool class Concatenate(nn.Module): def __init__(self): super(Concatenate,self).__init__() def forward(self,data1,data2): return torch.cat((data1, data2),1) class BASICCONVORDECONVLAYER(nn.Module): def __init__(self,C_IN,C_OUT,Params,DEOREN,CONV): super(BASICCONVORDECONVLAYER,self).__init__() self.isconv = CONV self.act = BringAct(Params['Act']) self.NORM = Params['Norm_layer'] self.NORMLayer = BringNorm(Params['Norm_layer_kind']) self.NORMLayer = self.NORMLayer(int(C_OUT)) if DEOREN: str1 = 'encoder' else: str1 = 'decoder' KERNEL_SIZE = Params['Kernel_size_'+str1] STRIDE_SIZE = Params['Stride_size_'+str1] self.conv = nn.Conv2d(in_channels=int(C_IN), out_channels = int(C_OUT), kernel_size = KERNEL_SIZE, padding = Params['padding_conv_layer'],stride = STRIDE_SIZE) self.deconv = nn.ConvTranspose2d(in_channels=int(C_IN), out_channels = int(C_OUT), kernel_size = Params['Kernel_size_deconvlayer'],stride = Params['Kernel_size_deconvlayer']) self.C_IN = C_IN self.C_OUT = C_OUT def forward(self,x): if self.isconv: out = self.conv(x) else: out = self.deconv(x) out = self.act(out) if self.NORM: out = self.NORMLayer(out) return out class EncoderBlock(nn.Module): def __init__(self,number_of_level,Params,W_POOL): super(EncoderBlock,self).__init__() self.convs = {} self.NUMBER_OF_CONV = Params['Blocks_in_level'] self.num_of_lev = number_of_level if number_of_level == -1: C_IN = Params['Basic_filters_num']*(Params['Duplication_of_filters']**(Params['Levels']-2)) C_OUT = Params['Basic_filters_num']*(Params['Duplication_of_filters']**(Params['Levels']-1)) if number_of_level == 0: C_IN = Params['Basic_CH'] C_OUT = Params['Basic_filters_num'] if number_of_level > 0: C_IN = Params['Basic_filters_num']*(Params['Duplication_of_filters']**(number_of_level-1)) C_OUT = Params['Basic_filters_num']*(Params['Duplication_of_filters']**(number_of_level)) self.convs[0] = BASICCONVORDECONVLAYER(C_IN,C_OUT,Params,True,True) for i in range(self.NUMBER_OF_CONV-1): self.convs[i+1] = BASICCONVORDECONVLAYER(C_OUT,C_OUT,Params,True,True) self.W_POOL = W_POOL self.Pool = BringPool(Params['Pool_kind'],Params['Pool_size'] ,Params['Pool_stride']) self.DROPOUT = Params['Dropout_encoder'] if self.DROPOUT: self.drop = nn.Dropout2d(Params['Droput']) self.C_IN = C_IN self.C_OUT = C_OUT def forward(self,x): out = self.convs[0](x) for i in range(self.NUMBER_OF_CONV-1): out = self.convs[i+1](out) if self.DROPOUT: out = self.drop(out) if self.W_POOL: filt = torch.clone(out) out = self.Pool(out) return out,filt return out class DecoderBlock(nn.Module): def __init__(self,number_of_level,Params): super(DecoderBlock,self).__init__() self.convs = {} if number_of_level == 0: C_IN = Params['Basic_filters_num'] C_OUT = Params['Basic_CH'] else: C_OUT = Params['Basic_filters_num']*(Params['Duplication_of_filters']**(number_of_level-1)) C_IN = Params['Basic_filters_num']*(Params['Duplication_of_filters']**(number_of_level)) self.NUMBER_OF_CONV = Params['Blocks_in_level'] self.doconc = Params['Skip'] if self.doconc: C_TAG = C_OUT else: C_TAG = C_IN self.convs[0] = BASICCONVORDECONVLAYER(C_IN,C_OUT,Params,False,True) for i in range(self.NUMBER_OF_CONV-1): self.convs[i+1] = BASICCONVORDECONVLAYER(C_OUT,C_OUT,Params,False,True) self.deconv = BASICCONVORDECONVLAYER(C_IN,C_TAG,Params,False,False) self.concat = Concatenate() self.DROPOUT = Params['Droput_decoder'] if self.DROPOUT: self.drop = nn.Dropout2d(Params['Droput']) self.C_IN = C_IN self.C_OUT = C_OUT self.C_TAG = C_TAG def forward(self,x,data): out = self.deconv(x) if self.doconc: out = self.concat(out,data) for i in range(self.NUMBER_OF_CONV): out = self.convs[i](out) if self.DROPOUT: out = self.drop(out) return out class Encoder(nn.Module): def __init__(self,Params): super(Encoder,self).__init__() self.EncoderBlocks = {} self.NUM_OF_LEVELS = Params['Levels'] self.filts = {} for i in range(self.NUM_OF_LEVELS-1): self.EncoderBlocks[i] = EncoderBlock(i,Params,True) def Filts(self): return self.filts def forward(self,x): out,filt = self.EncoderBlocks[0](x) self.filts[0] = filt for i in range(self.NUM_OF_LEVELS-2): out,filt = self.EncoderBlocks[i+1](out) self.filts[i+1] = filt return out class Decoder(nn.Module): def __init__(self,Params): super(Decoder,self).__init__() self.DecoderBlocks = {} self.NUM_OF_LEVELS = Params['Levels'] for i in range(self.NUM_OF_LEVELS-1): self.DecoderBlocks[i] = DecoderBlock(Params['Levels']-i-1,Params) def forward(self,x,filts): lenfilts = len(filts) out = self.DecoderBlocks[0](x,filts[lenfilts-1]) for i in range(self.NUM_OF_LEVELS-2): out = self.DecoderBlocks[i+1](out,filts[lenfilts-2-i]) return out class Bottleneck(nn.Module): def __init__(self,Params): super(Bottleneck,self).__init__() self.convlayer = EncoderBlock(-1,Params,False) def forward(self,x): out = self.convlayer(x) return out class Unet(nn.Module): def __init__(self,Params): super(Unet,self).__init__() self.Params = Params self.encoder = Encoder(self.Params) self.bottleneck = Bottleneck(self.Params) self.decoder = Decoder(self.Params) self.finallayer = nn.Conv2d(in_channels = Params['Basic_filters_num'], out_channels = Params['Basic_CH'], kernel_size = (1,1), padding = 'same',stride = (1,1)) self.finalact = BringAct(Params['Last_layer_act']) def forward(self,x): out = self.encoder(x) out = self.bottleneck(out) out = self.decoder(out,self.encoder.Filts()) out = self.finallayer(out) out = self.finalact(out) return out MusicNet = Unet(Params).cuda() summary(Unet(Params), ( 1, 64, 64),device = 'cpu') The outcome is this: Encoder-1 \[-1, 512, 2, 2\] 0 Dropout2d-2 \[-1, 1024, 2, 2\] 0 EncoderBlock-3 \[-1, 1024, 2, 2\] 0 Bottleneck-4 \[-1, 1024, 2, 2\] 0 Decoder-5 \[-1, 32, 64, 64\] 0 Conv2d-6 \[-1, 1, 64, 64\] 33 ReLU-7 \[-1, 1, 64, 64\] 0 Right now, it looks like the layers that are in the dictionaries don't print out as layers, but they are affect the shape. What did I do worng?
Actor-Critic Reinforcement Learning,network can't learning
I have some Actor-Critic reinforcement learning questions want to ask.In my code,I try to use two network(actor and critic) to play Pong-game,but I don't know my "learn()" function have problem?The "Loss1" is actor_network loss(-logP(a|s) * TDerror),the "Loss2" is critic_network loss((pre - TDtarget) ** 2 / 2). class Actor_Critic(): def __init__(self,input_dim=6400,n_actions=2): self.input_dim = input_dim self.n_action = n_actions//Pong game action space self.lr = 0.0005 self.gamma = 0.99 // discounted factor self.critic_network = self.__build_critic_network() self.actor_network = self.__build_actor_network() def __build_critic_network(self): model_input = layers.Input(shape=(self.input_dim + self.n_action,)) layer1 = layers.Dense(128, activation='relu')(model_input) layer2 = layers.Dense(32, activation='relu')(layer1) model_output = layers.Dense(1, activation=None)(layer2) model = Model(model_input,model_output) model.compile(optimizer=Adam(learning_rate=self.lr)) return model def __build_actor_network(self): model_input = layers.Input(shape=(self.input_dim,)) layer1 = layers.Dense(128, activation='relu')(model_input) layer2 = layers.Dense(32, activation='relu')(layer1) model_output = layers.Dense(self.n_action, activation='softmax')(layer2) model = Model(model_input, model_output) model.compile(optimizer=Adam(learning_rate=self.lr)) return model def choose_action(self,state): state = tf.convert_to_tensor([state],dtype=tf.float16) probability = self.actor_network(state) action_probs = tfp.distributions.Categorical(probs=probability) action = action_probs.sample() return action_probs,action.numpy()[0] // return action probability and action def action_hot_code(self, action): action_hot_code = np.zeros(self.n_action, dtype=np.float16) action_hot_code[action] = 1.0 action_hot_code = tf.convert_to_tensor([action_hot_code], dtype=tf.float16) return action_hot_code def learn(self,state,reward,next_state,done): with tf.GradientTape(persistent=True) as tape: action_probs, action = self.choose_action(state) next_action_prob, next_action = self.choose_action(next_state) action_hot_code = self.action_hot_code(action) next_action_hot_code = self.action_hot_code(next_action) state = tf.convert_to_tensor([state], dtype=tf.float16) next_state = tf.convert_to_tensor([next_state], dtype=tf.float16) state_action = tf.concat([state, action_hot_code], axis=1) next_state_action = tf.concat([next_state, next_action_hot_code], axis=1) state_value = self.critic_network(state_action) next_state_value = self.critic_network(next_state_action) TDtarget = reward + self.gamma * next_state_value * (1-done) TDerror = state_value - TDtarget Loss1 = -TDerror * action_probs.log_prob(action) Loss2 = (TDerror ** 2) / 2 Loss1 = tf.squeeze(Loss1) Loss2 = tf.squeeze(Loss2) gradient1 = tape.gradient(Loss1, self.actor_network.trainable_variables) gradient2 = tape.gradient(Loss2,self.critic_network.trainable_variables) self.actor_network.optimizer.apply_gradients(zip(gradient1, self.actor_network.trainable_variables)) self.critic_network.optimizer.apply_gradients(zip(gradient2,self.critic_network.trainable_variables))
Training Accuracy is Very Low in A Simple CNN using Theano
I'm trying to implement a CNN using Theano and tried to test my code with a small sample-set of my bigger dataset. I'm trying to categorize a set of 8280 pictures(of 250*250 sizes) into 115 classes and my sample set is a set of 32 pictures of the first two classes(16 pictures from each). The problem I'm experiencing is that from the first epoch, the training loss in NaN and It will not change in the further epochs. from __future__ import print_function import sys import os import time import numpy as np import theano import theano.tensor as T import lasagne import re import cv2 from lasagne.layers import Conv2DLayer, MaxPool2DLayer , DropoutLayer from lasagne.layers import InputLayer, DenseLayer, batch_norm def split_list(a_list): half = len(a_list)/2 return a_list[:half], a_list[half:] def load_dataset(path=''): cat_list = [] filelist = sorted(os.listdir(path)) trainlist = [] testlist = [] tmptrain = [] tmptest = [] max_id = 0 for f in filelist: match = re.match(r'C(\d+)([F|G])(\d+)\.PNG', f) id = int(match.group(1)) - 1 max_id = max(max_id,id) fg_class = match.group(2) fg_id = int(match.group(3)) if id not in [p[0] for p in cat_list]: cat_list.append([id, [], []]) if fg_class == 'G': cat_list[-1][1].append(f) else: cat_list[-1][2].append(f) for f in cat_list: id = f[0] trainG, testG = split_list(f[1]) trainF, testF = split_list(f[2]) tmptrain = tmptrain + [(id, 1, F) for F in trainF] + [(id, 0, G) for G in trainG] # (Class_id,Forgery,Img) tmptest = tmptest + [(id, 1, F) for F in testF] + [(id, 0, F) for F in testG] X_train = np.array([cv2.imread(path+f[2],0) for f in tmptrain]).astype(np.int32) y_train = np.array([f[0] for f in tmptrain]).astype(np.int32) X_test = np.array([cv2.imread(path+f[2],0) for f in tmptest]).astype(np.int32) y_test = np.array([f[0] for f in tmptest]).astype(np.int32) fg_train = np.array([f[1] for f in tmptrain]).astype(np.int32) fg_test = np.array([f[1] for f in tmptest]).astype(np.int32) X_train = np.expand_dims(X_train,axis=1).astype(np.int32) X_test = np.expand_dims(X_test, axis=1).astype(np.int32) return X_train, y_train, X_test, y_test, fg_train , fg_test def ExplicitNegativeCorrelation(net,layer='fc2',lr=0.00001): for param in lasagne.layers.get_all_params(net[layer]): if param.name.startswith('W'): W = param mean = T.mean(W,0) * lr W = W - mean#T.mean(T.mean(W,0)) def ImplicitNegativeCorrelation(MSE,Cross,Hinge): mean = T.mean((MSE+Cross+Hinge),axis=0) return ((MSE-mean)**2+(Cross-mean)**2+(Hinge-mean)**2)/3 def build_cnn(inputvar,input_shape, trained_weights=None): net = {} net['input'] = InputLayer(input_shape,input_var=inputvar) net['drop_input'] = DropoutLayer(net['input'],p=0.2) net['conv1'] = batch_norm(Conv2DLayer(net['input'], num_filters=96, filter_size=11, stride=4, flip_filters=False))#,W=lasagne.init.HeNormal())) net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=3, stride=2) net['conv2'] = batch_norm(Conv2DLayer(net['pool1'], num_filters=256, filter_size=5, pad=2, flip_filters=False))#, W=lasagne.init.HeNormal())) net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=3, stride=2) net['conv3'] = batch_norm(Conv2DLayer(net['pool2'], num_filters=384, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal())) net['conv4'] = batch_norm(Conv2DLayer(net['conv3'], num_filters=384, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal())) net['conv5'] = batch_norm(Conv2DLayer(net['conv4'], num_filters=256, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal())) net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=3, stride=2) net['fc1'] = batch_norm(DenseLayer(net['pool5'], num_units=2048)) net['drop_fc1'] = DropoutLayer(net['fc1']) net['fc2'] = batch_norm(DenseLayer(net['drop_fc1'], num_units=2048)) net['fc_class'] = batch_norm(DenseLayer(net['fc2'],num_units=115)) return net def iterate_minibatches(inputs, targets_class,targets_verif, batchsize, shuffle=False): assert len(inputs) == len(targets_class) assert len(inputs) == len(targets_verif) if shuffle: indices = np.arange(len(inputs)) np.random.shuffle(indices) for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): if shuffle: excerpt = indices[start_idx:start_idx + batchsize] else: excerpt = slice(start_idx, start_idx + batchsize) yield inputs[excerpt], targets_class[excerpt], targets_verif[excerpt] def main(num_epochs=500): print("Loading data...") X_train, y_train, X_test, y_test, fg_train, fg_test = load_dataset('./signatures/tmp4/') X_val, y_val, fg_val = X_train, y_train, fg_train print(y_train.shape) input_var = T.tensor4('inputs') target_var_class = T.ivector('targets') network = build_cnn(input_var, (None, 1, 250, 250)) class_prediction = lasagne.layers.get_output(network['fc_class']) # ,inputs={network['input']:input_var}) loss_class = lasagne.objectives.categorical_crossentropy(class_prediction, target_var_class) loss = loss_class.mean() params = lasagne.layers.get_all_params([network['fc_class']], trainable=True) lr = 0.01 updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=lr, momentum=0.9) test_prediction_class = lasagne.layers.get_output(network['fc_class'], deterministic=True) test_loss_class = lasagne.objectives.categorical_crossentropy(test_prediction_class, target_var_class) test_loss_class = test_loss_class.mean() test_acc_class = T.mean(T.eq(T.argmax(test_prediction_class, axis=1), target_var_class), dtype=theano.config.floatX) predict_class = theano.function([input_var], T.argmax(test_prediction_class,axis=1)) train_fn = theano.function([input_var, target_var_class], loss, updates=updates) val_fn_class = theano.function([input_var, target_var_class], [test_loss_class, test_acc_class]) print("Starting training...") BatchSize = 2 for epoch in range(num_epochs): train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train,fg_train, BatchSize, shuffle=True): inputs, targets_class, targets_verif = batch train_err += train_fn(inputs, targets_class) #ExplicitNegativeCorrelation(network, layer='fc2',lr=lr/10) print(targets_class,predict_class(inputs)) train_batches += 1 val_err_class = 0 val_acc_class = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, fg_val, BatchSize, shuffle=False): inputs, targets_class, targets_verif = batch err_class, acc_class = val_fn_class(inputs, targets_class) val_err_class += err_class val_acc_class += acc_class val_batches += 1 print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" Classification loss:\t\t{:.6f}".format(val_err_class / val_batches)) print(" Classification accuracy:\t\t{:.2f} %".format( val_acc_class / val_batches * 100)) test_err_class = 0 test_acc_class = 0 test_err_verif = 0 test_acc_verif = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, fg_test, BatchSize, shuffle=False): inputs, targets_class, targets_verif = batch err_class, acc_class = val_fn_class(inputs, targets_class) test_err_class += err_class test_acc_class += acc_class test_batches += 1 print("Final results:") print(" test loss (Classification):\t\t\t{:.6f}".format(test_err_class / test_batches)) print(" test accuracy (Classification):\t\t{:.2f} %".format( test_acc_class / test_batches * 100)) if __name__ == '__main__': main() I've tried to put lasagne.nonlinearities.softmax in the DenseLayers but it does fix the NaN issue but the accuracy of the Training model will not be any good, it will be fluctuating between 0 to 25%.(after 50 epochs!). I have implemented a load_dataset function which I think that works correctly (I've tested the function multiple times), and I'm giving the class id of each picture as the target in the loss function. So my inputs and Targets would be like this: Input Shape: (BatchSize, 1, 250, 250) Target Shape: (BatchSize, 1) : vector of class ids I've uploaded my sample-set here in this link.
It looks like we have 4 classes, according to the data, so I changed loading code to reflect it: y_train = np.array([f[0] * 2 + f[1] for f in tmptrain]).astype(np.int32) y_test = np.array([f[0] * 2 + f[1] for f in tmptest]).astype(np.int32) Number of units in output layer should be equal to the number of classes, so I added an output layer with SoftMax: net['fo_class'] = DenseLayer(net['fc_class'],num_units=4, nonlinearity=lasagne.nonlinearities.softmax) I suggest removing dropout layer just after inputs – you can compare outcomes with it and without it to make sure of that Batch size = 2 is too small and learning rate is too high Here is an example of code with those changes: from __future__ import print_function import sys import os import time import numpy as np import theano import theano.tensor as T import lasagne import re import cv2 from lasagne.layers import Conv2DLayer, MaxPool2DLayer , DropoutLayer from lasagne.layers import InputLayer, DenseLayer def split_list(a_list): half = len(a_list)/2 return a_list[:half], a_list[half:] def load_dataset(path=''): cat_list = [] filelist = sorted(os.listdir(path)) tmptrain = [] tmptest = [] max_id = 0 for f in filelist: match = re.match(r'C(\d+)([F|G])(\d+)\.PNG', f) id = int(match.group(1)) - 1 max_id = max(max_id,id) fg_class = match.group(2) if id not in [p[0] for p in cat_list]: cat_list.append([id, [], []]) if fg_class == 'G': cat_list[-1][1].append(f) else: cat_list[-1][2].append(f) for f in cat_list: id = f[0] trainG, testG = split_list(f[1]) trainF, testF = split_list(f[2]) tmptrain = tmptrain + [(id, 1, F) for F in trainF] + [(id, 0, G) for G in trainG] tmptest = tmptest + [(id, 1, F) for F in testF] + [(id, 0, F) for F in testG] X_train = np.array([cv2.imread(path+f[2],0) for f in tmptrain]).astype(np.float32) y_train = np.array([f[0] * 2 + f[1] for f in tmptrain]).astype(np.int32) X_test = np.array([cv2.imread(path+f[2],0) for f in tmptest]).astype(np.float32) y_test = np.array([f[0] * 2 + f[1] for f in tmptest]).astype(np.int32) fg_train = np.array([f[1] for f in tmptrain]).astype(np.float32) fg_test = np.array([f[1] for f in tmptest]).astype(np.float32) X_train = np.expand_dims(X_train,axis=1).astype(np.float32) X_test = np.expand_dims(X_test, axis=1).astype(np.float32) return X_train, y_train, X_test, y_test, fg_train , fg_test def ExplicitNegativeCorrelation(net,layer='fc2',lr=0.00001): for param in lasagne.layers.get_all_params(net[layer]): if param.name.startswith('W'): W = param mean = T.mean(W,0) * lr W = W - mean def ImplicitNegativeCorrelation(MSE,Cross,Hinge): mean = T.mean((MSE+Cross+Hinge),axis=0) return ((MSE-mean)**2+(Cross-mean)**2+(Hinge-mean)**2)/3 def build_cnn(inputvar,input_shape, trained_weights=None): net = {} net['input'] = InputLayer(input_shape,input_var=inputvar) net['conv1'] = Conv2DLayer(net['input'], num_filters=96, filter_size=11, stride=4) net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=3, stride=2) net['conv2'] = Conv2DLayer(net['pool1'], num_filters=256, filter_size=5, pad=2) net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=3, stride=2) net['conv3'] = Conv2DLayer(net['pool2'], num_filters=384, filter_size=3, pad=1) net['conv4'] = Conv2DLayer(net['conv3'], num_filters=384, filter_size=3, pad=1) net['conv5'] = Conv2DLayer(net['conv4'], num_filters=256, filter_size=3, pad=1) net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=3, stride=2) net['fc1'] = DenseLayer(net['pool5'], num_units=2048) net['drop_fc1'] = DropoutLayer(net['fc1']) net['fc2'] = DenseLayer(net['drop_fc1'], num_units=2048) net['fc_class'] = DenseLayer(net['fc2'],num_units=115) net['fo_class'] = DenseLayer(net['fc_class'],num_units=4, nonlinearity=lasagne.nonlinearities.softmax) return net def iterate_minibatches(inputs, targets_class,targets_verif, batchsize, shuffle=False): assert len(inputs) == len(targets_class) assert len(inputs) == len(targets_verif) if shuffle: indices = np.arange(len(inputs)) np.random.shuffle(indices) for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): if shuffle: excerpt = indices[start_idx:start_idx + batchsize] else: excerpt = slice(start_idx, start_idx + batchsize) yield inputs[excerpt], targets_class[excerpt], targets_verif[excerpt] def main(num_epochs=500): print("Loading data...") X_train, y_train, X_test, y_test, fg_train, fg_test = load_dataset('./signatures/tmp4/') X_train /= 255 X_val, y_val, fg_val = X_train, y_train, fg_train print(y_train.shape) check = X_train[0][0] print(check) input_var = T.tensor4('inputs') target_var_class = T.ivector('targets') network = build_cnn(input_var, (None, 1, 250, 250)) class_prediction = lasagne.layers.get_output(network['fo_class']) loss_class = lasagne.objectives.categorical_crossentropy(class_prediction, target_var_class) loss = loss_class.mean() params = lasagne.layers.get_all_params([network['fo_class']], trainable=True) lr = 0.0007 updates = lasagne.updates.nesterov_momentum( loss, params, learning_rate=lr, momentum=0.9) test_prediction_class = lasagne.layers.get_output(network['fo_class'], deterministic=True) test_loss_class = lasagne.objectives.categorical_crossentropy(test_prediction_class, target_var_class) test_loss_class = test_loss_class.mean() test_acc_class = T.mean(T.eq(T.argmax(test_prediction_class, axis=1), target_var_class), dtype=theano.config.floatX) predict_class = theano.function([input_var], T.argmax(test_prediction_class,axis=1)) train_fn = theano.function([input_var, target_var_class], loss, updates=updates) val_fn_class = theano.function([input_var, target_var_class], [test_loss_class, test_acc_class]) print("Starting training...") BatchSize = 16 for epoch in range(num_epochs): train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train,fg_train, BatchSize, shuffle=True): inputs, targets_class, targets_verif = batch train_err += train_fn(inputs, targets_class) print(targets_class,predict_class(inputs)) train_batches += 1 val_err_class = 0 val_acc_class = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, fg_val, BatchSize, shuffle=False): inputs, targets_class, targets_verif = batch err_class, acc_class = val_fn_class(inputs, targets_class) val_err_class += err_class val_acc_class += acc_class val_batches += 1 print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) print(" Classification loss:\t\t{:.6f}".format(val_err_class / val_batches)) print(" Classification accuracy:\t\t{:.2f} %".format( val_acc_class / val_batches * 100)) test_err_class = 0 test_acc_class = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, fg_test, BatchSize, shuffle=False): inputs, targets_class, targets_verif = batch err_class, acc_class = val_fn_class(inputs, targets_class) test_err_class += err_class test_acc_class += acc_class test_batches += 1 print("Final results:") print(" test loss (Classification):\t\t\t{:.6f}".format(test_err_class / test_batches)) print(" test accuracy (Classification):\t\t{:.2f} %".format( test_acc_class / test_batches * 100)) if __name__ == '__main__': main()