Training Accuracy is Very Low in A Simple CNN using Theano

Training Accuracy is Very Low in A Simple CNN using Theano - deep-learning

I'm trying to implement a CNN using Theano and tried to test my code with a small sample-set of my bigger dataset. I'm trying to categorize a set of 8280 pictures(of 250*250 sizes) into 115 classes and my sample set is a set of 32 pictures of the first two classes(16 pictures from each). The problem I'm experiencing is that from the first epoch, the training loss in NaN and It will not change in the further epochs.
from __future__ import print_function
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
import re
import cv2
from lasagne.layers import Conv2DLayer, MaxPool2DLayer , DropoutLayer
from lasagne.layers import InputLayer, DenseLayer, batch_norm
def split_list(a_list):
half = len(a_list)/2
return a_list[:half], a_list[half:]
def load_dataset(path=''):
cat_list = []
filelist = sorted(os.listdir(path))
trainlist = []
testlist = []
tmptrain = []
tmptest = []
max_id = 0
for f in filelist:
match = re.match(r'C(\d+)([F|G])(\d+)\.PNG', f)
id = int(match.group(1)) - 1
max_id = max(max_id,id)
fg_class = match.group(2)
fg_id = int(match.group(3))
if id not in [p[0] for p in cat_list]:
cat_list.append([id, [], []])
if fg_class == 'G':
cat_list[-1][1].append(f)
else:
cat_list[-1][2].append(f)
for f in cat_list:
id = f[0]
trainG, testG = split_list(f[1])
trainF, testF = split_list(f[2])
tmptrain = tmptrain + [(id, 1, F) for F in trainF] + [(id, 0, G) for G in trainG] # (Class_id,Forgery,Img)
tmptest = tmptest + [(id, 1, F) for F in testF] + [(id, 0, F) for F in testG]
X_train = np.array([cv2.imread(path+f[2],0) for f in tmptrain]).astype(np.int32)
y_train = np.array([f[0] for f in tmptrain]).astype(np.int32)
X_test = np.array([cv2.imread(path+f[2],0) for f in tmptest]).astype(np.int32)
y_test = np.array([f[0] for f in tmptest]).astype(np.int32)
fg_train = np.array([f[1] for f in tmptrain]).astype(np.int32)
fg_test = np.array([f[1] for f in tmptest]).astype(np.int32)
X_train = np.expand_dims(X_train,axis=1).astype(np.int32)
X_test = np.expand_dims(X_test, axis=1).astype(np.int32)
return X_train, y_train, X_test, y_test, fg_train , fg_test
def ExplicitNegativeCorrelation(net,layer='fc2',lr=0.00001):
for param in lasagne.layers.get_all_params(net[layer]):
if param.name.startswith('W'):
W = param
mean = T.mean(W,0) * lr
W = W - mean#T.mean(T.mean(W,0))
def ImplicitNegativeCorrelation(MSE,Cross,Hinge):
mean = T.mean((MSE+Cross+Hinge),axis=0)
return ((MSE-mean)**2+(Cross-mean)**2+(Hinge-mean)**2)/3
def build_cnn(inputvar,input_shape, trained_weights=None):
net = {}
net['input'] = InputLayer(input_shape,input_var=inputvar)
net['drop_input'] = DropoutLayer(net['input'],p=0.2)
net['conv1'] = batch_norm(Conv2DLayer(net['input'], num_filters=96, filter_size=11, stride=4, flip_filters=False))#,W=lasagne.init.HeNormal()))
net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=3, stride=2)
net['conv2'] = batch_norm(Conv2DLayer(net['pool1'], num_filters=256, filter_size=5, pad=2, flip_filters=False))#, W=lasagne.init.HeNormal()))
net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=3, stride=2)
net['conv3'] = batch_norm(Conv2DLayer(net['pool2'], num_filters=384, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal()))
net['conv4'] = batch_norm(Conv2DLayer(net['conv3'], num_filters=384, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal()))
net['conv5'] = batch_norm(Conv2DLayer(net['conv4'], num_filters=256, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal()))
net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=3, stride=2)
net['fc1'] = batch_norm(DenseLayer(net['pool5'], num_units=2048))
net['drop_fc1'] = DropoutLayer(net['fc1'])
net['fc2'] = batch_norm(DenseLayer(net['drop_fc1'], num_units=2048))
net['fc_class'] = batch_norm(DenseLayer(net['fc2'],num_units=115))
return net
def iterate_minibatches(inputs, targets_class,targets_verif, batchsize, shuffle=False):
assert len(inputs) == len(targets_class)
assert len(inputs) == len(targets_verif)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets_class[excerpt], targets_verif[excerpt]
def main(num_epochs=500):
print("Loading data...")
X_train, y_train, X_test, y_test, fg_train, fg_test = load_dataset('./signatures/tmp4/')
X_val, y_val, fg_val = X_train, y_train, fg_train
print(y_train.shape)
input_var = T.tensor4('inputs')
target_var_class = T.ivector('targets')
network = build_cnn(input_var, (None, 1, 250, 250))
class_prediction = lasagne.layers.get_output(network['fc_class']) # ,inputs={network['input']:input_var})
loss_class = lasagne.objectives.categorical_crossentropy(class_prediction, target_var_class)
loss = loss_class.mean()
params = lasagne.layers.get_all_params([network['fc_class']], trainable=True)
lr = 0.01
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=lr, momentum=0.9)
test_prediction_class = lasagne.layers.get_output(network['fc_class'], deterministic=True)
test_loss_class = lasagne.objectives.categorical_crossentropy(test_prediction_class,
target_var_class)
test_loss_class = test_loss_class.mean()
test_acc_class = T.mean(T.eq(T.argmax(test_prediction_class, axis=1), target_var_class),
dtype=theano.config.floatX)
predict_class = theano.function([input_var], T.argmax(test_prediction_class,axis=1))
train_fn = theano.function([input_var, target_var_class], loss, updates=updates)
val_fn_class = theano.function([input_var, target_var_class], [test_loss_class, test_acc_class])
print("Starting training...")
BatchSize = 2
for epoch in range(num_epochs):
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train,fg_train, BatchSize, shuffle=True):
inputs, targets_class, targets_verif = batch
train_err += train_fn(inputs, targets_class)
#ExplicitNegativeCorrelation(network, layer='fc2',lr=lr/10)
print(targets_class,predict_class(inputs))
train_batches += 1
val_err_class = 0
val_acc_class = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, fg_val, BatchSize, shuffle=False):
inputs, targets_class, targets_verif = batch
err_class, acc_class = val_fn_class(inputs, targets_class)
val_err_class += err_class
val_acc_class += acc_class
val_batches += 1
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" Classification loss:\t\t{:.6f}".format(val_err_class / val_batches))
print(" Classification accuracy:\t\t{:.2f} %".format(
val_acc_class / val_batches * 100))
test_err_class = 0
test_acc_class = 0
test_err_verif = 0
test_acc_verif = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, fg_test, BatchSize, shuffle=False):
inputs, targets_class, targets_verif = batch
err_class, acc_class = val_fn_class(inputs, targets_class)
test_err_class += err_class
test_acc_class += acc_class
test_batches += 1
print("Final results:")
print(" test loss (Classification):\t\t\t{:.6f}".format(test_err_class / test_batches))
print(" test accuracy (Classification):\t\t{:.2f} %".format(
test_acc_class / test_batches * 100))
if __name__ == '__main__':
main()
I've tried to put lasagne.nonlinearities.softmax in the DenseLayers but it does fix the NaN issue but the accuracy of the Training model will not be any good, it will be fluctuating between 0 to 25%.(after 50 epochs!).
I have implemented a load_dataset function which I think that works correctly (I've tested the function multiple times), and I'm giving the class id of each picture as the target in the loss function. So my inputs and Targets would be like this:
Input Shape: (BatchSize, 1, 250, 250)
Target Shape: (BatchSize, 1) : vector of class ids
I've uploaded my sample-set here in this link.

It looks like we have 4 classes, according to the data, so I changed loading code to reflect it:
y_train = np.array([f[0] * 2 + f[1] for f in tmptrain]).astype(np.int32)
y_test = np.array([f[0] * 2 + f[1] for f in tmptest]).astype(np.int32)
Number of units in output layer should be equal to the number of classes, so I added an output layer with SoftMax:
net['fo_class'] = DenseLayer(net['fc_class'],num_units=4,
nonlinearity=lasagne.nonlinearities.softmax)
I suggest removing dropout layer just after inputs – you can compare outcomes with it and without it to make sure of that
Batch size = 2 is too small and learning rate is too high
Here is an example of code with those changes:
from __future__ import print_function
import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
import re
import cv2
from lasagne.layers import Conv2DLayer, MaxPool2DLayer , DropoutLayer
from lasagne.layers import InputLayer, DenseLayer
def split_list(a_list):
half = len(a_list)/2
return a_list[:half], a_list[half:]
def load_dataset(path=''):
cat_list = []
filelist = sorted(os.listdir(path))
tmptrain = []
tmptest = []
max_id = 0
for f in filelist:
match = re.match(r'C(\d+)([F|G])(\d+)\.PNG', f)
id = int(match.group(1)) - 1
max_id = max(max_id,id)
fg_class = match.group(2)
if id not in [p[0] for p in cat_list]:
cat_list.append([id, [], []])
if fg_class == 'G':
cat_list[-1][1].append(f)
else:
cat_list[-1][2].append(f)
for f in cat_list:
id = f[0]
trainG, testG = split_list(f[1])
trainF, testF = split_list(f[2])
tmptrain = tmptrain + [(id, 1, F) for F in trainF] + [(id, 0, G) for G in trainG]
tmptest = tmptest + [(id, 1, F) for F in testF] + [(id, 0, F) for F in testG]
X_train = np.array([cv2.imread(path+f[2],0) for f in tmptrain]).astype(np.float32)
y_train = np.array([f[0] * 2 + f[1] for f in tmptrain]).astype(np.int32)
X_test = np.array([cv2.imread(path+f[2],0) for f in tmptest]).astype(np.float32)
y_test = np.array([f[0] * 2 + f[1] for f in tmptest]).astype(np.int32)
fg_train = np.array([f[1] for f in tmptrain]).astype(np.float32)
fg_test = np.array([f[1] for f in tmptest]).astype(np.float32)
X_train = np.expand_dims(X_train,axis=1).astype(np.float32)
X_test = np.expand_dims(X_test, axis=1).astype(np.float32)
return X_train, y_train, X_test, y_test, fg_train , fg_test
def ExplicitNegativeCorrelation(net,layer='fc2',lr=0.00001):
for param in lasagne.layers.get_all_params(net[layer]):
if param.name.startswith('W'):
W = param
mean = T.mean(W,0) * lr
W = W - mean
def ImplicitNegativeCorrelation(MSE,Cross,Hinge):
mean = T.mean((MSE+Cross+Hinge),axis=0)
return ((MSE-mean)**2+(Cross-mean)**2+(Hinge-mean)**2)/3
def build_cnn(inputvar,input_shape, trained_weights=None):
net = {}
net['input'] = InputLayer(input_shape,input_var=inputvar)
net['conv1'] = Conv2DLayer(net['input'], num_filters=96, filter_size=11, stride=4)
net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=3, stride=2)
net['conv2'] = Conv2DLayer(net['pool1'], num_filters=256, filter_size=5, pad=2)
net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=3, stride=2)
net['conv3'] = Conv2DLayer(net['pool2'], num_filters=384, filter_size=3, pad=1)
net['conv4'] = Conv2DLayer(net['conv3'], num_filters=384, filter_size=3, pad=1)
net['conv5'] = Conv2DLayer(net['conv4'], num_filters=256, filter_size=3, pad=1)
net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=3, stride=2)
net['fc1'] = DenseLayer(net['pool5'], num_units=2048)
net['drop_fc1'] = DropoutLayer(net['fc1'])
net['fc2'] = DenseLayer(net['drop_fc1'], num_units=2048)
net['fc_class'] = DenseLayer(net['fc2'],num_units=115)
net['fo_class'] = DenseLayer(net['fc_class'],num_units=4,
nonlinearity=lasagne.nonlinearities.softmax)
return net
def iterate_minibatches(inputs, targets_class,targets_verif, batchsize, shuffle=False):
assert len(inputs) == len(targets_class)
assert len(inputs) == len(targets_verif)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets_class[excerpt], targets_verif[excerpt]
def main(num_epochs=500):
print("Loading data...")
X_train, y_train, X_test, y_test, fg_train, fg_test = load_dataset('./signatures/tmp4/')
X_train /= 255
X_val, y_val, fg_val = X_train, y_train, fg_train
print(y_train.shape)
check = X_train[0][0]
print(check)
input_var = T.tensor4('inputs')
target_var_class = T.ivector('targets')
network = build_cnn(input_var, (None, 1, 250, 250))
class_prediction = lasagne.layers.get_output(network['fo_class'])
loss_class = lasagne.objectives.categorical_crossentropy(class_prediction, target_var_class)
loss = loss_class.mean()
params = lasagne.layers.get_all_params([network['fo_class']], trainable=True)
lr = 0.0007
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=lr, momentum=0.9)
test_prediction_class = lasagne.layers.get_output(network['fo_class'], deterministic=True)
test_loss_class = lasagne.objectives.categorical_crossentropy(test_prediction_class,
target_var_class)
test_loss_class = test_loss_class.mean()
test_acc_class = T.mean(T.eq(T.argmax(test_prediction_class, axis=1), target_var_class),
dtype=theano.config.floatX)
predict_class = theano.function([input_var], T.argmax(test_prediction_class,axis=1))
train_fn = theano.function([input_var, target_var_class], loss, updates=updates)
val_fn_class = theano.function([input_var, target_var_class], [test_loss_class, test_acc_class])
print("Starting training...")
BatchSize = 16
for epoch in range(num_epochs):
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train,fg_train, BatchSize, shuffle=True):
inputs, targets_class, targets_verif = batch
train_err += train_fn(inputs, targets_class)
print(targets_class,predict_class(inputs))
train_batches += 1
val_err_class = 0
val_acc_class = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, fg_val, BatchSize, shuffle=False):
inputs, targets_class, targets_verif = batch
err_class, acc_class = val_fn_class(inputs, targets_class)
val_err_class += err_class
val_acc_class += acc_class
val_batches += 1
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" Classification loss:\t\t{:.6f}".format(val_err_class / val_batches))
print(" Classification accuracy:\t\t{:.2f} %".format(
val_acc_class / val_batches * 100))
test_err_class = 0
test_acc_class = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, fg_test, BatchSize, shuffle=False):
inputs, targets_class, targets_verif = batch
err_class, acc_class = val_fn_class(inputs, targets_class)
test_err_class += err_class
test_acc_class += acc_class
test_batches += 1
print("Final results:")
print(" test loss (Classification):\t\t\t{:.6f}".format(test_err_class / test_batches))
print(" test accuracy (Classification):\t\t{:.2f} %".format(
test_acc_class / test_batches * 100))
if __name__ == '__main__':
main()

Related

How i can use dqn and ddpg to successfully train an agent excellent in customized environment?

I'm new in AI, and i want to get in the field, i have spent some time finishing a program to train an agent for a simple customized environment, but when i perform the training in colab for 10000 episodes, it still can not get well performance. I guess whether there is something wrong with the customized env or there is something wrong with the training process.
Env: a helicopter tries to get throw the continous flow of birds (max num: 10), the birds moves from the right to the left, and there is fuel randomly. If the helicopter is still alive, i.e., it has not collided with a bird and still has fuel (initialized by 1000, when it collides with the fuel icon (max num: 2), fuel_left will be reset to 1000), its rewards plus 1.
the environment is shown in the figure:
after 10000 episode in ddpg/dqn, the agent still can not play more than 15 seconds, could you point out where the problem is?
Action space(1 dim): 0, 1, 2, 3, 4 -> helicopter moves up, down, left, right and keep static.
State space(28 dim): (x,y) for 10 birds, 2 fuel, and 1 helicopter. Besides, there is fuel left and rewards obtained.
Rewards: If the helicopter is alive, rewards plus 1.
the env settings code is as follwos (custom.py):
import numpy as np
import cv2
import matplotlib.pyplot as plt
import random
import math
import time
from gym import Env, spaces
import time
font = cv2.FONT_HERSHEY_COMPLEX_SMALL
class ChopperScape(Env):
def __init__(self):
super(ChopperScape,self).__init__()
self.maxbirdnum = 10
self.maxfuelnum = 2
self.observation_shape = (28,)
self.canvas_shape = (600,800,3)
self.action_space = spaces.Discrete(5,)
self.last_action = 0
self.obs = np.zeros(self.observation_shape)
self.canvas = np.ones(self.canvas_shape) * 1
self.elements = []
self.maxfuel = 1000
self.y_min = int (self.canvas_shape[0] * 0.1)
self.x_min = 0
self.y_max = int (self.canvas_shape[0] * 0.9)
self.x_max = self.canvas_shape[1]
def draw_elements_on_canvas(self):
self.canvas = np.ones(self.canvas_shape) * 1
for elem in self.elements:
elem_shape = elem.icon.shape
x,y = elem.x, elem.y
self.canvas[y : y + elem_shape[1], x:x + elem_shape[0]] = elem.icon
text = 'Fuel Left: {} | Rewards: {}'.format(self.fuel_left, self.ep_return)
self.canvas = cv2.putText(self.canvas, text, (10,20), font, 0.8, (0,0,0), 1, cv2.LINE_AA)
def reset(self):
self.fuel_left = self.maxfuel
self.ep_return = 0
self.obs = np.zeros(self.observation_shape)
self.obs[26] = self.maxfuel
self.bird_count = 0
self.fuel_count = 0
x = random.randrange(int(self.canvas_shape[0] * 0.05), int(self.canvas_shape[0] * 0.90))
y = random.randrange(int(self.canvas_shape[1] * 0.05), int(self.canvas_shape[1] * 0.90))
self.chopper = Chopper("chopper", self.x_max, self.x_min, self.y_max, self.y_min)
self.chopper.set_position(x,y)
self.obs[24] = x
self.obs[25] = y
self.elements = [self.chopper]
self.canvas = np.ones(self.canvas_shape) * 1
self.draw_elements_on_canvas()
return self.obs
def get_action_meanings(self):
return {0: "Right", 1: "Left", 2: "Down", 3: "Up", 4: "Do Nothing"}
def has_collided(self, elem1, elem2):
x_col = False
y_col = False
elem1_x, elem1_y = elem1.get_position()
elem2_x, elem2_y = elem2.get_position()
if 2 * abs(elem1_x - elem2_x) <= (elem1.icon_w + elem2.icon_w):
x_col = True
if 2 * abs(elem1_y - elem2_y) <= (elem1.icon_h + elem2.icon_h):
y_col = True
if x_col and y_col:
return True
return False
def step(self, action):
done = False
reward = 1
assert self.action_space.contains(action), "invalid action"
if action == 4:
self.chopper.move(0,5)
elif action == 1:
self.chopper.move(0,-5)
elif action == 2:
self.chopper.move(5,0)
elif action == 0:
self.chopper.move(-5,0)
elif action == 3:
self.chopper.move(0,0)
if random.random() < 0.1 and self.bird_count<self.maxbirdnum:
spawned_bird = Bird("bird_{}".format(self.bird_count), self.x_max, self.x_min, self.y_max, self.y_min)
self.bird_count += 1
bird_y = random.randrange(self.y_min, self.y_max)
spawned_bird.set_position(self.x_max, bird_y)
self.elements.append(spawned_bird)
if random.random() < 0.05 and self.fuel_count<self.maxfuelnum:
spawned_fuel = Fuel("fuel_{}".format(self.bird_count), self.x_max, self.x_min, self.y_max, self.y_min)
self.fuel_count += 1
fuel_x = random.randrange(self.x_min, self.x_max)
fuel_y = self.y_max
spawned_fuel.set_position(fuel_x, fuel_y)
self.elements.append(spawned_fuel)
for elem in self.elements:
if isinstance(elem, Bird):
if elem.get_position()[0] <= self.x_min:
self.elements.remove(elem)
self.bird_count -= 1
else:
elem.move(-5,0)
if self.has_collided(self.chopper, elem):
done = True
reward = -100000.0*(1.0/self.ep_return+1)
if isinstance(elem, Fuel):
flag1 = False
flag2 = False
if self.has_collided(self.chopper, elem):
self.fuel_left = self.maxfuel
flag1 = True
reward += 2
# time.sleep(0.5)
if elem.get_position()[1] <= self.y_min:
flag2 = True
self.fuel_count -= 1
else:
elem.move(0, -5)
if flag1 == True or flag2 == True:
self.elements.remove(elem)
self.fuel_left -= 1
if self.fuel_left == 0:
done = True
self.draw_elements_on_canvas()
self.ep_return += 1
birdnum = 0
fuelnum = 0
x_, y_ = self.chopper.get_position()
dis = 0.0
for elem in self.elements:
x,y = elem.get_position()
if isinstance(elem,Bird):
self.obs[2*birdnum] = x
self.obs[2*birdnum+1] = y
birdnum += 1
dis += math.hypot(x_-x,y_-y)
if isinstance(elem,Fuel):
base = self.maxbirdnum*2
self.obs[base+2*fuelnum] = x
self.obs[base+2*fuelnum+1] = y
fuelnum += 1
self.obs[24] = x_
self.obs[25] = y_
self.obs[26] = self.fuel_left
self.obs[27] = self.ep_return
if x_ == self.x_min or x_ == self.x_max or y_ == self.y_max or y_ == self.y_min:
reward -= random.random()
for i in range(26):
if i%2 == 0:
self.obs[i]/=800.0
else:
self.obs[i]/=600.0
self.obs[26]/=1000.0
self.obs[27]/=100.0
# print('reward:',reward)
# if done == True:
# time.sleep(1)
return self.obs, reward, done, {}
def render(self, mode = "human"):
assert mode in ["human", "rgb_array"], "Invalid mode, must be either \"human\" or \"rgb_array\""
if mode == "human":
cv2.imshow("Game", self.canvas)
cv2.waitKey(10)
elif mode == "rgb_array":
return self.canvas
def close(self):
cv2.destroyAllWindows()
class Point(object):
def __init__(self, name, x_max, x_min, y_max, y_min):
self.x = 0
self.y = 0
self.x_min = x_min
self.x_max = x_max
self.y_min = y_min
self.y_max = y_max
self.name = name
def set_position(self, x, y):
self.x = self.clamp(x, self.x_min, self.x_max - self.icon_w)
self.y = self.clamp(y, self.y_min, self.y_max - self.icon_h)
def get_position(self):
return (self.x, self.y)
def move(self, del_x, del_y):
self.x += del_x
self.y += del_y
self.x = self.clamp(self.x, self.x_min, self.x_max - self.icon_w)
self.y = self.clamp(self.y, self.y_min, self.y_max - self.icon_h)
def clamp(self, n, minn, maxn):
return max(min(maxn, n), minn)
class Chopper(Point):
def __init__(self, name, x_max, x_min, y_max, y_min):
super(Chopper, self).__init__(name, x_max, x_min, y_max, y_min)
self.icon = cv2.imread("chopper1.jpg") / 255.0
self.icon_w = 64
self.icon_h = 64
self.icon = cv2.resize(self.icon, (self.icon_h, self.icon_w))
class Bird(Point):
def __init__(self, name, x_max, x_min, y_max, y_min):
super(Bird, self).__init__(name, x_max, x_min, y_max, y_min)
self.icon = cv2.imread("bird1.jpg") / 255.0
self.icon_w = 32
self.icon_h = 32
self.icon = cv2.resize(self.icon, (self.icon_h, self.icon_w))
class Fuel(Point):
def __init__(self, name, x_max, x_min, y_max, y_min):
super(Fuel, self).__init__(name, x_max, x_min, y_max, y_min)
self.icon = cv2.imread("fuel1.jpg") / 255.0
self.icon_w = 32
self.icon_h = 32
self.icon = cv2.resize(self.icon, (self.icon_h, self.icon_w))
if __name__ == '__main__':
from IPython import display
env = ChopperScape()
obs = env.reset()
while True:
# random agent
action = random.randrange(-1,1)
obs, reward, done, info = env.step(action)
# Render the game
env.render()
if done == True:
break
env.close()
the ddpg algorithm to train the agent is as follows (ddpg.py):
from custom import ChopperScape
import random
import collections
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#超参数
lr_mu = 0.005
lr_q = 0.01
gamma = 0.99
batch_size = 32
buffer_limit = 50000
tau = 0.005 # for target network soft update
class ReplayBuffer():
def __init__(self):
self.buffer = collections.deque(maxlen=buffer_limit)
def put(self, transition):
self.buffer.append(transition)
def sample(self, n):
mini_batch = random.sample(self.buffer, n)
s_lst, a_lst, r_lst, s_prime_lst, done_mask_lst = [], [], [], [], []
for transition in mini_batch:
s, a, r, s_prime, done = transition
s_lst.append(s)
a_lst.append([a])
r_lst.append(r)
s_prime_lst.append(s_prime)
done_mask = 0.0 if done else 1.0
done_mask_lst.append(done_mask)
return torch.tensor(s_lst, dtype=torch.float), torch.tensor(a_lst, dtype=torch.float), \
torch.tensor(r_lst, dtype=torch.float), torch.tensor(s_prime_lst, dtype=torch.float), \
torch.tensor(done_mask_lst, dtype=torch.float)
def size(self):
return len(self.buffer)
class MuNet(nn.Module):
def __init__(self):
super(MuNet, self).__init__()
self.fc1 = nn.Linear(28, 128)
self.fc2 = nn.Linear(128, 64)
self.fc_mu = nn.Linear(64, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
mu = torch.tanh(self.fc_mu(x))
return mu
class QNet(nn.Module):
def __init__(self):
super(QNet, self).__init__()
self.fc_s = nn.Linear(28, 64)
self.fc_a = nn.Linear(1,64)
self.fc_q = nn.Linear(128, 32)
self.fc_out = nn.Linear(32,1)
def forward(self, x, a):
h1 = F.relu(self.fc_s(x))
h2 = F.relu(self.fc_a(a))
cat = torch.cat([h1,h2], dim=1)
q = F.relu(self.fc_q(cat))
q = self.fc_out(q)
return q
class OrnsteinUhlenbeckNoise:
def __init__(self, mu):
self.theta, self.dt, self.sigma = 0.1, 0.01, 0.1
self.mu = mu
self.x_prev = np.zeros_like(self.mu)
def __call__(self):
x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + \
self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape)
self.x_prev = x
return x
def train(mu, mu_target, q, q_target, memory, q_optimizer, mu_optimizer):
s,a,r,s_prime,done_mask = memory.sample(batch_size)
core = q_target(s_prime, mu_target(s_prime)) * done_mask
target = r + gamma * core
q_loss = F.smooth_l1_loss(q(s,a), target.detach())
q_optimizer.zero_grad()
q_loss.backward()
q_optimizer.step()
mu_loss = -q(s,mu(s)).mean() # That's all for the policy loss.
mu_optimizer.zero_grad()
mu_loss.backward()
mu_optimizer.step()
def soft_update(net, net_target):
for param_target, param in zip(net_target.parameters(), net.parameters()):
param_target.data.copy_(param_target.data * (1.0 - tau) + param.data * tau)
def main():
env = ChopperScape()
memory = ReplayBuffer()
q, q_target = QNet(), QNet()
q_target.load_state_dict(q.state_dict())
mu, mu_target = MuNet(), MuNet()
mu_target.load_state_dict(mu.state_dict())
score = 0.0
print_interval = 20
mu_optimizer = optim.Adam(mu.parameters(), lr=lr_mu)
q_optimizer = optim.Adam(q.parameters(), lr=lr_q)
ou_noise = OrnsteinUhlenbeckNoise(mu=np.zeros(1))
for n_epi in range(10000):
s = env.reset()
done = False
while not done:
a = mu(torch.from_numpy(s).float())
a = a.item() + ou_noise()[0]
print('action:',a)
s_prime, r, done, info = env.step(a)
env.render()
memory.put((s,a,r/100.0,s_prime,done))
score += r
s = s_prime
if memory.size()>20000:
for _ in range(10):
train(mu, mu_target, q, q_target, memory, q_optimizer, mu_optimizer)
soft_update(mu, mu_target)
soft_update(q, q_target)
if n_epi%print_interval==0 and n_epi!=0:
print("# of episode :{}, avg score : {:.1f}".format(n_epi, score/print_interval))
score = 0.0
env.close()
if __name__ == '__main__':
main()
and the dqn algorithm is as follows(dqn.py):
import gym
import collections
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from custom import ChopperScape
#Hyperparameters
learning_rate = 0.0005
gamma = 0.98
buffer_limit = 50000
batch_size = 32
class ReplayBuffer():
def __init__(self):
self.buffer = collections.deque(maxlen=buffer_limit)
def put(self, transition):
self.buffer.append(transition)
def sample(self, n):
mini_batch = random.sample(self.buffer, n)
s_lst, a_lst, r_lst, s_prime_lst, done_mask_lst = [], [], [], [], []
for transition in mini_batch:
s, a, r, s_prime, done_mask = transition
s_lst.append(s)
a_lst.append([a])
r_lst.append([r])
s_prime_lst.append(s_prime)
done_mask_lst.append([done_mask])
return torch.tensor(s_lst, dtype=torch.float), torch.tensor(a_lst), \
torch.tensor(r_lst), torch.tensor(s_prime_lst, dtype=torch.float), \
torch.tensor(done_mask_lst)
def size(self):
return len(self.buffer)
class Qnet(nn.Module):
def __init__(self):
super(Qnet, self).__init__()
self.fc1 = nn.Linear(28, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, 5)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def sample_action(self, obs, epsilon):
out = self.forward(obs)
# coin = random.random()
# if coin < epsilon:
# return random.randint(0,1)
# else :
# return out.argmax().item()
return out.argmax().item()
def train(q, q_target, memory, optimizer):
for _ in range(10):
s,a,r,s_prime,done_mask = memory.sample(batch_size)
q_out = q(s)
q_a = q_out.gather(1,a)
max_q_prime = q_target(s_prime).max(1)[0].unsqueeze(1)
target = r + gamma * max_q_prime * done_mask
loss = F.smooth_l1_loss(q_a, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
def main():
env = ChopperScape()
q = torch.load('10000_dqn_3.pt')
q_target = torch.load('10000_dqn_3_qtarget.pt')
# q_target.load_state_dict(q.state_dict())
memory = ReplayBuffer()
print_interval = 20
score = 0.0
optimizer = optim.Adam(q.parameters(), lr=learning_rate)
for n_epi in range(10000):
epsilon = max(0.01, 0.08 - 0.01*(n_epi/200)) #Linear annealing from 8% to 1%
s = env.reset()
done = False
while not done:
a = q.sample_action(torch.from_numpy(s).float(), epsilon)
s_prime, r, done, info = env.step(a)
env.render()
done_mask = 0.0 if done else 1.0
memory.put((s,a,r,s_prime, done_mask))
s = s_prime
if done:
break
score += r
if memory.size()>20000:
train(q, q_target, memory, optimizer)
if n_epi%print_interval==0 and n_epi!=0:
q_target.load_state_dict(q.state_dict())
print("n_episode :{}, score : {:.1f}, n_buffer : {}, eps : {:.1f}%".format(n_epi, score/print_interval, memory.size(), epsilon*100))
score = 0.0
env.close()
def test():
env = ChopperScape()
q = torch.load('10000_dqn_q.pt')
done = False
s = env.reset()
while not done:
a = q.sample_action(torch.from_numpy(s).float(), 1)
s_prime, r, done, info = env.step(a)
env.render()
s = s_prime
if done:
break
if __name__ == '__main__':
main()
when perform dqn, please annotate the action convert part in custom.py/class ChoperScape/step
after 10000 episode in ddpg/dqn, the agent still can not play more than 15 seconds, could you point out where the problem is?

Questions on interface condition convergence in PINNs(Physics-Informed Neural Network)

I am solving a magnetostatic problem using PINN.
I have succeeded in solving a simple Poisson equation. However, in the analysis considering the geometry, a problem was found in which the interface condition loss did not converge.
I've tried numerous things including changing the mini-batch addition model.
I'd appreciate it if you could let me know what's wrong with my code.
class ironmaxwell(Model):
def __init__(self):
super(ironmaxwell, self).__init__()
initializer = tf.keras.initializers.GlorotUniform
self.id1 = tf.keras.layers.Dropout(rate=0.2)
self.ih1 = Dense(40, activation='elu', kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id2 = tf.keras.layers.Dropout(rate=0.2)
self.ih2 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id3 = tf.keras.layers.Dropout(rate=0.2)
self.ih3 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id4 = tf.keras.layers.Dropout(rate=0.2)
self.ih4 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id5 = tf.keras.layers.Dropout(rate=0.2)
self.ih5 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.id6 = tf.keras.layers.Dropout(rate=0.2)
self.ih6 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.iu1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.iw1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.iu = Dense(1, activation='linear')
def call(self, state):
ix = self.id1(state)
iy = self.iu1(state)
iz = self.iw1(state)
ix = (1-self.ih1(ix))*iy + self.ih1(ix)*iz
ix = self.id2(ix)
ix = (1-self.ih2(ix))*iy + self.ih2(ix)*iz
ix = self.id3(ix)
ix = (1-self.ih3(ix))*iy + self.ih4(ix)*iz
ix = self.id4(ix)
ix = (1-self.ih4(ix))*iy + self.ih4(ix)*iz
ix = self.id5(ix)
ix = (1-self.ih5(ix))*iy + self.ih5(ix)*iz
ix = self.id6(ix)
ix = (1-self.ih6(ix))*iy + self.ih6(ix)*iz
iout = self.iu(ix)
return iout
class coilmaxwell(Model):
def __init__(self):
super(coilmaxwell, self).__init__()
initializer = tf.keras.initializers.GlorotUniform
self.d1 = tf.keras.layers.Dropout(rate=0.2)
self.h1 = Dense(40, activation='elu', kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d2 = tf.keras.layers.Dropout(rate=0.2)
self.h2 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d3 = tf.keras.layers.Dropout(rate=0.2)
self.h3 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d4 = tf.keras.layers.Dropout(rate=0.2)
self.h4 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d5 = tf.keras.layers.Dropout(rate=0.2)
self.h5 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.d6 = tf.keras.layers.Dropout(rate=0.2)
self.h6 = Dense(40, activation='elu',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.u1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.w1 = Dense(40, activation='linear',kernel_regularizer=tf.keras.regularizers.L2(0.001))
self.u = Dense(1, activation='linear')
def call(self, state):
x = self.d1(state)
y = self.u1(state)
z = self.w1(state)
x = (1-self.h1(x))*y + self.h1(x)*z
x = self.d2(x)
x = (1-self.h2(x))*y + self.h2(x)*z
x = self.d3(x)
x = (1-self.h3(x))*y + self.h4(x)*z
x = self.d4(x)
x = (1-self.h4(x))*y + self.h4(x)*z
x = self.d5(x)
x = (1-self.h5(x))*y + self.h5(x)*z
x = self.d6(x)
x = (1-self.h6(x))*y + self.h6(x)*z
out = self.u(x)
return out
##############################################################################################################################
class MaxwellPinn(object):
def __init__(self):
self.lr = 0.001
#self.lr = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=.001, decay_steps=10, decay_rate=.01)
self.opt_iron = Adam(self.lr)
self.opt_coil = Adam(self.lr)
self.ironmaxwell = ironmaxwell()
self.ironmaxwell.build(input_shape=(None, 2))
self.coilmaxwell = coilmaxwell()
self.coilmaxwell.build(input_shape=(None, 2))
self.train_loss_history = []
self.iter_count = 0
self.instant_loss = 0
self.bnd_loss = 0
self.ic_loss = 0
self.lamda = 0.1
self.pde_loss = 0
self.max_value = 0.012315021035034
self.iron_loss = 0
self.coil_loss = 0
################################################################################################################################
#tf.function
def physics_net_iron(self, xt,jmu):
x_i = xt[:, 0:1]
t_i = xt[:, 1:2]
with tf.GradientTape(persistent=True) as tape:
tape.watch(t_i)
tape.watch(x_i)
xt_t_i = tf.concat([x_i,t_i], axis=1)
u_i = self.ironmaxwell(xt_t_i)
u_x_i = tape.gradient(u_i, x_i)
u_t_i = tape.gradient(u_i, t_i)
u_xx_i = tape.gradient(u_x_i, x_i)
u_tt_i = tape.gradient(u_t_i, t_i)
del tape
return (u_xx_i+u_tt_i+jmu)
#tf.function
def physics_net_coil(self, xt,jmu):
x_c = xt[:, 0:1]
t_c = xt[:, 1:2]
with tf.GradientTape(persistent=True) as tape:
tape.watch(t_c)
tape.watch(x_c)
xt_t_c = tf.concat([x_c,t_c], axis=1)
u_c = self.coilmaxwell(xt_t_c)
u_x_c = tape.gradient(u_c, x_c)
u_t_c = tape.gradient(u_c, t_c)
u_xx_c = tape.gradient(u_x_c, x_c)
u_tt_c = tape.gradient(u_t_c, t_c)
del tape
return (u_xx_c+u_tt_c+jmu)
####################################################################################################################
#tf.function
def physics_net_for_ic(self, xt,in_mu,nom,out_mu): # 경계조건 물리정보
x = xt[:, 0:1]
t = xt[:, 1:2]
with tf.GradientTape(persistent=True) as tape:
tape.watch(t)
tape.watch(x)
xt_t = tf.concat([x,t], axis=1)
out_u = self.ironmaxwell(xt_t)
out_u_x = tape.gradient(out_u, x)
out_u_y = tape.gradient(out_u, t)
in_u = self.coilmaxwell(xt_t)
in_u_x = tape.gradient(in_u, x)
in_u_y = tape.gradient(in_u, t)
del tape
out_b_x = out_u_y
out_b_y = out_u_x
out_h_x = out_b_x/out_mu
out_h_y = out_b_y/out_mu
in_b_x = in_u_y
in_b_y = in_u_x
in_h_x = in_b_x/in_mu
in_h_y = in_b_y/in_mu
loss_b = tf.add(tf.multiply((in_b_x-out_b_x),nom),tf.multiply((in_b_y-out_b_y),(1-nom)))
loss_h = tf.add(tf.multiply((in_h_x-out_h_x),(1-nom)),tf.multiply((in_h_y-out_h_y),nom))
return loss_b, loss_h
#############################################################################################################
def save_weights(self, path):
self.ironmaxwell.save_weights(path + 'ironmaxwell.h5')
self.coilmaxwell.save_weights(path + 'coilmaxwell.h5')
#############################################################################################################
def load_weights(self, path):
self.ironmaxwell.load_weights(path + 'ironmaxwell.h5')
self.coilmaxwell.load_weights(path + 'coilmaxwell.h5')
#############################################################################################################
def compute_loss_iron(self, f, u_bnd_hat, u_bnd_sol,penalty,loss_b,loss_h):
loss_col = tf.reduce_mean(tf.square(f))
loss_bnd = tf.reduce_mean(tf.square(u_bnd_hat - u_bnd_sol))
loss_mag = tf.reduce_mean(tf.square(loss_b))
loss_field =tf.reduce_mean(tf.square(loss_h))
loss = loss_col + loss_bnd + loss_mag + loss_field
self.iron_loss = loss.numpy()
return loss
def compute_loss_coil(self, f,penalty,loss_b,loss_h):
loss_col = tf.reduce_mean(tf.square(f))
loss_mag = tf.reduce_mean(tf.square(loss_b))
loss_field =tf.reduce_mean(tf.square(loss_h))
loss = loss_col + loss_mag + loss_field
self.coil_loss = loss.numpy()
return loss
#############################################################################################################
def compute_grad(self, xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol,ic,nom):
with tf.GradientTape(persistent=True) as tape:
J_coil = 9800
J_iron = 0
mu_coil = 1.2566e-06
mu_iron = mu_coil*2000
f_iron = self.physics_net_iron(xt_col_iron,J_iron*mu_iron) # iron의 PDE 손실
f_coil = self.physics_net_coil(xt_col_coil,J_coil*mu_coil) # coil의 PDE 손실
u_bnd_hat = self.ironmaxwell(xt_bnd) # IRON이 OUT
loss_b, loss_h = self.physics_net_for_ic(ic,mu_coil,nom,mu_iron)
loss_iron = self.compute_loss_iron(f_iron, u_bnd_hat, u_bnd_sol,1,loss_b,loss_h)
loss_coil = self.compute_loss_coil(f_coil,1,loss_b,loss_h)
iron_grads = tape.gradient(loss_iron, self.ironmaxwell.trainable_variables)
coil_grads = tape.gradient(loss_coil, self.coilmaxwell.trainable_variables)
loss = loss_iron + loss_coil
return loss, iron_grads, coil_grads
#############################################################################################################
def callback(self, arg=None):
if self.iter_count % 10 == 0:
print('iter=', self.iter_count, ', loss=', self.instant_loss,'iron_loss=',self.iron_loss,'coil_loss=',self.coil_loss)
self.train_loss_history.append([self.iter_count, self.instant_loss])
self.iter_count += 1
#############################################################################################################
def train_with_adam(self,xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol, adam_num,ic,nom):
def learn():
loss, iron_grads, coil_grads = self.compute_grad(xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol,ic,nom)
self.opt_iron.apply_gradients(zip(iron_grads, self.ironmaxwell.trainable_variables))
self.opt_coil.apply_gradients(zip(coil_grads, self.coilmaxwell.trainable_variables))
return loss
for iter in range(int(adam_num)):
loss = learn()
self.instant_loss = loss.numpy()
self.opt_iron = Adam(self.lr/(1+0.001*iter))
self.opt_coil = Adam(self.lr/(1+0.001*iter))
self.callback()
#############################################################################################################
def train_with_lbfgs(self, xt_col, xt_bnd, u_bnd_sol, lbfgs_num,J,mu,penalty,ii,ic,ii_mu,ic_mu,nom):
def vec_weight():
# vectorize weights
weight_vec = []
# Loop over all weights
for v in self.burgers.trainable_variables:
weight_vec.extend(v.numpy().flatten())
weight_vec = tf.convert_to_tensor(weight_vec)
return weight_vec
w0 = vec_weight().numpy()
def restore_weight(weight_vec):
# restore weight vector to model weights
idx = 0
for v in self.burgers.trainable_variables:
vs = v.shape
# weight matrices
if len(vs) == 2:
sw = vs[0] * vs[1]
updated_val = tf.reshape(weight_vec[idx:idx + sw], (vs[0], vs[1]))
idx += sw
# bias vectors
elif len(vs) == 1:
updated_val = weight_vec[idx:idx + vs[0]]
idx += vs[0]
# assign variables (Casting necessary since scipy requires float64 type)
v.assign(tf.cast(updated_val, dtype=tf.float32))
def loss_grad(w):
# update weights in model
restore_weight(w)
loss, grads, loss_bnd = self.compute_grad(xt_col, xt_bnd, u_bnd_sol,J,mu,penalty,ii,ic,ii_mu,ic_mu,nom)
# vectorize gradients
grad_vec = []
for g in grads:
grad_vec.extend(g.numpy().flatten())
# gradient list to array
# scipy-routines requires 64-bit floats
loss = loss.numpy().astype(np.float64)
self.instant_loss = loss
grad_vec = np.array(grad_vec, dtype=np.float64)
return loss, grad_vec
return scipy.optimize.minimize(fun=loss_grad,
x0=w0,
jac=True,
method='L-BFGS-B',
callback=self.callback,
options={'maxiter': lbfgs_num,
'maxfun': 5000,
'maxcor': 500,
'maxls': 500,
'ftol': 1.0 * np.finfo(float).eps}) #1.0 * np.finfo(float).eps
########################################################################################################################
def predict_iron(self, xt):
u_pred = self.ironmaxwell(xt)
return u_pred
def predict_coil(self, xt):
u_pred = self.coilmaxwell(xt)
return u_pred
#############################################################################################################
def train(self, adam_num, lbfgs_num):
iron_x = scipy.io.loadmat('iron_x.mat') # iron 좌표
iron_y = scipy.io.loadmat('iron_y.mat')
coil_x = scipy.io.loadmat('coil_x.mat') # coil 좌표
coil_y = scipy.io.loadmat('coil_y.mat')
iron_J = scipy.io.loadmat('iron_J.mat')
iron_mu = scipy.io.loadmat('iron_mu.mat')
coil_J = scipy.io.loadmat('iron_J.mat')
coil_mu = scipy.io.loadmat('iron_mu.mat')
ini = scipy.io.loadmat('bnd.mat')
inter_coil_x = scipy.io.loadmat('inter_coil_x.mat')
inter_coil_y = scipy.io.loadmat('inter_coil_y.mat')
icx = inter_coil_x['coil_inter_x']
icy = inter_coil_y['coil_inter_y']
ic = np.concatenate([icx, icy], axis=1) # interface 코일 데이터
inter_coil_mu = scipy.io.loadmat('inter_coil_mu.mat')
ic_mu = np.transpose(inter_coil_mu['mu_inter_coil'])
nomvec = scipy.io.loadmat('nom_vec.mat')
nom = nomvec['nom_vec']
nom = tf.convert_to_tensor(nom, dtype=tf.float32)
x_ini = np.transpose(ini['iron_bnd_x'])
y_ini = np.transpose(ini['iron_bnd_y'])
xt_bnd_data = np.concatenate([x_ini, y_ini], axis=1)
tu_bnd_data = []
for xt in xt_bnd_data:
tu_bnd_data.append(0)
tu_bnd_data = np.transpose(tu_bnd_data)
# collocation point iron
x_col_data = (iron_x['x'])
y_col_data = (iron_y['y'])
xy_col_data_iron = np.concatenate([x_col_data, y_col_data], axis=1)
# coil
x_col_data = (coil_x['coil_x'])
y_col_data = (coil_y['coil_y'])
xy_col_data_coil = np.concatenate([x_col_data, y_col_data], axis=1)
xt_col_iron = tf.convert_to_tensor(xy_col_data_iron, dtype=tf.float32)
xt_col_coil = tf.convert_to_tensor(xy_col_data_coil, dtype=tf.float32)
xt_bnd = tf.convert_to_tensor(xt_bnd_data, dtype=tf.float32)
u_bnd_sol = tf.convert_to_tensor(tu_bnd_data, dtype=tf.float32)
ic = tf.convert_to_tensor(ic, dtype=tf.float32)
ic_mu = tf.convert_to_tensor(ic_mu, dtype=tf.float32)
# Start timer
self.load_weights("C:/Users/user/Desktop/1-Cars (2 cases)/save_weights/maxwell/new_test/")
t0 = time()
self.train_with_adam(xt_col_iron,xt_col_coil, xt_bnd, u_bnd_sol, adam_num,ic,nom)
print('\nComputation time of adam: {} seconds'.format(time() - t0))
self.save_weights("C:/Users/user/Desktop/1-Cars (2 cases)/save_weights/maxwell/new_test/")
np.savetxt('C:/Users/user/Desktop/1-Cars (2 cases)/save_weights/maxwell/new_test/loss.txt', self.train_loss_history)
train_loss_history = np.array(self.train_loss_history)
plt.plot(train_loss_history[:, 0], train_loss_history[:, 1])
plt.yscale("log")
plt.show()
#############################################################################################################
# main
def main():
adam_num = 100000
lbfgs_num = 1000
agent = MaxwellPinn()
agent.train(adam_num, lbfgs_num)
if __name__=="__main__":
main()
It's my code.
Including mini-batch. Changing the model

Why is my REINFORCE algorithm not learning?

I am training a REINFORCE algorithm on the CartPole environment. Due to the simple nature of the environment, I expect it to learn quickly. However, that doesn't happen.
Here is the main portion of the algorithm -
for i in range(episodes):
print("i = ", i)
state = env.reset()
done = False
transitions = []
tot_rewards = 0
while not done:
act_proba = model(torch.from_numpy(state))
action = np.random.choice(np.array([0,1]), p = act_proba.data.numpy())
next_state, reward, done, info = env.step(action)
tot_rewards += 1
transitions.append((state, action, tot_rewards))
state = next_state
if i%50==0:
print("i = ", i, ",reward = ", tot_rewards)
score.append(tot_rewards)
reward_batch = torch.Tensor([r for (s,a,r) in transitions])
disc_rewards = discount_rewards(reward_batch)
nrml_disc_rewards = normalize_rewards(disc_rewards)
state_batch = torch.Tensor([s for (s,a,r) in transitions])
action_batch = torch.Tensor([a for (s,a,r) in transitions])
pred_batch = model(state_batch)
prob_batch = pred_batch.gather(dim=1, index=action_batch.long().view(-1, 1)).squeeze()
loss = -(torch.sum(torch.log(prob_batch)*nrml_disc_rewards))
opt.zero_grad()
loss.backward()
opt.step()
Here is the entire algorithm -
#I referred to this when writing the code - https://github.com/DeepReinforcementLearning/DeepReinforcementLearningInAction/blob/master/Chapter%204/Ch4_book.ipynb
import numpy as np
import gym
import torch
from torch import nn
env = gym.make('CartPole-v0')
learning_rate = 0.0001
episodes = 10000
def discount_rewards(reward, gamma = 0.99):
return torch.pow(gamma, torch.arange(len(reward)))*reward
def normalize_rewards(disc_reward):
return disc_reward/(disc_reward.max())
class NeuralNetwork(nn.Module):
def __init__(self, state_size, action_size):
super(NeuralNetwork, self).__init__()
self.state_size = state_size
self.action_size = action_size
self.linear_relu_stack = nn.Sequential(
nn.Linear(state_size, 300),
nn.ReLU(),
nn.Linear(300, 128),
nn.ReLU(),
nn.Linear(128, 128),
nn.ReLU(),
nn.Linear(128, action_size),
nn.Softmax()
)
def forward(self,x):
x = self.linear_relu_stack(x)
return x
model = NeuralNetwork(env.observation_space.shape[0], env.action_space.n)
opt = torch.optim.Adam(params = model.parameters(), lr = learning_rate)
score = []
for i in range(episodes):
print("i = ", i)
state = env.reset()
done = False
transitions = []
tot_rewards = 0
while not done:
act_proba = model(torch.from_numpy(state))
action = np.random.choice(np.array([0,1]), p = act_proba.data.numpy())
next_state, reward, done, info = env.step(action)
tot_rewards += 1
transitions.append((state, action, tot_rewards))
state = next_state
if i%50==0:
print("i = ", i, ",reward = ", tot_rewards)
score.append(tot_rewards)
reward_batch = torch.Tensor([r for (s,a,r) in transitions])
disc_rewards = discount_rewards(reward_batch)
nrml_disc_rewards = normalize_rewards(disc_rewards)
state_batch = torch.Tensor([s for (s,a,r) in transitions])
action_batch = torch.Tensor([a for (s,a,r) in transitions])
pred_batch = model(state_batch)
prob_batch = pred_batch.gather(dim=1, index=action_batch.long().view(-1, 1)).squeeze()
loss = -(torch.sum(torch.log(prob_batch)*nrml_disc_rewards))
opt.zero_grad()
loss.backward()
opt.step()

Your computation for discounting the reward is where your mistake is.
In REINFORCE (and many other algorithms) you need to compute the sum of future discounted rewards for every step onward.
This means that the sum of discounted rewards for the first step should be:
G_1 = r_1 + gamma * r_2 + gamma ^ 2 * r_3 + ... + gamma ^ (T-1) * r_T
G_2 = r_2 + gamma * r_3 + gamma ^ 2 * r_4 + ... + gamma ^ (T-1) * r_T
And so on...
This gives you an array containing all the sum of future rewards for every step (i.e. [G_1, G_2, G_3, ... , G_T])
However, what you compute currently is only applying a discount on the current step's reward:
G_1 = r_1
G_2 = gamma * r_2
G_3 = gamma ^ 2 * r_3
And so on...
Here is the Python code fixing your problem. We compute from the back of the list of reward to the front to be more computationally efficient.
def discount_rewards(reward, gamma=0.99):
R = 0
returns = []
reward = reward.tolist()
for r in reward[::-1]:
R = r + gamma * R
returns.append(R)
returns = torch.tensor(returns[::-1])
return returns
Here is a figure showing the progression of the algorithm's score over the first 5000 steps.

Pytorch-Optimzer doesn't update parameters

I made my custom model, AlexNetQIL (Alexnet with QIL layer)
'QIL' means quantization intervals learning
I trained my model and loss value didn't decrease at all and I found out parameters in my model were not updated at all because of QIL layer I added
I attached my codes AlexNetQil and qil
please someone let me know what's the problem in my codes
AlexNetQIL
import torch
import torch.nn as nn
from qil import *
class AlexNetQIL(nn.Module):
#def __init__(self, num_classes=1000): for imagenet
def __init__(self, num_classes=10): # for cifar-10
super(AlexNetQIL, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
self.bn1 = nn.BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.qil2 = Qil()
self.conv2 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(192)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.qil3 = Qil()
self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(384)
self.relu3 = nn.ReLU(inplace=True)
self.qil4 = Qil()
self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
self.bn4 = nn.BatchNorm2d(256)
self.relu4 = nn.ReLU(inplace=True)
self.qil5 = Qil()
self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.bn5 = nn.BatchNorm2d(256)
self.relu5 = nn.ReLU(inplace=True)
self.maxpool5 = nn.MaxPool2d(kernel_size=2)
self.classifier = nn.Sequential(
nn.Linear(256 * 2 * 2, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self,x,inference = False):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu2(x)
x = self.maxpool1(x)
x,self.conv2.weight = self.qil2(x,self.conv2.weight,inference ) # if I remove this line, No problem
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.maxpool2(x)
x,self.conv3.weight = self.qil3(x,self.conv3.weight,inference ) # if I remove this line, No problem
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x,self.conv4.weight = self.qil4(x,self.conv4.weight,inference ) # if I remove this line, No problem
x = self.conv4(x)
x = self.bn4(x)
x = self.relu4(x)
x,self.conv5.weight = self.qil5(x,self.conv5.weight,inference ) # if I remove this line, No problem
x = self.conv5(x)
x = self.bn5(x)
x = self.relu5(x)
x = self.maxpool5(x)
x = x.view(x.size(0),256 * 2 * 2)
x = self.classifier(x)
return x
QIL
forward
quantize weights and input activation with 2 steps
transformer(params) -> discretizer(params)
import torch
import torch.nn as nn
import numpy as np
import copy
#Qil (Quantize intervals learning)
class Qil(nn.Module):
discretization_level = 32
def __init__(self):
super(Qil,self).__init__()
self.cw = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.dw = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.cx = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.dx = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.gamma = nn.Parameter(torch.tensor(1.0)) # I have to train this transformer parameter
self.a = Qil.discretization_level
def forward(self,x,weights,Inference = False):
if not Inference:
weights = self.transfomer_weights(weights)
weights = self.discretizer(weights)
x = self.transfomer_activation(x)
x = self.discretizer(x)
return torch.nn.Parameter(x), torch.nn.Parameter(weights)
def transfomer_weights(self,weights):
device = weights.device
aw,bw = (0.5 / self.dw) , (-0.5*self.cw / self.dw + 0.5)
weights = torch.where( abs(weights) < self.cw - self.dw,
torch.tensor(0.).to(device),weights)
weights = torch.where( abs(weights) > self.cw + self.dw,
weights.sign(), weights)
weights = torch.where( (abs(weights) >= self.cw - self.dw) & (abs(weights) <= self.cw + self.dw),
(aw*abs(weights) + bw)**self.gamma * weights.sign() , weights)
return weights
def transfomer_activation(self,x):
device = x.device
ax,bx = (0.5 / self.dx) , (-0.5*self.cx / self.dx + 0.5)
x = torch.where(x < self.cx - self.dx,
torch.tensor(0.).to(device),x)
x = torch.where(x > self.cx + self.dx,
torch.tensor(1.0).to(device),x)
x = torch.where( (abs(x) >= self.cx - self.dx) & (abs(x) <= self.cx + self.dx),
ax*abs(x) + bx, x)
return x
def discretizer(self,tensor):
q_D = pow(2, Qil.discretization_level)
tensor = torch.round(tensor * q_D) / q_D
return tensor

handwriting synthesis alex graves

I have been trying to replicate the alex graves handwriting synthesis model, and I did this with tensorflow, and python on a 1080Ti GPU with cuda,
I exactly replicated all of the features explained in the paper and even clipped the respective gradient values in place, but I have real difficulty training it.
I also preproccessed the data in the way explained in the paper, including normalizing the X and y offsets, but the problem is that the training usually can't lower the negative log likelihood more than 1000 which in the paper it reaches -1000, and after that i see NaN weights.
The only extra thing I did was to add 0.0000001 to the conditional probability of every stroke to prevent NaN values in log likelihood.
Any tips or suggestions or experience with such a task?
this is the cell code I use,
class Custom_Cell(RNNCell):
def __init__(self,forget_bias,bias,one_hot_vector, hidden_layer_nums=[700,700,700], mixture_num=10, attention_num=4):
self.bias = bias
self.lstms = []
for i in hidden_layer_nums:
self.lstms.append(LSTMCell(num_units=i, initializer=tf.truncated_normal_initializer(0.075), dtype=tf.float32, forget_bias=forget_bias))
self.attention_num = attention_num
self.mixture_num = mixture_num
self.state_size = 2*sum(hidden_layer_nums) + 3*self.attention_num
self.attention_var_num = 3*self.attention_num
self.output_size = 6*self.mixture_num + 1 + 1
self.one_hot_vector = one_hot_vector
self.lstm_num = len(hidden_layer_nums)
self.hidden_layer_nums = hidden_layer_nums
temp_shape = self.one_hot_vector.shape
self.char_num = temp_shape[2]
self.i_to_h = []
self.w_to_h = []
self.h_to_h = []
self.prev_h_to_h = []
self.lstm_bias = []
self.lstm_to_attention_weights = tf.get_variable("lstms/first_to_attention_mtrx",shape=[hidden_layer_nums[0],self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.lstm_to_attention_bias = tf.get_variable("lstms/first_to_attention_bias",shape=[self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.all_to_output_mtrx = []
for i in range(self.lstm_num):
self.all_to_output_mtrx.append( tf.get_variable("lstms/to_output_mtrx_" + str(i), shape=[hidden_layer_nums[i],self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.all_to_output_bias = tf.get_variable("lstms/output_bias",shape=[self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
for i in range(self.lstm_num):
self.i_to_h.append(tf.get_variable("lstms/i_to_h_"+str(i),shape=[3,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.w_to_h.append(tf.get_variable("lstms/w_to_h_"+str(i),shape=[self.char_num,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.h_to_h.append(tf.get_variable("lstms/h_to_h_"+str(i),shape=[hidden_layer_nums[i],hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.lstm_bias.append(tf.get_variable("lstms/bias_" + str(i),shape=[hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
if not i == 0:
self.prev_h_to_h.append(
tf.get_variable("lstms/prev_h_to_h_" + str(i), shape=[hidden_layer_nums[i-1], hidden_layer_nums[i]],
dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.075),
trainable=True))
def __call__(self, inputs, state, scope=None):
# Extracting previous configuration and vectors
splitarray = []
for i in self.hidden_layer_nums:
splitarray.append(i)
splitarray.append(i)
splitarray.append(3*self.attention_num)
splitted = tf.split(state,splitarray,axis=1)
prev_tuples = []
for i in range(self.lstm_num):
newtuple = LSTMStateTuple(splitted[2*i],splitted[2*i + 1])
prev_tuples.append(newtuple)
prev_attention_vec = splitted[2*self.lstm_num]
new_attention_vec = 0
next_states = []
most_attended = 0
last_output = 0
for i in range(self.lstm_num):
prev_c, prev_h = prev_tuples[i]
cell = self.lstms[i]
if i == 0:
with tf.name_scope("layer_1"):
w, most_attended = self.gaussian_attention(self.one_hot_vector,prev_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[0]) + tf.matmul(prev_h,self.h_to_h[0]) + tf.matmul(w,self.w_to_h[0]) + self.lstm_bias[0]
_, new_state = cell(input_vec, prev_tuples[0])
new_c, new_h = new_state
next_states.append(new_c)
next_states.append(new_h)
last_output = tf.matmul(new_h,self.all_to_output_mtrx[0])
with tf.name_scope("attention_layer"):
temp_attention = tf.matmul(new_h,self.lstm_to_attention_weights) + self.lstm_to_attention_bias
new_alpha, new_beta, new_kappa = tf.split(temp_attention,[self.attention_num,self.attention_num,self.attention_num],axis=1)
old_alpha, old_beta, old_kappa = tf.split(prev_attention_vec,[self.attention_num,self.attention_num,self.attention_num], axis=1)
new_alpha = tf.exp(new_alpha)
new_beta = tf.exp(new_beta)
new_kappa = tf.exp(new_kappa) + old_kappa
new_attention_vec = tf.concat([new_alpha,new_beta,new_kappa],axis=1)
else:
with tf.name_scope("layer_" + str(i)):
w, most_attended = self.gaussian_attention(self.one_hot_vector,new_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[i]) + tf.matmul(next_states[-1],self.prev_h_to_h[i-1]) + tf.matmul(prev_h,self.h_to_h[i]) + tf.matmul(w,self.w_to_h[i]) + self.lstm_bias[i]
_,new_state = cell(input_vec,prev_tuples[i])
new_c, new_h = new_state
next_states.append(new_c)
next_states.append(new_h)
last_output = last_output + tf.matmul(new_h, self.all_to_output_mtrx[i])
with tf.name_scope("output"):
last_output = last_output + self.all_to_output_bias
next_states.append(new_attention_vec)
state_to_return = tf.concat(next_states,axis=1)
output_split_param = [1,self.mixture_num,2*self.mixture_num,2*self.mixture_num,self.mixture_num]
binomial_param, pi, mu, sigma, rho = tf.split(last_output,output_split_param,axis=1)
binomial_param = tf.divide(1.,1.+tf.exp(binomial_param))
pi = tf.nn.softmax(tf.multiply(pi,1.+self.bias),axis=1)
mu = mu
sigma = tf.exp(sigma-self.bias)
rho = tf.tanh(rho)
output_to_return = tf.concat([most_attended, binomial_param, pi, mu, sigma, rho],axis=1)
return output_to_return, state_to_return
def state_size(self):
return self.state_size
def output_size(self):
return self.output_size
def gaussian_attention(self,sequence,params):
with tf.name_scope("attention_calculation"):
alpha, beta, kappa = tf.split(params,[self.attention_num,self.attention_num,self.attention_num],axis=1)
seq_shape = sequence.shape
seq_length = seq_shape[1]
temp_vec = 20*np.asarray(range(seq_length),dtype=float)
final_result = 0
alpha = tf.split(alpha,self.attention_num,1)
beta = tf.split(beta,self.attention_num,1)
kappa = tf.split(kappa,self.attention_num,1)
for i in range(self.attention_num):
alpha_now = alpha[i]
beta_now = beta[i]
kappa_now = kappa[i]
result = kappa_now - temp_vec
result = tf.multiply(tf.square(result),tf.negative(beta_now))
result = tf.multiply(tf.exp(result),alpha_now)
final_result = final_result+result
most_attended = tf.argmax(final_result,axis=1)
most_attended = tf.reshape(tf.cast(most_attended,dtype=tf.float32),shape=[-1,1])
final_result = tf.tile(tf.reshape(final_result,[-1,seq_shape[1],1]),[1,1,seq_shape[2]])
to_return = tf.reduce_sum(tf.multiply(final_result,sequence),axis=1)
return to_return, most_attended
and this is the rnn with loss network:
`to_write_one_hot = tf.placeholder(dtype=tf.float32,shape=(None,line_length,dict_length))
sequence = tf.placeholder(dtype=tf.float32,shape=(None,None,3))
sequence_shift = tf.placeholder(dtype=tf.float32,shape=(None,None,3))
bias = tf.placeholder(shape=[1],dtype=tf.float32)
sequence_length = tf.placeholder(shape=(None),dtype=tf.int32)
forget_bias_placeholder = tf.placeholder(shape=(None),dtype=tf.float32)
graves_cell = Custom_Cell(forget_bias=1,one_hot_vector=to_write_one_hot,hidden_layer_nums=hidden_layer_nums,mixture_num=mixture_num,bias=bias,attention_num=attention_num)
output, state = tf.nn.dynamic_rnn(graves_cell,sequence,dtype=tf.float32,sequence_length=sequence_length)
with tf.name_scope("loss_layer"):
mask = tf.sign(tf.reduce_max(tf.abs(output), 2))
most_attended, binomial_param, pi, mu, sigma, rho = tf.split(output,[1,1,mixture_num,2*mixture_num,2*mixture_num,mixture_num], axis=2)
pi = tf.split(pi,mixture_num,axis=2)
mu = tf.split(mu,mixture_num,axis=2)
sigma = tf.split(sigma,mixture_num,axis=2)
rho = tf.split(rho,mixture_num,axis=2)
negative_log_likelihood = 0
probability = 0
x1, x2, e = tf.split(sequence_shift,3,axis=2)
for i in range(mixture_num):
pi_now = pi[i]
mu_now = tf.split(mu[i],2,axis=2)
mu_1 = mu_now[0]
mu_2 = mu_now[1]
sigma_now = tf.split(sigma[i],2,axis=2)
sigma_1 = sigma_now[0] + (1-tf.reshape(mask, [-1,max_len,1]))
sigma_2 = sigma_now[1] + (1-tf.reshape(mask, [-1,max_len,1]))
rho_now = rho[i]
Z = tf.divide(tf.square(x1-mu_1),tf.square(sigma_1)) + tf.divide(tf.square(x2-mu_2),tf.square(sigma_2)) - tf.divide(tf.multiply(tf.multiply(x1-mu_1,x2-mu_2),2*rho_now),tf.multiply(sigma_1,sigma_2))
prob = tf.exp(tf.div(tf.negative(Z),2*(1-tf.square(rho_now))))
Normalizing_factor = 2*np.pi*tf.multiply(sigma_1,sigma_2)
Normalizing_factor = tf.multiply(Normalizing_factor,tf.sqrt(1-tf.square(rho_now)))
prob = tf.divide(prob,Normalizing_factor)
prob = tf.multiply(pi_now,prob)
probability = probability + prob
binomial_likelihood = tf.multiply(binomial_param,e) + tf.multiply(1-binomial_param,1-e)
probability = tf.multiply(probability,binomial_likelihood)
probability = probability + (1-tf.reshape(mask,[-1,max_len,1]))
temp_tensor = tf.multiply(mask, tf.log(tf.reshape(probability,[-1,max_len]) + mask*0.00001))
negative_log_likelihood_0 = tf.negative(tf.reduce_sum(temp_tensor,axis=1))
negative_log_likelihood_1 = tf.divide(negative_log_likelihood_0,tf.reshape(tf.cast(sequence_length, dtype=tf.float32), shape=[-1,1]))
negative_log_likelihood_1 = tf.reduce_mean(negative_log_likelihood_1)
tf.summary.scalar("average_per_timestamp_log_likelihood", negative_log_likelihood_1)
negative_log_likelihood = tf.reduce_mean(negative_log_likelihood_0)
with tf.name_scope("train_op"):
optimizer = tf.train.RMSPropOptimizer(learning_rate=0.0001,momentum=0.9, decay=0.95,epsilon=0.0001)
gvs = optimizer.compute_gradients(negative_log_likelihood)
capped_gvs = []
for grad, var in gvs:
if var.name.__contains__("rnn"):
capped_gvs.append((tf.clip_by_value(grad,-10,10),var))
else:
capped_gvs.append((tf.clip_by_value(grad,-100,100),var))
train_op = optimizer.apply_gradients(capped_gvs)
`
Edit.1. I discovered that I was clipping gradients in a wrong way, the correct way was to introduce a new 'op' as explained by https://github.com/tensorflow/tensorflow/issues/2793 to clip only the output gradients of the whole network and lstm cells.
#tf.custom_gradient
def clip_gradient(x, clip):
def grad(dresult):
return [tf.clip_by_norm(dresult, clip)]
return x, grad
add the lines above to your code and use the function on any variable you want to clip the gradient in back propagation!
I should still see my results.
Edit 2.
The changed Model code is:
from tensorflow.contrib.rnn import RNNCell
from tensorflow.contrib.rnn import LSTMCell
from tensorflow.contrib.rnn import LSTMStateTuple
import tensorflow as tf
import numpy as np
#tf.custom_gradient
def clip_gradient_lstm(x):
def grad(dresult):
return [tf.clip_by_value(dresult,-10,10)]
return x, grad
#tf.custom_gradient
def clip_gradient_output(x):
def grad(dresult):
return [tf.clip_by_value(dresult,-100,100)]
return x, grad
def length_of(seq):
used = tf.sign(tf.reduce_max(tf.abs(seq),axis=2))
length = tf.reduce_sum(used,1)
length = tf.cast(length,tf.int32)
return length
class Custom_Cell(RNNCell):
def __init__(self,forget_bias,bias,one_hot_vector, hidden_layer_nums=[700,700,700], mixture_num=10, attention_num=4):
self.bias = bias
self.lstms = []
for i in hidden_layer_nums:
self.lstms.append(LSTMCell(num_units=i, initializer=tf.truncated_normal_initializer(0.075), dtype=tf.float32, forget_bias=forget_bias))
self.attention_num = attention_num
self.mixture_num = mixture_num
self.state_size = 2*sum(hidden_layer_nums) + 3*self.attention_num
self.attention_var_num = 3*self.attention_num
self.output_size = 6*self.mixture_num + 1 + 1
self.one_hot_vector = one_hot_vector
self.lstm_num = len(hidden_layer_nums)
self.hidden_layer_nums = hidden_layer_nums
temp_shape = self.one_hot_vector.shape
self.char_num = temp_shape[2]
self.i_to_h = []
self.w_to_h = []
self.h_to_h = []
self.prev_h_to_h = []
self.lstm_bias = []
self.lstm_to_attention_weights = tf.get_variable("lstms/first_to_attention_mtrx",shape=[hidden_layer_nums[0],self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.lstm_to_attention_bias = tf.get_variable("lstms/first_to_attention_bias",shape=[self.attention_var_num],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
self.all_to_output_mtrx = []
for i in range(self.lstm_num):
self.all_to_output_mtrx.append( tf.get_variable("lstms/to_output_mtrx_" + str(i), shape=[hidden_layer_nums[i],self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.all_to_output_bias = tf.get_variable("lstms/output_bias",shape=[self.output_size-1],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True)
for i in range(self.lstm_num):
self.i_to_h.append(tf.get_variable("lstms/i_to_h_"+str(i),shape=[3,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.w_to_h.append(tf.get_variable("lstms/w_to_h_"+str(i),shape=[self.char_num,hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.h_to_h.append(tf.get_variable("lstms/h_to_h_"+str(i),shape=[hidden_layer_nums[i],hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
self.lstm_bias.append(tf.get_variable("lstms/bias_" + str(i),shape=[hidden_layer_nums[i]],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.075),trainable=True))
if not i == 0:
self.prev_h_to_h.append(
tf.get_variable("lstms/prev_h_to_h_" + str(i), shape=[hidden_layer_nums[i-1], hidden_layer_nums[i]],
dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.075),
trainable=True))
def __call__(self, inputs, state, scope=None):
# Extracting previous configuration and vectors
splitarray = []
for i in self.hidden_layer_nums:
splitarray.append(i)
splitarray.append(i)
splitarray.append(3*self.attention_num)
splitted = tf.split(state,splitarray,axis=1)
prev_tuples = []
for i in range(self.lstm_num):
newtuple = LSTMStateTuple(splitted[2*i],splitted[2*i + 1])
prev_tuples.append(newtuple)
prev_attention_vec = splitted[2*self.lstm_num]
new_attention_vec = 0
next_states = []
most_attended = 0
last_output = 0
for i in range(self.lstm_num):
prev_c, prev_h = prev_tuples[i]
cell = self.lstms[i]
if i == 0:
with tf.name_scope("layer_1"):
w, most_attended = self.gaussian_attention(self.one_hot_vector,prev_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[0]) + tf.matmul(prev_h,self.h_to_h[0]) + tf.matmul(w,self.w_to_h[0]) + self.lstm_bias[0]
_, new_state = cell(input_vec, prev_tuples[0])
new_c, new_h = new_state
new_h = clip_gradient_lstm(new_h)
next_states.append(new_c)
next_states.append(new_h)
last_output = tf.matmul(new_h,self.all_to_output_mtrx[0])
with tf.name_scope("attention_layer"):
temp_attention = tf.matmul(new_h,self.lstm_to_attention_weights) + self.lstm_to_attention_bias
new_alpha, new_beta, new_kappa = tf.split(temp_attention,[self.attention_num,self.attention_num,self.attention_num],axis=1)
old_alpha, old_beta, old_kappa = tf.split(prev_attention_vec,[self.attention_num,self.attention_num,self.attention_num], axis=1)
new_alpha = tf.exp(new_alpha)
new_beta = tf.exp(new_beta)
new_kappa = tf.exp(new_kappa) + old_kappa
new_attention_vec = tf.concat([new_alpha,new_beta,new_kappa],axis=1)
else:
with tf.name_scope("layer_" + str(i)):
w, most_attended = self.gaussian_attention(self.one_hot_vector,new_attention_vec)
input_vec = tf.matmul(inputs,self.i_to_h[i]) + tf.matmul(next_states[-1],self.prev_h_to_h[i-1]) + tf.matmul(prev_h,self.h_to_h[i]) + tf.matmul(w,self.w_to_h[i]) + self.lstm_bias[i]
_,new_state = cell(input_vec,prev_tuples[i])
new_c, new_h = new_state
new_h = clip_gradient_lstm(new_h)
next_states.append(new_c)
next_states.append(new_h)
last_output = last_output + tf.matmul(new_h, self.all_to_output_mtrx[i])
with tf.name_scope("output"):
last_output = last_output + self.all_to_output_bias
last_output = clip_gradient_output(last_output)
next_states.append(new_attention_vec)
state_to_return = tf.concat(next_states,axis=1)
output_split_param = [1,self.mixture_num,2*self.mixture_num,2*self.mixture_num,self.mixture_num]
binomial_param, pi, mu, sigma, rho = tf.split(last_output,output_split_param,axis=1)
binomial_param = tf.divide(1.,1.+tf.exp(binomial_param))
pi = tf.nn.softmax(tf.multiply(pi,1.+self.bias),axis=1)
mu = mu
sigma = tf.exp(sigma-self.bias)
rho = tf.tanh(rho)
output_to_return = tf.concat([most_attended, binomial_param, pi, mu, sigma, rho],axis=1)
return output_to_return, state_to_return
def state_size(self):
return self.state_size
def output_size(self):
return self.output_size
def gaussian_attention(self,sequence,params):
with tf.name_scope("attention_calculation"):
alpha, beta, kappa = tf.split(params,[self.attention_num,self.attention_num,self.attention_num],axis=1)
seq_shape = sequence.shape
seq_length = seq_shape[1]
temp_vec = np.asarray(range(seq_length),dtype=float)
final_result = 0
alpha = tf.split(alpha,self.attention_num,1)
beta = tf.split(beta,self.attention_num,1)
kappa = tf.split(kappa,self.attention_num,1)
for i in range(self.attention_num):
alpha_now = alpha[i]
beta_now = beta[i]
kappa_now = kappa[i]
result = kappa_now - temp_vec
result = tf.multiply(tf.square(result),tf.negative(beta_now))
result = tf.multiply(tf.exp(result),alpha_now)
final_result = final_result+result
most_attended = tf.argmax(final_result,axis=1)
most_attended = tf.reshape(tf.cast(most_attended,dtype=tf.float32),shape=[-1,1])
final_result = tf.tile(tf.reshape(final_result,[-1,seq_shape[1],1]),[1,1,seq_shape[2]])
to_return = tf.reduce_sum(tf.multiply(final_result,sequence),axis=1)
return to_return, most_attended
and the Training is done by
with tf.name_scope("train_op"):
optimizer =
tf.train.RMSPropOptimizer(learning_rate=0.0001,momentum=0.9, decay=0.95,epsilon=0.0001,centered=True)
train_op = optimizer.minimize(negative_log_likelihood)
and right now is still in training, but it is now as low as -10.

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Training Accuracy is Very Low in A Simple CNN using Theano - deep-learning

Related

How i can use dqn and ddpg to successfully train an agent excellent in customized environment?

Questions on interface condition convergence in PINNs(Physics-Informed Neural Network)

Why is my REINFORCE algorithm not learning?

Pytorch-Optimzer doesn't update parameters

handwriting synthesis alex graves

Categories

Resources