ValueError: Error when checking input: expected dense_1_input to have 4 dimensions, but got array with shape (20593, 4, 1) - deep-learning

I am trying to follow sentdex's game ai bot tutorial(https://www.youtube.com/watch?v=G-KvpNGudLw), but instead of tflearn, I am trying to use keras for the same implementation.
Model Function
def neural_network_model(input_size):
network = Sequential()
network.add(Dense(units = 128, activation='relu', kernel_initializer = 'uniform', input_shape = [None, input_size, 1]))
network.add(Dropout(0.2))
network.add(Dense(units = 256, activation='relu', kernel_initializer = 'uniform'))
network.add(Dropout(0.2))
network.add(Dense(units = 512, activation='relu', kernel_initializer = 'uniform'))
network.add(Dropout(0.2))
network.add(Dense(units = 256, activation='relu', kernel_initializer = 'uniform'))
network.add(Dropout(0.2))
network.add(Dense(units = 128, activation='relu', kernel_initializer = 'uniform'))
network.add(Dropout(0.2))
network.add(Dense(units = 2, activation = 'softmax', kernel_initializer = 'uniform'))
adam = optimizers.Adam(lr=LR, decay=0.0)
network.compile(optimizer=adam, loss='categorical_crossentropy', metrics = ['accuracy'])
return network
Model Training Function
def train_model(training_data, model=False):
X = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]), 1)
Y = [i[1] for i in training_data]
if not model:
model = neural_network_model(len(X[0]))
model.fit(X,Y, epochs = 5)
return model
where the training data is :
def initial_population():
training_data = [] # Observations and the move made, append to only when score > 50
scores = []
accepted_scores = []
for x in range(initial_games):
score = 0
game_memory = []
prev_observation = []
for x in range(goal_steps):
action = random.randrange(0,2) # 0's and 1's
observation, reward, done, info = env.step(action)
if len(prev_observation) > 0 :
game_memory.append([prev_observation,action])
prev_observation = observation
score += reward
if done:
break
if score >= score_requirement:
accepted_scores.append(score)
for data in game_memory:
if data[1] == 1:
output = [0,1]
if data[1] == 0:
output = [1,0]
training_data.append([data[0], output])
env.reset()
scores.append(score)
training_data_save = np.array(training_data)
np.save('saved.npy', training_data_save)
print('Average accepted score : ', mean(accepted_scores))
print('Median accepted scores : ', median(accepted_scores))
print(Counter(accepted_scores))
return training_data
training_data = initial_population()
The error I am getting is in the title. I am new to deep learning and I don't have a good grasp yet on the reshaping part.

So after a bit tweaking I finally got the network to work. If anyone is interested, I fixed it by doing the following:
I changed the first Dense layer to :
network.add(Dense(units = 128, activation='relu', kernel_initializer = 'uniform', input_dim = input_size))
and in the model training function, I changed the shape of the input to 2D instead of 3D :
def train_model(training_data, model=False):
X = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]))
Y = np.array([i[1] for i in training_data])
if not model:
model = neural_network_model(len(X[0]))
model.fit(X,Y, epochs = 5)
return model

Related

training and validation losses decreasing slowly

i have implemented 2DCNN model followed by GRU layer
class CNN2D(nn.Module):
def __init__(self, img_x=88, img_y=88, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=512,num_classes=9):
super(CNN2D, self).__init__()
self.img_x = img_x
self.img_y = img_y
self.CNN_embed_dim = CNN_embed_dim
self.ch1, self.ch2, self.ch3, self.ch4 = 8, 16, 32, 64
self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)
self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)
self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)
self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1, self.s1) # Conv1 output shape
self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3)
self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4)
# fully connected layer hidden nodes
self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
self.drop_p = drop_p
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1),
nn.BatchNorm2d(self.ch1, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.pd2),
nn.BatchNorm2d(self.ch2, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.pd3),
nn.BatchNorm2d(self.ch3, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.pd4),
nn.BatchNorm2d(self.ch4, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.drop = nn.Dropout2d(self.drop_p)
self.pool = nn.MaxPool2d(2)
#self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.fc_hidden1) # fully connected layer, output k classes
#self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
self.fc3 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.CNN_embed_dim) # output = CNN embedding latent variables
self.num_classes = num_classes
self.gru = nn.GRU(
input_size=self.CNN_embed_dim,
hidden_size=256,
num_layers=1,
batch_first=True,(batch, time_step, input_size)
)
#self.gfc1 = nn.Linear(256, 128)
self.gfc2 = nn.Linear(256, self.num_classes)
def forward(self, x_3d):
cnn_embed_seq = []
for t in range(x_3d.size(2)):
# CNNs
x = self.conv1(x_3d[:, :, t, :, :])
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = x.reshape(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = F.relu(self.fc2(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = self.fc3(x)
cnn_embed_seq.append(x)
cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
RNN_out, _ = self.gru(cnn_embed_seq, None)
x = RNN_out[:, -1, :]
x = F.relu(x)
x = F.dropout(x, p=self.drop_p, training=self.training) NEW UPDATE
x = self.gfc2(x)
return x
inputs are videos of shape [batch,channels,frames,height,width]
i used adam optimizer with lr=1e-5 ,weight_decay=5e-5 ,amsgrad=True and cross entropy loss
training and validation losses are decreasing slowly and model is not converging
what should i change ?

why my multi-output regression using pytorch only optimize one output?

I want to predict three outputs, the model is as follows. the features of input is 9, output is 3.
class DNN(nn.Module):
def __init__(self, n_features):
self.n_features = n_features
super(DNN, self).__init__()
self.inlayer1 = nn.Linear(self.n_features, 16)
self.layer2 = nn.Linear(16, 32)
self.layer3 = nn.Linear(32, 64)
self.layer4 = nn.Linear(64, 128)
self.layer5 = nn.Linear(128, 256)
self.layer6 = nn.Linear(256, 256)
self.layer7 = nn.Linear(256, 128)
self.layer8 = nn.Linear(128, 64)
self.layer9 = nn.Linear(64, 32)
self.layer10 = nn.Linear(32, 16)
self.outlayer = nn.Linear(16, 3)
def forward(self, x):
x = F.elu(self.inlayer1(x))
x = F.elu(self.layer2(x))
x = F.elu(self.layer3(x))
x = F.elu(self.layer4(x))
x = F.elu(self.layer5(x))
x = F.elu(self.layer6(x))
x = F.elu(self.layer7(x))
x = F.elu(self.layer8(x))
x = F.elu(self.layer9(x))
x = F.elu(self.layer10(x))
out = self.outlayer(x)
return out
The train code
def train(net, train_features, train_labels, test_features, test_labels,
num_epochs, learning_rate, weight_decay, batch_size):
train_ls, test_ls = [], []
train_iter = d2l.load_array((train_features, train_labels), batch_size)
optimizer = torch.optim.Adam(net.parameters(),
lr = learning_rate,
weight_decay = weight_decay)
for epoch in range(num_epochs):
for X, y in train_iter:
optimizer.zero_grad()
out = net(X) ##out.shape is (100 samples, 3 labels)
loss = MSEloss(out, y)
loss.backward()
optimizer.step()
train_ls.append(MSEloss(net(train_features), train_labels).item())
if test_labels is not None:
test_ls.append(MSEloss(net(test_features), test_labels).item())
return train_ls, test_ls
after running the model, the below result is incorrect, but i don't know where is the bug? It seems that only the first col label is right. Should i change my method of calculating loss?
the below is the result.
the R2 and MSE metrics for three outputs
I tried to calculate the three outputs(out1, out2, out3) separately by change the number of output neurons to 1, then calculate the weighted loss, but it didn't work, even all three outputs are not close to the real label.

Keras Tuner on autoencoder - Add condition : first hidden layer units greater than or equal next hidden layer units

I want to use Keras-tuner to tune an autoencoder hyperparameters.
It is a symetric AE with two layers. I want the number of units in the first layer always greater than or equal the units in the second layer. But I don't know how implement it with keras-tuner. If someone can help, it would be very great. Thank you in advance.
class DAE(tf.keras.Model):
'''
A DAE model
'''
def __init__(self, hp, **kwargs):
'''
DAE instantiation
args :
hp : Tuner
input_dim : input dimension
return:
None
'''
super(DAE, self).__init__(**kwargs)
input_dim = 15
latent_dim = hp.Choice("latent_space", [2,4,8])
units_0 = hp.Choice("units_0", [8, 16, 32, 64])
units_1 = hp.Choice("units_1", [8, 16, 32, 64])
for i in [8, 16, 32, 64]:
with hp.conditional_scope("units_0", [i]):
if units_0 == i:
......? # units_1 should be <= i
dropout = hp.Choice("dropout_rate", [0.1, 0.2, 0.3, 0.4, 0.5])
inputs = tf.keras.Input(shape = (input_dim,))
x = layers.Dense(units_0, activation="relu")(inputs)
x = layers.Dropout(dropout)(x)
x = layers.Dense(units_1, activation="relu")(x)
x = layers.Dropout(dropout)(x)
z = layers.Dense(latent_dim)(x)
self.encoder = tf.keras.Model(inputs, z, name="encoder")
inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(units_1, activation="relu")(inputs)
x = layers.Dropout(dropout)(x)
x = layers.Dense(units_0, activation="relu")(x)
x = layers.Dropout(dropout)(x)
outputs = layers.Dense(input_dim, activation="linear")(x)
self.decoder = tf.keras.Model(inputs, outputs, name="decoder")```
See above my code. It's a denoising autoencoder class
I found the solution. We need to create differents units_1 for for each units_O values
class DAE(tf.keras.Model):
'''
A DAE model
'''
def __init__(self, hp, training=None, **kwargs):
'''
DAE instantiation
args :
hp : Tuner
input_dim : input dimension
return:
None
'''
super(DAE, self).__init__(**kwargs)
self.input_dim = 15
l_units = [16, 32, 64, 128]
latent_dim = hp.Choice("latent_space", [2,4,8])
units_0 = hp.Choice("units_0", l_units)
dropout_0 = hp.Choice("dropout_rate_0", [0.1, 0.2, 0.3, 0.4, 0.5])
dropout_1 = hp.Choice("dropout_rate_1", [0.1, 0.2, 0.3, 0.4, 0.5])
for i in l_units:
name = "units_1_%d" % i # generates unique name for each hp.Int object
with hp.conditional_scope("units_0", [i]):
if units_0 == i:
locals()[name] = hp.Int(name, min_value = 8, max_value = i, step = 2, sampling = "log" )
inputs = tf.keras.Input(shape = (self.input_dim,))
x = layers.Dense(units_0, activation="relu")(inputs)
x = layers.Dropout(dropout_0)(x, training=training)
x = layers.Dense(locals()[name], activation="relu")(x)
x = layers.Dropout(dropout_1)(x, training=training)
z = layers.Dense(latent_dim)(x)
self.encoder = tf.keras.Model(inputs, z, name="encoder")
inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(locals()[name], activation="relu")(inputs)
x = layers.Dropout(dropout_1)(x, training=training)
x = layers.Dense(units_0, activation="relu")(x)
x = layers.Dropout(dropout_0)(x, training=training)
outputs = layers.Dense(self.input_dim, activation="linear")(x)
self.decoder = tf.keras.Model(inputs, outputs, name="decoder")

Why network is not learning with this loss?

I've been playing around a bit with Pytorch and have created a convolutional network with a total of 3 layers. I created a loss function that takes the results from the first layer and tries to minimize the norm.
So that view2 displays the data after the first layer in a matrix.
During learning, the error did not change at all, and the city was equal to 1 the whole time.
I know that this code doesn't make sense, but I am very intersting to her very this code is not working.
data = sio.loadmat('ORL_32x32.mat')
x, y = data['fea'], data['gnd']
x, y = data['fea'].reshape((-1, 1, 32, 32)), data['gnd']
y = np.squeeze(y - 1) # y in [0, 1, ..., K-1]
class ConvAutoencoder(nn.Module):
def __init__(self):
super(ConvAutoencoder, self).__init__()
## encoder layers ##
# conv layer (depth from 3 --> 16), 3x3 kernels
self.conv1 = nn.Conv2d(1, 3, 3)
self.conv2 = nn.Conv2d(3 ,3, 3)
self.conv3 = nn.Conv2d(3, 3, 3)
self.conv4 = nn.Conv2d(3, 3, 3)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
return x
def test1(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
return x
def test2(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
return x
def my_loss(novi2):
return torch.tensor(LA.norm(novi2)).to(device)
model = ConvAutoencoder().to(device)
epochs = 950;
lossList = []
view2 = np.zeros((576,400))
view3 = np.zeros((576,400))
losses = torch.tensor(0.).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
if not isinstance(x, torch.Tensor):
x = torch.tensor(x, dtype=torch.float32, device=device)
x = x.to(device)
if isinstance(y, torch.Tensor):
y = y.to('cuda').numpy()
K = len(np.unique(y))
for epoch in range(epochs):
view2 = np.zeros((576,400))
view3 = np.zeros((576,400))
output = model.test2(x.to(device)).cpu().detach().numpy()
output1 = model.test1(x.to(device)).cpu().detach().numpy()
for i in range(numclass):
lovro = output[i]
lovro =lovro[[0]]
lovro = lovro.squeeze(axis = 0)
lovro = lovro.flatten()
for j in range(576):
view2[j][i] = lovro[j]
for i in range(numclass):
lovro = output[i]
loss = my_loss(view2)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch %02d' %
(epoch))
The way you implemented your loss does not really look "differentiable". I am putting it in quotation marks because what you are observing is a difference between mathematical diffentiation and backpropagation. There is no functional dependency in the underlying graph of computation between your variables and your loss. The reason for that is because you used an array, where you copied values into. So while your loss depends on values of "view2" it does not depend on values of outputs of your model. You have to avoid any value assignments when defining your computation.
x = np.array([0])
x[0] = output_of_network
loss = LA.norm(x) # wrong
loss = LA.norm(output_of_network) # correct

Pytorch-Optimzer doesn't update parameters

I made my custom model, AlexNetQIL (Alexnet with QIL layer)
'QIL' means quantization intervals learning
I trained my model and loss value didn't decrease at all and I found out parameters in my model were not updated at all because of QIL layer I added
I attached my codes AlexNetQil and qil
please someone let me know what's the problem in my codes
AlexNetQIL
import torch
import torch.nn as nn
from qil import *
class AlexNetQIL(nn.Module):
#def __init__(self, num_classes=1000): for imagenet
def __init__(self, num_classes=10): # for cifar-10
super(AlexNetQIL, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
self.bn1 = nn.BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.qil2 = Qil()
self.conv2 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(192)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.qil3 = Qil()
self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(384)
self.relu3 = nn.ReLU(inplace=True)
self.qil4 = Qil()
self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
self.bn4 = nn.BatchNorm2d(256)
self.relu4 = nn.ReLU(inplace=True)
self.qil5 = Qil()
self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.bn5 = nn.BatchNorm2d(256)
self.relu5 = nn.ReLU(inplace=True)
self.maxpool5 = nn.MaxPool2d(kernel_size=2)
self.classifier = nn.Sequential(
nn.Linear(256 * 2 * 2, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self,x,inference = False):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu2(x)
x = self.maxpool1(x)
x,self.conv2.weight = self.qil2(x,self.conv2.weight,inference ) # if I remove this line, No problem
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.maxpool2(x)
x,self.conv3.weight = self.qil3(x,self.conv3.weight,inference ) # if I remove this line, No problem
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x,self.conv4.weight = self.qil4(x,self.conv4.weight,inference ) # if I remove this line, No problem
x = self.conv4(x)
x = self.bn4(x)
x = self.relu4(x)
x,self.conv5.weight = self.qil5(x,self.conv5.weight,inference ) # if I remove this line, No problem
x = self.conv5(x)
x = self.bn5(x)
x = self.relu5(x)
x = self.maxpool5(x)
x = x.view(x.size(0),256 * 2 * 2)
x = self.classifier(x)
return x
QIL
forward
quantize weights and input activation with 2 steps
transformer(params) -> discretizer(params)
import torch
import torch.nn as nn
import numpy as np
import copy
#Qil (Quantize intervals learning)
class Qil(nn.Module):
discretization_level = 32
def __init__(self):
super(Qil,self).__init__()
self.cw = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.dw = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.cx = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.dx = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.gamma = nn.Parameter(torch.tensor(1.0)) # I have to train this transformer parameter
self.a = Qil.discretization_level
def forward(self,x,weights,Inference = False):
if not Inference:
weights = self.transfomer_weights(weights)
weights = self.discretizer(weights)
x = self.transfomer_activation(x)
x = self.discretizer(x)
return torch.nn.Parameter(x), torch.nn.Parameter(weights)
def transfomer_weights(self,weights):
device = weights.device
aw,bw = (0.5 / self.dw) , (-0.5*self.cw / self.dw + 0.5)
weights = torch.where( abs(weights) < self.cw - self.dw,
torch.tensor(0.).to(device),weights)
weights = torch.where( abs(weights) > self.cw + self.dw,
weights.sign(), weights)
weights = torch.where( (abs(weights) >= self.cw - self.dw) & (abs(weights) <= self.cw + self.dw),
(aw*abs(weights) + bw)**self.gamma * weights.sign() , weights)
return weights
def transfomer_activation(self,x):
device = x.device
ax,bx = (0.5 / self.dx) , (-0.5*self.cx / self.dx + 0.5)
x = torch.where(x < self.cx - self.dx,
torch.tensor(0.).to(device),x)
x = torch.where(x > self.cx + self.dx,
torch.tensor(1.0).to(device),x)
x = torch.where( (abs(x) >= self.cx - self.dx) & (abs(x) <= self.cx + self.dx),
ax*abs(x) + bx, x)
return x
def discretizer(self,tensor):
q_D = pow(2, Qil.discretization_level)
tensor = torch.round(tensor * q_D) / q_D
return tensor