why my multi-output regression using pytorch only optimize one output? - deep-learning

I want to predict three outputs, the model is as follows. the features of input is 9, output is 3.
class DNN(nn.Module):
def __init__(self, n_features):
self.n_features = n_features
super(DNN, self).__init__()
self.inlayer1 = nn.Linear(self.n_features, 16)
self.layer2 = nn.Linear(16, 32)
self.layer3 = nn.Linear(32, 64)
self.layer4 = nn.Linear(64, 128)
self.layer5 = nn.Linear(128, 256)
self.layer6 = nn.Linear(256, 256)
self.layer7 = nn.Linear(256, 128)
self.layer8 = nn.Linear(128, 64)
self.layer9 = nn.Linear(64, 32)
self.layer10 = nn.Linear(32, 16)
self.outlayer = nn.Linear(16, 3)
def forward(self, x):
x = F.elu(self.inlayer1(x))
x = F.elu(self.layer2(x))
x = F.elu(self.layer3(x))
x = F.elu(self.layer4(x))
x = F.elu(self.layer5(x))
x = F.elu(self.layer6(x))
x = F.elu(self.layer7(x))
x = F.elu(self.layer8(x))
x = F.elu(self.layer9(x))
x = F.elu(self.layer10(x))
out = self.outlayer(x)
return out
The train code
def train(net, train_features, train_labels, test_features, test_labels,
num_epochs, learning_rate, weight_decay, batch_size):
train_ls, test_ls = [], []
train_iter = d2l.load_array((train_features, train_labels), batch_size)
optimizer = torch.optim.Adam(net.parameters(),
lr = learning_rate,
weight_decay = weight_decay)
for epoch in range(num_epochs):
for X, y in train_iter:
optimizer.zero_grad()
out = net(X) ##out.shape is (100 samples, 3 labels)
loss = MSEloss(out, y)
loss.backward()
optimizer.step()
train_ls.append(MSEloss(net(train_features), train_labels).item())
if test_labels is not None:
test_ls.append(MSEloss(net(test_features), test_labels).item())
return train_ls, test_ls
after running the model, the below result is incorrect, but i don't know where is the bug? It seems that only the first col label is right. Should i change my method of calculating loss?
the below is the result.
the R2 and MSE metrics for three outputs
I tried to calculate the three outputs(out1, out2, out3) separately by change the number of output neurons to 1, then calculate the weighted loss, but it didn't work, even all three outputs are not close to the real label.

Related

Why network is not learning with this loss?

I've been playing around a bit with Pytorch and have created a convolutional network with a total of 3 layers. I created a loss function that takes the results from the first layer and tries to minimize the norm.
So that view2 displays the data after the first layer in a matrix.
During learning, the error did not change at all, and the city was equal to 1 the whole time.
I know that this code doesn't make sense, but I am very intersting to her very this code is not working.
data = sio.loadmat('ORL_32x32.mat')
x, y = data['fea'], data['gnd']
x, y = data['fea'].reshape((-1, 1, 32, 32)), data['gnd']
y = np.squeeze(y - 1) # y in [0, 1, ..., K-1]
class ConvAutoencoder(nn.Module):
def __init__(self):
super(ConvAutoencoder, self).__init__()
## encoder layers ##
# conv layer (depth from 3 --> 16), 3x3 kernels
self.conv1 = nn.Conv2d(1, 3, 3)
self.conv2 = nn.Conv2d(3 ,3, 3)
self.conv3 = nn.Conv2d(3, 3, 3)
self.conv4 = nn.Conv2d(3, 3, 3)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
return x
def test1(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
return x
def test2(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
return x
def my_loss(novi2):
return torch.tensor(LA.norm(novi2)).to(device)
model = ConvAutoencoder().to(device)
epochs = 950;
lossList = []
view2 = np.zeros((576,400))
view3 = np.zeros((576,400))
losses = torch.tensor(0.).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
if not isinstance(x, torch.Tensor):
x = torch.tensor(x, dtype=torch.float32, device=device)
x = x.to(device)
if isinstance(y, torch.Tensor):
y = y.to('cuda').numpy()
K = len(np.unique(y))
for epoch in range(epochs):
view2 = np.zeros((576,400))
view3 = np.zeros((576,400))
output = model.test2(x.to(device)).cpu().detach().numpy()
output1 = model.test1(x.to(device)).cpu().detach().numpy()
for i in range(numclass):
lovro = output[i]
lovro =lovro[[0]]
lovro = lovro.squeeze(axis = 0)
lovro = lovro.flatten()
for j in range(576):
view2[j][i] = lovro[j]
for i in range(numclass):
lovro = output[i]
loss = my_loss(view2)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch %02d' %
(epoch))
The way you implemented your loss does not really look "differentiable". I am putting it in quotation marks because what you are observing is a difference between mathematical diffentiation and backpropagation. There is no functional dependency in the underlying graph of computation between your variables and your loss. The reason for that is because you used an array, where you copied values into. So while your loss depends on values of "view2" it does not depend on values of outputs of your model. You have to avoid any value assignments when defining your computation.
x = np.array([0])
x[0] = output_of_network
loss = LA.norm(x) # wrong
loss = LA.norm(output_of_network) # correct

Why does Softmax(dim=0) produce poor results?

I'm getting weird results from a PyTorch Softmax layer, trying to figure out what's going on, so I boiled it down to a minimal test case, a neural network that just learns to decode binary numbers into one-hot form.
Just Softmax() gets a warning:
UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
Okay, so what to supply for X? I had been guessing 0 would be a sensible argument. Just to make sure, I tried Softmax(dim=1):
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
Okay, so that seems clear about allowed values. -1 apparently means the last dimension, so in this case, where the output is just a one-dimensional vector, that should mean the same thing as 0. Trying it with Softmax(dim=-1) works fine; in a few thousand epochs, the network reliably learns to decode the numbers with 100% accuracy.
Just to make sure it gives the same results, I tried it again with Softmax(dim=0) (as shown below)...
And it does not give the same result at all. The accuracy oscillates, but levels off somewhere around 20-30%.
What's going on? Why is 0 not the same as -1 in this context, and what exactly is 0 doing?
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
bits = 5
class Dataset1(Dataset):
def __init__(self):
s = []
for i in range(1 << bits):
x = []
for c in format(i, "b").zfill(bits):
x.append(float(c == "1"))
y = []
for j in range(1 << bits):
y.append(float(i == j))
x = torch.as_tensor(x)
y = torch.as_tensor(y)
s.append((x, y))
self.s = s
def __len__(self):
return len(self.s)
def __getitem__(self, i):
return self.s[i]
trainDs = Dataset1()
batchSize = 16
trainDl = DataLoader(trainDs, batch_size=batchSize)
for x, y in trainDl:
print(x.shape)
print(y.shape)
break
hiddenSize = 100
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.layers = nn.Sequential(
nn.Linear(bits, hiddenSize),
nn.ReLU(),
nn.Linear(hiddenSize, hiddenSize),
nn.Tanh(),
nn.Linear(hiddenSize, hiddenSize),
nn.ReLU(),
nn.Linear(hiddenSize, 1 << bits),
nn.Softmax(dim=0),
)
def forward(self, x):
return self.layers(x)
device = torch.device("cpu")
model = Net().to(device)
def accuracy(model, ds):
n = 0
for x, y in ds:
with torch.no_grad():
z = model(x)
if torch.argmax(y) == torch.argmax(z):
n += 1
return n / len(ds)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
epochs = 10000
interval = epochs // 10
for epoch in range(epochs + 1):
for bi, (x, y) in enumerate(trainDl):
x = x.to(device)
y = y.to(device)
loss = criterion(model(x), y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % interval == 0 and not bi:
print(f"{epoch}\t{loss}\t{accuracy(model, trainDs)}")
In the accuracy function, you forgot to create a new dimension for the batch (batchsize=1), which explains why it gives that error when you use dim=1. Regarding the dimension of the softmax, you can check this post.
Below is the modified code.
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
bits = 5
class Dataset1(Dataset):
def __init__(self):
s = []
for i in range(1 << bits):
x = []
for c in format(i, "b").zfill(bits):
x.append(float(c == "1"))
y = []
for j in range(1 << bits):
y.append(float(i == j))
x = torch.as_tensor(x)
y = torch.as_tensor(y)
s.append((x, y))
self.s = s
def __len__(self):
return len(self.s)
def __getitem__(self, i):
return self.s[i]
trainDs = Dataset1()
batchSize = 16
trainDl = DataLoader(trainDs, batch_size=batchSize, drop_last=True)
for x, y in trainDl:
print(x.shape)
print(y.shape)
break
hiddenSize = 100
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.layers = nn.ModuleList(
[nn.Linear(bits, hiddenSize),
nn.ReLU(),
nn.Linear(hiddenSize, hiddenSize),
nn.Tanh(),
nn.Linear(hiddenSize, hiddenSize),
nn.ReLU(),
nn.Linear(hiddenSize, 1 << bits),
nn.Softmax(dim=1)]
)
def forward(self, x):
for i,layer in enumerate(self.layers):
x = layer(x)
if i == 6:
pass
#print('softmax input shape',x.shape)
#print('softmax output shape',torch.nn.functional.softmax(x,dim=1).shape)
#print('linear',x.shape)
#print('output',x.shape)
return x
device = torch.device("cpu")
model = Net().to(device)
def accuracy(model, ds):
n = 0
for x, y in ds:
x = x.unsqueeze(0) # create a batch of size 1
y = y.unsqueeze(0) # create a batch of size 1
with torch.no_grad():
z = model(x)
print(z.shape)
break
if torch.argmax(y) == torch.argmax(z):
n += 1
return n / len(ds)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
epochs = 10000
interval = epochs // 10
for epoch in range(epochs + 1):
for bi, (x, y) in enumerate(trainDl):
x = x.to(device)
y = y.to(device)
loss = criterion(model(x), y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch % interval == 0 and not bi:
print(f"{epoch}\t{loss}\t{accuracy(model, trainDs)}")

Pytorch running out of memory when initialising cnn model

Trying to implement a SAGEConv classifier using Pytorch. Here's the model definition:
embed_dim = 128
from torch_geometric.nn import GraphConv, TopKPooling, GatedGraphConv
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
import torch.nn.functional as F
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = SAGEConv(embed_dim, 128)
self.pool1 = TopKPooling(128, ratio=0.8)
self.conv2 = SAGEConv(128, 128)
self.pool2 = TopKPooling(128, ratio=0.8)
self.conv3 = SAGEConv(128, 128)
self.pool3 = TopKPooling(128, ratio=0.8)
self.item_embedding = torch.nn.Embedding(num_embeddings=df.item_id.max() +1, embedding_dim=embed_dim)
self.lin1 = torch.nn.Linear(256, 128)
self.lin2 = torch.nn.Linear(128, 64)
self.lin3 = torch.nn.Linear(64, 1)
self.bn1 = torch.nn.BatchNorm1d(128)
self.bn2 = torch.nn.BatchNorm1d(64)
self.act1 = torch.nn.ReLU()
self.act2 = torch.nn.ReLU()
def forward(self, data):
x, edge_index, batch = data.x, data.edge_index, data.batch
x = self.item_embedding(x)
x = x.squeeze(1)
x = F.relu(self.conv1(x, edge_index))
x, edge_index, _, batch, _ = self.pool1(x, edge_index, None, batch)
x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
x = F.relu(self.conv2(x, edge_index))
x, edge_index, _, batch, _ = self.pool2(x, edge_index, None, batch)
x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
x = F.relu(self.conv3(x, edge_index))
x, edge_index, _, batch, _ = self.pool3(x, edge_index, None, batch)
x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)
x = x1 + x2 + x3
x = self.lin1(x)
x = self.act1(x)
x = self.lin2(x)
x = self.act2(x)
x = F.dropout(x, p=0.5, training=self.training)
x = torch.sigmoid(self.lin3(x)).squeeze(1)
return x
I get the following error:
RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:75] data. DefaultCPUAllocator: not enough memory: you tried to allocate 603564952576 bytes. Buy new RAM!
When initialising the model:
device = torch.device('cuda')
model = Net().to(device)
I am new to pytorch. Any help would be appreciated.
Thank you!
Turns out that df.item_id.max() was too large as I did not subset the data. Hence dimensionality was too high.

ValueError: Expected input batch_size (1) to match target batch_size (100)

I am trying to execute my dataset with batch size = 100. It works fine with batch size = 1 but with 100, it gives the error as: ValueError: Expected input batch_size (1) to match target batch_size (100).
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv1d(100, 1, kernel_size=1)
self.dropout = nn.Dropout2d()
self.fc1 = nn.Linear(32, 16)
self.fc2 = nn.Linear(16, 1)
self.hybrid = Hybrid(qiskit.Aer.get_backend('qasm_simulator'), 100, np.pi / 2)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.dropout(x)
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = self.hybrid(x)
return torch.cat((x, 1 - x), -1)
```
The sizes of data, label and output where output = model(data) are
torch.Size([100, 32])
torch.Size([100])
torch.Size([1, 2])
```
How to make the size of output same as data size?

ValueError: Error when checking input: expected dense_1_input to have 4 dimensions, but got array with shape (20593, 4, 1)

I am trying to follow sentdex's game ai bot tutorial(https://www.youtube.com/watch?v=G-KvpNGudLw), but instead of tflearn, I am trying to use keras for the same implementation.
Model Function
def neural_network_model(input_size):
network = Sequential()
network.add(Dense(units = 128, activation='relu', kernel_initializer = 'uniform', input_shape = [None, input_size, 1]))
network.add(Dropout(0.2))
network.add(Dense(units = 256, activation='relu', kernel_initializer = 'uniform'))
network.add(Dropout(0.2))
network.add(Dense(units = 512, activation='relu', kernel_initializer = 'uniform'))
network.add(Dropout(0.2))
network.add(Dense(units = 256, activation='relu', kernel_initializer = 'uniform'))
network.add(Dropout(0.2))
network.add(Dense(units = 128, activation='relu', kernel_initializer = 'uniform'))
network.add(Dropout(0.2))
network.add(Dense(units = 2, activation = 'softmax', kernel_initializer = 'uniform'))
adam = optimizers.Adam(lr=LR, decay=0.0)
network.compile(optimizer=adam, loss='categorical_crossentropy', metrics = ['accuracy'])
return network
Model Training Function
def train_model(training_data, model=False):
X = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]), 1)
Y = [i[1] for i in training_data]
if not model:
model = neural_network_model(len(X[0]))
model.fit(X,Y, epochs = 5)
return model
where the training data is :
def initial_population():
training_data = [] # Observations and the move made, append to only when score > 50
scores = []
accepted_scores = []
for x in range(initial_games):
score = 0
game_memory = []
prev_observation = []
for x in range(goal_steps):
action = random.randrange(0,2) # 0's and 1's
observation, reward, done, info = env.step(action)
if len(prev_observation) > 0 :
game_memory.append([prev_observation,action])
prev_observation = observation
score += reward
if done:
break
if score >= score_requirement:
accepted_scores.append(score)
for data in game_memory:
if data[1] == 1:
output = [0,1]
if data[1] == 0:
output = [1,0]
training_data.append([data[0], output])
env.reset()
scores.append(score)
training_data_save = np.array(training_data)
np.save('saved.npy', training_data_save)
print('Average accepted score : ', mean(accepted_scores))
print('Median accepted scores : ', median(accepted_scores))
print(Counter(accepted_scores))
return training_data
training_data = initial_population()
The error I am getting is in the title. I am new to deep learning and I don't have a good grasp yet on the reshaping part.
So after a bit tweaking I finally got the network to work. If anyone is interested, I fixed it by doing the following:
I changed the first Dense layer to :
network.add(Dense(units = 128, activation='relu', kernel_initializer = 'uniform', input_dim = input_size))
and in the model training function, I changed the shape of the input to 2D instead of 3D :
def train_model(training_data, model=False):
X = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]))
Y = np.array([i[1] for i in training_data])
if not model:
model = neural_network_model(len(X[0]))
model.fit(X,Y, epochs = 5)
return model