I am trying to build a multi layer neural network. I have train data with shape:
train[0][0].shape
(4096,)
Below is my dense layer
from collections import OrderedDict
n_out = 8
net = nn.Sequential(OrderedDict([
('hidden_linear', nn.Linear(4096, 1366)),
('hidden_activation', nn.Tanh()),
('hidden_linear', nn.Linear(1366, 456)),
('hidden_activation', nn.Tanh()),
('hidden_linear', nn.Linear(456, 100)),
('hidden_activation', nn.Tanh()),
('output_linear', nn.Linear(100, n_out))
]))
I am using crossentropy as the loss function. The problem I have is when I train the model with the below code:
learning_rate = 0.001
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)
n_epochs = 40
for epoch in range(n_epochs):
for snds, labels in final_train_loader:
outputs = net(snds.view(snds.shape[0], -1))
loss = loss_fn(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("Epoch: %d, Loss: %f" % (epoch, float(loss)))
The error I receive is the matrix multiplication error.
RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x4096 and 456x100)
I have the dimensions wrong but cannot figure out how to get it right.
The OrderedDict contains three Linear layers associated with the same key, hidden_layer (the same happens with nn.Tanh). In order to make it work you need to provide such layers with a different name:
inp = torch.rand(100, 4096)
net = nn.Sequential(OrderedDict([
('hidden_linear0', nn.Linear(4096, 1366)),
('hidden_activation0', nn.Tanh()),
('hidden_linear1', nn.Linear(1366, 456)),
('hidden_activation1', nn.Tanh()),
('hidden_linear2', nn.Linear(456, 100)),
('hidden_activation2', nn.Tanh()),
('output_linear', nn.Linear(100, n_out))
]))
net(inp) # now it works!
Related
I'm using DNN to fit these data, and I use softmax to classify them into 2 class, and each of them has a demensity of 4040, can someone with experience tell me what's wrong with my nets.
It is strange that my initial loss is 7.6 and my initial error is 0.5524, and Basically they won't change anymore.
for train, test in kfold.split(data_pro, valence_labels):
model = keras.Sequential()
model.add(keras.layers.Dense(5000,activation='relu',input_shape=(4040,)))
model.add(keras.layers.Dropout(rate=0.25))
model.add(keras.layers.Dense(500, activation='relu'))
model.add(keras.layers.Dropout(rate=0.5))
model.add(keras.layers.Dense(1000, activation='relu'))
model.add(keras.layers.Dropout(rate=0.5))
model.add(keras.layers.Dense(2, activation='softmax'))
model.add(keras.layers.Dropout(rate=0.5))
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001,rho=0.9),
loss='binary_crossentropy',
metrics=['accuracy'])
print('------------------------------------------------------------------------')
print(f'Training for fold {fold_no} ...')
log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# Fit data to model
history = model.fit(data_pro[train], valence_labels[train],
batch_size=128,
epochs=50,
verbose=1,
callbacks=[tensorboard_callback]
)
# Generate generalization metrics
scores = model.evaluate(data_pro[test], valence_labels[test], verbose=0)
print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
acc_per_fold.append(scores[1] * 100)
loss_per_fold.append(scores[0])
# Increase fold number
fold_no = fold_no + 1
# == Provide average scores ==
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
print('------------------------------------------------------------------------')
print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')
You shouldn't add Dropout after the final Dense , delete the model.add(keras.layers.Dropout(rate=0.5))
And I think your code may raise error because your labels's dim is 1 , But your final Dense's units is 2 . Change model.add(keras.layers.Dense(2, activation='softmax')) to model.add(keras.layers.Dense(1, activation='sigmoid'))
Read this to learn tensorflow
Update 1 :
Change
model.compile(optimizer= tf.keras.optimizers.SGD(learning_rate = 0.00001,momentum=0.9,nesterov=True),
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=['accuracy'])
to
model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate=3e-4),
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=['accuracy'])
And change
accAll = []
for epoch in range(1, 50):
model.fit(train_data, train_labels,
batch_size=50,epochs=5,
validation_data = (val_data, val_labels))
val_loss, val_Accuracy = model.evaluate(val_data,val_labels,batch_size=1)
accAll.append(val_Accuracy)
to
accAll = model.fit(
train_data, train_labels,
batch_size=50,epochs=20,
validation_data = (val_data, val_labels)
)
I am trying to train a LSTM for energy demand forecast but it takes too long. I do not understand why because the model looks “simple” and there is no much data. Might it be because I am not using the DataLoader? How could I use it with RNN since I have a sequence?
Complete code is in Colab: https://colab.research.google.com/drive/130rG8_j1Lf8RQoVRrfXCeo5h_CcC5NU6?usp=sharing
The interesting part to be improved may be this:
for seq, y_train in train_data:
optimizer.zero_grad()
model.hidden = (torch.zeros(1,1,model.hidden_size),
torch.zeros(1,1,model.hidden_size))
y_pred = model(seq)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
Thanks in advance to anyone helping me.
Should you want to speed up the process of training, more data must be provided to the model per training. In my case I was providing just 1 batch. The best way to simply solve this is using the DataLoader.
Complete Colab with the solution can be found in this link: https://colab.research.google.com/drive/1QgtshCFETZ9oTvIYWy1Bdre-614kbwRX?usp=sharing
# This is to create the Dataset
from torch.utils.data import Dataset, DataLoader
class DemandDataset(Dataset):
def __init__(self, X_train, y_train):
self.X_train = X_train
self.y_train = y_train
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
data = self.X_train[idx]
labels = self.y_train[idx]
return data, labels
#This is to convert from typical RNN sequences
sq_0 =[]
y_0 =[]
for seq, y_train in train_data:
sq_0.append(seq)
y_0.append(y_train)
dataset=DemandDataset(sq_0,y_0)
dataloader = DataLoader(dataset, batch_size=20)
epochs = 30
t = 50
for i in range(epochs):
print("New epoch")
for data,label in dataloader:
optimizer.zero_grad()
model.hidden = (torch.zeros(1,1,model.hidden_size),
torch.zeros(1,1,model.hidden_size))
y_pred = model(seq)
loss = criterion(y_pred, label)
loss.backward()
optimizer.step()
print(f'Epoch: {i+1:2} Loss: {loss.item():10.8f}')
preds = train_set[-window_size:].tolist()
for f in range(t):
seq = torch.FloatTensor(preds[-window_size:])
with torch.no_grad():
model.hidden = (torch.zeros(1,1,model.hidden_size),
torch.zeros(1,1,model.hidden_size))
preds.append(model(seq).item())
loss = criterion(torch.tensor(preds[-window_size:]),y[-t:])
I am fairly new to deep learning and neural networks. I recently built a facial emotions recognition classifier using the FER-2013 dataset. I am using the pretrained resnet-152 model for classification, but the accuracy of my model is very low, both training and validation accuracies. I am getting an accuracy of around 36%, which is not good. I suppose that using transfer learning, the accuracies should be high, why is it that im getting such a low accuracy. should I change the hyperparameters? here is my code.
model= models.resnet152(pretrained=True)
for param in model.parameters():
param.requires_grad= False
print(model)
from collections import OrderedDict
classifier= nn.Sequential(OrderedDict([
('fc1',nn.Linear(2048, 512)),
('relu', nn.ReLU()),
('dropout1', nn. Dropout(p=0.5)),
('fc2', nn.Linear(512, 7)),
('output', nn.LogSoftmax(dim=1))
]))
model.fc= classifier
print(classifier)
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
since= time.time()
best_model_wts= copy.deepcopy(model.state_dict())
best_acc= 0.0
for epoch in range(1, num_epochs + 1):
print('Epoch {}/{}'.format(epoch, num_epochs))
print('-' * 10)
for phase in ['train', 'validation']:
if phase == 'train':
scheduler.step()
model.train()
else:
model.eval()
running_loss= 0.0
running_corrects=0
for inputs, labels in dataloaders[phase]:
inputs, labels= inputs.to(device), labels.to(device)
optimizer.zero_grad()
with torch.set_grad_enabled(phase== 'train'):
outputs= model(inputs)
loss= criterion(outputs, labels)
_, preds= torch.max(outputs, 1)
if phase == 'train':
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds== labels.data)
epoch_loss= running_loss / dataset_sizes[phase]
epoch_acc= running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
if phase == 'validation' and epoch_acc > best_acc:
best_acc= epoch_acc
best_model_wts= copy.deepcopy(model.state_dict())
time_elapsed= time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best valid accuracy: {:4f}'.format(best_acc))
model.load_state_dict(best_model_wts)
return model
use_gpu= torch.cuda.is_available()
num_epochs= 10
if use_gpu:
print('Using GPU: '+ str(use_gpu))
model= model.cuda()
criterion= nn.NLLLoss()
optimizer= optim.SGD(model.fc.parameters(), lr = .0006, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
model_ft = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=10)
Can someone please guide me. I am a beginner at it, and I could really make use of some help in it.
preprocess the dataset.
Get more dataset as low accuracy could be a result of smaller dataset.
Try data-augmentation if you have less data.
I am trying LSTM model on this dataset: https://www.kaggle.com/rtatman/speech-accent-archive
This is the model that I am working on:
def train_lstm_model(X_train, y_train, X_validation, y_validation, EPOCHS, batch_size=128):
# Get row, column, and class sizes
rows = X_train[0].shape[0]
cols = X_train[0].shape[1]
val_rows = X_validation[0].shape[0]
val_cols = X_validation[0].shape[1]
num_classes = len(y_train[0])
input_shape = (rows, cols)
X_train = X_train.reshape(X_train.shape[0], rows, cols)
X_validation = X_validation.reshape(X_validation.shape[0], val_rows, val_cols)
lstm = Sequential()
lstm.add(LSTM(64, return_sequences=True, stateful=False, input_shape=input_shape, activation='tanh'))
lstm.add(LSTM(64, return_sequences=True, stateful=False, activation='tanh'))
lstm.add(LSTM(64, stateful=False, activation='tanh'))
# add dropout to control for overfitting
lstm.add(Dropout(.25))
# squash output onto number of classes in probability space
lstm.add(Dense(num_classes, activation='softmax'))
# adam = optimizers.adam(lr=0.0001)
rmsprop = optimizers.adam(lr=0.002)
lstm.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=["accuracy"])
es = EarlyStopping(monitor='acc', min_delta=.005, patience=10, verbose=1, mode='auto')
# Creates log file for graphical interpretation using TensorBoard
tb = TensorBoard(log_dir=LOG_DIR, histogram_freq=0, batch_size=32, write_graph=True, write_grads=True,
write_images=True, embeddings_freq=0, embeddings_layer_names=None,
embeddings_metadata=None)
lstm.fit(X_train, y_train, batch_size=batch_size,
epochs=EPOCHS, validation_data=(X_validation,y_validation),
callbacks=[es,tb])
return lstm
And when I run it for 15 epochs, I get this loss curve for the validation data. https://imgur.com/a/hB4uK
And this is the accuracy on validation data.
https://imgur.com/a/9knGD
This is the training accuracy: https://imgur.com/a/HBfgF
And this is the training loss: https://imgur.com/a/JRdQ9
I've only used three classes from the dataset.
Any suggestions on what can I improve in the model?
These are the steps I followed:
1. Read wav file [only reading 90 samples per class]
2. calculate melspectrogram
3. split mel-spec into segments. [this gives around 11k samples]
3. normalize the mel-spec.
4. feed into network.
I clearly don't understand something (first Keras toy)
My input x,y. X is 1D real values and y is a scalar
I want to predict if y is positive or negative. One way is to encode as one hot and use categorical_cross_entropy (which works) and the other is with a custome loss function that does the same (which doesn't work)
I'm training on a 8 examples and checking that I can overfit. My custom function gets stuck at 0.56
Here's the code:
import keras.backend as K
def custom_cross_entrophy(y_true, y_pred):
'''expected return'''
return -(K.log(y_pred[:,0])*K.cast(y_true<=0, dtype='float32')
+ K.log(y_pred[:,1])*K.cast(y_true>0, dtype='float32'))
def build_model(x_dim, unites, loss_fuc):
model = Sequential()
model.add(Dense(
units=unites,
activation='relu',
input_shape=(x_dim,),
# return_sequences=True
))
model.add(Dense(
units=2))
model.add(Activation("softmax"))
start = time.time()
model.compile(loss=loss_fuc, optimizer="adam")
print("Compilation Time : ", time.time() - start)
return model
Now build and run model with custom
model = build_model(X_train.shape[1], 20, custom_cross_entrophy)
model.fit(X_train,y_train,
batch_size=8,epochs=10000,
validation_split=0.,verbose=0)
print model.evaluate(X_train, y_train, verbose=1)
#assert my custom_cross_entrophy is like catergorical_cross_entropy
pred = model.predict(X)
y_onehot = np.zeros((len(K.eval(y_true)),2))
for i in range(len(K.eval(y_true))):
y_onehot[i,int(K.eval(y_true)[i]>0)]=1
print K.eval(custom_cross_entrophy(K.variable(y_train), K.variable(pred)))
print K.eval(categorical_crossentropy(K.variable(y_onehot), K.variable(pred)))
output:
('Compilation Time : ', 0.06212186813354492)
8/8 [==============================] - 0s 52ms/step
0.562335193157
[ 1.38629234 0.28766826 1.38613474 0.28766349 0.28740349 0.28795806
0.28766707 0.28768104]
[ 1.38629234 0.28766826 1.38613474 0.28766349 0.28740349 0.28795806
0.28766707 0.28768104]
now do the same with the Keras loss:
model = build_model(X_train.shape[1], 20, categorical_crossentropy)
model.fit(X_train,y_onehot,
batch_size=8,epochs=10000,
validation_split=0.,verbose=0)
print model.evaluate(X_train, y_onehot, verbose=1)
output:
('Compilation Time : ', 0.04332709312438965)
8/8 [==============================] - 0s 34ms/step
4.22694138251e-05
How is this possible? the losses should be the same mathematically
Thanks!
Off the top of my head, I'd say you're running two different evaluations:
print model.evaluate(X_train, y_train, verbose=1)
# ...
print model.evaluate(X_train, y, verbose=1)
but I don't know what's in y and y_train, so you might need to expand a bit more on what you're doing and how you're splitting the data.
Try and run:
print model.evaluate(X_train, y_onehot, verbose=1)
to see if it was just a typo.
Cheers