I'm new to GANs and I have trouble in training DCGAN on mnist, when I was working with GAN with linear layers everything was fine and the generator generates pretty good images. But when I started working with Convolutional GAN, the generator generators only noise. Does anybody know how to fix this issue?
Generated images
Discriminator loss and generator loss
Here are my Neural network and my training loop,
Neural network :
Discriminator
class Discriminator(nn.Module):
def __init__(self, ch, F):
super(Discriminator, self).__init__()
def block(in_ch,out_ch,k,s,p,final=False,bn=True):
block = []
block.append(nn.Conv2d(in_ch,out_ch,kernel_size=k,stride=s,padding=p))
if not final and bn:
block.append(nn.BatchNorm2d(out_ch))
block.append(nn.LeakyReLU(0.2))
elif not bn and not final:
block.append(nn.LeakyReLU(0.2))
elif final and not bn:
block.append(nn.Sigmoid())
return block
self.D = nn.Sequential(
*block(ch,F,k=4,s=2,p=1,bn=False),
*block(F,F*2,k=4,s=2,p=1),
*block(F*2,F*4,k=4,s=2,p=1),
*block(F*4,F*8,k=4,s=2,p=1),
*block(F*8,1,k=4,s=2,p=0,final=True,bn=False)
)
def forward(self,x): return self.D(x)
Generator
class Generator(nn.Module):
def __init__(self, ch_noise, ch_img, features_g):
super(Generator, self).__init__()
def block(in_ch,out_ch,k,s,p,final=False):
block = []
block.append(nn.ConvTranspose2d(in_ch,out_ch,kernel_size=k,stride=s,padding=p))
if not final:
block.append(nn.BatchNorm2d(out_ch))
block.append(nn.ReLU())
if final:
block.append(nn.Tanh())
return block
self.G = nn.Sequential(
*block(ch_noise, features_g*16, k=4,s=1,p=0),
*block(features_g*16, features_g*8, k=4, s=2, p=1),
*block(features_g*8, features_g*4, k=4, s=2, p=1),
*block(features_g*4, features_g*2, k=4, s=2, p=1),
*block(features_g*2, 1, k=4, s=2, p=1,final=True)
)
def forward(self,z): return self.G(z)
Training Loop:
loss_G, loss_D = [],[]
for i in range(epochs):
D.train()
G.train()
st = time.time()
for idx, (img, _ ) in enumerate(mnist):
img = img.to(device)
## Discriminator ##
D.zero_grad(set_to_none=True)
lable = torch.ones(bs,device=device)*0.9
pred = D(img).reshape(-1)
loss_d_real = criterion(pred,lable)
z = torch.randn(img.shape[0],ch_z, 1,1,device=device)
fake_img = G(z)
lable = torch.ones(bs,device=device)*0.1
pred = D(fake_img.detach()).reshape(-1)
loss_d_fake = criterion(pred,lable)
D_loss = loss_d_real + loss_d_fake
D_loss.backward()
optim_d.step()
## Generator ##
G.zero_grad(True)
lable = torch.randn(bs,device=device)
pred = D(fake_img).reshape(-1)
G_loss = criterion(pred,lable)
G_loss.backward()
optim_g.step()
## printing on terminal
if idx % 100 == 0:
print(f'\nBatches done : {idx}/{len(mnist)}')
print(f'Loss_D : {D_loss.item():.4f}\tLoss_G : {G_loss.item():.4f}')
et = time.time()
print(f'\nEpoch : {i+1}\n{time_cal(st,et)}')
G.eval()
with torch.no_grad():
fake_image = G(fixed_noise)
save_image(fake_image[:25],fp=f'{path_to_img}/{i+1}_fake.png',nrow=5,normalize=True)
loss_G.append(G_loss.item())
loss_D.append(D_loss.item())
Here's the link to my Colab notebook in which I have been working.
Related
I am making a class-incremental learning multi-label classifier. Here the model first trains with 7 labels. After training, another dataset emerges that contains the same labels except one more. I want to automatically add an extra node to the trained network and continue training on this new dataset. How can I do this?
class FeedForewardNN(nn.Module):
def __init__(self, input_size, h1_size = 264, h2_size = 128, num_services=8):
super().__init__()
self.input_size = input_size
self.lin1 = nn.Linear(input_size, h1_size)
self.lin2 = nn.Linear(h1_size, h2_size)
self.lin3 = nn.Linear(h2_size, num_services)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.lin1(x)
x = self.relu(x)
x = self.lin2(x)
x = self.relu(x)
x = self.lin3(x)
x = self.sigmoid(x)
return x
This is the architecture of the feedforward Neural Network.
Then I first train on the data set with only 7 classes.
#Create NN
input_size = len(x_columns)
net1 = FeedForewardNN(input_size, num_services=7)
alpha= 0.001
#Define optimizer
optimizer = optim.Adam(net.parameters(), lr=alpha)
criterion = nn.BCELoss()
running_loss = 0
#Training Loop
loss_list = []
auc_list = []
for i in range(len(train_data_x)):
optimizer.zero_grad()
outputs = net1(train_data_x[i])
loss = criterion(outputs, train_data_y[i])
loss.backward()
optimizer.step()
However then, I want to add one additional output node, define the new weights but maintain the old trained weights, and train on this new data set.
I suggest to replace layer with new one, having desired shape, and than partially assign its parameter values with old ones as follows:
def increaseClassifier( m: torch.nn.Linear ):
w = m.weight
b = m.bias
old_shape = m.weight.shape
m2 = nn.Linear( old_shape[1], old_shape[0] +1 )
m2.weight = nn.parameter.Parameter( torch.cat( (m.weight, m2.weight[0:1]) ) )
m2.bias = nn.parameter.Parameter( torch.cat( (m.bias, m2.bias[0:1]) ) )
return m2
class FeedForewardNN(nn.Module):
...
def incrHere(self):
self.lin3 = increaseClassifier( self.lin3 )
UPD:
Can you explain, how these additional weights that come with this new output node are initialized?
The initial weights for new channel come from new layer creation, layer constructor make new parameters with some random initialization, then we are replace part of it with trained weight, and remained part is ready for new training.
m2.weight = nn.parameter.Parameter( torch.cat( (m.weight, m2.weight[0:1]) ) )
Hello guys I've joined a university-level image recognition competition.
In the test, they will give two images (people face) and my model need to detect pair of the image is the same person or not
My model is resnet18 with IR block and SE block. and it will use Arcface loss.
I can use only the MS1M dataset with a total of 86876 classes
The problem is that loss is getting better, but accuracy is 0 and not changing.
Here's part of code I'm working on.
Train
def train_model(model, net, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
for phase in ['train']:
if phase == 'train':
model.train() # Set model to training mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in notebook.tqdm(dataloader):
inputs = inputs.to(device)
labels = labels.to(device).long()
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
features = model(inputs)
outputs = net(features, labels)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / len(dataloader)
epoch_acc = running_corrects.double() / len(dataloader)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'train' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
torch.save({'epoch': epoch,
'mode_state_dict': model.state_dict(),
'fc_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scheduler': scheduler.state_dict(), # HERE IS THE CHANGE
}, f'/content/drive/MyDrive/inha_data/training_saver/training_stat{epoch}.pth')
print(f'finished {epoch} and saved model_save_{epoch}.pt')
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best train Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'model_save.pt')
return model
Parameters
train_dataset = MS1MDataset('train')
dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True,num_workers=4)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 디바이스 설정
num_classes = 86876
# normal classifier
# net = nn.Sequential(nn.Linear(512, num_classes))
# Feature extractor backbone, input is 112x112 image output is 512 feature vector
model_ft = resnet18(True)
#set metric
metric_fc = metrics.ArcMarginProduct(512, num_classes, s = 30.0, m = 0.50, easy_margin = False)
metric_fc.to(device)
# net = net.to(device)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = torch.optim.Adam([{'params': model_ft.parameters()}, {'params': metric_fc.parameters()}],
lr=0.1)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=4, gamma=0.1)
Arcface
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
import math
class ArcMarginProduct(nn.Module):
r"""Implement of large margin arc distance: :
Args:
in_features: size of each input sample
out_features: size of each output sample
s: norm of input feature
m: margin
cos(theta + m)
"""
def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
super(ArcMarginProduct, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.s = s
self.m = m
self.weight = Parameter(torch.FloatTensor(out_features, in_features))
nn.init.xavier_uniform_(self.weight)
self.easy_margin = easy_margin
self.cos_m = math.cos(m)
self.sin_m = math.sin(m)
self.th = math.cos(math.pi - m)
self.mm = math.sin(math.pi - m) * m
def forward(self, input, label):
# --------------------------- cos(theta) & phi(theta) ---------------------------
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = torch.where(cosine > 0, phi, cosine)
else:
phi = torch.where(cosine > self.th, phi, cosine - self.mm)
# --------------------------- convert label to one-hot ---------------------------
# one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
one_hot = torch.zeros(cosine.size(), device='cuda')
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
# -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4
output *= self.s
# print(output)
return output
dataset
data_transforms = {
'train': transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.125, contrast=0.125, saturation=0.125),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
#train_ms1_data = torchvision.datasets.ImageFolder('/content/drive/MyDrive/inha_data/train', transform = data_transforms)
class MS1MDataset(Dataset):
def __init__(self,split):
self.file_list = '/content/drive/MyDrive/inha_data/ID_List.txt'
self.images = []
self.labels = []
self.transformer = data_transforms['train']
with open(self.file_list) as f:
files = f.read().splitlines()
for i, fi in enumerate(files):
fi = fi.split()
image = "/content/" + fi[1]
label = int(fi[0])
self.images.append(image)
self.labels.append(label)
def __getitem__(self, index):
img = Image.open(self.images[index])
img = self.transformer(img)
label = self.labels[index]
return img, label
def __len__(self):
return len(self.images)
You can try to use a smaller m in ArcFace, even a minus value.
I'm trying to build a semantic segmentation model with pytorch. However, I encounter this error and do not know how to fix it.
This is the model:
class SegmentationNN(pl.LightningModule):
def __init__(self, num_classes=23, hparams=None):
super().__init__()
self.hparams = hparams
self.model=models.alexnet(pretrained=True).features
self.conv=nn.Conv2d(256, 3, kernel_size=1)
self.upsample = nn.Upsample(size=(240,240))
def forward(self, x):
print('Input:', x.shape)
x = self.model(x)
print('After Alexnet convs:', x.shape)
x = self.conv(x)
print('After 1-conv:', x.shape)
x = self.upsample(x)
print('After upsampling:', x.shape)
return x
def training_step(self, batch, batch_idx):
images, targets = batch
# targets = targets.view(targets.size(0), -1)
out = self.forward(images)
loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduction='mean')
loss = loss_func(out, targets.unsqueeze(0))
tensorboard_logs = {'loss': loss}
return {'loss': loss, 'log':tensorboard_logs}
def validation_step(self, batch, batch_idx):
images, targets = batch
# targets = targets.view(targets.size(0), -1)
out = self.forward(images)
loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduction='mean')
loss = loss_func(out, targets.unsqueeze(0))
tensorboard_logs = {'loss': loss}
return {'loss': loss, 'log':tensorboard_logs}
def configure_optimizers(self):
optim = torch.optim.Adam(self.parameters(), lr=self.hparams['learning_rate'])
return optim
And this is the training and fit:
train_dataloader = DataLoader(train_data, batch_size=hparams['batch_size'])
val_dataloader = DataLoader(val_data, batch_size=hparams['batch_size'])
trainer = pl.Trainer(
max_epochs=50,
gpus=1 if torch.cuda.is_available() else None
)
pass
trainer.fit(model, train_dataloader, val_dataloader)
These are the sizes of the tensor after each layer:
Input: torch.Size([59, 3, 240, 240])
After Alexnet convs: torch.Size([59, 256, 6, 6])
After 1-conv: torch.Size([59, 3, 6, 6])
After upsampling: torch.Size([59, 3, 240, 240])
I am pretty a beginner with Pytorch and Pytorch Lightning so every advice would be apprreciated!
Can you delete the unsqueeze(0) part here : loss = loss_func(out, targets.unsqueeze(0))
Here is some part of my PyTorch code:
test_loader = DataLoader(dataset = test_loader_hibiscus, batch_size = 1, shuffle=False, num_workers=0)
test_losses = []
y_pred_list = []
feat_list = []
with torch.no_grad():
model.eval()
test_loss = 0.0
if expe_temoin == False :
for test_dwi,test_adc,test_tmax,test_cbf,test_cbv,test_label in test_loader :
test_dwi = test_dwi.to(device)
test_adc = test_adc.to(device)
test_tmax = test_tmax.to(device)
test_cbf = test_cbf.to(device)
test_cbv = test_cbv.to(device)
in_imgs = torch.cat((train_dwi,train_adc,train_tmax,train_cbf,train_cbv), dim=1)
out_recon, my_feat = model(in_imgs)
print("my_feat", my_feat[0].shape)
But it prints:
my_feat torch.Size([2, 512, 1, 24, 24])
Could someone please tell me why 2 (batch?) Thanks!
Hint: When I run with test data size: 26, its OK, when run with data size: 25, mess up the batch! Is there something about being odd and even?!
Here is the Unet 3d model for 3D reconstruction and segmentation:
class Abstract3DUNet(nn.Module):
def __init__(self, in_channels, out_channels, final_sigmoid, basic_module, f_maps=64, layer_order='gcr',
num_groups=8, num_levels=4, is_segmentation=True, testing=False,
conv_kernel_size=3, pool_kernel_size=2, conv_padding=1, **kwargs):
super(Abstract3DUNet, self).__init__()
self.testing = testing
if isinstance(f_maps, int):
f_maps = number_of_features_per_level(f_maps, num_levels=num_levels)
assert isinstance(f_maps, list) or isinstance(f_maps, tuple)
assert len(f_maps) > 1, "Required at least 2 levels in the U-Net"
# create encoder path
self.encoders = create_encoders(in_channels, f_maps, basic_module, conv_kernel_size, conv_padding, layer_order,
num_groups, pool_kernel_size)
# create decoder path
self.decoders = create_decoders(f_maps, basic_module, conv_kernel_size, conv_padding, layer_order, num_groups,
upsample=True)
# in the last layer a 1×1 convolution reduces the number of output
# channels to the number of labels
self.final_conv = nn.Conv3d(f_maps[0], out_channels, 1)
if is_segmentation:
# semantic segmentation problem
if final_sigmoid:
self.final_activation = nn.Sigmoid()
else:
self.final_activation = nn.Softmax(dim=1)
else:
# regression problem
self.final_activation = None
def forward(self, x):
# encoder part
encoders_features = []
my_feat =[]
for encoder in self.encoders:
x = encoder(x)
# reverse the encoder outputs to be aligned with the decoder
encoders_features.insert(0, x)
# remove the last encoder's output from the list
# !!remember: it's the 1st in the list
my_feat = encoders_features[0:]
encoders_features = encoders_features[1:]
# decoder part
for decoder, encoder_features in zip(self.decoders, encoders_features):
# pass the output from the corresponding encoder and the output
# of the previous decoder
x = decoder(encoder_features, x)
x = self.final_conv(x)
# apply final_activation (i.e. Sigmoid or Softmax) only during prediction. During training the network outputs
# logits and it's up to the user to normalize it before visualising with tensorboard or computing validation metric
if self.testing and self.final_activation is not None:
x = self.final_activation(x)
return x, my_feat
class UNet3D(Abstract3DUNet):
def __init__(self, in_channels, out_channels, final_sigmoid=True, f_maps=64, layer_order='gcr',
num_groups=8, num_levels=4, is_segmentation=True, conv_padding=1, **kwargs):
super(UNet3D, self).__init__(in_channels=in_channels,
out_channels=out_channels,
final_sigmoid=final_sigmoid,
basic_module=DoubleConv,
f_maps=f_maps,
layer_order=layer_order,
num_groups=num_groups,
num_levels=num_levels,
is_segmentation=is_segmentation,
conv_padding=conv_padding,
**kwargs)
My train batch size was 3! When I changed it into 2 or 4 the problem solved!
I'm finding a hardtime figuring out how to correctly define a mxnet net so that i can serialize/convert this model to a json file.
The pipeline is composed of a CNN + biLSTM + CTC.
I now i must use HybridBlock and hybridize() but i can't seem to make it work or if its even possible or if there is any other way around.
I'm sure its lack of knowledge on my part and wonder is anyone can help.
Here is the net definition in python:
NUM_HIDDEN = 200
NUM_CLASSES = 13550
NUM_LSTM_LAYER = 1
p_dropout = 0.5
SEQ_LEN = 32
def get_featurizer():
featurizer = gluon.nn.HybridSequential()
# conv layer
featurizer.add(gluon.nn.Conv2D(kernel_size=(3,3), padding=(1,1), channels=32, activation="relu"))
featurizer.add(gluon.nn.BatchNorm())
....
featurizer.hybridize()
return featurizer
class EncoderLayer(gluon.Block):
def __init__(self, **kwargs):
super(EncoderLayer, self).__init__(**kwargs)
with self.name_scope():
self.lstm = mx.gluon.rnn.LSTM(NUM_HIDDEN, NUM_LSTM_LAYER, bidirectional=True)
def forward(self, x):
x = x.transpose((0,3,1,2))
x = x.flatten()
x = x.split(num_outputs=SEQ_LEN, axis = 1) # (SEQ_LEN, N, CHANNELS)
x = nd.concat(*[elem.expand_dims(axis=0) for elem in x], dim=0)
x = self.lstm(x)
x = x.transpose((1, 0, 2)) # (N, SEQ_LEN, HIDDEN_UNITS)
return x
def get_encoder():
encoder = gluon.nn.Sequential()
encoder.add(EncoderLayer())
encoder.add(gluon.nn.Dropout(p_dropout))
return encoder
def get_decoder():
decoder = mx.gluon.nn.Dense(units=ALPHABET_SIZE, flatten=False)
decoder.hybridize()
return decoder
def get_net():
net = gluon.nn.Sequential()
with net.name_scope():
net.add(get_featurizer())
net.add(get_encoder())
net.add(get_decoder())
return net
Any help would be highly appreciated.
Thank you very much.
There are few requirements for a model in Gluon to be exportable to json:
It needs to be hybridizable, meaning that each children block should be hybridizable as well and the model works in both modes
All parameters should be initialized. Since Gluon uses deferred parameter initialization, that means that you should do forward pass at least once before you can save the model.
I did some fixes for your code also introducing new constants when I needed. The most significant changes are:
Don't use split if you can avoid it, because it returns list of NDArrays. Use reshape, which works seemlessly with Symbol as well.
Starting from 1.3.0 version of MXNet, LSTM is also hybridizable, so you can wrap it in a HybridBlock instead of just a Block.
Use HybridSequential.
Here is the adjusted code with an example at the bottom how to save the model and how to load it back. You can find more information in this tutorial.
import mxnet as mx
from mxnet import gluon
from mxnet import nd
BATCH_SIZE = 1
CHANNELS = 100
ALPHABET_SIZE = 1000
NUM_HIDDEN = 200
NUM_CLASSES = 13550
NUM_LSTM_LAYER = 1
p_dropout = 0.5
SEQ_LEN = 32
HEIGHT = 100
WIDTH = 100
def get_featurizer():
featurizer = gluon.nn.HybridSequential()
featurizer.add(
gluon.nn.Conv2D(kernel_size=(3, 3), padding=(1, 1), channels=32, activation="relu"))
featurizer.add(gluon.nn.BatchNorm())
return featurizer
class EncoderLayer(gluon.HybridBlock):
def __init__(self, **kwargs):
super(EncoderLayer, self).__init__(**kwargs)
with self.name_scope():
self.lstm = mx.gluon.rnn.LSTM(NUM_HIDDEN, NUM_LSTM_LAYER, bidirectional=True)
def hybrid_forward(self, F, x):
x = x.transpose((0, 3, 1, 2))
x = x.flatten()
x = x.reshape(shape=(SEQ_LEN, -1, CHANNELS)) #x.split(num_outputs=SEQ_LEN, axis=1) # (SEQ_LEN, N, CHANNELS)
x = self.lstm(x)
x = x.transpose((1, 0, 2)) # (N, SEQ_LEN, HIDDEN_UNITS)
return x
def get_encoder():
encoder = gluon.nn.HybridSequential()
encoder.add(EncoderLayer())
encoder.add(gluon.nn.Dropout(p_dropout))
return encoder
def get_decoder():
decoder = mx.gluon.nn.Dense(units=ALPHABET_SIZE, flatten=False)
return decoder
def get_net():
net = gluon.nn.HybridSequential()
with net.name_scope():
net.add(get_featurizer())
net.add(get_encoder())
net.add(get_decoder())
return net
if __name__ == '__main__':
net = get_net()
net.initialize()
net.hybridize()
fake_data = mx.random.uniform(shape=(BATCH_SIZE, HEIGHT, WIDTH, CHANNELS))
out = net(fake_data)
net.export("mymodel")
deserialized_net = gluon.nn.SymbolBlock.imports("mymodel-symbol.json", ['data'],
"mymodel-0000.params", ctx=mx.cpu())
out2 = deserialized_net(fake_data)
# just to check that we get the same results
assert (out - out2).sum().asscalar() == 0