Gan loss tiny, discriminator loss huge? - deep-learning

I can't find the problem in my code - I'm training a GAN and the gan loss and discriminator loss are very low, 0.04 and it seems like it's converging well but a - the pictures don't look very good but the actual problem is that, b - somehow when I do gan.predict(noise) it's very close to 1, but when I do discriminator.predict(gan(noise)), it's very close to 0 although it's supposed to be identical. Here's my code:
Generator code:
def create_generator():
generator=tf.keras.Sequential()
#generator.add(layers.Dense(units=50176,input_dim=25))
generator.add(layers.Dense(units=12544,input_dim=100))
#generator.add(layers.Dropout(0.2))
generator.add(layers.Reshape([112,112,1])) #112,112
generator.add(layers.Conv2D(32, kernel_size=3,padding='same',activation='relu'))
generator.add(layers.UpSampling2D()) #224,224
generator.add(layers.Conv2D(1, kernel_size=4,padding='same',activation='tanh'))
generator.compile(loss='binary_crossentropy', optimizer=adam_optimizer())
return generator
g=create_generator()
g.summary()
Discriminator code:
#IMAGE DISCRIMINATOR
def create_discriminator():
discriminator=tf.keras.Sequential()
discriminator.add(layers.Conv2D(64, kernel_size=2,padding='same',activation='relu',input_shape=[224,224,1]))
discriminator.add(layers.Dropout(0.5))
discriminator.add(layers.Conv2D(32,kernel_size=2,padding='same',activation='relu'))
discriminator.add(layers.Dropout(0.5))
discriminator.add(layers.Conv2D(16,kernel_size=2,padding='same',activation='relu'))
discriminator.add(layers.Dropout(0.5))
discriminator.add(layers.Conv2D(8,kernel_size=2,padding='same',activation='relu'))
discriminator.add(layers.Dropout(0.5))
discriminator.add(layers.Conv2D(1,kernel_size=2,padding='same',activation='relu'))
discriminator.add(layers.Dropout(0.5))
discriminator.add(layers.Flatten())
discriminator.add(layers.Dense(units=1, activation='sigmoid'))
discriminator.compile(loss='binary_crossentropy', optimizer=tf.optimizers.Adam(lr=0.0002))
return discriminator
d =create_discriminator()
d.summary()
Gan code:
def create_gan(discriminator, generator):
discriminator.trainable=False
gan_input = tf.keras.Input(shape=(100,))
x = generator(gan_input)
gan_output= discriminator(x)
gan= tf.keras.Model(inputs=gan_input, outputs=gan_output)
#gan.compile(loss='binary_crossentropy', optimizer='adam')
gan.compile(loss='binary_crossentropy', optimizer=adam_optimizer())
return gan
gan = create_gan(d,g)
gan.summary()
Training code (I purposely don't do train_on_batch with the gan cause I wanted to see if the gradients zero out.)
##tf.function
def training(epochs=1, batch_size=128, rounds=50):
batch_count = X_bad.shape[0] / batch_size
# Creating GAN
generator = create_generator()
discriminator = create_discriminator()
###########if you want to continue training an already trained gan
#discriminator.set_weights(weights)
gan = create_gan(discriminator, generator)
start = time.time()
for e in range(1,epochs+1 ):
#print("Epoch %d" %e)
#for _ in tqdm(range(batch_size)):
#generate random noise as an input to initialize the generator
noise= np.random.normal(0,1, [batch_size, 100])
# Generate fake MNIST images from noised input
generated_images = generator.predict(noise)
#print('gen im shape: ',np.shape(generated_images))
# Get a random set of real images
image_batch = X_bad[np.random.randint(low=0,high=X_bad.shape[0],size=batch_size)]
#print('im batch shape: ',image_batch.shape)
#Construct different batches of real and fake data
X= np.concatenate([image_batch, generated_images])
# Labels for generated and real data
y_dis=np.zeros(2*batch_size)
y_dis[:batch_size]=0.99
#Pre train discriminator on fake and real data before starting the gan.
discriminator.trainable=True
discriminator.train_on_batch(X, y_dis)
#Tricking the noised input of the Generator as real data
noise= np.random.normal(0,1, [batch_size, 100])
y_gen = np.ones(batch_size)
# During the training of gan,
# the weights of discriminator should be fixed.
#We can enforce that by setting the trainable flag
discriminator.trainable=False
#training the GAN by alternating the training of the Discriminator
#and training the chained GAN model with Discriminator’s weights freezed.
#gan.train_on_batch(noise, y_gen)
with tf.GradientTape() as tape:
pred=gan(noise)
loss_val=tf.keras.losses.mean_squared_error(y_gen,pred)
# loss_val=gan.test_on_batch(noise,y_gen)
# loss_val=tf.cast(loss_val,dtype=tf.float32)
grads=tape.gradient(loss_val,gan.trainable_variables)
optimizer.apply_gradients(zip(grads, gan.trainable_variables))
if e == 1 or e % rounds == 0:
end = time.time()
loss_value=discriminator.test_on_batch(X, y_dis)
print("Epoch {:03d}: Loss: {:.3f}".format(e,loss_value))
gen_loss=gan.test_on_batch(noise,y_gen)
print('gan loss: ',gen_loss)
#print('Epoch: ',e,' Loss: ',)
print('Time for ',rounds,' epochs: ',end-start,' seconds')
local_time = time.ctime(end)
print('Printing time: ',local_time)
plot_generated_images(e, generator,examples=5)
start = time.time()
return discriminator,generator,grads
Now, the final losses after around 2000 epochs are 0.039 for the generator and 0.034 for the discriminator. The thing is that when I do the following, look what I get
print('disc for bad train: ',np.mean(discriminator(X_bad[:50])))
#output
disc for bad train: 0.9995248
noise= np.random.normal(0,1, [500, 100])
generated_images = generator.predict(noise)
print('disc for gen: ',np.mean(discriminator(generated_images[:50])))
print('gan for gen: ',np.mean(gan(noise[:50])))
#output
disc for gen: 0.0018724388
gan for gen: 0.96554756
Can anyone find the problem?
Thanks!

If anyone stumbles upon this, then I figured it out (although I haven't fixed it yet). What's happening here is that my Gan object is training only the generator weights. The discriminator is a different object that is training itself but when I train the gan, I don't update the discriminator weights in the gan, I only updated the generator weights and therefore when I had the noise as the input to the gan, the output was as I wanted, close to one, however when I took the generated images as the input to the discriminator, since the discriminator was being trained separately from the generator (the discriminator weights were not being changed at all inside the gan), the result was close to zero. A better approach is to create a GAN as a class and be able to approach the discriminator within the gan and the generator within the gan in order to update both of their weights inside the gan.

Related

Why Is accuracy so different when I use evaluate() and predict()?

I have a Convolutional Neural Network, and it's trying to resolve a classification problem using images (2 classes, so binary classification), using sigmoid.
To evaluate the model I use:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
path_dir = '../../dataset/train'
parth_dir_test = '../../dataset/test'
datagen = ImageDataGenerator(
rescale=1./255,
validation_split = 0.2)
test_set = datagen.flow_from_directory(parth_dir_test,
target_size= (150,150),
batch_size = 64,
class_mode = 'binary')
score = classifier.evaluate(test_set, verbose=0)
print('Test Loss', score[0])
print('Test accuracy', score[1])
And it outputs:
When I try to print the classification report I use:
yhat_classes = classifier.predict_classes(test_set, verbose=0)
yhat_classes = yhat_classes[:, 0]
print(classification_report(test_set.classes,yhat_classes))
But now I get this accuracy:
If I print the test_set.classes, it shows the first 344 numbers of the array as 0, and the next 344 as 1. Is this test_set shuffled before feeding into the network?
I think your model is doing just fine both in "training" and "evaluating".Evaluation accuracy comes on the basis of prediction so maybe you are making some logical mistake while using model.predict_classes().Please check if you are using the trained model weights and not any randomly initialized model while evaluating it.
what "evaluate" does: The model sets apart this fraction of data while training, and will not train on it, and will evaluate loss and any other model's metrics on this data after each "epoch".so, model.evaluate() is for evaluating your trained model. Its output is accuracy or loss, not prediction to your input data!
predict: Generates output predictions for the input samples. model.predict() actually predicts, and its output is target value, predicted from your input data.
FYI: if your accurscy in Binary Classification problem is less than 50%, it's worse than the case that you randomly predict one of those classes (acc = 50%)!
I needed to add a shuffle=False. The code that work is:
test_set = datagen.flow_from_directory(parth_dir_test,
target_size=(150,150),
batch_size=64,
class_mode='binary',
shuffle=False)

what should I do if my regression model stuck at a high value loss?

I'm using neural nets for a regression problem where I have 3 features and I'm trying to predict one continuous value. I noticed that my neural net start learning good but after 10 epochs it get stuck on a high loss value and could not improve anymore.
I tried to use Adam and other adaptive optimizers instead of SGD but that didn't work. I tried a complex architectures like adding layers, neurons, batch normalization and other activations etc.. and that also didn't work.
I tried to debug and try to find out if something is wrong with the implementation but when I use only 10 examples of the data my model learn fast so there are no errors. I start to increase the examples of the data and monitoring my model results as I increase the data examples. when I reach 3000 data examples my model start to get stuck on a high value loss.
I tried to increase layers, neurons and also to try other activations, batch normalization. My data are also normalized between [-1, 1], my target value is not normalized since it is regression and I'm predicting a continuous value. I also tried using keras but I've got the same result.
My real dataset have 40000 data, I don't know what should I try, I almost try all things that I know for optimization but none of them worked. I would appreciate it if someone can guide me on this. I'll post my Code but maybe it is too messy to try to understand, I'm sure there is no problem with my implementation, I'm using skorch/pytorch and some SKlearn functions:
# take all features as an Independant variable except the bearing and distance
# here when I start small the model learn good but from 3000 data points as you can see the model stuck on a high value. I mean the start loss is 15 and it start to learn good but when it reach 9 it stucks there
# and if I try to use the whole dataset for training then the loss start at 47 and start decreasing until it reach 36 and then stucks there too
X = dataset.iloc[:3000, 0:-2].reset_index(drop=True).to_numpy().astype(np.float32)
# take distance and bearing as the output values:
y = dataset.iloc[:3000, -2:].reset_index(drop=True).to_numpy().astype(np.float32)
y_bearing = y[:, 0].reshape(-1, 1)
y_distance = y[:, 1].reshape(-1, 1)
# normalize the input values
scaler = StandardScaler()
X_norm = scaler.fit_transform(X, y)
X_br_train, X_br_test, y_br_train, y_br_test = train_test_split(X_norm,
y_bearing,
test_size=0.1,
random_state=42,
shuffle=True)
X_dis_train, X_dis_test, y_dis_train, y_dis_test = train_test_split(X_norm,
y_distance,
test_size=0.1,
random_state=42,
shuffle=True)
bearing_trainset = Dataset(X_br_train, y_br_train)
bearing_testset = Dataset(X_br_test, y_br_test)
distance_trainset = Dataset(X_dis_train, y_dis_train)
distance_testset = Dataset(X_dis_test, y_dis_test)
def root_mse(y_true, y_pred):
return np.sqrt(mean_squared_error(y_true, y_pred))
class RMSELoss(nn.Module):
def __init__(self):
super().__init__()
self.mse = nn.MSELoss()
def forward(self, yhat, y):
return torch.sqrt(self.mse(yhat, y))
class AED(nn.Module):
"""custom average euclidean distance loss"""
def __init__(self):
super().__init__()
def forward(self, yhat, y):
return torch.dist(yhat, y)
def train(on_target,
hidden_units,
batch_size,
epochs,
optimizer,
lr,
regularisation_factor,
train_shuffle):
network = None
trainset = distance_trainset if on_target.lower() == 'distance' else bearing_trainset
testset = distance_testset if on_target.lower() == 'distance' else bearing_testset
print(f"shape of trainset.X = {trainset.X.shape}, shape of trainset.y = {trainset.y.shape}")
print(f"shape of testset.X = {testset.X.shape}, shape of testset.y = {testset.y.shape}")
mse = EpochScoring(scoring=mean_squared_error, lower_is_better=True, name='MSE')
r2 = EpochScoring(scoring=r2_score, lower_is_better=False, name='R2')
rmse = EpochScoring(scoring=make_scorer(root_mse), lower_is_better=True, name='RMSE')
checkpoint = Checkpoint(dirname=f'results/{on_target}/checkpoints')
train_end_checkpoint = TrainEndCheckpoint(dirname=f'results/{on_target}/checkpoints')
if on_target.lower() == 'bearing':
network = BearingNetwork(n_features=X_norm.shape[1],
n_hidden=hidden_units,
n_out=y_distance.shape[1])
elif on_target.lower() == 'distance':
network = DistanceNetwork(n_features=X_norm.shape[1],
n_hidden=hidden_units,
n_out=1)
model = NeuralNetRegressor(
module=network,
criterion=RMSELoss,
device='cpu',
batch_size=batch_size,
lr=lr,
optimizer=optim.Adam if optimizer.lower() == 'adam' else optim.SGD,
optimizer__weight_decay=regularisation_factor,
max_epochs=epochs,
iterator_train__shuffle=train_shuffle,
train_split=predefined_split(testset),
callbacks=[mse, r2, rmse, checkpoint, train_end_checkpoint]
)
print(f"{'*' * 10} start training the {on_target} model {'*' * 10}")
history = model.fit(trainset, y=None)
print(f"{'*' * 10} End Training the {on_target} Model {'*' * 10}")
if __name__ == '__main__':
args = parser.parse_args()
train(on_target=args.on_target,
hidden_units=args.hidden_units,
batch_size=args.batch_size,
epochs=args.epochs,
optimizer=args.optimizer,
lr=args.learning_rate,
regularisation_factor=args.regularisation_lambda,
train_shuffle=args.shuffle)
and this is my network declaration:
class DistanceNetwork(nn.Module):
"""separate NN for predicting distance"""
def __init__(self, n_features=5, n_hidden=16, n_out=1):
super().__init__()
self.model = nn.Sequential(
nn.Linear(n_features, n_hidden),
nn.LeakyReLU(),
nn.Linear(n_hidden, 5),
nn.LeakyReLU(),
nn.Linear(5, n_out)
)
here is the log while training:

Variable bug(s) O'Reilly Programming PyTorch

I'm reading O'Reilly's Sept, 2019 publication 'Programming Pytorch ..' describing a simple linear neural network for image classification.
There is a bug in a variable name in the opening model (no worries) target vs. targets, however there is what appears to be a weird omission of a variable declaration, train_iterator (and also dev_iterator, not shown).
I wish to know is what was the train_iterator variable they (I presume) intended?
p27
def train(model, optimiser, loss_fn, train_loader, val_loader, epochs=20, device='cpu'):
for epoch in range(epochs):
training_loss = 0.0
valid_loss = 0.0
model.train()
for batch in train_loader:
optimizer.zero_grad()
inputs, target = batch # Bug here for 'target'
inputs = inputs.to(device)
target = targets.to(device)
output = model(inputs)
loss = loss_fin(output, target)
loss.backward()
optimizer.step()
training_loss += loss.data.item()
training_loss /= len(train_iterator) # What is train_iterator?
so,..
inputs, target = batch
must be
inputs, targets = batch
In the validation step below the training step (not shown) is
inputs, targets = batch
...
targets = targets.to(device)
Its no biggie and the code is simply assigning to Cudas (GPU) or CPU.
The variable train_iterator is defining the training loss (important diagnostic). I assume that there should be an iterator declared between the epoch and the batch iterator, or is this within the training loop?
Notes train_loader simply refers Pytorch Dataloader. The model refers to 3 linear layers with a ReLU activation function.

Keras' ImageDataGenerator.flow() results in very low training/validation accuracy as opposed to flow_from_directory()

I am trying to train a very simple model for image recognition, nothing spectacular. My first attempt worked just fine, when I used image rescaling:
# this is the augmentation configuration to enhance the training dataset
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
# validation generator, only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')
Then I simply trained the model as such:
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
This works perfectly fine and leads to a reasonable accuracy. Then I thought it may be a good idea to try out mean subtraction, as VGG16 model uses. Instead of doing it manually, I chose to use ImageDataGenerator.fit(). For that, however, you need to supply it with training images as numpy arrays, so I first read the images, convert them, and then feed them into it:
train_datagen = ImageDataGenerator(
featurewise_center=True,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(featurewise_center=True)
def process_images_from_directory(data_dir):
x = []
y = []
for root, dirs, files in os.walk(data_dir, topdown=False):
class_names = sorted(dirs)
global class_indices
if len(class_indices) == 0:
class_indices = dict(zip(class_names, range(len(class_names))))
for dir in class_names:
filenames = os.listdir(os.path.join(root,dir))
for file in filenames:
img_array = img_to_array(load_img(os.path.join(root,dir,file), target_size=(224, 224)))[np.newaxis]
if len(x) == 0:
x = img_array
else:
x = np.concatenate((x,img_array))
y.append(class_indices[dir])
#this step converts an array of classes [0,1,2,3...] into sparse vectors [1,0,0,0], [0,1,0,0], etc.
y = np.eye(len(class_names))[y]
return x, y
x_train, y_train = process_images_from_directory(train_data_dir)
x_valid, y_valid = process_images_from_directory(validation_data_dir)
nb_train_samples = x_train.shape[0]
nb_validation_samples = x_valid.shape[0]
train_datagen.fit(x_train)
test_datagen.mean = train_datagen.mean
train_generator = train_datagen.flow(
x_train,
y_train,
batch_size=batch_size,
shuffle=False)
validation_generator = test_datagen.flow(
x_valid,
y_valid,
batch_size=batch_size,
shuffle=False)
Then, I train the model the same way, simply giving it both iterators. After the training completes, the accuracy is basically stuck at ~25% even after 50 epochs:
80/80 [==============================] - 77s 966ms/step - loss: 12.0886 - acc: 0.2500 - val_loss: 12.0886 - val_acc: 0.2500
When I run predictions on the above model, it classifies only 1 out 4 total classes correctly, all images from other 3 classes are classified as belonging to the first class - clearly the percentage of 25% has something to do with this fact, I just can't figure out what I am doing wrong.
I realize that I could calculate the mean manually and then simply set it for both generators, or that I could use ImageDataGenerator.fit() and then still go with flow_from_directory, but that would be a waste of already processed images, I would be doing the same processing twice.
Any opinions on how to make it work with flow() all the way?
Did you try setting shuffle=True in your generators?
You did not specify shuffling in the first case (it should be True by default) and set it to False in the second case.
Your input data might be sorted by classes. Without shuffling, your model first only sees class #1 and simply learns to predict class #1 always. It then sees class #2 and learns to always predict class #2 and so on. At the end of one epoch your model learns to always predict class #4 and thus gives a 25% accuracy on validation.

More than one prediction in multi-classification in Keras?

I am learning about designing Convolutional Neural Networks using Keras. I have developed a simple model using VGG16 as the base. I have about 6 classes of images in the dataset. Here are the code and description of my model.
model = models.Sequential()
conv_base = VGG16(weights='imagenet' ,include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
conv_base.trainable = False
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(6, activation='sigmoid'))
Here is the code for compiling and fitting the model:
model.compile(loss='categorical_crossentropy',
optimizer=optimizers.RMSprop(lr=1e-4),
metrics=['acc'])
model.summary()
callbacks = [
EarlyStopping(monitor='acc', patience=1, mode='auto'),
ModelCheckpoint(monitor='val_loss', save_best_only=True, filepath=model_file_path)
]
history = model.fit_generator(
train_generator,
steps_per_epoch=10,
epochs=EPOCHS,
validation_data=validation_generator,
callbacks = callbacks,
validation_steps=10)
Here is the code for prediction of a new image
img = image.load_img(img_path, target_size=(IMAGE_SIZE, IMAGE_SIZE))
plt.figure(index)
imgplot = plt.imshow(img)
x = image.img_to_array(img)
x = x.reshape((1,) + x.shape)
prediction = model.predict(x)[0]
# print(prediction)
Often model.predict() method predicts more than one class.
[0 1 1 0 0 0]
I have a couple of questions
Is it normal for a multiclass classification model to predict more than one output?
How is accuracy measured during training time if more than one class was predicted?
How can I modify the neural network so that only one class is predicted?
Any help is appreciated. Thank you so much!
You are not doing multi-class classification, but multi-label. This is caused by the use of a sigmoid activation at the output layer. To do multi-class classification properly, use a softmax activation at the output, which will produce a probability distribution over classes.
Taking the class with the biggest probability (argmax) will produce a single class prediction, as expected.