I'm new to deep learning. May I ask if the code below uses soft attention or hard attention?
class AttentionBlock(nn.Module):
def __init__(self, f_g, f_l, f_int):
super().__init__()
self.w_g = nn.Sequential(
nn.Conv2d(f_g, f_int,
kernel_size=1, stride=1,
padding=0, bias=True),
nn.BatchNorm2d(f_int)
)
self.w_x = nn.Sequential(
nn.Conv2d(f_l, f_int,
kernel_size=1, stride=1,
padding=0, bias=True),
nn.BatchNorm2d(f_int)
)
self.psi = nn.Sequential(
nn.Conv2d(f_int, 1,
kernel_size=1, stride=1,
padding=0, bias=True),
nn.BatchNorm2d(1),
nn.Sigmoid(),
)
self.relu = nn.ReLU(inplace=True)
def forward(self, g, x):
g1 = self.w_g(g)
x1 = self.w_x(x)
psi = self.relu(g1+x1)
psi = self.psi(psi)
return psi*x
Source: https://www.kaggle.com/code/truthisneverlinear/attention-u-net-pytorch/notebook#Attention-U-Net
I didn't know the differences between soft attention and hard attention.
Hard attention makes a "hard" (attention values are 0 or 1) decision on which input/region to focus on. Whereas soft attention makes a "soft" decision ( all values lie in the range [0, 1]); a probability distribution. Generally, soft attention is used and preferred since its differentiable.
A good starting point is to look at the corresponding paper.
We propose one of the first use cases of soft-attention technique in a feed-forward CNN
model applied to a medical imaging task.
In the code you can see that they use a sigmoid activation in the last layer to normalize to the range [0, 1].
Related
I'm writing a thesis and want to present a visualisation of the CNN architecture used for the analysis (written in PyTorch). I came across this cool repository PlotNeuralNet with examples for how to generate LaTeX code for drawing neural networks for reports and presentation. However, I'm having trouble finding out how to exactly define my particular architecture.
Here is an example of how one would define an architecture.
import sys
sys.path.append('../')
from pycore.tikzeng import *
# define your arch
arch = \[
to_head( '..' ),
to_cor(),
to_begin(),
to_Conv("conv1", 512, 64, offset="(0,0,0)", to="(0,0,0)", height=64, depth=64, width=2 ),
to_Pool("pool1", offset="(0,0,0)", to="(conv1-east)"),
to_Conv("conv2", 128, 64, offset="(1,0,0)", to="(pool1-east)", height=32, depth=32, width=2 ),
to_connection( "pool1", "conv2"),
to_Pool("pool2", offset="(0,0,0)", to="(conv2-east)", height=28, depth=28, width=1),
to_SoftMax("soft1", 10 ,"(3,0,0)", "(pool1-east)", caption="SOFT" ),
to_connection("pool2", "soft1"),
to_Sum("sum1", offset="(1.5,0,0)", to="(soft1-east)", radius=2.5, opacity=0.6),
to_connection("soft1", "sum1"),
to_end()
\]
def main():
namefile = str(sys.argv[0]).split('.')[0]
to_generate(arch, namefile + '.tex' )
if __name__ == '__main__':
main()
However, looking at the different available blocks available in pycore module, I'm still not able to use the tool. Documentation for usage is not really that elaborate, so I was hoping someone here would find it trivial to define the architecture below. Else, any good ways to
class Net20(nn.Module):
""" CNN for 20-day Image
This particular model should have:
- 3 blocks
- 64 layers in first block, multiply by 2 each subsequent block
- filter size (5,3)
- vertical stride = 3 (but only in first layer)
- vertical dilation = 2 (but only in first layer)
- Leaky Relu activation function
- max pooling (2,1) at the end of each block
"""
def __init__(self):
super().__init__()
self.layer1 = nn.Sequential(
Conv2dSame(1, 64, kernel_size=(5,3), stride=(3,1), dilation=(2,1)),
nn.BatchNorm2d(64),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.MaxPool2d((2, 1), ceil_mode=True)
)
self.layer2 = nn.Sequential(
Conv2dSame(64, 128, kernel_size=(5,3)),
nn.BatchNorm2d(128),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.MaxPool2d((2, 1), ceil_mode=True)
)
self.layer3 = nn.Sequential(
Conv2dSame(128, 256, kernel_size=(5,3)),
nn.BatchNorm2d(256),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.MaxPool2d((2, 1), ceil_mode=True)
)
self.fc1 = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(46080, 1),
)
def forward(self, x):
x = x.reshape(-1,1,64,60)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = x.reshape(-1,46080)
x = self.fc1(x)
return x
You can try model.summary() or keras.utils.plot_model. You may want to check: https://machinelearningmastery.com/visualize-deep-learning-neural-network-model-keras/
I'm trying to create a Pytorch Neural Network and keep getting this error
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x3072 and 1024x512)
Here is my code where I create the model:
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(32*32, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork()
print(model)
An answer would be much appreciated
There can be two problems here:
Either the tensor is not being flattened which is possible in cases such as 64x1x3076 sized tensor.
The problem might be in code unrelated to your model but in the training loop of the code. Please add that part of the code as well.
I watched the following video on YouTube https://www.youtube.com/watch?v=jx9iyQZhSwI where it was shown that it is possible to use Gradio and the learned model of MNIST dataset in Tensorflow. I have read and written that it is possible to use Pytorch in Gradio, but I have problems with its implementation. Does anyone have an idea how to do this?
My Pytorch code of cnn
import torch.nn as nn
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layer, output 10 classes
self.out = nn.Linear(32 * 7 * 7, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2 to (batch_size, 32 * 7 * 7)
x = x.view(x.size(0), -1)
output = self.out(x)
return output, x # return x for visualization
By watching I find that I need to change function that Gradio use
def predict_image(img):
img_3d=img.reshape(-1,28,28)
im_resize=img_3d/255.0
prediction=CNN(im_resize)
pred=np.argmax(prediction)
return pred
Im sorry if I got your question wrong, but from what I understand you are getting an error when trying to predict the digit using your function predict image.
So here are two possible hints. Maybe you have implemented them already, but I don't know because of the very small code snippet.
First of all. Have you set your model into evaluation mode using
CNN.eval()
Do after you finished training your model and want to evaluate inputs without training the model.
Second of all, maybe you need to add a fourth dimension to your input tensor "im_resize". Normally your model expects a dimension for the number of channels, the batch size, the height and the width of your input.
In addition I can not tell if your input is a of the datatype torch.tensor . If not transform your array into a tensor first.
You can add a batch dimension to your input tensor by using
im_resize = im_resize.unsqueeze(0)
I hope that I understand your question correctly and was able to help you.
Is this concept possible to be implemented with the GAN algorithm?
I want the GAN to generate a regression-output(G-Value) of the shape(4,) by the real-image, not from the random noise, and discriminate G-Value with real regression-value(R-Value) of the same shape(4, ). R-Value is of the "y-train" dataset.
It means that if an image has a pattern like circular, it generally has the 4 features of position x, y, z, and alpha. I call it Real-Value(R-Value) and I want the GAN to generate fake value (G-Value) fooling the discriminator.
I have tried to implement it as below.
class UTModel:
def __init__(self):
optimizer__ = Adam(2e-4)
self.__dropout = .3
self.optimizerGenerator = Adam(1e-4)
self.optimizerDiscriminator = Adam(1e-4)
self.generator, self.discriminator = self.build()
def build(self):
# build the generator
g = Sequential()
g.add(Conv2D(512, kernel_size=3, strides=2, input_shape=(128, 128, 1), padding='same'))
g.add(BatchNormalization(momentum=0.8))
g.add(LeakyReLU(alpha=0.2))
g.add(Dropout(self.__dropout))
g.add(Conv2D(256, kernel_size=3, strides=2, padding='same'))
g.add(BatchNormalization(momentum=0.8))
g.add(LeakyReLU(alpha=0.2))
g.add(Dropout(self.__dropout))
g.add(Conv2D(128, kernel_size=3, strides=2, padding='same'))
g.add(BatchNormalization(momentum=0.8))
g.add(LeakyReLU(alpha=0.2))
g.add(Dropout(self.__dropout))
g.add(Conv2D(64, kernel_size=3, strides=1, padding='same'))
g.add(BatchNormalization(momentum=0.8))
g.add(LeakyReLU(alpha=0.2))
g.add(Dropout(self.__dropout))
g.add(Flatten())
g.add(Dense(4, activation='linear'))
# build the discriminator
d = Sequential()
d.add(Dense(128, input_shape=(4,)))
d.add(LeakyReLU(alpha=0.2))
d.add(Dropout(self.__dropout))
d.add(Dense(64))
d.add(LeakyReLU(alpha=0.2))
d.add(Dropout(self.__dropout))
d.add(Dense(64))
d.add(LeakyReLU(alpha=0.2))
d.add(Dropout(self.__dropout))
d.add(Dense(32))
d.add(LeakyReLU(alpha=0.2))
d.add(Dropout(self.__dropout))
d.add(Dense(1, activation='sigmoid'))
return g, d
def computeLosses(self, rValid, fValid):
bce = BinaryCrossentropy(from_logits=True)
# Discriminator loss
rLoss = bce(tf.ones_like(rValid), rValid)
fLoss = bce(tf.zeros_like(fValid), fValid)
dLoss = rLoss + fLoss
# Generator loss
gLoss = bce(tf.zeros_like(fValid), fValid)
return dLoss, gLoss
def train(self, images, rValues):
with tf.GradientTape() as gTape, tf.GradientTape() as dTape:
gValues = self.generator(images, training=True)
rValid = self.discriminator(rValues, training=True)
fValid = self.discriminator(gValues, training=True)
dLoss, gLoss = self.computeLosses(rValid, fValid)
dGradients = dTape.gradient(dLoss, self.discriminator.trainable_variables)
gGradients = gTape.gradient(gLoss, self.generator.trainable_variables)
self.optimizerDiscriminator.apply_gradients(zip(dGradients, self.discriminator.trainable_variables))
self.optimizerGenerator.apply_gradients(zip(gGradients, self.generator.trainable_variables))
print (dLoss, gLoss)
class UTTrainer:
def __init__(self):
self.env = 3DPatterns()
self.model = UTModel()
def start(self):
if not self.env.available:
return
batch = 32
for epoch in range(1):
# set new episod
while self.env.setEpisod():
for i in range(0, self.env.episodelen, batch):
self.model.train(self.env.episode[i:i+batch], self.env.y[i:i+batch])
But the G-Values have not generated as valid values. It converges the 1 or -1 always. The proper value should be like [-0.192798, 0.212887, -0.034519, -0.015000]. Please help me to find the right way.
Thank you.
I am trying to train one CNN model with Pytorch, so that the output behaves differently for different types of inputs. (i.e. If the input images are human-beings, it outputs pattern A, but if the input is some other animals, it outputs pattern B).
After some online search, it seems Siamese network is related to this. So I have the following 2 questions:
(1) Is Siamese network really a good way to train such a model?
(2) From the implementation point of view, how should I implement the code in pytorch?
class SiameseNetwork(nn.Module):
def __init__(self):
super(SiameseNetwork, self).__init__()
self.cnn1 = nn.Sequential(
nn.ReflectionPad2d(1),
nn.Conv2d(1, 4, kernel_size=3),
nn.ReLU(inplace=True),
nn.BatchNorm2d(4),
nn.ReflectionPad2d(1),
nn.Conv2d(4, 8, kernel_size=3),
nn.ReLU(inplace=True),
nn.BatchNorm2d(8),
nn.ReflectionPad2d(1),
nn.Conv2d(8, 8, kernel_size=3),
nn.ReLU(inplace=True),
nn.BatchNorm2d(8),
)
self.fc1 = nn.Sequential(
nn.Linear(8*100*100, 500),
nn.ReLU(inplace=True),
nn.Linear(500, 500),
nn.ReLU(inplace=True),
nn.Linear(500, 5))
def forward_once(self, x):
output = self.cnn1(x)
output = output.view(output.size()[0], -1)
output = self.fc1(output)
return output
def forward(self, input1, input2):
output1 = self.forward_once(input1)
output2 = self.forward_once(input2)
return output1, output2
Currently, I am trying some existing implementation I found online like the above class definition. It works, but there will always be two inputs and two outputs for this model. I agree that it is convenient for training, but ideally, it should be only one input and one (two is also fine) output during inference.
Could someone provide some guidance on how to modify the code to make it single input?
You can call forward_once during inference: this takes a single input and returns a single output. Note that explicitly calling forward_once will not invoke any hooks you might have on forward/backward calls of your module.
Alternatively, you can make forward_once your module's forward function, and make your training function do the double calling of your model (which makes more sense: Siamese networks is a training method, and not part of a network's architecture).