It contains the implementation of Recognizing Challenging Handwritten Annotations with Fully Convolutional Networks.
While training, an error occurs, NaN loss during training. What could be the reasons?
I tried changing loss function to tanh and changing hyper-parameters, but the problem still persists
import torch
from PIL import Image
from matplotlib import pyplot as plt
import os
import numpy as np
from tqdm import trange, tqdm
import torch.optim as optim
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader
import random
import time
import math
import numbers
import torchfcn
import subprocess
def make_cv_folds(img_root, gt_root, num_folds, random_state):
samples = []
folds = [list() for i in range(num_folds)]
lengths = [0]*num_folds
img_root = os.path.expanduser(img_root)
gt_root = os.path.expanduser(gt_root)
for f in sorted(os.listdir(img_root)):
if not os.path.isfile(os.path.join(img_root, f)) or not os.path.isfile(os.path.join(gt_root, f.rsplit(".", 1)[0] + ".png")):
raise Exception("GT fehlt")
samples.append((os.path.join(img_root, f), os.path.join(gt_root, f.rsplit(".", 1)[0] + ".png")))
np.random.seed(random_state)
np.random.shuffle(samples)
np.random.seed()
for s in samples:
idx = np.argmin(lengths)
folds[idx].append(s)
lengths[idx] += 1
return folds
def load_sample(img_path, gt_path):
img = Image.open(img_path)
gt = Image.open(gt_path)
gt = np.array(gt)[:,:,2]
#binary format
gt[gt == 0] = 2
gt[gt == 255] = 1
#hisdb format
gt[gt == 1] = 1
gt[(gt%8) == 0] = 1
gt[(gt%4) == 0] = 1
gt[(gt%2) == 0] = 0
gt = Image.fromarray(gt)
return img, gt
class Annotations(Dataset):
class_names = np.array(['other', 'annotation'])
def __init__(self, img_root, gt_root, loader=load_sample, num_folds=5, preprocess=None, random_state=None):
self.folds = make_cv_folds(img_root, gt_root, num_folds=num_folds, random_state=random_state)
self.img_root = img_root
self.num_folds = num_folds
self.preprocess = preprocess
self.loader = loader
self.is_training = True
self.load_split(num=0)
def train(self, val):
if val:
self.is_training = True
self.samples = self.train_samples
else:
self.is_training = False
self.samples = self.test_samples
def load_split(self, num=0):
if len(self.folds) == 1:
self.train_samples = self.folds[0]
self.test_samples = self.folds[0]
else:
num = num%len(self.folds)
train_folds = list(range(0,num)) + list(range(num+1, len(self.folds)))
test_fold = num
self.train_samples = []
for i in train_folds:
self.train_samples.extend(self.folds[i])
self.test_samples = self.folds[num]
if self.is_training:
self.samples = self.train_samples
else:
self.samples = self.test_samples
def untransform(self, img, gt):
img = img.numpy()
img = img.transpose(1, 2, 0)
img = img.astype(np.uint8)
img = img[:, :, ::-1]
gt = gt.numpy()
return img, gt
def __getitem__(self, index):
img_path, gt_path = self.samples[index]
img, gt = self.loader(img_path, gt_path)
if self.preprocess is not None:
state = time.time()
img = self.preprocess(img, random_state=state)
gt = self.preprocess(gt, random_state=state)
img = np.array(img, dtype=np.uint8)
img = img[:, :, ::-1] # RGB -> BGR
img = img.astype(np.float64)
img = img.transpose(2, 0, 1)
img = torch.from_numpy(img).float()
gt = np.array(gt, dtype=np.int32)
gt = torch.from_numpy(gt).long()
return img, gt
def __len__(self):
return len(self.samples)
def __repr__(self):
fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
fmt_str += ' Number of datapoints: {}\n'.format(self.__len__())
fmt_str += ' Number of training samples: {}\n'.format(len(self.train_samples))
fmt_str += ' Number of testing samples: {}\n'.format(len(self.test_samples))
return fmt_str
class CenterCrop(object):
"""Crops the given PIL Image at the center.
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made.
"""
def __init__(self, size):
if isinstance(size, numbers.Number):
self.size = (int(size), int(size))
else:
self.size = size
def __call__(self, img, random_state=None):
"""
Args:
img (PIL Image): Image to be cropped.
Returns:
PIL Image: Cropped image.
"""
return transforms.functional.center_crop(img, self.size)
def __repr__(self):
return self.__class__.__name__ + '(size={0})'.format(self.size)
class RandomResizedCrop(object):
"""Crop the given PIL Image to random size and aspect ratio.
A crop of random size (default: of 0.08 to 1.0) of the original size and a random
aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
is finally resized to given size.
This is popularly used to train the Inception networks.
Args:
size: expected output size of each edge
scale: range of size of the origin size cropped
ratio: range of aspect ratio of the origin aspect ratio cropped
interpolation: Default: PIL.Image.BILINEAR
"""
def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.NEAREST):
self.size = (size, size)
self.interpolation = interpolation
self.scale = scale
self.ratio = ratio
#staticmethod
def get_params(img, scale, ratio, random_state=None):
"""Get parameters for ``crop`` for a random sized crop.
Args:
img (PIL Image): Image to be cropped.
scale (tuple): range of size of the origin size cropped
ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
Returns:
tuple: params (i, j, h, w) to be passed to ``crop`` for a random
sized crop.
"""
random.seed(random_state)
for attempt in range(10):
area = img.size[0] * img.size[1]
target_area = random.uniform(*scale) * area
aspect_ratio = random.uniform(*ratio)
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if random.random() < 0.5:
w, h = h, w
if w <= img.size[0] and h <= img.size[1]:
i = random.randint(0, img.size[1] - h)
j = random.randint(0, img.size[0] - w)
return i, j, h, w
# Fallback
w = min(img.size[0], img.size[1])
i = (img.size[1] - w) // 2
j = (img.size[0] - w) // 2
return i, j, w, w
def __call__(self, img, random_state=None):
"""
Args:
img (PIL Image): Image to be cropped and resized.
Returns:
PIL Image: Randomly cropped and resized image.
"""
i, j, h, w = self.get_params(img, self.scale, self.ratio, random_state=random_state)
return transforms.functional.resized_crop(img, i, j, h, w, self.size, self.interpolation)
def __repr__(self):
interpolate_str = _pil_interpolation_to_str[self.interpolation]
format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
format_string += ', scale={0}'.format(round(self.scale, 4))
format_string += ', ratio={0}'.format(round(self.ratio, 4))
format_string += ', interpolation={0})'.format(interpolate_str)
return format_string
class RandomCrop(object):
"""Crop the given PIL Image at a random location.
Args:
size (sequence or int): Desired output size of the crop. If size is an
int instead of sequence like (h, w), a square crop (size, size) is
made.
padding (int or sequence, optional): Optional padding on each border
of the image. Default is 0, i.e no padding. If a sequence of length
4 is provided, it is used to pad left, top, right, bottom borders
respectively.
"""
def __init__(self, size, padding=0):
if isinstance(size, numbers.Number):
self.size = (int(size), int(size))
else:
self.size = size
self.padding = padding
#staticmethod
def get_params(img, output_size, random_state=None):
"""Get parameters for ``crop`` for a random crop.
Args:
img (PIL Image): Image to be cropped.
output_size (tuple): Expected output size of the crop.
Returns:
tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
"""
random.seed(random_state)
w, h = img.size
th, tw = output_size
if w == tw and h == th:
return 0, 0, h, w
i = random.randint(0, h - th)
j = random.randint(0, w - tw)
return i, j, th, tw
def __call__(self, img, random_state=None):
"""
Args:
img (PIL Image): Image to be cropped.
Returns:
PIL Image: Cropped image.
"""
if self.padding > 0:
img = F.pad(img, self.padding)
i, j, h, w = self.get_params(img, self.size, random_state=random_state)
return transforms.functional.crop(img, i, j, h, w)
def __repr__(self):
return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding)
preprocess_train = RandomResizedCrop(size=1024)
preprocess_test = RandomResizedCrop(size=1024)
trainset = Annotations(img_root='public_shared/input/',
gt_root='public_shared/new_bin',
preprocess=preprocess_train,
num_folds=1)
testset = Annotations(img_root='test/input/',
gt_root='test/new_bin/',
preprocess=preprocess_test,
num_folds=1)
testset.train(False)
train_loader = DataLoader(trainset, batch_size=1, shuffle=True, num_workers=8, drop_last=True)
test_loader = DataLoader(testset, batch_size=1, shuffle=False, num_workers=8, drop_last=True)
dat = trainset[0]
img = dat[0].numpy().transpose(1,2,0)
gt = dat[1].numpy()
print(img.shape)
print(gt.shape)
plt.imshow(img.squeeze())
plt.show()
plt.imshow(gt.squeeze())
plt.show()
log_dir = "log-mg3/"
def get_parameters(model, bias=False):
import torch.nn as nn
modules_skipped = (
nn.ReLU,
nn.MaxPool2d,
nn.Dropout2d,
nn.Sequential,
torchfcn.models.FCN8s,
)
for m in model.modules():
if isinstance(m, nn.Conv2d):
if bias:
yield m.bias
else:
yield m.weight
elif isinstance(m, nn.ConvTranspose2d):
# weight is frozen because it is just a bilinear upsampling
if bias:
assert m.bias is None
elif isinstance(m, modules_skipped):
continue
else:
raise ValueError('Unexpected module: %s' % str(m))
configurations = {
# same configuration as original work
# https://github.com/shelhamer/fcn.berkeleyvision.org
1: dict(
max_iteration=200000,
lr=1.0e-10,
momentum=0.99,
weight_decay=0.0005,
interval_validate=4000,
)
}
cfg = configurations[1]
out = log_dir
cuda = torch.cuda.is_available()
torch.manual_seed(1337)
if cuda:
torch.cuda.manual_seed(1337)
resume = ""
model = torchfcn.models.FCN8sAtOnce(n_class=2)
start_epoch = 0
start_iteration = 0
if resume:
checkpoint = torch.load(resume)
model.load_state_dict(checkpoint['model_state_dict'])
start_epoch = checkpoint['epoch']
start_iteration = checkpoint['iteration']
else:
vgg16 = torchfcn.models.VGG16(pretrained=True)
model.copy_params_from_vgg16(vgg16)
if cuda:
model = model.cuda()
optimizer = torch.optim.SGD(
[
{'params': get_parameters(model, bias=False)},
{'params': get_parameters(model, bias=True),
'lr': cfg['lr'] * 2, 'weight_decay': 0},
],
lr=cfg['lr'],
momentum=cfg['momentum'],
weight_decay=cfg['weight_decay'])
if resume:
optimizer.load_state_dict(checkpoint['optim_state_dict'])
trainer = torchfcn.Trainer(
cuda=cuda,
model=model,
optimizer=optimizer,
train_loader=train_loader,
val_loader=test_loader,
out=out,
max_iter=cfg['max_iteration'],
interval_validate=cfg.get('interval_validate', len(train_loader)),
)
trainer.epoch = start_epoch
trainer.iteration = start_iteration
# trainer.train()
def evaluate_model(model, data_loader):
model.eval()
processes = []
mius = []
for index in tqdm(range(len(data_loader.dataset))):
_, gt_path = data_loader.dataset.samples[index]
image, _ = data_loader.dataset[index]
image = image.numpy()
image.shape = (1, image.shape[0], image.shape[1], image.shape[2])
prediction = np.zeros((image.shape[2], image.shape[3], 3), dtype=np.uint8)
div_arr = np.zeros((image.shape[2], image.shape[3]), dtype=np.uint8)
offsets_vertical = list(range(0, image.shape[2], 256))
offsets_horizontal = list(range(0, image.shape[3], 256))
for v in offsets_vertical:
for h in offsets_horizontal:
data = image[:, :, v:v+1024, h:h+1024]
data = torch.from_numpy(data)
data = data.cuda()
data = Variable(data, volatile=True)
score = model(data)
lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :]
lbl_pred[lbl_pred == 0] = 2
prediction[v:v+1024, h:h+1024, 2] += lbl_pred.astype(np.uint8).squeeze()
div_arr[v:v+1024, h:h+1024] += 1
prediction[:,:,2] = np.round(prediction[:,:,2]/div_arr)
im = Image.fromarray(prediction)
prediction_path = os.path.join(log_dir, "prediction-private")
if not os.path.isdir(prediction_path):
os.makedirs(prediction_path)
prediction_filename = os.path.join(prediction_path, os.path.basename(gt_path))
im.save(prediction_filename)
processes.append(subprocess.Popen(["java", "-jar", "DIVA_Layout_Analysis_Evaluator/out/artifacts/LayoutAnalysisEvaluator.jar", "-p", prediction_filename, "-gt", gt_path], stdout=subprocess.PIPE))
for p in processes:
miu = float(p.communicate()[0].splitlines()[0].split()[-1])
mius.append(miu)
print(mius)
print("average:", np.mean(mius))
return np.mean(mius)
testset = Annotations(img_root='test/input/',
gt_root='test/new_gt/',
preprocess=None,
num_folds=1)
testset.train(False)
test_loader = DataLoader(testset, batch_size=1, shuffle=False, num_workers=8, drop_last=True)
evaluate_model(model, test_loader)
Error:
0%| | 0/10 [00:00<?, ?it/s]/home/harsh/anaconda3/envs/vdlproject/lib/python3.6/site-packages/ipykernel_launcher.py:23: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
100%|██████████| 10/10 [00:13<00:00, 1.28s/it]
[]
average: nan
/home/harsh/anaconda3/envs/vdlproject/lib/python3.6/site-packages/numpy/core/fromnumeric.py:2920: RuntimeWarning: Mean of empty slice.
out=out, **kwargs)
/home/harsh/anaconda3/envs/vdlproject/lib/python3.6/site-packages/numpy/core/_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
ret = ret.dtype.type(ret / rcount)
Output: average: nan
Can anyone explain cause of error?
Related
I am trying to reproduce the neural network proposed in this paper using PyTorch: A mechanics-informed artificial neural network approach in data driven constitutive modeling. The goal of the neural network is to approximate a nonlinear function, which maps the input to the output, but using the derivative of the network as the actual output. That means, that the network learns the integral of the function.
My approach to coding this with PyTorch is at the end of this question (It would be great if you could review that as well, as the mistake might be somewhere else and I don't see it.).
The whole idea of the paper is as follows:
Input: x_0; Output: y
x_1 = activation(w_input * x_0 +b)
for l = 2,…,N_l do
x_l = activation(softplus(Q;alpha) * x_(l-1) + b_l + w_skip_l * x_o
end for
y = softplus(Q, alpha) * x_(N_l-1) + f(x_0)
with the activation function being essentially the softplus function squared :
class LearnedSoftPlusSquared(torch.nn.Module):
def __init__(self, init_beta=1.0, threshold=20):
super().__init__()
self.log_beta = torch.nn.Parameter(torch.tensor(float(init_beta)).log())
self.threshold = 20
def forward(self, x):
beta = self.log_beta.exp()
beta_squared = beta**2
beta_squared_x = beta_squared * x
return torch.where(beta_squared_x < 20, 0.5 * ((torch.log1p(beta_squared_x.exp()) / beta_squared)**2), x)
And the loss function (better representation in the linked paper):
Loss = sum_over_samples_and_dimensions(((derivative_wrt_to_input(evaluated at input) - derivative_wrt_to_input(evaluated at 0) - target value)/elementwise_standard_deviation)^2)
In code:
def final_loss_func(gradients_output_wrt_input, gradients_corrector, target):
sigma_k = torch.std(target, dim = 0)
loss = torch.mean(torch.sum(((gradients_output_wrt_input - gradients_corrector - target)/sigma_k)**2, dim = 1))
return loss
All in all this network has the parameters (alpha, A, Q_l, W_l, b_l, beta).
I can’t really figure out my mistake, although I believe the problem lies in my implementation of the loss function, which uses the derivative of the network and a correction term (which is also based on the derivative of the network.
Right now, the loss starts out very high and decreases to a certain point, from which it rises again.
It is also not possible to overfit, even with very small examples. In my opinion that is caused by the correction term which is applied in the loss function.
My questions regarding this network are:
How does one implement the double differentiation properly?
I tried to get the gradient of the output with respect to the input by using autograd, same procedure for the correction term, enabling the option create_graph. Those quantities were then used in the loss function, which is then used with the backward() method. My code for the training routine looks like this:
X_corr = torch.zeros(1,input_dim, requires_grad = True).type(torch.FloatTensor)
Y_corr = torch.zeros_like(X_corr)
def final_loss_func(gradients_output_wrt_input, gradients_corrector, target):
sigma_k = torch.std(target, dim = 0)
loss = torch.mean(torch.sum(((gradients_output_wrt_input - gradients_corrector - target)/sigma_k)**2, dim = 1))
return loss
model = MyModule()
learning_rate = 0.00001
optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
epochs = 20000+1
for epoch in range(epochs):
optimizer.zero_grad()
pred = model(X)
pred_corr = model(X_corr)
X_gradients = torch.autograd.grad(pred, X, retain_graph=True, grad_outputs=torch.ones_like(pred), create_graph = True)[0]
X_corr_gradients = torch.autograd.grad(pred_corr, X_corr, retain_graph=True, grad_outputs=torch.ones_like(pred_corr), create_graph = True)[0]
X_corr_gradients_no_grad = X_corr_gradients.detach()
loss = final_loss_func(X_gradients, X_corr_gradients_no_grad, Y)
optimizer.step()
Also: how do I add a convex function like $f(x) = x^T A^T A x$, which is proposed in the paper, to the output, while keeping the Matrix A as a learnable parameter?
The problem here is the dimension of the input is $batchSizesampleDimension$ leading to $f(x)$ as a $batchSizebatchSize$ matrix, although it must be a $batchSize*1$ vector. Applying $f(x)$ to each sample separately delivers the right dimension, but also makes it impossible to learn A as a parameter, because it makes the differentiation impossible. My code for that looks like this:
class ConvexFunction(torch.nn.Module):
def __init__(self, input_size):
super().__init__()
A = torch.Tensor(input_size, input_size)
self.A = torch.nn.Parameter(A)
nn.init.orthogonal_(self.A)
def forward(self, x):
A_T = torch.transpose(self.A, dim0 = 0, dim1 = 1)
A_T_A = torch.matmul(A_T, self.A)
result = torch.zeros(x.size(dim = 0),1)
for i in range(x.size(dim = 0)):
result[i] = x[i] # A_T_A # x[i]
return result
The whole code for the network:
import torch
import torch.nn as nn
import pandas as pd
torch.manual_seed(0)
df = pd.read_csv("Strain_Stress_Pairs_XL_3columns.csv", sep = ',', decimal='.')
x = df.iloc[:,0:3].values
y = df.iloc[:,3:7].values
x = x *9.091
X = torch.from_numpy(x).type(torch.FloatTensor)
Y = torch.from_numpy(y).type(torch.FloatTensor)
X.requires_grad_(True)
means = Y.mean(dim=1, keepdim=True)
stds = Y.std(dim=1, keepdim=True)
for i in range(stds.size(dim = 0)):
if stds[i] < 1e-20:
stds[i] = 1e-20
Y = (Y - means) / stds
X_corr = torch.zeros(1,3, requires_grad = True).type(torch.FloatTensor)
Y_corr = torch.zeros_like(X_corr)
class ConvexFunction(torch.nn.Module):
def __init__(self, input_size):
super().__init__()
A = torch.Tensor(input_size, input_size)
self.A = torch.nn.Parameter(A)
nn.init.orthogonal_(self.A)
def forward(self, x):
A_T = torch.transpose(self.A, dim0 = 0, dim1 = 1)
A_T_A = torch.matmul(A_T, self.A)
result = torch.zeros(x.size(dim = 0),1)
for i in range(x.size(dim = 0)):
result[i] = x[i] # A_T_A # x[i]
return result
class LearnedSoftPlusSquared(torch.nn.Module):
def __init__(self, init_beta=1.0, threshold=20):
super().__init__()
self.log_beta = torch.nn.Parameter(torch.tensor(float(init_beta)).log())
self.threshold = 20
def forward(self, x):
beta = self.log_beta.exp()
beta_squared = beta**2
beta_squared_x = beta_squared * x
return torch.where(beta_squared_x < 20, 0.5 * ((torch.log1p(beta_squared_x.exp()) / beta_squared)**2), x)
class SoftPlusLinear(nn.Module):
def __init__(self, input_size, output_size, init_alpha=1.0, threshold=20):
super().__init__()
w = torch.Tensor(output_size, input_size)
self.w = nn.Parameter(w)
nn.init.orthogonal_(self.w)
b = torch.Tensor(output_size).fill_(0)
self.b = nn.Parameter(b)
self.log_alpha = torch.nn.Parameter(torch.tensor(float(init_alpha)).log())
self.threshold = 20
def forward(self, x):
alpha = self.log_alpha.exp()
alpha = alpha**2
alpha_weight = alpha * self.w
w_new = torch.log1p(alpha_weight.exp()) / alpha
return nn.functional.linear(x, w_new, bias=self.b)
class MyModule(nn.Module):
def __init__(self):
super().__init__()
input_size = 3
out_l1 = 9
out_l2 = 9
out_l3 = 6
out_l4 = 3
output_size = 1
w_skip_1 = torch.Tensor(input_size, out_l2)
self.w_skip_1 = nn.Parameter(w_skip_1)
nn.init.orthogonal_(self.w_skip_1)
w_skip_2 = torch.Tensor(input_size, out_l3)
self.w_skip_2 = nn.Parameter(w_skip_2)
nn.init.orthogonal_(self.w_skip_2)
w_skip_3 = torch.Tensor(input_size, out_l4)
self.w_skip_3 = nn.Parameter(w_skip_3)
nn.init.orthogonal_(self.w_skip_3)
matrix_conv = torch.Tensor(input_size,1)
self.matrix_conv = nn.Parameter(matrix_conv)
nn.init.orthogonal_(self.matrix_conv)
self.convex_layer = ConvexFunction(input_size)
self.l1 = nn.Linear(input_size,out_l1)
self.a1 = LearnedSoftPlusSquared()
self.l2 = SoftPlusLinear(out_l1,out_l2)
self.a2 = LearnedSoftPlusSquared()
self.l3 = SoftPlusLinear(out_l2,out_l3)
self.a3 = LearnedSoftPlusSquared()
self.l4 = SoftPlusLinear(out_l3,output_size)
def forward(self, x):
x_in = x
x = self.l1(x)
x = self.a1(x)
x = self.l2(x)
x = torch.add(x, torch.mm(x_in, self.w_skip_1))
x = self.a2(x)
x = self.l3(x)
x = torch.add(x, torch.mm(x_in, self.w_skip_2))
x = self.a3(x)
x = self.l4(x)#+ self.convex_layer(x_in) convex function not working!
return x
def final_loss_func(gradients_output_wrt_input, gradients_corrector, target):
sigma_k = torch.std(target, dim = 0)
loss = torch.mean(torch.sum(((gradients_output_wrt_input - gradients_corrector - target)/sigma_k)**2, dim = 1))
return loss
model = MyModule()
learning_rate = 0.00001
optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
epochs = 20000+1
for epoch in range(epochs):
optimizer.zero_grad()
pred = model(X)
pred_corr = model(X_corr)
X_gradients = torch.autograd.grad(pred, X, retain_graph=True, grad_outputs=torch.ones_like(pred), create_graph = True)[0]
X_corr_gradients = torch.autograd.grad(pred_corr, X_corr, retain_graph=True, grad_outputs=torch.ones_like(pred_corr), create_graph = True)[0]
X_corr_gradients_no_grad = X_corr_gradients.detach()
loss = final_loss_func(X_gradients, X_corr_gradients_no_grad, Y)
optimizer.step()
loss.backward()
print(loss)
Thank you for your time.
I was getting error as :
Traceback (most recent call last):
File "/Users/dilipreddy/Downloads/Lane-Segmentation-master/lane_segmentation.py", line 132, in
X, y = train_generator.getitem(0)
File "/Users/dilipreddy/Downloads/Lane-Segmentation-master/lane_segmentation.py", line 107, in getitem
_img, _mask = self.load(id_name)
File "/Users/dilipreddy/Downloads/Lane-Segmentation-master/lane_segmentation.py", line 80, in load
id_name_actual, text, _ = id_name.split('.')
ValueError: not enough values to unpack (expected 3, got 2)
(base) dilipreddy#Dilips-MacBook-Pro Lane-Segmentation-master %
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import random
import sys
import tensorflow as tf
import keras
from keras.layers import *
from keras.models import *
# Part 1 - Data Preprocessing
def get_mask(img_path, label_path):
label_file = open(label_path, "r")
if label_file.mode == 'r':
contents = label_file.read()
lines_text = contents.split('\n')
x_coordinate, y_coordinate, lanes = [], [], []
for line_text in lines_text:
number_lines = line_text.split(" ")
number_lines.pop()
x = list([float(number_lines[i]) for i in range(len(number_lines)) if i % 2 == 0])
y = list([float(number_lines[i]) for i in range(len(number_lines)) if i % 2 != 0])
x_coordinate.append(x)
y_coordinate.append(y)
lanes.append(set(zip(x, y)))
lanes.pop()
img = cv2.imread(img_path)
mask = np.zeros_like(img)
# colors = [[255,0,0], [0,255,0], [0,0,255], [255,255,0]]
colors = [[255, 255, 255], [255, 255, 255], [255, 255, 255], [255, 255, 255]]
for i in range(len(lanes)):
cv2.polylines(img, np.int32([list(lanes[i])]), isClosed=False, color=colors[i], thickness=10)
label = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return label
img = get_mask("data/CULane/driver_161_90frame/06030819_0755.MP4/00000.jpg",
"data/CULane/driver_161_90frame/06030819_0755.MP4/00000.lines.txt")
plt.imshow(img)
print(img.shape)
import os
from tensorflow.keras.utils import Sequence
import os
from tensorflow.keras.utils import Sequence
class DataGenerator2D(Sequence):
"""Generates data for Keras
Sequence based data generator. Suitable for building data generator for training and prediction.
"""
def __init__(self, base_path, img_size=256, batch_size=1, shuffle=True):
self.base_path = base_path
self.img_size = img_size
self.id = os.listdir(os.path.join(base_path, "CULane"))
self.batch_size = batch_size
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
"""Denotes the number of batches per epoch
:return: number of batches per epoch
"""
return int(np.ceil(len(self.id) / float(self.batch_size)))
def __load__(self, id_name):
id_name_actual, text, _ = id_name.split('.')
image_path = os.path.join(self.base_path, "images", (id_name_actual + '.' + text + '.jpg'))
label_path = os.path.join(self.base_path, "labels", (id_name_actual + '.' + text + '.lines.txt'))
image = cv2.imread(image_path, 1) # Reading Image in RGB format
image = cv2.resize(image, (self.img_size, self.img_size))
# image = cv2.resize(image, (int(img.shape[1]/2), int(img.shape[0]/2)))
mask = get_mask(image_path, label_path)
mask = cv2.resize(mask, (self.img_size, self.img_size))
# mask = cv2.resize(mask, (int(img.shape[1]/2), int(img.shape[0]/2)))
# Normalizing the image
image = image / 255.0
mask = mask / 255.0
return image, mask
def __getitem__(self, index):
if (index + 1) * self.batch_size > len(self.id):
file_batch = self.id[index * self.batch_size:]
else:
file_batch = self.id[index * self.batch_size:(index + 1) * self.batch_size]
images, masks = [], []
for id_name in file_batch:
_img, _mask = self.__load__(id_name)
images.append(_img)
masks.append(_mask)
images = np.array(images)
masks = np.array(masks)
return images, masks
def on_epoch_end(self):
"""Updates indexes after each epoch
"""
self.indexes = np.arange(len(self.id))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def on_epoch_end(self):
"""Updates indexes after each epoch
"""
self.indexes = np.arange(len(self.id))
if self.shuffle == True:
np.random.shuffle(self.indexes)
train_generator = DataGenerator2D(base_path='data', img_size=256, batch_size=64, shuffle=False)
X, y = train_generator.__getitem__(0)
print(X.shape, y.shape)
fig = plt.figure(figsize=(17, 8))
columns = 4
rows = 3
for i in range(1, columns*rows + 1):
img = X[i-1]
fig.add_subplot(rows, columns, i)
plt.imshow(img)
plt.show()
fig = plt.figure(figsize=(17, 8))
columns = 4
rows = 3
for i in range(1, columns*rows + 1):
img = y[i-1]
fig.add_subplot(rows, columns, i)
plt.imshow(img)
plt.show()
# Part 2 - Model
def dice_coef(y_true, y_pred, smooth=1):
"""
Dice = (2*|X & Y|)/ (|X|+ |Y|)
= 2*sum(|A*B|)/(sum(A^2)+sum(B^2))
ref: https://arxiv.org/pdf/1606.04797v1.pdf
"""
intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
return (2. * intersection + smooth) / (K.sum(K.square(y_true),-1) + K.sum(K.square(y_pred),-1) + smooth)
def dice_coef_loss(y_true, y_pred):
return 1-dice_coef(y_true, y_pred)
reconstructed_model = tf.keras.models.load_model("pretrained models/UNET-BN-20-0.081170.hdf5",
custom_objects = {'dice_coef_loss': dice_coef_loss})
# Part 3 - Visualization
val_generator = DataGenerator2D('content/data/', img_size=256, batch_size=128, shuffle=True)
X, y = val_generator.__getitem__(10)
print(X.shape, y.shape)
plt.imshow(X[2])
predict = reconstructed_model.predict(X)
print(predict.shape)
img = cv2.cvtColor(predict[2], cv2.COLOR_GRAY2BGR)
plt.imshow(img)
When I use cross-entropy loss function for multiclass text classification, I get the error
Dimension out of range (expected to be in range of [-1, 0], but got 1)
This is my code:
def train(model, iterator):
...
for batch in iterator:
text, text_lengths = batch.Turn
optimizer.zero_grad()
predictions = model(text, text_lengths).squeeze(1)
loss = criterion(predictions, batch.label)
acc = categorical_accuracy(predictions, batch.label)
...
Dataset:
TEXT = data.Field(tokenize = 'spacy', include_lengths = True)
LABEL = data.LabelField(dtype = torch.long)
Forward:
def forward(self, text, text_lengths):
embedded = self.embedding(text)
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
packed_output, (hidden, cell) = self.rnn(packed_embedded)
hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
output = self.fc1(hidden)
output = self.dropout(self.fc2(output))
return output
I have a horizontal (hLine) and Vertical Line (vLine) in Relativelayout. Horizontal line will be just touching Vertical Line. When I move Vertical Line, how to dynamically change Horizontal lines width (i.e horizontal line will stretch instead of moving ) so that it just touches Vertical Line. Any idea please. In given example below presume that Vertical line just scrolls left to right and vice versa (i.e it moves horizontally).
For sample I had provided only one horizontal line. In my actual project there will be atleast 10 Horizontal lines touching the vertical line.
from kivy.app import App
from kivy.graphics import Line, Color
from kivy.uix.scatter import Scatter
from kivy.uix.relativelayout import RelativeLayout
from kivy.uix.widget import Widget
from kivy.uix.label import Label
from kivy.core.window import Window
class MyPaintApp(App):
def build(self):
root = RelativeLayout()
(ix, iy) = (100,100)
(fx, fy) = (200,100)
clr = Color(0.2, 0.2, 1)
wdgt1 = Scatter(pos = (ix,iy), size = (fx-ix, 5))
(ix,iy) = wdgt1.to_local(ix,iy,relative=True)
(fx,fy) = wdgt1.to_local(fx, fy,relative=True)
hLine = Line(points=[ix,iy, fx, fy], width=2, cap='none')
lbl = Label(text='[color=3333ff]Horizontal[/color]', markup = True, pos=(ix,iy ))
wdgt1.canvas.add(clr)
wdgt1.canvas.add(hLine)
wdgt1.add_widget(lbl)
(fx, fy) = (200,150)
(dx, dy) = (200,50)
wdgt2 = Scatter(pos = (fx,fy), size = (5, fy - dy))
(fx,fy) = wdgt2.to_local(fx, fy,relative=True)
(dx,dy) = wdgt2.to_local(dx,dy,relative=True)
vLine = Line(points=[fx,fy, dx, dy], width=2, cap='none')
lbl = Label(text='[color=3333ff]Vertical[/color]', markup = True, pos=(fx,fy ))
wdgt2.canvas.add(clr)
wdgt2.canvas.add(vLine)
wdgt2.add_widget(lbl)
root.add_widget(wdgt1)
root.add_widget(wdgt2)
return root
if __name__ == '__main__':
Window.clearcolor = (1, 1, 1, 1)
MyPaintApp().run()
You can do that by redrawing the Canvas of wdgt1 whenever wdgt2 moves. Here is a modified version of your code that does that:
from kivy.app import App
from kivy.graphics import Line, Color
from kivy.uix.scatter import Scatter
from kivy.uix.relativelayout import RelativeLayout
from kivy.uix.label import Label
from kivy.core.window import Window
class MyPaintApp(App):
def __init__(self, **kwargs):
super(MyPaintApp, self).__init__(**kwargs)
self.hLine = None
def build(self):
root = RelativeLayout()
(ix, iy) = (100,100)
(fx, fy) = (200,100)
self.clr = Color(0.2, 0.2, 1)
self.wdgt1 = Scatter(pos = (ix,iy), size = (fx-ix, 5))
(ix,iy) = self.wdgt1.to_local(ix,iy,relative=True)
(fx,fy) = self.wdgt1.to_local(fx, fy,relative=True)
self.hLine = Line(points=[ix,iy, fx, fy], width=2, cap='none')
self.lbl = Label(text='[color=3333ff]Horizontal[/color]', markup = True, pos=(ix,iy ))
self.wdgt1.canvas.add(self.clr)
self.wdgt1.canvas.add(self.hLine)
self.wdgt1.add_widget(self.lbl)
(fx, fy) = (200,150)
(dx, dy) = (200,50)
wdgt2 = Scatter(pos = (fx,fy), size = (5, fy - dy))
(fx,fy) = wdgt2.to_local(fx, fy,relative=True)
(dx,dy) = wdgt2.to_local(dx,dy,relative=True)
vLine = Line(points=[fx,fy, dx, dy], width=2, cap='none')
lbl = Label(text='[color=3333ff]Vertical[/color]', markup = True, pos=(fx,fy ))
wdgt2.canvas.add(self.clr)
wdgt2.canvas.add(vLine)
wdgt2.add_widget(lbl)
wdgt2.bind(pos=self.move_wdgt2) # bind to movement of wdgt2
root.add_widget(self.wdgt1)
root.add_widget(wdgt2)
return root
def move_wdgt2(self, wdgt2, new_pos):
if self.hLine is None:
return
# calculate the new ending x coordinate of the hLine
x1, y1 = self.wdgt1.to_local(wdgt2.x, wdgt2.y, relative=True)
pts = self.hLine.points
pts[2] = x1
# recreate the hLine
self.hLine = Line(points=pts, width=2, cap='none')
# clear the canvas
self.wdgt1.canvas.clear()
self.wdgt1.remove_widget(self.lbl)
# redraw the canvas
self.wdgt1.canvas.add(self.clr)
self.wdgt1.canvas.add(self.hLine)
self.wdgt1.add_widget(self.lbl)
if __name__ == '__main__':
Window.clearcolor = (1, 1, 1, 1)
MyPaintApp().run()
Hello i am making a graphic adventure/rpg with pygame.
Is there a way to make NPCs with pygame, and be able to interact with them, like having a dialog ?
I've been searching on the internet but i didn't have useful results. It would be great if someone could help me.
Here is the main code.
import pygame as pg
import sys
from os import path
from settings import *
from sprites import *
from tiledmap import *
from pgu import gui
from pygame.draw import circle
import pygame_ai as pai
import time
class Game:
def __init__(self):
pg.init()
self.screen = pg.display.set_mode((WIDTH, HEIGHT))
pg.display.set_caption(TITLE)
self.clock = pg.time.Clock()
self.load_data()
def load_data(self):
game_folder = path.dirname(__file__)
img_folder = path.join(game_folder, 'img')
map_folder = path.join(game_folder, 'maps')
self.map = TiledMap(path.join(map_folder, 'mapa_inici.tmx'))
self.map_img = self.map.make_map()
self.map_rect = self.map_img.get_rect()
self.player_img = pg.image.load(path.join(img_folder, PLAYER_IMG)).convert_alpha()
def new(self):
# iniciar totes les variables i fer tota la preparació per a una nova partida
self.all_sprites = pg.sprite.Group()
self.walls = pg.sprite.Group()
#for row, tiles in enumerate(self.map.data):
#for col, tile in enumerate(tiles):
#if tile == '1':
#Wall(self, col, row)
#if tile == 's':
#self.player = Player(self, col, row)
for tile_object in self.map.tmxdata.objects:
if tile_object.name == 'Jugador':
self.player = Player(self, tile_object.x, tile_object.y)
if tile_object.name == 'Muro':
Obstacle(self, tile_object.x, tile_object.y, tile_object.width, tile_object.height)
self.camera = Camera(self.map.width, self.map.height)
def run(self):
# bucle del joc - s'iguala self.playing = False per finalitzar el joc
self.playing = True
while self.playing:
self.dt = self.clock.tick(FPS) / 1000
self.events()
self.update()
self.draw()
def quit(self):
pg.quit()
sys.exit()
def update(self):
# update portion of the game loop
self.all_sprites.update()
self.camera.update(self.player)
def draw_grid(self):
for x in range(0, WIDTH, TILESIZE):
pg.draw.line(self.screen, LIGHTGREY, (x, 0), (x, HEIGHT))
for y in range(0, HEIGHT, TILESIZE):
pg.draw.line(self.screen, LIGHTGREY, (0, y), (WIDTH, y))
def draw(self):
self.screen.blit(self.map_img, self.camera.apply_rect(self.map_rect))
for sprite in self.all_sprites:
self.screen.blit(sprite.image, self.camera.apply(sprite))
pg.display.flip()
def events(self):
# tots els events
for event in pg.event.get():
if event.type == pg.QUIT:
self.quit()
if event.type == pg.KEYDOWN:
if event.key == pg.K_ESCAPE:
self.quit()
g = Game()
while True:
g.new()
g.run()
The sprites code
import pygame as pg
from os import path
import sys
from settings import *
import pygame_ai as pai
from tiledmap import TiledMap
vec = pg.math.Vector2
class Player(pg.sprite.Sprite):
def __init__(self, game, x, y):
self.groups = game.all_sprites
pg.sprite.Sprite.__init__(self, self.groups)
self.game = game
self.image = game.player_img
self.rect = self.image.get_rect()
self.vel = vec(0,0)
self.pos = vec(x, y)
self.rot = 0
def get_keys(self):
self.vel = vec(0,0)
keys = pg.key.get_pressed()
if keys[pg.K_LEFT] or keys[pg.K_a]:
self.vel.x = -PLAYER_SPEED
if keys[pg.K_RIGHT] or keys[pg.K_d]:
self.vel.x = PLAYER_SPEED
if keys[pg.K_UP] or keys[pg.K_w]:
self.vel.y = -PLAYER_SPEED
if keys[pg.K_DOWN] or keys[pg.K_s]:
self.vel.y = PLAYER_SPEED
if self.vel.x != 0 and self.vel.y != 0:
self.vel *= 0.7071
def collide_walls(self,dir):
if dir == 'x':
hits = pg.sprite.spritecollide(self, self.game.walls, False)
if hits:
if self.vel.x > 0:
self.pos.x = hits[0].rect.left - self.rect.width
if self.vel.x < 0:
self.pos.x = hits[0].rect.right
self.vel.x = 0
self.rect.x = self.pos.x
if dir == 'y':
hits = pg.sprite.spritecollide(self, self.game.walls, False)
if hits:
if self.vel.y > 0:
self.pos.y = hits[0].rect.top - self.rect.height
if self.vel.y < 0:
self.pos.y = hits[0].rect.bottom
self.vel.y = 0
self.rect.y = self.pos.y
def update(self):
self.get_keys()
self.pos += self.vel * self.game.dt
self.rect.x = self.pos.x
self.collide_walls('x')
self.rect.y = self.pos.y
self.collide_walls('y')
class Obstacle(pg.sprite.Sprite):
def __init__(self, game, x, y, w, h):
self.groups = game.walls
pg.sprite.Sprite.__init__(self, self.groups)
self.game = game
self.rect = pg.Rect(x, y, w, h)
self.x = x
self.y = y
self.rect.x = x
self.rect.y = y
The tile map code
import pygame as pg
import pytmx
from settings import *
class Map:
def __init__(self, filename):
self.data = []
with open(filename, 'rt') as f:
for line in f:
self.data.append(line.strip())
self.tilewidth = len(self.data[0])
self.tileheight = len(self.data)
self.width = self.tilewidth * TILESIZE
self.height = self.tileheight * TILESIZE
class TiledMap:
def __init__(self, filename):
tm = pytmx.load_pygame(filename, pixelalpha=True)
self.width = tm.width * tm.tilewidth
self.height = tm.height * tm.tileheight
self.tmxdata = tm
def render(self, surface):
ti = self.tmxdata.get_tile_image_by_gid
for layer in self.tmxdata.visible_layers:
if isinstance(layer, pytmx.TiledTileLayer):
for x, y, gid, in layer:
tile = ti(gid)
if tile:
surface.blit(tile, (x * self.tmxdata.tilewidth, y * self.tmxdata.tileheight))
def make_map(self):
temp_surface = pg.Surface((self.width, self.height))
self.render(temp_surface)
return temp_surface
class Camera:
def __init__(self, width, height):
self.camera = pg.Rect(0,0,width,height)
self.width = width
self.height = height
def apply(self, entity):
return entity.rect.move(self.camera.topleft)
def apply_rect(self, rect):
return rect.move(self.camera.topleft)
def update(self, target):
x = -target.rect.centerx + int(WIDTH / 2)
y = -target.rect.centery + int(HEIGHT / 2)
# limit al seguiment del personatge
x = min(0,x) # esquerra
y = min(0,y) # part de dalt
x = max(-(self.width - WIDTH), x) # dreta
y = max(-(self.height - HEIGHT), y) # part de baix
self.camera = pg.Rect(x, y, self.width, self.height)
In the 1960's there was a simple dialogue-like handler named Eliza. Since then there has been many changes and variations, and they have become known as "chat bots". Perhaps your NPCs could have an Eliza-like conversation with the player? It might be worth doing some research on these sort of natural text processors.
However if you mean simply choosing conversation topics from, say an [(a), (b), (c)] type list, these dialogues would typically be mapped out in a graph of choices. Have you read stories where you make choices to change the narrative? These are a simple graph of choices. You could draw them out on a piece of paper then encode them into a data-structure, maybe a python dictionary. Each option moves to another node in the graph. Perhaps they also loop around back upon themselves.