Finding an interesting error about calling 'super()' - deep-learning

When I modify a geometric network to address the PPI problem, I found that 'super()' can not be called in some circumstances.
The following way can lead to an error as:
TypeError: super(type, obj): obj must be an instance or subtype of type
def forward(self, batch, level='residue', **kwargs):
out = torch.cat([super().forward(graph, scatter_mean=False, dense=True) for graph in batch], dim=-1)
if level == 'atom': out = out[batch.ca_idx + batch.ptr[:-1]]
return torch.sigmoid(out)
Notably, the batch has two items, i.e., two torch_geometric graphs.
However, the following way works fine for me.
def forward(self, batch, level='residue', **kwargs):
out1 = super().forward(batch[0], scatter_mean=False, dense=True)
out2 = super().forward(batch[1], scatter_mean=False, dense=True)
out = torch.cat([out1, out2], dim=-1)
if level == 'atom': out = out[batch.ca_idx + batch.ptr[:-1]]
return torch.sigmoid(out)

Related

4d convolution, i want to add conv4d to my model but the custom function doesn't work

I have a u-net module, i want to change the conv3d with conv4d, when i looked on google i found some custom conv4d functions but I am having hard time executing them.
can someone show me the right way to call them and remove the errors
here are the two functions i tryed (but i get errors but just executing them)
**first one i tryed in from :
**> How to create a Keras layer to do a 4D convolutions (Conv4D)?
class Conv(Layer):
def __init__(self, filters, kernel_size, padding='VALID', **kwargs):
self.filters = filters
self.kernel_size = kernel_size #must be a tuple!!!!
self.padding=padding
super(Conv, self).__init__(**kwargs)
#using channels last!!!
def build(self, input_shape):
spatialDims = len(self.kernel_size)
allDims = len(input_shape)
assert allDims == spatialDims + 2 #spatial dimensions + batch size + channels
kernelShape = self.kernel_size + (input_shape[-1], self.filters)
#(spatial1, spatial2,...., spatialN, input_channels, output_channels)
biasShape = tuple(1 for _ in range(allDims-1)) + (self.filters,)
self.kernel = self.add_weight(name='kernel',
shape=kernelShape,
initializer='uniform',
trainable=True)
self.bias = self.add_weight(name='bias',
shape = biasShape,
initializer='zeros',
trainable=True)
self.built = True
def call(self, inputs):
results = tf.nn.convolution(inputs, self.kernel, padding=self.padding)
return results + self.bias
def compute_output_shape(self, input_shape):
sizes = input_shape[1:-1]
if (self.padding=='VALID') or (self.padding=='valid'):
sizes = [s - kSize + 1 for s, kSize in zip(sizes, self.kernel_size)]
return input_shape[:1] + sizes + (self.filters,)
**and second one check this:
**
https://github.com/Vincentx15/Conv4D/blob/main/Conv4DTF.py

Is there a good way to access batch id in pytorch datasets using dataloaders?

In pytorch datasets, the way to access individual samples is given by implementing the __getitem__ method, but there seems to me that there are no natural way to get the batch id for the extracted sample. One may argue that batch ids should be handled outside the dataset (e.g. in training loops or similar), but I want to modify the processing of the sample when it is retrieved based on the batch id.
I have a hacked solution given below, but I am wondering if there are better ways of doing this.
The "solution" below doesn't work when using num_workers > 1, so it is non-functional.
from typing import List, Tuple, Iterator
from torch.utils.data import RandomSampler, Dataset, DataLoader, BatchSampler
class intwithbtx(int):
def __new__(cls, theint: int, btx: int):
x = int.__new__(cls, theint)
x.btx_number = btx
return x
class IdBatchSampler(BatchSampler):
def __iter__(self) -> Iterator[List[int]]:
batch = []
iii = 0
for idx in self.sampler:
batch.append(intwithbtx(idx, iii))
if len(batch) == self.batch_size:
yield batch
iii += 1
batch = []
if len(batch) > 0 and not self.drop_last:
yield batch
class RangeDataset(Dataset):
def __init__(self, lgt: int):
self.data = list(range(lgt))
def __getitem__(self, item: intwithbtx):
dt = self.data[item] + item.btx_number*1000
return dt
def __len__(self):
return len(self.data)
if __name__ == '__main__':
ds = RangeDataset(30)
smp = IdBatchSampler(RandomSampler(range(len(ds))), batch_size=3, drop_last=False)
loader = DataLoader(ds, batch_sampler=smp)
for btx in loader:
print(btx)
loader2 = DataLoader(ds, batch_sampler=smp, num_workers=2) # Fails.
for btx in loader2:
print(btx)

MNIST Shard Descriptor: IndexError: list index out of range

I am working on Federated Learning experiments using Intel OpenFL. I want to distribute my dataset (MNIST) using different non-iidness scenarios.
I am following their official documentation: https://openfl.readthedocs.io/en/latest/source/utilities/splitters_data.html
This is my original working code:
"""Mnist Shard Descriptor."""
import logging
import os
from typing import List
import numpy as np
import requests
from openfl.interface.interactive_api.shard_descriptor import ShardDataset
from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor
logger = logging.getLogger(__name__)
class MnistShardDataset(ShardDataset):
"""Mnist Shard dataset class."""
def __init__(self, x, y, data_type, rank=1, worldsize=1):
"""Initialize MNISTDataset."""
self.data_type = data_type
self.rank = rank
self.worldsize = worldsize
self.x = x[self.rank - 1::self.worldsize]
self.y = y[self.rank - 1::self.worldsize]
def __getitem__(self, index: int):
"""Return an item by the index."""
return self.x[index], self.y[index]
def __len__(self):
"""Return the len of the dataset."""
return len(self.x)
class MnistShardDescriptor(ShardDescriptor):
"""Mnist Shard descriptor class."""
def __init__(
self,
rank_worldsize: str = '1, 1',
**kwargs
):
"""Initialize MnistShardDescriptor."""
self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(','))
(x_train, y_train), (x_test, y_test) = self.download_data()
self.data_by_type = {
'train': (x_train, y_train),
'val': (x_test, y_test)
}
def get_shard_dataset_types(self) -> List[str]:
"""Get available shard dataset types."""
return list(self.data_by_type)
def get_dataset(self, dataset_type='train'):
"""Return a shard dataset by type."""
if dataset_type not in self.data_by_type:
raise Exception(f'Wrong dataset type: {dataset_type}')
return MnistShardDataset(
*self.data_by_type[dataset_type],
data_type=dataset_type,
rank=self.rank,
worldsize=self.worldsize
)
#property
def sample_shape(self):
"""Return the sample shape info."""
return ['28', '28', '1']
#property
def target_shape(self):
"""Return the target shape info."""
return ['28', '28', '1']
#property
def dataset_description(self) -> str:
"""Return the dataset description."""
return (f'Mnist dataset, shard number {self.rank}'
f' out of {self.worldsize}')
def download_data(self):
"""Download prepared dataset."""
local_file_path = 'mnist.npz'
mnist_url = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'
response = requests.get(mnist_url)
with open(local_file_path, 'wb') as f:
f.write(response.content)
with np.load(local_file_path) as f:
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
#x_train = np.reshape(x_train, (-1, 784))
#x_test = np.reshape(x_test, (-1, 784))
os.remove(local_file_path) # remove mnist.npz
print('Mnist data was loaded!')
return (x_train, y_train), (x_test, y_test)
Basically, I changed the MnistShardDescriptor class in both my 2 nodes of the federation in this way:
...
class MnistShardDescriptor(ShardDescriptor):
"""Mnist Shard descriptor class."""
def __init__(
self,
rank_worldsize: str = '1, 1',
**kwargs
):
"""Initialize MnistShardDescriptor."""
self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(','))
(x_train, y_train), (x_test, y_test) = self.download_data()
train_splitter = RandomNumPyDataSplitter()
test_splitter = RandomNumPyDataSplitter()
train_idx = train_splitter.split(y_train, self.worldsize)[self.rank]
test_idx = test_splitter.split(y_test, self.worldsize)[self.rank]
x_train_shard = x_train[train_idx]
x_test_shard = x_test[test_idx]
self.data_by_type = {
'train': (x_train, y_train),
'val': (x_test, y_test)
}
...
I have this error at the line train_idx:IndexError: list index out of range but only in one of the 2 nodes. I do not know why, because the code are exactly the same on both nodes of my federation.
EDIT: I changed the position of the code I have written above, and in particular I wrote in the class MnistShardDataset rather than MnistShardDescriptor:
class MnistShardDataset(ShardDataset):
"""Mnist Shard dataset class."""
def __init__(self, x, y, data_type, rank=1, worldsize=1):
"""Initialize MNISTDataset."""
self.data_type = data_type
self.rank = rank
self.worldsize = worldsize
self.x = x[self.rank - 1::self.worldsize]
self.y = y[self.rank - 1::self.worldsize]
train_splitter = RandomNumPyDataSplitter()
#test_splitter = RandomNumPyDataSplitter()
train_idx = train_splitter.split(self.y, self.worldsize)[self.rank]
#test_idx = test_splitter.split(self.y, self.worldsize)[self.rank]
x_train_shard = self.x[train_idx]
#x_test_shard = self.x[test_idx]
self.x = x_train_shard
With this I am able to create the federation and, in the same node of the director, the clients start training, and the split is truly random because I ran the experiment 2 times, and each time the envoy had a different number of samples. However in the other node (because I am using 2 nodes, one for each envoy) with the envoy (openFL calls envoy the worker on a client) I have the same error of Index out of rangeā€¦
EDIT2: here is an example of data split using openFL: https://github.com/intel/openfl/blob/develop/openfl-tutorials/interactive_api/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor_with_data_splitter.py
However my dataset is different, and I am not succeeding in adapting this solution. Any other example can you suggest to me, about sharding a dataset like MNIST? A tutorial to follow?
Entire error:
File "/home/lmancuso/envoymnist/mnist_shard_descriptor_with_data_splitter.py", line 61, in __init__
train_idx = train_splitter.split(y_train, self.worldsize)[self.rank]
IndexError: list index out of range
EDIT: interesting point: If I change the dimension of my federation, increasing from 2 to 3 the rank_worldsize inside the envoy_config.yaml, training starts (and the dataset is divided in a random way, so it works, because each node has different number of samples). However it works only because I have 2 nodes, but I created a federation of 3 without the 3 node. Indeed the samples are 8064 for one node and 9856 for another node. However considering that I have 60000 training samples in MNIST, all the remaining samples got lost, because they are supposed to be in the last node (which does not exist).
The only solution I found until now is to reduce the rank of each envoy:
train_idx = train_splitter.split(self.y, self.worldsize)[self.rank-1]

PyTorch: Target 1 is out of bounds

I am new to Deep Learning and wondering how to modify my model to fix it.
It says Target 1 is out of bounds, so what parameter should I change to make it works. When the output is changed to 2, it works. However, the goal for the model is to predict 2 classes classification. Also, when output is 2, the training loss becomes nan.
The data is a dataframe with shape (15958, 4) transformed into tensor format.
Sorry Split_NN is a class:
# SplitNN
# to protect privacy and split
class SplitNN:
def __init__(self, models, optimizers):
self.models = models
self.optimizers = optimizers
self.data = []
self.remote_tensors = []
def forward(self, x):
data = []
remote_tensors = []
data.append(self.models[0](x))
if data[-1].location == self.models[1].location:
remote_tensors.append(data[-1].detach().requires_grad_())
else:
remote_tensors.append(
data[-1].detach().move(self.models[1].location).requires_grad_()
)
i = 1
while i < (len(models) - 1):
data.append(self.models[i](remote_tensors[-1]))
if data[-1].location == self.models[i + 1].location:
remote_tensors.append(data[-1].detach().requires_grad_())
else:
remote_tensors.append(
data[-1].detach().move(self.models[i + 1].location).requires_grad_()
)
i += 1
data.append(self.models[i](remote_tensors[-1]))
self.data = data
self.remote_tensors = remote_tensors
return data[-1]
def backward(self):
for i in range(len(models) - 2, -1, -1):
if self.remote_tensors[i].location == self.data[i].location:
grads = self.remote_tensors[i].grad.copy()
else:
grads = self.remote_tensors[i].grad.copy().move(self.data[i].location)
self.data[i].backward(grads)
def zero_grads(self):
for opt in self.optimizers:
opt.zero_grad()
def step(self):
for opt in self.optimizers:
opt.step()
Below are the codes:
Model set up: The Model is a sequential deep learning model, which I tried to use nn.linear to generated binary prediction.
torch.manual_seed(0)
# Define our model segments
input_size = 3
hidden_sizes = [128, 640]
output_size = 1
# original models
models = [
nn.Sequential(
nn.Linear(input_size, hidden_sizes[0]),
nn.ReLU(),
nn.Linear(hidden_sizes[0], hidden_sizes[1]),
nn.ReLU(),
),
nn.Sequential(nn.Linear(hidden_sizes[1], output_size), nn.LogSoftmax(dim=1)),
]
# Create optimisers for each segment and link to them
optimizers = [
optim.SGD(model.parameters(), lr=0.03,)
for model in models
]
Train model is here:
def train(x, target, splitNN):
#1) Zero our grads
splitNN.zero_grads()
#2) Make a prediction
pred = splitNN.forward(x)
#3) Figure out how much we missed by
criterion = nn.NLLLoss()
loss = criterion(pred, target)
#4) Backprop the loss on the end layer
loss.backward()
#5) Feed Gradients backward through the nework
splitNN.backward()
#6) Change the weights
splitNN.step()
return loss, pred
Finally the training part, also the part where problem happen:
the send function is for assigning model to the nodes, cuz this is set up to simulating federated learning.
for i in range(epochs):
running_loss = 0
correct_preds = 0
total_preds = 0
for (data, ids1), (labels, ids2) in dataloader:
# Train a model
data = data.send(models[0].location)
data = data.view(data.shape[0], -1)
labels = labels.send(models[-1].location)
# Call model
loss, preds = train(data.float(), labels, splitNN)
# Collect statistics
running_loss += loss.get()
correct_preds += preds.max(1)[1].eq(labels).sum().get().item()
total_preds += preds.get().size(0)
print(f"Epoch {i} - Training loss: {running_loss/len(dataloader):.3f} - Accuracy: {100*correct_preds/total_preds:.3f}")
The error show the problem occurs at loss, preds = train(data.float(), labels, splitNN)
The actual error message:
During handling of the above exception, another exception occurred:
IndexError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1836 .format(input.size(0), target.size(0)))
1837 if dim == 2:
-> 1838 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
1839 elif dim == 4:
1840 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
IndexError: Target 1 is out of bounds.
Please help me. Thank you

How do I compute bootstrapped cross entropy loss in PyTorch?

I have read some papers that use something called "Bootstrapped Cross Entropy Loss" to train their segmentation network. The idea is to focus only on the hardest k% (say 15%) of the pixels into account to improve learning performance, especially when easy pixels dominate.
Currently, I am using the standard cross entropy:
loss = F.binary_cross_entropy(mask, gt)
How do I convert this to the bootstrapped version efficiently in PyTorch?
Often we would also add a "warm-up" period to the loss such that the network can learn to adapt to the easy regions first and transit to the harder regions.
This implementation starts from k=100 and continues for 20000 iterations, then linearly decay it to k=15 for another 50000 iterations.
class BootstrappedCE(nn.Module):
def __init__(self, start_warm=20000, end_warm=70000, top_p=0.15):
super().__init__()
self.start_warm = start_warm
self.end_warm = end_warm
self.top_p = top_p
def forward(self, input, target, it):
if it < self.start_warm:
return F.cross_entropy(input, target), 1.0
raw_loss = F.cross_entropy(input, target, reduction='none').view(-1)
num_pixels = raw_loss.numel()
if it > self.end_warm:
this_p = self.top_p
else:
this_p = self.top_p + (1-self.top_p)*((self.end_warm-it)/(self.end_warm-self.start_warm))
loss, _ = torch.topk(raw_loss, int(num_pixels * this_p), sorted=False)
return loss.mean(), this_p
Addition to self answer by #hkchengrex (for future self and API parity with PyTorch);
one could implement functional version first (with some additional arguments provided in original torch.nn.functional.cross_entropy) like this (also I prefer reduction to be callable instead of predefined strings):
import typing
import torch
def bootstrapped_cross_entropy(
inputs,
targets,
iteration,
p: float,
warmup: typing.Union[typing.Callable[[float, int], float], int] = -1,
weight=None,
ignore_index=-100,
reduction: typing.Callable[[torch.Tensor], torch.Tensor] = torch.mean,
):
if not 0 < p < 1:
raise ValueError("p should be in [0, 1] range, got: {}".format(p))
if isinstance(warmup, int):
this_p = 1.0 if iteration < warmup else p
elif callable(warmup):
this_p = warmup(p, iteration)
else:
raise ValueError(
"warmup should be int or callable, got {}".format(type(warmup))
)
# Shortcut
if this_p == 1.0:
return torch.nn.functional.cross_entropy(
inputs, targets, weight, ignore_index=ignore_index, reduction=reduction
)
raw_loss = torch.nn.functional.cross_entropy(
inputs, targets, weight=weight, ignore_index=ignore_index, reduction="none"
).view(-1)
num_pixels = raw_loss.numel()
loss, _ = torch.topk(raw_loss, int(num_pixels * this_p), sorted=False)
return reduction(loss)
Also warmup can be specified as callable (taking p and current iteration) or int which allows for flexible or easy scheduling.
And making a class basing of _WeightedLoss and iteration incremented automatically during each call (so only inputs and targets have to be passed):
class BoostrappedCrossEntropy(torch.nn.modules.loss._WeightedLoss):
def __init__(
self,
p: float,
warmup: typing.Union[typing.Callable[[float, int], float], int] = -1,
weight=None,
ignore_index=-100,
reduction: typing.Callable[[torch.Tensor], torch.Tensor] = torch.mean,
):
self.p = p
self.warmup = warmup
self.ignore_index = ignore_index
self._current_iteration = -1
super().__init__(weight, size_average=None, reduce=None, reduction=reduction)
def forward(self, inputs, targets):
self._current_iteration += 1
return bootstrapped_cross_entropy(
inputs,
targets,
self._current_iteration,
self.p,
self.warmup,
self.weight,
self.ignore_index,
self.reduction,
)