Why is RandomCrop with size 84 and padding 8 returning an image size of 84 and not 100 in pytorch? - deep-learning

I was using the mini-imagenet data set and noticed this line of code:
elif data_augmentation == 'lee2019:
normalize = Normalize(
mean=[120.39586422 / 255.0, 115.59361427 / 255.0, 104.54012653 / 255.0],
std=[70.68188272 / 255.0, 68.27635443 / 255.0, 72.54505529 / 255.0],
)
train_data_transforms = Compose([
ToPILImage(),
RandomCrop(84, padding=8),
ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
RandomHorizontalFlip(),
ToTensor(),
normalize,
])
test_data_transforms = Compose([
normalize,
])
but when I checked the image size it was 84 instead of 100 (after adding padding):
X.size()=torch.Size([50, 3, 84, 84])
what is going on with this? Shouldn't it be 100?
reproduction:
import random
from typing import Callable
import learn2learn as l2l
import numpy as np
import torch
from learn2learn.data import TaskDataset, MetaDataset, DataDescription
from learn2learn.data.transforms import TaskTransform
from torch.utils.data import Dataset
class IndexableDataSet(Dataset):
def __init__(self, datasets):
self.datasets = datasets
def __len__(self) -> int:
return len(self.datasets)
def __getitem__(self, idx: int):
return self.datasets[idx]
class SingleDatasetPerTaskTransform(Callable):
"""
Transform that samples a data set first, then creates a task (e.g. n-way, k-shot) and finally
applies the remaining task transforms.
"""
def __init__(self, indexable_dataset: IndexableDataSet, cons_remaining_task_transforms: Callable):
"""
:param: cons_remaining_task_transforms; constructor that builds the remaining task transforms. Cannot be a list
of transforms because we don't know apriori which is the data set we will use. So this function should be of
type MetaDataset -> list[TaskTransforms] i.e. given the dataset it returns the transforms for it.
"""
self.indexable_dataset = MetaDataset(indexable_dataset)
self.cons_remaining_task_transforms = cons_remaining_task_transforms
def __call__(self, task_description: list):
"""
idea:
- receives the index of the dataset to use
- then use the normal NWays l2l function
"""
# - this is what I wish could have gone in a seperate callable transform, but idk how since the transforms take apriori (not dynamically) which data set to use.
i = random.randint(0, len(self.indexable_dataset) - 1)
task_description = [DataDescription(index=i)] # using this to follow the l2l convention
# - get the sampled data set
dataset_index = task_description[0].index
dataset = self.indexable_dataset[dataset_index]
dataset = MetaDataset(dataset)
# - use the sampled data set to create task
remaining_task_transforms: list[TaskTransform] = self.cons_remaining_task_transforms(dataset)
description = None
for transform in remaining_task_transforms:
description = transform(description)
return description
def sample_dataset(dataset):
def sample_random_dataset(x):
print(f'{x=}')
i = random.randint(0, len(dataset) - 1)
return [DataDescription(index=i)]
# return dataset[i]
return sample_random_dataset
def get_task_transforms(dataset: IndexableDataSet) -> list[TaskTransform]:
"""
:param dataset:
:return:
"""
transforms = [
sample_dataset(dataset),
l2l.data.transforms.NWays(dataset, n=5),
l2l.data.transforms.KShots(dataset, k=5),
l2l.data.transforms.LoadData(dataset),
l2l.data.transforms.RemapLabels(dataset),
l2l.data.transforms.ConsecutiveLabels(dataset),
]
return transforms
def print_datasets(dataset_lst: list):
for dataset in dataset_lst:
print(f'\n{dataset=}\n')
def get_indexable_list_of_datasets_mi_and_cifarfs(root: str = '~/data/l2l_data/') -> IndexableDataSet:
from learn2learn.vision.benchmarks import mini_imagenet_tasksets
datasets, transforms = mini_imagenet_tasksets(root=root)
mi = datasets[0].dataset
from learn2learn.vision.benchmarks import cifarfs_tasksets
datasets, transforms = cifarfs_tasksets(root=root)
cifarfs = datasets[0].dataset
dataset_list = [mi, cifarfs]
dataset_list = [l2l.data.MetaDataset(dataset) for dataset in dataset_list]
dataset = IndexableDataSet(dataset_list)
return dataset
# -- tests
def loop_through_l2l_indexable_datasets_test():
"""
:return:
"""
# - for determinism
random.seed(0)
torch.manual_seed(0)
np.random.seed(0)
# - options for number of tasks/meta-batch size
batch_size: int = 10
# - create indexable data set
indexable_dataset: IndexableDataSet = get_indexable_list_of_datasets_mi_and_cifarfs()
# - get task transforms
def get_remaining_transforms(dataset: MetaDataset) -> list[TaskTransform]:
remaining_task_transforms = [
l2l.data.transforms.NWays(dataset, n=5),
l2l.data.transforms.KShots(dataset, k=5),
l2l.data.transforms.LoadData(dataset),
l2l.data.transforms.RemapLabels(dataset),
l2l.data.transforms.ConsecutiveLabels(dataset),
]
return remaining_task_transforms
task_transforms: TaskTransform = SingleDatasetPerTaskTransform(indexable_dataset, get_remaining_transforms)
# -
taskset: TaskDataset = TaskDataset(dataset=indexable_dataset, task_transforms=task_transforms)
# - loop through tasks
for task_num in range(batch_size):
print(f'{task_num=}')
X, y = taskset.sample()
print(f'{X.size()=}')
print(f'{y.size()=}')
print(f'{y=}')
print()
print('-- end of test --')
# -- Run experiment
if __name__ == "__main__":
import time
from uutils import report_times
start = time.time()
# - run experiment
loop_through_l2l_indexable_datasets_test()
# - Done
print(f"\nSuccess Done!: {report_times(start)}\a")
context: https://github.com/learnables/learn2learn/issues/333
crossposted:
https://discuss.pytorch.org/t/why-is-randomcrop-with-size-84-and-padding-8-returning-an-image-size-of-84-and-not-100-in-pytorch/151463
https://www.reddit.com/r/pytorch/comments/uno1ih/why_is_randomcrop_with_size_84_and_padding_8/

The padding is applied to the input image or tensor before applying the random crop. Ultimately, the output image has a spatial size equal to that of the provided size(s) given to the T.RandomCrop function since the operation is performed after.
After all, it makes more sense to pad the input image rather than the cropped image, doesn't it?

Related

System exit 1 error after obtaining NaN losses from finetuning Mask R-CNN in Pytorch

I am following this tutorial from Pytorch for Finetuning a pre-trained model on my own dataset. I have my annotation in the COCO format in a json file, so, I first implemented the dataloader as follows:
import torch
import json
from torch.utils.data import Dataset
from pycocotools.coco import COCO
from PIL import Image
import os
import numpy as np
from torchvision import transforms
import Config
import transforms as T
from torchvision.transforms import functional as F
class CustomDataset(Dataset):
def __init__(self, root, json_file, transform=None):
self.root = root
with open(json_file) as f:
self.data = json.load(f)
self.transform = transform
self.image_ids = [img["id"] for img in self.data["images"]]
self.imgs = list(sorted(os.listdir(os.path.join(root, "Images"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "Masks"))))
def __getitem__(self, idx):
# Get image ID
img_id = self.image_ids[idx]
img = next(image for image in self.data["images"] if image["id"] == img_id)
img_path = os.path.join(self.root, "Images")
mask_path = os.path.join(self.root, "Masks")
# Load image
image = Image.open(os.path.join(img_path, img['file_name'])).convert("RGB")
# extract annotations from the json file
annotations = [ann for ann in self.data["annotations"] if ann["image_id"] == img_id]
# extract labels from annotations
labels = [ann["label"] for ann in annotations]
# convert labels to integers
labels = [label for label in labels]
labels = torch.as_tensor(labels, dtype=torch.int64)
# extract boxes and convert them to format [x1, y1, x2, y2]
boxes = [ann["bbox"] for ann in annotations]
boxes = [[bbox[0], bbox[1], bbox[2], bbox[3]] for bbox in boxes]
num_objects = len(boxes)
# read the mask and include the number of objects in the first dimension
mask = np.array(Image.open(os.path.join(mask_path, img['file_name'])).convert("L"))
# Check if mask is empty
if mask.size == 0:
mask = np.zeros((num_objects, 1, 1), dtype=np.uint8)
else:
mask = np.expand_dims(mask, axis=0)
mask = np.repeat(mask, num_objects, axis=0)
# convert the binary mask array to a torch tensor
mask = torch.as_tensor(mask, dtype=torch.uint8)
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objects,), dtype=torch.int64)
# convert bboxes to tensors
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# calculate the area of the bounding box
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# convert id to tensor
image_id = torch.tensor([idx])
# create target dictionary
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = mask
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
# apply the transform if any
if self.transform is not None:
image, target = self.transform(image, target)
return image, target
def __len__(self):
return len(self.imgs)
and I am using this code for training:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from engine import train_one_epoch
import utils
import transforms as T
from dataloader import CustomDataset
import Config
import torch
import utils
from tqdm import tqdm
from torch.optim.lr_scheduler import StepLR
from torchvision.transforms import functional as F
def get_instance_segmentation_model(num_classes):
# load an instance segmentation model pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
# converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.PILToTensor())
if train:
# during training, randomly flip the training images
# and ground-truth for data augmentation
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
json_path = 'annotations.json'
# use our dataset and defined transformations
dataset = CustomDataset(root = Config.Dataset_dir, json_file=json_path, transform = get_transform(train=True))
# for image, target in dataset:
# print(image.shape)
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-500])
dataset_test = torch.utils.data.Subset(dataset, indices[-500:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
device = Config.DEVICE
# # our dataset has two classes only - background and person
num_classes = 2
# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.1,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
This training code is as stated in the tutorial is using some helper functions which can be accessed from here. I have run the training code and the training is working for the first 10 samples in the data, but then it gives the following error:
Epoch: [0] [ 0/2759] eta: 13:29:50 lr: 0.000200 loss: -136.8811 (-136.8811) loss_classifier: 0.9397 (0.9397) loss_box_reg: 0.0017 (0.0017) loss_mask: -137.9142 (-137.9142) loss_objectness: 0.0859 (0.0859) loss_rpn_box_reg: 0.0057 (0.0057) time: 17.6117 data: 10.0775
Loss is nan, stopping training
{'loss_classifier': tensor(nan, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(nan, grad_fn=<DivBackward0>), 'loss_mask': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_objectness': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(nan, grad_fn=<DivBackward0>)}
An exception has occurred, use %tb to see the full traceback.
SystemExit: 1
This error is raised from the engine.py train_one_epoch function, especially from this part of the function:
with torch.cuda.amp.autocast(enabled=scaler is not None):
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print(f"Loss is {loss_value}, stopping training")
print(loss_dict_reduced)
sys.exit(1)
Which indicates that the losses returned after the first loop are NaN ... What could be wrong here please? I am running out of ideas and don't know what's going wrong anymore.

Is there a good way to access batch id in pytorch datasets using dataloaders?

In pytorch datasets, the way to access individual samples is given by implementing the __getitem__ method, but there seems to me that there are no natural way to get the batch id for the extracted sample. One may argue that batch ids should be handled outside the dataset (e.g. in training loops or similar), but I want to modify the processing of the sample when it is retrieved based on the batch id.
I have a hacked solution given below, but I am wondering if there are better ways of doing this.
The "solution" below doesn't work when using num_workers > 1, so it is non-functional.
from typing import List, Tuple, Iterator
from torch.utils.data import RandomSampler, Dataset, DataLoader, BatchSampler
class intwithbtx(int):
def __new__(cls, theint: int, btx: int):
x = int.__new__(cls, theint)
x.btx_number = btx
return x
class IdBatchSampler(BatchSampler):
def __iter__(self) -> Iterator[List[int]]:
batch = []
iii = 0
for idx in self.sampler:
batch.append(intwithbtx(idx, iii))
if len(batch) == self.batch_size:
yield batch
iii += 1
batch = []
if len(batch) > 0 and not self.drop_last:
yield batch
class RangeDataset(Dataset):
def __init__(self, lgt: int):
self.data = list(range(lgt))
def __getitem__(self, item: intwithbtx):
dt = self.data[item] + item.btx_number*1000
return dt
def __len__(self):
return len(self.data)
if __name__ == '__main__':
ds = RangeDataset(30)
smp = IdBatchSampler(RandomSampler(range(len(ds))), batch_size=3, drop_last=False)
loader = DataLoader(ds, batch_sampler=smp)
for btx in loader:
print(btx)
loader2 = DataLoader(ds, batch_sampler=smp, num_workers=2) # Fails.
for btx in loader2:
print(btx)

MNIST Shard Descriptor: IndexError: list index out of range

I am working on Federated Learning experiments using Intel OpenFL. I want to distribute my dataset (MNIST) using different non-iidness scenarios.
I am following their official documentation: https://openfl.readthedocs.io/en/latest/source/utilities/splitters_data.html
This is my original working code:
"""Mnist Shard Descriptor."""
import logging
import os
from typing import List
import numpy as np
import requests
from openfl.interface.interactive_api.shard_descriptor import ShardDataset
from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor
logger = logging.getLogger(__name__)
class MnistShardDataset(ShardDataset):
"""Mnist Shard dataset class."""
def __init__(self, x, y, data_type, rank=1, worldsize=1):
"""Initialize MNISTDataset."""
self.data_type = data_type
self.rank = rank
self.worldsize = worldsize
self.x = x[self.rank - 1::self.worldsize]
self.y = y[self.rank - 1::self.worldsize]
def __getitem__(self, index: int):
"""Return an item by the index."""
return self.x[index], self.y[index]
def __len__(self):
"""Return the len of the dataset."""
return len(self.x)
class MnistShardDescriptor(ShardDescriptor):
"""Mnist Shard descriptor class."""
def __init__(
self,
rank_worldsize: str = '1, 1',
**kwargs
):
"""Initialize MnistShardDescriptor."""
self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(','))
(x_train, y_train), (x_test, y_test) = self.download_data()
self.data_by_type = {
'train': (x_train, y_train),
'val': (x_test, y_test)
}
def get_shard_dataset_types(self) -> List[str]:
"""Get available shard dataset types."""
return list(self.data_by_type)
def get_dataset(self, dataset_type='train'):
"""Return a shard dataset by type."""
if dataset_type not in self.data_by_type:
raise Exception(f'Wrong dataset type: {dataset_type}')
return MnistShardDataset(
*self.data_by_type[dataset_type],
data_type=dataset_type,
rank=self.rank,
worldsize=self.worldsize
)
#property
def sample_shape(self):
"""Return the sample shape info."""
return ['28', '28', '1']
#property
def target_shape(self):
"""Return the target shape info."""
return ['28', '28', '1']
#property
def dataset_description(self) -> str:
"""Return the dataset description."""
return (f'Mnist dataset, shard number {self.rank}'
f' out of {self.worldsize}')
def download_data(self):
"""Download prepared dataset."""
local_file_path = 'mnist.npz'
mnist_url = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'
response = requests.get(mnist_url)
with open(local_file_path, 'wb') as f:
f.write(response.content)
with np.load(local_file_path) as f:
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
#x_train = np.reshape(x_train, (-1, 784))
#x_test = np.reshape(x_test, (-1, 784))
os.remove(local_file_path) # remove mnist.npz
print('Mnist data was loaded!')
return (x_train, y_train), (x_test, y_test)
Basically, I changed the MnistShardDescriptor class in both my 2 nodes of the federation in this way:
...
class MnistShardDescriptor(ShardDescriptor):
"""Mnist Shard descriptor class."""
def __init__(
self,
rank_worldsize: str = '1, 1',
**kwargs
):
"""Initialize MnistShardDescriptor."""
self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(','))
(x_train, y_train), (x_test, y_test) = self.download_data()
train_splitter = RandomNumPyDataSplitter()
test_splitter = RandomNumPyDataSplitter()
train_idx = train_splitter.split(y_train, self.worldsize)[self.rank]
test_idx = test_splitter.split(y_test, self.worldsize)[self.rank]
x_train_shard = x_train[train_idx]
x_test_shard = x_test[test_idx]
self.data_by_type = {
'train': (x_train, y_train),
'val': (x_test, y_test)
}
...
I have this error at the line train_idx:IndexError: list index out of range but only in one of the 2 nodes. I do not know why, because the code are exactly the same on both nodes of my federation.
EDIT: I changed the position of the code I have written above, and in particular I wrote in the class MnistShardDataset rather than MnistShardDescriptor:
class MnistShardDataset(ShardDataset):
"""Mnist Shard dataset class."""
def __init__(self, x, y, data_type, rank=1, worldsize=1):
"""Initialize MNISTDataset."""
self.data_type = data_type
self.rank = rank
self.worldsize = worldsize
self.x = x[self.rank - 1::self.worldsize]
self.y = y[self.rank - 1::self.worldsize]
train_splitter = RandomNumPyDataSplitter()
#test_splitter = RandomNumPyDataSplitter()
train_idx = train_splitter.split(self.y, self.worldsize)[self.rank]
#test_idx = test_splitter.split(self.y, self.worldsize)[self.rank]
x_train_shard = self.x[train_idx]
#x_test_shard = self.x[test_idx]
self.x = x_train_shard
With this I am able to create the federation and, in the same node of the director, the clients start training, and the split is truly random because I ran the experiment 2 times, and each time the envoy had a different number of samples. However in the other node (because I am using 2 nodes, one for each envoy) with the envoy (openFL calls envoy the worker on a client) I have the same error of Index out of rangeā€¦
EDIT2: here is an example of data split using openFL: https://github.com/intel/openfl/blob/develop/openfl-tutorials/interactive_api/PyTorch_Kvasir_UNet/envoy/kvasir_shard_descriptor_with_data_splitter.py
However my dataset is different, and I am not succeeding in adapting this solution. Any other example can you suggest to me, about sharding a dataset like MNIST? A tutorial to follow?
Entire error:
File "/home/lmancuso/envoymnist/mnist_shard_descriptor_with_data_splitter.py", line 61, in __init__
train_idx = train_splitter.split(y_train, self.worldsize)[self.rank]
IndexError: list index out of range
EDIT: interesting point: If I change the dimension of my federation, increasing from 2 to 3 the rank_worldsize inside the envoy_config.yaml, training starts (and the dataset is divided in a random way, so it works, because each node has different number of samples). However it works only because I have 2 nodes, but I created a federation of 3 without the 3 node. Indeed the samples are 8064 for one node and 9856 for another node. However considering that I have 60000 training samples in MNIST, all the remaining samples got lost, because they are supposed to be in the last node (which does not exist).
The only solution I found until now is to reduce the rank of each envoy:
train_idx = train_splitter.split(self.y, self.worldsize)[self.rank-1]

What is the proper way to create training, validation and test set in pytorch or change the transform of an already created data set?

I noticed that a standard thing like getting the validation set in PyTorch is not as common as one would expect or not obviously available in the pytorch library.
I found two websites that do it their own way:
- https://gist.github.com/MattKleinsmith/5226a94bad5dd12ed0b871aed98cb123
- https://www.geeksforgeeks.org/training-neural-networks-with-validation-using-pytorch/
but they have their problems because the second one force you to have both the train & validation set have the same transforms and the first one splits with respect to the data loader - which is then impossible to give easily to a distributed data loader afaik.
If that is not the way to do it then what is the right proper way to create a train, val and test set?
The solution I found is to create three data sets since the beginning with the correct transforms you want. Then you have 3 data set objects and you give it to the torch.utils.data.Subset(train_dataset, train_indices). The crux is this essentially:
# load the dataset
path_to_data_set: str = str(Path(path_to_data_set).expanduser())
train_dataset = datasets.MNIST(root=path_to_data_set, train=True,
download=True, transform=train_transform)
val_dataset = datasets.MNIST(root=path_to_data_set, train=True,
download=True, transform=val_transform)
indices = list(range(len(train_dataset)))
train_indices, val_indices = split_inidices(indices, test_size=val_size, random_state=seed, shuffle=shuffle)
train_dataset = torch.utils.data.Subset(train_dataset, train_indices)
val_dataset = torch.utils.data.Subset(val_dataset, val_indices)
train_loader, val_loader = get_serial_or_distributed_dataloaders(train_dataset,
val_dataset,
batch_size,
batch_size_eval,
rank,
world_size,
merge,
num_workers,
pin_memory
)
then you can create whatever dataloaders you want later. This way you don't have to change the transform in the first place.
full code:
"""
# - data augmentation
Current belief is that augmenting the validation set should be fine, especially if you want to actually encourage
generalization since it makes the val set harder and it allows you to make val split percentage slightly lower since
your validation set was increased size.
For reproducibility of other work, especially for scientific pursues rather than "lets beat state of the art" - to make
it easier to compare results use what they use. e.g. it seems only augmenting the train set is the common thing,
especially when I looked at the augmentation strategies in min-imagenet and mnist.
Test set augmentation helps mostly to make test set harder (so acc should go down) - but it also increases variance
since the data size was increased. If you are reporting results most likely augmenting the data set is a good idea
- especially if you are going to compute test set errors when comparing accuracy with previous work.
Also, the way CI intervals are computed with t_p * std_n / sqrt n, means that the avg test error will be smaller, so
you are fine in general.
Default code I see doesn't augment test set so I most likely won't either.
ref:
- https://stats.stackexchange.com/questions/320800/data-augmentation-on-training-set-only/320967#320967
- https://arxiv.org/abs/1809.01442, https://stats.stackexchange.com/a/390470/28986
# - pin_memory
For data loading, passing pin_memory=True to a DataLoader will automatically put the fetched data Tensors in pinned
memory, and thus enables faster data transfer to CUDA-enabled GPUs. Note on pinning:
This is an advanced tip. If you overuse pinned memory, it can cause serious problems when running low on RAM, and
you should be aware that pinning is often an expensive operation. Thus, will leave it's default as False.
ref:
- on pin_memory: https://pytorch.org/docs/stable/data.html
"""
from typing import Callable, Optional, Union
import numpy as np
import torch
from numpy.random import RandomState
from torch.utils.data import Dataset, SubsetRandomSampler, random_split, DataLoader, RandomSampler
def get_train_val_split_random_sampler(
train_dataset: Dataset,
val_dataset: Dataset,
val_size: float = 0.2,
batch_size: int = 128,
batch_size_eval: int = 64,
num_workers: int = 4,
pin_memory: bool = False
# random_seed: Optional[int] = None,
) -> tuple[DataLoader, DataLoader]:
"""
Note:
- this will use different transforms for val and train if the objects you pass have different transforms.
- note train_dataset, val_dataset whill often be the same data set object but different instances with different
transforms for each data set.
Recommended use:
- this one is recommended when you want the train & val to have different transforms e.g. when doing scientific
work - instead of beating benchmark work - and the train, val sets had different transforms.
ref:
- https://gist.github.com/MattKleinsmith/5226a94bad5dd12ed0b871aed98cb123
"""
assert 0 <= val_size <= 1.0, f"Error: {val_size} valid_size should be in the range [0, 1]."
num_train = len(train_dataset)
indices = list(range(num_train))
split_idx = int(np.floor(val_size * num_train))
# I don't think this is needed since later the sampler randomly samples data from a given list
# if shuffle == True:
# np.random.seed(random_seed)
# np.random.shuffle(indices)
train_idx, valid_idx = indices[:split_idx], indices[split_idx:]
assert len(train_idx) != 0 and len(valid_idx) != 0
# Samples elements randomly from a given list of indices, without replacement.
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, sampler=train_sampler,
num_workers=num_workers, pin_memory=pin_memory)
valid_loader = torch.utils.data.DataLoader(val_dataset,
batch_size=batch_size_eval, sampler=valid_sampler,
num_workers=num_workers, pin_memory=pin_memory)
return train_loader, valid_loader
def get_train_val_split_with_split(
train_dataset: Dataset,
train_val_split: list[int, int], # e.g. [50_000, 10_000] for mnist
batch_size: int = 128,
batch_size_eval: int = 64,
num_workers: int = 4,
pin_memory: bool = False
) -> tuple[DataLoader, DataLoader]:
"""
Note:
- this will have the train and val sets have the same transform.
ref:
- https://gist.github.com/MattKleinsmith/5226a94bad5dd12ed0b871aed98cb123
- change transform: https://discuss.pytorch.org/t/changing-transforms-after-creating-a-dataset/64929/4
"""
train_dataset, valid_dataset = random_split(train_dataset, train_val_split)
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory)
valid_loader = torch.utils.data.DataLoader(valid_dataset,
batch_size=batch_size_eval, num_workers=num_workers,
pin_memory=pin_memory)
return train_loader, valid_loader
def get_serial_or_distributed_dataloaders(train_dataset: Dataset,
val_dataset: Dataset,
batch_size: int = 128,
batch_size_eval: int = 64,
rank: int = -1,
world_size: int = 1,
merge: Optional[Callable] = None,
num_workers: int = -1, # -1 means its running serially
pin_memory: bool = False,
):
"""
"""
from uutils.torch_uu.distributed import is_running_serially
if is_running_serially(rank):
train_sampler = RandomSampler(train_dataset)
val_sampler = RandomSampler(val_dataset)
num_workers = 4 if num_workers == -1 else num_workers
else:
assert (batch_size >= world_size), f'Each worker must get at least one data point, so batch_size >= world_size' \
f'but got: {batch_size}{world_size}'
from torch.utils.data import DistributedSampler
# note: shuffle = True by default
train_sampler = DistributedSampler(train_dataset, num_replicas=world_size, rank=rank)
val_sampler = DistributedSampler(val_dataset, num_replicas=world_size, rank=rank)
# set the input num_workers but for ddp 0 is recommended afaik, todo - check
num_workers = 0 if num_workers == -1 else num_workers
# get dist dataloaders
train_loader = DataLoader(train_dataset,
batch_size=batch_size,
sampler=train_sampler,
collate_fn=merge,
num_workers=num_workers,
pin_memory=pin_memory)
val_loader = DataLoader(val_dataset,
batch_size=batch_size_eval,
sampler=val_sampler,
collate_fn=merge,
num_workers=num_workers,
pin_memory=pin_memory)
# return dataloaders
# dataloaders = {'train': train_dataloader, 'val': val_dataloader, 'test': test_dataloader}
# iter(train_dataloader) # if this fails its likely your running in pycharm and need to set num_workers flag to 0
return train_loader, val_loader
def split_inidices(indices: list,
test_size: Optional = None,
random_state: Optional[Union[int, RandomState, None]] = None,
shuffle: bool = False, # false for reproducibility, and any split is as good as any other.
) -> tuple[list[int], list[int]]:
import sklearn.model_selection
# - api: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
train_indices, val_indices = sklearn.model_selection.train_test_split(indices, test_size=test_size,
random_state=random_state,
shuffle=shuffle)
return train_indices, val_indices
# - visualization help
"""
Inspired from:
- https://gist.github.com/MattKleinsmith/5226a94bad5dd12ed0b871aed98cb123
- https://www.geeksforgeeks.org/training-neural-networks-with-validation-using-pytorch/
"""
from argparse import Namespace
from pathlib import Path
from typing import Optional, Callable
import numpy as np
import torch
from torch.utils.data import random_split, DataLoader
from torchvision import datasets
from torchvision.transforms import transforms
from uutils.torch_uu.dataloaders.common import split_inidices, \
get_serial_or_distributed_dataloaders
NORMALIZE_MNIST = transforms.Normalize((0.1307,), (0.3081,)) # MNIST
def get_train_valid_test_data_loader_helper_for_mnist(args: Namespace) -> dict:
train_kwargs = {'path_to_data_set': args.path_to_data_set,
'batch_size': args.batch_size,
'batch_size_eval': args.batch_size_eval,
'augment_train': args.augment_train,
'augment_val': args.augment_val,
'num_workers': args.num_workers,
'pin_memory': args.pin_memory,
'rank': args.rank,
'world_size': args.world_size,
'merge': None
}
test_kwargs = {'path_to_data_set': args.path_to_data_set,
'batch_size_eval': args.batch_size_eval,
'augment_test': args.augment_train,
'num_workers': args.num_workers,
'pin_memory': args.pin_memory,
'rank': args.rank,
'world_size': args.world_size,
'merge': None
}
train_loader, val_loader = get_train_valid_loader(**train_kwargs)
test_loader: DataLoader = get_test_loader(**test_kwargs)
dataloaders: dict = {'train': train_loader, 'val': val_loader, 'test': test_loader}
return dataloaders
def get_transform(augment: bool):
if augment:
transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
NORMALIZE_MNIST
])
else:
transform = transforms.Compose([
transforms.ToTensor(),
NORMALIZE_MNIST
])
return transform
def get_train_valid_loader(path_to_data_set: Path,
batch_size: int = 128,
batch_size_eval: int = 64,
seed: Optional[int] = None,
augment_train: bool = True,
augment_val: bool = False,
val_size: Optional[float] = 0.2,
shuffle: bool = False, # false for reproducibility, and any split is as good as any other.
num_workers: int = -1,
pin_memory: bool = False,
rank: int = -1,
world_size: int = 1,
merge: Optional[Callable] = None,
) -> tuple[DataLoader, DataLoader]:
"""
Utility function for loading and returning train and valid
multi-process iterators over the MNIST dataset. A sample
9x9 grid of the images can be optionally displayed.
If using CUDA, num_workers should be set to 1 and pin_memory to True.
"""
# train_kwargs = {'batch_size': args.batch_size}
# define transforms
train_transform = get_transform(augment_train)
val_transform = get_transform(augment_val)
# load the dataset
path_to_data_set: str = str(Path(path_to_data_set).expanduser())
train_dataset = datasets.MNIST(root=path_to_data_set, train=True,
download=True, transform=train_transform)
val_dataset = datasets.MNIST(root=path_to_data_set, train=True,
download=True, transform=val_transform)
indices = list(range(len(train_dataset)))
train_indices, val_indices = split_inidices(indices, test_size=val_size, random_state=seed, shuffle=shuffle)
train_dataset = torch.utils.data.Subset(train_dataset, train_indices)
val_dataset = torch.utils.data.Subset(val_dataset, val_indices)
train_loader, val_loader = get_serial_or_distributed_dataloaders(train_dataset,
val_dataset,
batch_size,
batch_size_eval,
rank,
world_size,
merge,
num_workers,
pin_memory
)
return train_loader, val_loader
def get_test_loader(path_to_data_set,
batch_size_eval: int = 64,
shuffle: bool = True,
augment_test: bool = False,
num_workers: int = -1,
pin_memory=False,
rank: int = -1,
world_size: int = 1,
merge: Optional[Callable] = None,
) -> DataLoader:
"""
Utility function for loading and returning a multi-process
test iterator over the MNIST dataset.
If using CUDA, num_workers should be set to 1 and pin_memory to True.
Params
------
- path_to_data_set: path directory to the dataset.
- batch_size: how many samples per batch to load.
- shuffle: whether to shuffle the dataset after every epoch.
- num_workers: number of subprocesses to use when loading the dataset.
- pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
True if using GPU.
Returns
-------
- data_loader: test set iterator.
Note:
- it knows it's the test set since train=False in the body when creating the data set.
"""
# define transform
test_transform = get_transform(augment_test)
# load the dataset
path_to_data_set: str = str(Path(path_to_data_set).expanduser())
test_dataset = datasets.MNIST(root=path_to_data_set,
train=False, # ensures its test set
download=True,
transform=test_transform)
_, test_loader = get_serial_or_distributed_dataloaders(test_dataset,
test_dataset,
batch_size_eval,
batch_size_eval,
rank,
world_size,
merge,
num_workers,
pin_memory,
)
return test_loader
repo that came from this with permanent github link: https://github.com/brando90/ultimate-utils/blob/ef2217c07b43aa5354f7b6f8f1761c5f16017874/ultimate-utils-proj-src/uutils/torch_uu/dataloaders/mnist.py#L22
related:
https://discuss.pytorch.org/t/changing-transformation-applied-to-data-during-training/15671/14
https://discuss.pytorch.org/t/changing-transforms-after-creating-a-dataset/64929/7
https://discuss.pytorch.org/t/apply-different-transform-data-augmentation-to-train-and-validation/63580/13

Keras Functional API and loss function with multiple inputs

I am trying to use a custom Keras loss function that apart from the usual signature (y_true, y_pred) takes another parameter sigma (which is also produced by the last layer of the network).
The training works fine, but then I am not sure how to perform forward propagation and return sigma (while muis the output of the model.predict method).
This is the code I am using, which features a custom layer GaussianLayer that returns the list [mu, sigma].
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Dense, Layer, Dropout
from keras.models import Model
from keras.initializers import glorot_normal
import numpy as np
def custom_loss(sigma):
def gaussian_loss(y_true, y_pred):
return tf.reduce_mean(0.5*tf.log(sigma) + 0.5*tf.div(tf.square(y_true - y_pred), sigma)) + 10
return gaussian_loss
class GaussianLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(GaussianLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel_1 = self.add_weight(name='kernel_1',
shape=(30, self.output_dim),
initializer=glorot_normal(),
trainable=True)
self.kernel_2 = self.add_weight(name='kernel_2',
shape=(30, self.output_dim),
initializer=glorot_normal(),
trainable=True)
self.bias_1 = self.add_weight(name='bias_1',
shape=(self.output_dim, ),
initializer=glorot_normal(),
trainable=True)
self.bias_2 = self.add_weight(name='bias_2',
shape=(self.output_dim, ),
initializer=glorot_normal(),
trainable=True)
super(GaussianLayer, self).build(input_shape)
def call(self, x):
output_mu = K.dot(x, self.kernel_1) + self.bias_1
output_sig = K.dot(x, self.kernel_2) + self.bias_2
output_sig_pos = K.log(1 + K.exp(output_sig)) + 1e-06
return [output_mu, output_sig_pos]
def compute_output_shape(self, input_shape):
return [(input_shape[0], self.output_dim), (input_shape[0], self.output_dim)]
# This returns a tensor
inputs = Input(shape=(1,))
x = Dense(30, activation='relu')(inputs)
x = Dropout(0.3)(x)
x = Dense(30, activation='relu')(x)
x = Dense(40, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(30, activation='relu')(x)
mu, sigma = GaussianLayer(1)(x)
model = Model(inputs, mu)
model.compile(loss=custom_loss(sigma), optimizer='adam')
model.fit(train_x, train_y, epochs=150)
Since your model returns two tensors as output, you also need to pass a list of two arrays as the output when calling fit() method. That's essentially what the error is trying to convey:
Error when checking model target:
So the error is in targets (i.e. labels). What is wrong?
the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 2 array(s), but instead got the following list of 1 arrays:
I may have found the answer among Keras FAQs.
I found out that it is possible to retrieve intermediate steps' output using the code snippet below:
layer_name = 'main_output'
intermediate_layer_model = Model(inputs=model.input,
outputs=model.get_layer(layer_name).output)
intermediate_output = intermediate_layer_model.predict(train_x[0])
intermediate_output
In this case intermediate_output is a list of two values [mu, sigma] (just needed to name the output layer main_output and retrieve it later)