Python script that can auto-annotate the images - json

I am using the https://github.com/mdhmz1/Auto-Annotate repo. I have tried to custom train my own dataset which has it own COCO JSON format file.
When I try to run
python3 customTrain.py train --dataset=path/to/dir --weights=coco
I get the following error:
Traceback (most recent call last):
File "customTrain.py", line 279, in
train(model)
File "customTrain.py", line 179, in train
dataset_train.load_custom(args.dataset, "train")
File "customTrain.py", line 87, in load_custom
annotations = [a for a in annotations if a['regions']]
File "customTrain.py", line 87, in
annotations = [a for a in annotations if a['regions']]
TypeError: list indices must be integers or slices, not str
My customtrain.py looks like the following:
import os
import sys
import json
import datetime
import numpy as np
import skimage.draw
Root directory of the project
ROOT_DIR = "/home/hiwi/Auto-Annotate"
Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn.config import Config
from mrcnn import model as modellib, utils
Path to trained weights file
COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
Directory to save logs and model checkpoints, if not provided
through the command line argument --logs
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
############################################################
Configurations
############################################################
class CustomConfig(Config):
"""Configuration for training on the toy dataset.
Derives from the base Config class and overrides some values.
"""
# Give the configuration a recognizable name
NAME = "custom"
IMAGES_PER_GPU = 1
# Number of classes (including background)
NUM_CLASSES = 1 + 2 # Background + 2 classes
# Number of training steps per epoch
STEPS_PER_EPOCH = 100
# Skip detections with < 90% confidence
DETECTION_MIN_CONFIDENCE = 0.9
############################################################
Dataset
############################################################
class CustomDataset(utils.Dataset):
def load_custom(self, dataset_dir, subset):
"""Load a subset of the Custom dataset.
dataset_dir: Root directory of the dataset.
subset: Subset to load: train or val
"""
# Add classes. We have only one class to add.
self.add_class("custom", 0, "Primary_Track")
self.add_class("custom", 1, "Secondary_Track")
# Train or validation dataset?
assert subset in ["train", "val"]
dataset_dir = os.path.join(dataset_dir, subset)
# Load annotations
# VGG Image Annotator (up to version 1.6) saves each image in the form:
# { 'filename': '28503151_5b5b7ec140_b.jpg',
# 'regions': {
# '0': {
# 'region_attributes': {},
# 'shape_attributes': {
# 'all_points_x': [...],
# 'all_points_y': [...],
# 'name': 'polygon'}},
# ... more regions ...
# },
# 'size': 100202
# }
# We mostly care about the x and y coordinates of each region
# Note: In VIA 2.0, regions was changed from a dict to a list.
annotations1 = json.load(open(os.path.join(dataset_dir, "train.json")))
annotations = list(annotations1.values()) # don't need the dict keys
# The VIA tool saves images in the JSON even if they don't have any
# annotations. Skip unannotated images.
annotations = [a for a in annotations if a['regions']]
# Add images
for a in annotations:
# Get the x, y coordinaets of points of the polygons that make up
# the outline of each object instance. These are stores in the
# shape_attributes (see json format above)
# The if condition is needed to support VIA versions 1.x and 2.x.
if type(a['regions']) is dict:
polygons = [r['shape_attributes'] for r in a['regions'].values()]
else:
polygons = [r['shape_attributes'] for r in a['regions']]
#labelling each class in the given image to a number
custom = [s['region_attributes'] for s in a['regions']]
num_ids=[]
#Add the classes according to the requirement
for n in custom:
try:
if n['name']=="Primary_Track":
num_ids.append(0)
elif n['name']=='Secondary_Track':
num_ids.append(1)
except:
pass
# load_mask() needs the image size to convert polygons to masks.
# Unfortunately, VIA doesn't include it in JSON, so we must read
# the image. This is only managable since the dataset is tiny.
image_path = os.path.join(dataset_dir, a['filename'])
image = skimage.io.imread(image_path)
height, width = image.shape[:2]
self.add_image(
"custom",
image_id=a['filename'], # use file name as a unique image id
path=image_path,
width=width, height=height,
polygons=polygons,
num_ids=num_ids)
def load_mask(self, image_id):
"""Generate instance masks for an image.
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
# If not a custom dataset image, delegate to parent class.
image_info = self.image_info[image_id]
if image_info["source"] != "custom":
return super(self.__class__, self).load_mask(image_id)
num_ids = image_info['num_ids']
#print("Here is the numID",num_ids)
# Convert polygons to a bitmap mask of shape
# [height, width, instance_count]
info = self.image_info[image_id]
mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
dtype=np.uint8)
for i, p in enumerate(info["polygons"]):
if p['name'] == 'polygon':
# Get indexes of pixels inside the polygon and set them to 1
rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
else:
rr, cc = skimage.draw.rectangle((p['y'], p['x']), extent=(p['height'], p['width']))
rr[rr > mask.shape[0]-1] = mask.shape[0]-1
cc[cc > mask.shape[1]-1] = mask.shape[1]-1
mask[rr, cc, i] = 1
# Return mask, and array of class IDs of each instance. Since we have
# one class ID only, we return an array of 1s
num_ids = np.array(num_ids, dtype=np.int32)
return mask.astype(np.bool), num_ids.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
#return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
def image_reference(self, image_id):
"""Return the path of the image."""
info = self.image_info[image_id]
if info["source"] == "Railtrack":
return info["path"]
else:
super(self.__class__, self).image_reference(image_id)
def train(model):
"""Train the model."""
# Training dataset.
dataset_train = CustomDataset()
dataset_train.load_custom(args.dataset, "train")
dataset_train.prepare()
# Validation dataset
dataset_val = CustomDataset()
dataset_val.load_custom(args.dataset, "val")
dataset_val.prepare()
# *** This training schedule is an example. Update to your needs ***
# Since we're using a very small dataset, and starting from
# COCO trained weights, we don't need to train too long. Also,
# no need to train all layers, just the heads should do it.
print("Training network heads")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=30,
layers='heads')
############################################################
Training
############################################################
if name == 'main':
import argparse
# Parse command line arguments
parser = argparse.ArgumentParser(
description='Train Mask R-CNN to detect custom objects.')
parser.add_argument("command",
metavar="<command>",
help="'train' or 'splash'")
parser.add_argument('--dataset', required=False,
metavar="/path/to/custom/dataset/",
help='Directory of the Custom dataset')
parser.add_argument('--weights', required=True,
metavar="/path/to/weights.h5",
help="Path to weights .h5 file or 'coco'")
parser.add_argument('--logs', required=False,
default=DEFAULT_LOGS_DIR,
metavar="/path/to/logs/",
help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--image', required=False,
metavar="path or URL to image",
help='Image to apply the color splash effect on')
parser.add_argument('--video', required=False,
metavar="path or URL to video",
help='Video to apply the color splash effect on')
args = parser.parse_args()
# Validate arguments
if args.command == "train":
assert args.dataset, "Argument --dataset is required for training"
elif args.command == "splash":
assert args.image or args.video,\
"Provide --image or --video to apply color splash"
print("Weights: ", args.weights)
print("Dataset: ", args.dataset)
print("Logs: ", args.logs)
# Configurations
if args.command == "train":
config = CustomConfig()
# Create model
if args.command == "train":
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=args.logs)
# Select weights file to load
if args.weights.lower() == "coco":
weights_path = COCO_WEIGHTS_PATH
# Download weights file
if not os.path.exists(weights_path):
utils.download_trained_weights(weights_path)
elif args.weights.lower() == "last":
# Find last trained weights
weights_path = model.find_last()
elif args.weights.lower() == "imagenet":
# Start from ImageNet trained weights
weights_path = model.get_imagenet_weights()
else:
weights_path = args.weights
# Load weights
print("Loading weights ", weights_path)
if args.weights.lower() == "coco":
# Exclude the last layers because they require a matching
# number of classes
model.load_weights(weights_path, by_name=True, exclude=[
"mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
else:
model.load_weights(weights_path, by_name=True)
# Train or evaluate
if args.command == "train":
train(model)
else:
print("'{}' is not recognized. "
"Use 'train' or 'splash'".format(args.command))

Related

results.pandas().xyxy[0] in only outputting data for one image rather than four

I am trying to get the output stored in a variable so that it can be used later on for more processing.
But to get to that stage I am facing a challenge with this code
######INFERENCE ON P6 MODELS*****************************************************************************
import torch
import glob
from natsort import natsorted
import cv2
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline
import pandas as pd
import os
model = torch.hub.load('/Users/yolov5', 'custom', path='/User/yolov5/runs/train/exp11/weights/best.pt', source='local', force_reload=True) # custom trained model
model.conf = 0.25 # NMS confidence threshold
Path = 'User/yolov5/data/images/'
imgs = [cv2.imread(file) for file in natsorted(glob.glob(Path+"/*.jpg"))]
# Inference
results = model(imgs,size=640)
# Results:
#results.save() # or .print() .show(), .save(), .crop(), .pandas(), etc.
results.pandas()
#print(results.print())
#print(results.pandas().xyxy[:])
# results.show()
#results.pandas().xyxy[0]
#print(results)
#print(results.pandas().xyxy[0])
# dfm = pd.DataFrame(results.pandas().xyxy[0])#, columns = ['Loss','Accuracy']
# # #dfm['Classes'] = classes.tolist()
# predict_labs = 'pred_yolo_individual.csv'
# with open(predict_labs, mode='w') as fd:
# dfm.to_csv(fd)
#results.print() # or .show(), .save(), .crop(), .pandas(), etc.
#results.render()
results.xyxy[0] # im predictions (tensor)
results.pandas().xyxy[0]
results.print()
# pred = results.pandas().xyxy[0]
# for index, row in pred.iterrows():
# print(row['class'], row['confidence'], row['name'])
As you can see I am trying loads of stuff to get this going but some major details I am missing that is not getting the right output as desired.
I would like to get the output like this format below for the folder of images that I have.
# Results
results.print() # or .show(), .save(), .crop(), .pandas(), etc.
results.xyxy[0] # im predictions (tensor)
results.pandas().xyxy[0] # im predictions (pandas)
# xmin ymin xmax ymax confidence class name
# 0 749.50 43.50 1148.0 704.5 0.874023 0 person
# 2 114.75 195.75 1095.0 708.0 0.624512 0 person
# 3 986.00 304.00 1028.0 420.0 0.286865 27 tie
*** THE ISSUE IS***
When I use the same code I am only getting one output!!!!!?!?!?!
If I do a
print(results.pandas().xyxy[0:])
I am seeing the output as demonstrated below but not in the structured format as above:
YOLOv5 🚀 v7.0-72-g064365d Python-3.10.6 torch-1.13.1 CPU
Fusing layers...
Model summary: 212 layers, 20856975 parameters, 0 gradients, 47.9 GFLOPs
Adding AutoShape...
[ xmin ymin xmax ymax confidence class name
0 539.859314 119.92907 602.884216 245.533752 0.353711 1 Stabbing, Empty DataFrame
Columns: [xmin, ymin, xmax, ymax, confidence, class, name]
Index: [], Empty DataFrame
Columns: [xmin, ymin, xmax, ymax, confidence, class, name]
Index: [], xmin ymin xmax ymax confidence class name
0 709.833496 66.843300 1025.770752 800.782593 0.771696 1 Stabbing
1 84.628845 4.153772 461.863617 833.189636 0.632551 1 Stabbing]
Please assist, and thank you in advance for acknowledging my issues.
I would suggest using the following code to get the desired output:
# Inference
results = model(imgs, size=640)
# Results:
preds = results.pandas().xyxy[0] # im predictions (pandas)
# print(preds)
# Create dataframe and write to file
dfm = pd.DataFrame(preds)
dfm.columns = ['xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name']
predict_labs = 'pred_yolo_individual.csv'
dfm.to_csv(predict_labs, index=False)
This will create a CSV file containing all the predictions in the desired format.

NMT , 'KerasTensor' object is not callable'

Here I share a code snippet for training Encoder_Decoder Model for machine translation. While Using the Embedding layer (trained previously) during inference mode( on test_data) . It threw the following error --->
# Encoder
encoder_inputs = Input(shape=(None ,))
enc_emb = Embedding(eng_vocab_size, latent_dim, mask_zero = True)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(enc_emb)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))
dec_emb = Embedding(deu_vocab_size, latent_dim, mask_zero = True)(decoder_inputs)
# decoder return full output sequences, and internal states as well.
# We don't use the return states in the training model,
# but we will use them in inference.
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb,
initial_state=encoder_states)
decoder_dense = Dense(deu_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
# Compile the model
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])
# Encode the input sequence to get the "thought vectors"
encoder_model = Model(encoder_inputs, encoder_states)
# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
dec_emb2= dec_emb(decoder_inputs) # reusing embedding layer
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=decoder_states_inputs) # reusing lstm layer
decoder_outputs2 = decoder_dense(decoder_outputs2) # softmax_layer to generate prob_dist. over target vocab
# Final decoder model
decoder_model = Model(
[decoder_inputs] + decoder_states_inputs,
[decoder_outputs2] )
ERROR
8 decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
---> 10 dec_emb2= dec_emb(decoder_inputs) # reusing embedding layer
11
12 decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=decoder_states_inputs) # reusing lstm layer
TypeError: 'KerasTensor' object is not callableenter image description here
I read through various solutions available for this issue , but couldn't understand what 2 modes of model they were talking about and what their soltion was effectively doing .
Pls explain in detail. Thanks in advance

System exit 1 error after obtaining NaN losses from finetuning Mask R-CNN in Pytorch

I am following this tutorial from Pytorch for Finetuning a pre-trained model on my own dataset. I have my annotation in the COCO format in a json file, so, I first implemented the dataloader as follows:
import torch
import json
from torch.utils.data import Dataset
from pycocotools.coco import COCO
from PIL import Image
import os
import numpy as np
from torchvision import transforms
import Config
import transforms as T
from torchvision.transforms import functional as F
class CustomDataset(Dataset):
def __init__(self, root, json_file, transform=None):
self.root = root
with open(json_file) as f:
self.data = json.load(f)
self.transform = transform
self.image_ids = [img["id"] for img in self.data["images"]]
self.imgs = list(sorted(os.listdir(os.path.join(root, "Images"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "Masks"))))
def __getitem__(self, idx):
# Get image ID
img_id = self.image_ids[idx]
img = next(image for image in self.data["images"] if image["id"] == img_id)
img_path = os.path.join(self.root, "Images")
mask_path = os.path.join(self.root, "Masks")
# Load image
image = Image.open(os.path.join(img_path, img['file_name'])).convert("RGB")
# extract annotations from the json file
annotations = [ann for ann in self.data["annotations"] if ann["image_id"] == img_id]
# extract labels from annotations
labels = [ann["label"] for ann in annotations]
# convert labels to integers
labels = [label for label in labels]
labels = torch.as_tensor(labels, dtype=torch.int64)
# extract boxes and convert them to format [x1, y1, x2, y2]
boxes = [ann["bbox"] for ann in annotations]
boxes = [[bbox[0], bbox[1], bbox[2], bbox[3]] for bbox in boxes]
num_objects = len(boxes)
# read the mask and include the number of objects in the first dimension
mask = np.array(Image.open(os.path.join(mask_path, img['file_name'])).convert("L"))
# Check if mask is empty
if mask.size == 0:
mask = np.zeros((num_objects, 1, 1), dtype=np.uint8)
else:
mask = np.expand_dims(mask, axis=0)
mask = np.repeat(mask, num_objects, axis=0)
# convert the binary mask array to a torch tensor
mask = torch.as_tensor(mask, dtype=torch.uint8)
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objects,), dtype=torch.int64)
# convert bboxes to tensors
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# calculate the area of the bounding box
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# convert id to tensor
image_id = torch.tensor([idx])
# create target dictionary
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = mask
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
# apply the transform if any
if self.transform is not None:
image, target = self.transform(image, target)
return image, target
def __len__(self):
return len(self.imgs)
and I am using this code for training:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from engine import train_one_epoch
import utils
import transforms as T
from dataloader import CustomDataset
import Config
import torch
import utils
from tqdm import tqdm
from torch.optim.lr_scheduler import StepLR
from torchvision.transforms import functional as F
def get_instance_segmentation_model(num_classes):
# load an instance segmentation model pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
# converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.PILToTensor())
if train:
# during training, randomly flip the training images
# and ground-truth for data augmentation
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
json_path = 'annotations.json'
# use our dataset and defined transformations
dataset = CustomDataset(root = Config.Dataset_dir, json_file=json_path, transform = get_transform(train=True))
# for image, target in dataset:
# print(image.shape)
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-500])
dataset_test = torch.utils.data.Subset(dataset, indices[-500:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
device = Config.DEVICE
# # our dataset has two classes only - background and person
num_classes = 2
# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.1,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
This training code is as stated in the tutorial is using some helper functions which can be accessed from here. I have run the training code and the training is working for the first 10 samples in the data, but then it gives the following error:
Epoch: [0] [ 0/2759] eta: 13:29:50 lr: 0.000200 loss: -136.8811 (-136.8811) loss_classifier: 0.9397 (0.9397) loss_box_reg: 0.0017 (0.0017) loss_mask: -137.9142 (-137.9142) loss_objectness: 0.0859 (0.0859) loss_rpn_box_reg: 0.0057 (0.0057) time: 17.6117 data: 10.0775
Loss is nan, stopping training
{'loss_classifier': tensor(nan, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(nan, grad_fn=<DivBackward0>), 'loss_mask': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_objectness': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(nan, grad_fn=<DivBackward0>)}
An exception has occurred, use %tb to see the full traceback.
SystemExit: 1
This error is raised from the engine.py train_one_epoch function, especially from this part of the function:
with torch.cuda.amp.autocast(enabled=scaler is not None):
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print(f"Loss is {loss_value}, stopping training")
print(loss_dict_reduced)
sys.exit(1)
Which indicates that the losses returned after the first loop are NaN ... What could be wrong here please? I am running out of ideas and don't know what's going wrong anymore.

Loading multiple csvs with mixed dtypes in tensorflow for training

I have 100s of csvs in a directory, with headers. I am trying to create a feedforward NN using tensorflow for regression.
What's the best way to import these csvs and train using tf & train it?
Also help to look at my preprocessing if I am doing it right?
Note: My features has mixed datatypes (int,float,string), My target is float
I can not concat the csv and use pandas to import, my data size is >50 GB so can not load it in-memory, have to read it iteratively from disc
Directory Path:
./data/train/ -> 100s of csvs
./data/test -> 100s of csvs
./data/valid -> 100s of csvs
Code:
Methodology:
Create Generator
Use Dataset API to load the data
Preprocess the Data (embedding, one-hot,etc)
Train fit
But, in generator I was able to give only output formats where the inputs/ outputs are homogeneous ddtypes.
Code:
def data_generator(file_list, batch_size = 2):
i = 0
while True:
if i*batch_size >= len(file_list): # This loop is used to run the generator indefinitely.
i = 0
np.random.shuffle(file_list)
else:
file_chunk = file_list[i*batch_size:(i+1)*batch_size]
data = []
labels = []
for file in file_chunk:
temp = pd.read_csv(open(file,'r')) # Change this line to read any other type of file
labels = temp.pop('ACTUAL_BOXES')
data.append(temp.values) # Convert column data to matrix like data with one channel
labels.append(labels)
data = np.asarray(data)
labels = np.asarray(labels)
yield data, labels # Here data will be mixed datatype arrays & lables will be a float dtype array
i = i + 1
#getting list of files inside the directory
train_file_list = np.sort(glob.glob('././data/train/*.csv'))
test_file_list = np.sort(glob.glob('././data/test/*.csv'))
val_file_list = np.sort(glob.glob('././data/val/*.csv'))
train_dataset = tf.data.Dataset.from_generator(data_generator,args= [train_file_list , batch_size = 2],
output_types = (tf.float32, tf.float32), #This is where I am struck
#my sample data and lables will be like this
data = ['a','b',1,2,3.14,2] #Mixed dtypes
lables = [1.0] #float
)
val_dataset = tf.data.Dataset.from_generator(data_generator,args= [val_file_list , batch_size = 2],
output_types = (tf.float32, tf.float32), #This is where I am struck
)
# Pre processing Part:
def encode_inputs(EMBEDDING_FEATURES,INDICATOR_FEATURES):
''' Function for encoding the deatures'''
encoded_features = []
for feature_name in EMBEDDING_FEATURES:
#Getting unique vocab list
vocabulary = np.array(list(flatten(vocab_list[feature_name])))
# categorical columns using the lists created above:
cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
feature_name, vocabulary)
embedding_dims = int(math.sqrt(len(vocabulary)))
# create an embedding from the categorical column:
cat_emb = tf.feature_column.embedding_column(cat_col,8) #,dimension=embedding_dims
# add the embeddings to the list of feature columns
encoded_features.append(cat_emb)
for feature_name in INDICATOR_FEATURES:
#Getting unique vocab list
vocabulary = list(flatten(vocab_list[feature_name]))
# indicator columns using the lists created above:
ind_col = tf.feature_column.categorical_column_with_vocabulary_list(
feature_name, vocabulary)
# create an embedding from the categorical column:
cat_one_hot = tf.feature_column.indicator_column(ind_col)
# add the embeddings to the list of feature columns
encoded_features.append(cat_one_hot)
# create the input layer for the model
feature_layer = tf.keras.layers.DenseFeatures(encoded_features)
return feature_layer
# Opening JSON file that contains vocab list for str cols
f = open('./vocab_list.json') # File that contains the unique values of each feature
vocab_list = json.load(f)
features_layer = encode_inputs(EMBEDDING_FEATURES,INDICATOR_FEATURES)
# Model Part
model = tf.keras.models.Sequential([
features_layer,
tf.keras.layers.Dense(30, activation = 'relu'),
tf.keras.layers.Dense(1)
])
m_loss = tf.keras.losses.mean_squared_error
m_optimizer = tf.keras.optimizers.SGD(lr = 1e-3)
batch_size = 32
model.compile(loss = m_loss, optimizer = m_optimizer, metrics = ['accuracy'])
model.fit(train_dataset ,epochs = 10, validation_data = val_dataset )

Why is RandomCrop with size 84 and padding 8 returning an image size of 84 and not 100 in pytorch?

I was using the mini-imagenet data set and noticed this line of code:
elif data_augmentation == 'lee2019:
normalize = Normalize(
mean=[120.39586422 / 255.0, 115.59361427 / 255.0, 104.54012653 / 255.0],
std=[70.68188272 / 255.0, 68.27635443 / 255.0, 72.54505529 / 255.0],
)
train_data_transforms = Compose([
ToPILImage(),
RandomCrop(84, padding=8),
ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
RandomHorizontalFlip(),
ToTensor(),
normalize,
])
test_data_transforms = Compose([
normalize,
])
but when I checked the image size it was 84 instead of 100 (after adding padding):
X.size()=torch.Size([50, 3, 84, 84])
what is going on with this? Shouldn't it be 100?
reproduction:
import random
from typing import Callable
import learn2learn as l2l
import numpy as np
import torch
from learn2learn.data import TaskDataset, MetaDataset, DataDescription
from learn2learn.data.transforms import TaskTransform
from torch.utils.data import Dataset
class IndexableDataSet(Dataset):
def __init__(self, datasets):
self.datasets = datasets
def __len__(self) -> int:
return len(self.datasets)
def __getitem__(self, idx: int):
return self.datasets[idx]
class SingleDatasetPerTaskTransform(Callable):
"""
Transform that samples a data set first, then creates a task (e.g. n-way, k-shot) and finally
applies the remaining task transforms.
"""
def __init__(self, indexable_dataset: IndexableDataSet, cons_remaining_task_transforms: Callable):
"""
:param: cons_remaining_task_transforms; constructor that builds the remaining task transforms. Cannot be a list
of transforms because we don't know apriori which is the data set we will use. So this function should be of
type MetaDataset -> list[TaskTransforms] i.e. given the dataset it returns the transforms for it.
"""
self.indexable_dataset = MetaDataset(indexable_dataset)
self.cons_remaining_task_transforms = cons_remaining_task_transforms
def __call__(self, task_description: list):
"""
idea:
- receives the index of the dataset to use
- then use the normal NWays l2l function
"""
# - this is what I wish could have gone in a seperate callable transform, but idk how since the transforms take apriori (not dynamically) which data set to use.
i = random.randint(0, len(self.indexable_dataset) - 1)
task_description = [DataDescription(index=i)] # using this to follow the l2l convention
# - get the sampled data set
dataset_index = task_description[0].index
dataset = self.indexable_dataset[dataset_index]
dataset = MetaDataset(dataset)
# - use the sampled data set to create task
remaining_task_transforms: list[TaskTransform] = self.cons_remaining_task_transforms(dataset)
description = None
for transform in remaining_task_transforms:
description = transform(description)
return description
def sample_dataset(dataset):
def sample_random_dataset(x):
print(f'{x=}')
i = random.randint(0, len(dataset) - 1)
return [DataDescription(index=i)]
# return dataset[i]
return sample_random_dataset
def get_task_transforms(dataset: IndexableDataSet) -> list[TaskTransform]:
"""
:param dataset:
:return:
"""
transforms = [
sample_dataset(dataset),
l2l.data.transforms.NWays(dataset, n=5),
l2l.data.transforms.KShots(dataset, k=5),
l2l.data.transforms.LoadData(dataset),
l2l.data.transforms.RemapLabels(dataset),
l2l.data.transforms.ConsecutiveLabels(dataset),
]
return transforms
def print_datasets(dataset_lst: list):
for dataset in dataset_lst:
print(f'\n{dataset=}\n')
def get_indexable_list_of_datasets_mi_and_cifarfs(root: str = '~/data/l2l_data/') -> IndexableDataSet:
from learn2learn.vision.benchmarks import mini_imagenet_tasksets
datasets, transforms = mini_imagenet_tasksets(root=root)
mi = datasets[0].dataset
from learn2learn.vision.benchmarks import cifarfs_tasksets
datasets, transforms = cifarfs_tasksets(root=root)
cifarfs = datasets[0].dataset
dataset_list = [mi, cifarfs]
dataset_list = [l2l.data.MetaDataset(dataset) for dataset in dataset_list]
dataset = IndexableDataSet(dataset_list)
return dataset
# -- tests
def loop_through_l2l_indexable_datasets_test():
"""
:return:
"""
# - for determinism
random.seed(0)
torch.manual_seed(0)
np.random.seed(0)
# - options for number of tasks/meta-batch size
batch_size: int = 10
# - create indexable data set
indexable_dataset: IndexableDataSet = get_indexable_list_of_datasets_mi_and_cifarfs()
# - get task transforms
def get_remaining_transforms(dataset: MetaDataset) -> list[TaskTransform]:
remaining_task_transforms = [
l2l.data.transforms.NWays(dataset, n=5),
l2l.data.transforms.KShots(dataset, k=5),
l2l.data.transforms.LoadData(dataset),
l2l.data.transforms.RemapLabels(dataset),
l2l.data.transforms.ConsecutiveLabels(dataset),
]
return remaining_task_transforms
task_transforms: TaskTransform = SingleDatasetPerTaskTransform(indexable_dataset, get_remaining_transforms)
# -
taskset: TaskDataset = TaskDataset(dataset=indexable_dataset, task_transforms=task_transforms)
# - loop through tasks
for task_num in range(batch_size):
print(f'{task_num=}')
X, y = taskset.sample()
print(f'{X.size()=}')
print(f'{y.size()=}')
print(f'{y=}')
print()
print('-- end of test --')
# -- Run experiment
if __name__ == "__main__":
import time
from uutils import report_times
start = time.time()
# - run experiment
loop_through_l2l_indexable_datasets_test()
# - Done
print(f"\nSuccess Done!: {report_times(start)}\a")
context: https://github.com/learnables/learn2learn/issues/333
crossposted:
https://discuss.pytorch.org/t/why-is-randomcrop-with-size-84-and-padding-8-returning-an-image-size-of-84-and-not-100-in-pytorch/151463
https://www.reddit.com/r/pytorch/comments/uno1ih/why_is_randomcrop_with_size_84_and_padding_8/
The padding is applied to the input image or tensor before applying the random crop. Ultimately, the output image has a spatial size equal to that of the provided size(s) given to the T.RandomCrop function since the operation is performed after.
After all, it makes more sense to pad the input image rather than the cropped image, doesn't it?