results.pandas().xyxy[0] in only outputting data for one image rather than four - yolov5

I am trying to get the output stored in a variable so that it can be used later on for more processing.
But to get to that stage I am facing a challenge with this code
######INFERENCE ON P6 MODELS*****************************************************************************
import torch
import glob
from natsort import natsorted
import cv2
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline
import pandas as pd
import os
model = torch.hub.load('/Users/yolov5', 'custom', path='/User/yolov5/runs/train/exp11/weights/best.pt', source='local', force_reload=True) # custom trained model
model.conf = 0.25 # NMS confidence threshold
Path = 'User/yolov5/data/images/'
imgs = [cv2.imread(file) for file in natsorted(glob.glob(Path+"/*.jpg"))]
# Inference
results = model(imgs,size=640)
# Results:
#results.save() # or .print() .show(), .save(), .crop(), .pandas(), etc.
results.pandas()
#print(results.print())
#print(results.pandas().xyxy[:])
# results.show()
#results.pandas().xyxy[0]
#print(results)
#print(results.pandas().xyxy[0])
# dfm = pd.DataFrame(results.pandas().xyxy[0])#, columns = ['Loss','Accuracy']
# # #dfm['Classes'] = classes.tolist()
# predict_labs = 'pred_yolo_individual.csv'
# with open(predict_labs, mode='w') as fd:
# dfm.to_csv(fd)
#results.print() # or .show(), .save(), .crop(), .pandas(), etc.
#results.render()
results.xyxy[0] # im predictions (tensor)
results.pandas().xyxy[0]
results.print()
# pred = results.pandas().xyxy[0]
# for index, row in pred.iterrows():
# print(row['class'], row['confidence'], row['name'])
As you can see I am trying loads of stuff to get this going but some major details I am missing that is not getting the right output as desired.
I would like to get the output like this format below for the folder of images that I have.
# Results
results.print() # or .show(), .save(), .crop(), .pandas(), etc.
results.xyxy[0] # im predictions (tensor)
results.pandas().xyxy[0] # im predictions (pandas)
# xmin ymin xmax ymax confidence class name
# 0 749.50 43.50 1148.0 704.5 0.874023 0 person
# 2 114.75 195.75 1095.0 708.0 0.624512 0 person
# 3 986.00 304.00 1028.0 420.0 0.286865 27 tie
*** THE ISSUE IS***
When I use the same code I am only getting one output!!!!!?!?!?!
If I do a
print(results.pandas().xyxy[0:])
I am seeing the output as demonstrated below but not in the structured format as above:
YOLOv5 🚀 v7.0-72-g064365d Python-3.10.6 torch-1.13.1 CPU
Fusing layers...
Model summary: 212 layers, 20856975 parameters, 0 gradients, 47.9 GFLOPs
Adding AutoShape...
[ xmin ymin xmax ymax confidence class name
0 539.859314 119.92907 602.884216 245.533752 0.353711 1 Stabbing, Empty DataFrame
Columns: [xmin, ymin, xmax, ymax, confidence, class, name]
Index: [], Empty DataFrame
Columns: [xmin, ymin, xmax, ymax, confidence, class, name]
Index: [], xmin ymin xmax ymax confidence class name
0 709.833496 66.843300 1025.770752 800.782593 0.771696 1 Stabbing
1 84.628845 4.153772 461.863617 833.189636 0.632551 1 Stabbing]
Please assist, and thank you in advance for acknowledging my issues.

I would suggest using the following code to get the desired output:
# Inference
results = model(imgs, size=640)
# Results:
preds = results.pandas().xyxy[0] # im predictions (pandas)
# print(preds)
# Create dataframe and write to file
dfm = pd.DataFrame(preds)
dfm.columns = ['xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name']
predict_labs = 'pred_yolo_individual.csv'
dfm.to_csv(predict_labs, index=False)
This will create a CSV file containing all the predictions in the desired format.

Related

System exit 1 error after obtaining NaN losses from finetuning Mask R-CNN in Pytorch

I am following this tutorial from Pytorch for Finetuning a pre-trained model on my own dataset. I have my annotation in the COCO format in a json file, so, I first implemented the dataloader as follows:
import torch
import json
from torch.utils.data import Dataset
from pycocotools.coco import COCO
from PIL import Image
import os
import numpy as np
from torchvision import transforms
import Config
import transforms as T
from torchvision.transforms import functional as F
class CustomDataset(Dataset):
def __init__(self, root, json_file, transform=None):
self.root = root
with open(json_file) as f:
self.data = json.load(f)
self.transform = transform
self.image_ids = [img["id"] for img in self.data["images"]]
self.imgs = list(sorted(os.listdir(os.path.join(root, "Images"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "Masks"))))
def __getitem__(self, idx):
# Get image ID
img_id = self.image_ids[idx]
img = next(image for image in self.data["images"] if image["id"] == img_id)
img_path = os.path.join(self.root, "Images")
mask_path = os.path.join(self.root, "Masks")
# Load image
image = Image.open(os.path.join(img_path, img['file_name'])).convert("RGB")
# extract annotations from the json file
annotations = [ann for ann in self.data["annotations"] if ann["image_id"] == img_id]
# extract labels from annotations
labels = [ann["label"] for ann in annotations]
# convert labels to integers
labels = [label for label in labels]
labels = torch.as_tensor(labels, dtype=torch.int64)
# extract boxes and convert them to format [x1, y1, x2, y2]
boxes = [ann["bbox"] for ann in annotations]
boxes = [[bbox[0], bbox[1], bbox[2], bbox[3]] for bbox in boxes]
num_objects = len(boxes)
# read the mask and include the number of objects in the first dimension
mask = np.array(Image.open(os.path.join(mask_path, img['file_name'])).convert("L"))
# Check if mask is empty
if mask.size == 0:
mask = np.zeros((num_objects, 1, 1), dtype=np.uint8)
else:
mask = np.expand_dims(mask, axis=0)
mask = np.repeat(mask, num_objects, axis=0)
# convert the binary mask array to a torch tensor
mask = torch.as_tensor(mask, dtype=torch.uint8)
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objects,), dtype=torch.int64)
# convert bboxes to tensors
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# calculate the area of the bounding box
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# convert id to tensor
image_id = torch.tensor([idx])
# create target dictionary
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = mask
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
# apply the transform if any
if self.transform is not None:
image, target = self.transform(image, target)
return image, target
def __len__(self):
return len(self.imgs)
and I am using this code for training:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from engine import train_one_epoch
import utils
import transforms as T
from dataloader import CustomDataset
import Config
import torch
import utils
from tqdm import tqdm
from torch.optim.lr_scheduler import StepLR
from torchvision.transforms import functional as F
def get_instance_segmentation_model(num_classes):
# load an instance segmentation model pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
# converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.PILToTensor())
if train:
# during training, randomly flip the training images
# and ground-truth for data augmentation
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
json_path = 'annotations.json'
# use our dataset and defined transformations
dataset = CustomDataset(root = Config.Dataset_dir, json_file=json_path, transform = get_transform(train=True))
# for image, target in dataset:
# print(image.shape)
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-500])
dataset_test = torch.utils.data.Subset(dataset, indices[-500:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
device = Config.DEVICE
# # our dataset has two classes only - background and person
num_classes = 2
# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.1,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
This training code is as stated in the tutorial is using some helper functions which can be accessed from here. I have run the training code and the training is working for the first 10 samples in the data, but then it gives the following error:
Epoch: [0] [ 0/2759] eta: 13:29:50 lr: 0.000200 loss: -136.8811 (-136.8811) loss_classifier: 0.9397 (0.9397) loss_box_reg: 0.0017 (0.0017) loss_mask: -137.9142 (-137.9142) loss_objectness: 0.0859 (0.0859) loss_rpn_box_reg: 0.0057 (0.0057) time: 17.6117 data: 10.0775
Loss is nan, stopping training
{'loss_classifier': tensor(nan, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(nan, grad_fn=<DivBackward0>), 'loss_mask': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_objectness': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(nan, grad_fn=<DivBackward0>)}
An exception has occurred, use %tb to see the full traceback.
SystemExit: 1
This error is raised from the engine.py train_one_epoch function, especially from this part of the function:
with torch.cuda.amp.autocast(enabled=scaler is not None):
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print(f"Loss is {loss_value}, stopping training")
print(loss_dict_reduced)
sys.exit(1)
Which indicates that the losses returned after the first loop are NaN ... What could be wrong here please? I am running out of ideas and don't know what's going wrong anymore.

Why is RandomCrop with size 84 and padding 8 returning an image size of 84 and not 100 in pytorch?

I was using the mini-imagenet data set and noticed this line of code:
elif data_augmentation == 'lee2019:
normalize = Normalize(
mean=[120.39586422 / 255.0, 115.59361427 / 255.0, 104.54012653 / 255.0],
std=[70.68188272 / 255.0, 68.27635443 / 255.0, 72.54505529 / 255.0],
)
train_data_transforms = Compose([
ToPILImage(),
RandomCrop(84, padding=8),
ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
RandomHorizontalFlip(),
ToTensor(),
normalize,
])
test_data_transforms = Compose([
normalize,
])
but when I checked the image size it was 84 instead of 100 (after adding padding):
X.size()=torch.Size([50, 3, 84, 84])
what is going on with this? Shouldn't it be 100?
reproduction:
import random
from typing import Callable
import learn2learn as l2l
import numpy as np
import torch
from learn2learn.data import TaskDataset, MetaDataset, DataDescription
from learn2learn.data.transforms import TaskTransform
from torch.utils.data import Dataset
class IndexableDataSet(Dataset):
def __init__(self, datasets):
self.datasets = datasets
def __len__(self) -> int:
return len(self.datasets)
def __getitem__(self, idx: int):
return self.datasets[idx]
class SingleDatasetPerTaskTransform(Callable):
"""
Transform that samples a data set first, then creates a task (e.g. n-way, k-shot) and finally
applies the remaining task transforms.
"""
def __init__(self, indexable_dataset: IndexableDataSet, cons_remaining_task_transforms: Callable):
"""
:param: cons_remaining_task_transforms; constructor that builds the remaining task transforms. Cannot be a list
of transforms because we don't know apriori which is the data set we will use. So this function should be of
type MetaDataset -> list[TaskTransforms] i.e. given the dataset it returns the transforms for it.
"""
self.indexable_dataset = MetaDataset(indexable_dataset)
self.cons_remaining_task_transforms = cons_remaining_task_transforms
def __call__(self, task_description: list):
"""
idea:
- receives the index of the dataset to use
- then use the normal NWays l2l function
"""
# - this is what I wish could have gone in a seperate callable transform, but idk how since the transforms take apriori (not dynamically) which data set to use.
i = random.randint(0, len(self.indexable_dataset) - 1)
task_description = [DataDescription(index=i)] # using this to follow the l2l convention
# - get the sampled data set
dataset_index = task_description[0].index
dataset = self.indexable_dataset[dataset_index]
dataset = MetaDataset(dataset)
# - use the sampled data set to create task
remaining_task_transforms: list[TaskTransform] = self.cons_remaining_task_transforms(dataset)
description = None
for transform in remaining_task_transforms:
description = transform(description)
return description
def sample_dataset(dataset):
def sample_random_dataset(x):
print(f'{x=}')
i = random.randint(0, len(dataset) - 1)
return [DataDescription(index=i)]
# return dataset[i]
return sample_random_dataset
def get_task_transforms(dataset: IndexableDataSet) -> list[TaskTransform]:
"""
:param dataset:
:return:
"""
transforms = [
sample_dataset(dataset),
l2l.data.transforms.NWays(dataset, n=5),
l2l.data.transforms.KShots(dataset, k=5),
l2l.data.transforms.LoadData(dataset),
l2l.data.transforms.RemapLabels(dataset),
l2l.data.transforms.ConsecutiveLabels(dataset),
]
return transforms
def print_datasets(dataset_lst: list):
for dataset in dataset_lst:
print(f'\n{dataset=}\n')
def get_indexable_list_of_datasets_mi_and_cifarfs(root: str = '~/data/l2l_data/') -> IndexableDataSet:
from learn2learn.vision.benchmarks import mini_imagenet_tasksets
datasets, transforms = mini_imagenet_tasksets(root=root)
mi = datasets[0].dataset
from learn2learn.vision.benchmarks import cifarfs_tasksets
datasets, transforms = cifarfs_tasksets(root=root)
cifarfs = datasets[0].dataset
dataset_list = [mi, cifarfs]
dataset_list = [l2l.data.MetaDataset(dataset) for dataset in dataset_list]
dataset = IndexableDataSet(dataset_list)
return dataset
# -- tests
def loop_through_l2l_indexable_datasets_test():
"""
:return:
"""
# - for determinism
random.seed(0)
torch.manual_seed(0)
np.random.seed(0)
# - options for number of tasks/meta-batch size
batch_size: int = 10
# - create indexable data set
indexable_dataset: IndexableDataSet = get_indexable_list_of_datasets_mi_and_cifarfs()
# - get task transforms
def get_remaining_transforms(dataset: MetaDataset) -> list[TaskTransform]:
remaining_task_transforms = [
l2l.data.transforms.NWays(dataset, n=5),
l2l.data.transforms.KShots(dataset, k=5),
l2l.data.transforms.LoadData(dataset),
l2l.data.transforms.RemapLabels(dataset),
l2l.data.transforms.ConsecutiveLabels(dataset),
]
return remaining_task_transforms
task_transforms: TaskTransform = SingleDatasetPerTaskTransform(indexable_dataset, get_remaining_transforms)
# -
taskset: TaskDataset = TaskDataset(dataset=indexable_dataset, task_transforms=task_transforms)
# - loop through tasks
for task_num in range(batch_size):
print(f'{task_num=}')
X, y = taskset.sample()
print(f'{X.size()=}')
print(f'{y.size()=}')
print(f'{y=}')
print()
print('-- end of test --')
# -- Run experiment
if __name__ == "__main__":
import time
from uutils import report_times
start = time.time()
# - run experiment
loop_through_l2l_indexable_datasets_test()
# - Done
print(f"\nSuccess Done!: {report_times(start)}\a")
context: https://github.com/learnables/learn2learn/issues/333
crossposted:
https://discuss.pytorch.org/t/why-is-randomcrop-with-size-84-and-padding-8-returning-an-image-size-of-84-and-not-100-in-pytorch/151463
https://www.reddit.com/r/pytorch/comments/uno1ih/why_is_randomcrop_with_size_84_and_padding_8/
The padding is applied to the input image or tensor before applying the random crop. Ultimately, the output image has a spatial size equal to that of the provided size(s) given to the T.RandomCrop function since the operation is performed after.
After all, it makes more sense to pad the input image rather than the cropped image, doesn't it?

Python script that can auto-annotate the images

I am using the https://github.com/mdhmz1/Auto-Annotate repo. I have tried to custom train my own dataset which has it own COCO JSON format file.
When I try to run
python3 customTrain.py train --dataset=path/to/dir --weights=coco
I get the following error:
Traceback (most recent call last):
File "customTrain.py", line 279, in
train(model)
File "customTrain.py", line 179, in train
dataset_train.load_custom(args.dataset, "train")
File "customTrain.py", line 87, in load_custom
annotations = [a for a in annotations if a['regions']]
File "customTrain.py", line 87, in
annotations = [a for a in annotations if a['regions']]
TypeError: list indices must be integers or slices, not str
My customtrain.py looks like the following:
import os
import sys
import json
import datetime
import numpy as np
import skimage.draw
Root directory of the project
ROOT_DIR = "/home/hiwi/Auto-Annotate"
Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn.config import Config
from mrcnn import model as modellib, utils
Path to trained weights file
COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
Directory to save logs and model checkpoints, if not provided
through the command line argument --logs
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
############################################################
Configurations
############################################################
class CustomConfig(Config):
"""Configuration for training on the toy dataset.
Derives from the base Config class and overrides some values.
"""
# Give the configuration a recognizable name
NAME = "custom"
IMAGES_PER_GPU = 1
# Number of classes (including background)
NUM_CLASSES = 1 + 2 # Background + 2 classes
# Number of training steps per epoch
STEPS_PER_EPOCH = 100
# Skip detections with < 90% confidence
DETECTION_MIN_CONFIDENCE = 0.9
############################################################
Dataset
############################################################
class CustomDataset(utils.Dataset):
def load_custom(self, dataset_dir, subset):
"""Load a subset of the Custom dataset.
dataset_dir: Root directory of the dataset.
subset: Subset to load: train or val
"""
# Add classes. We have only one class to add.
self.add_class("custom", 0, "Primary_Track")
self.add_class("custom", 1, "Secondary_Track")
# Train or validation dataset?
assert subset in ["train", "val"]
dataset_dir = os.path.join(dataset_dir, subset)
# Load annotations
# VGG Image Annotator (up to version 1.6) saves each image in the form:
# { 'filename': '28503151_5b5b7ec140_b.jpg',
# 'regions': {
# '0': {
# 'region_attributes': {},
# 'shape_attributes': {
# 'all_points_x': [...],
# 'all_points_y': [...],
# 'name': 'polygon'}},
# ... more regions ...
# },
# 'size': 100202
# }
# We mostly care about the x and y coordinates of each region
# Note: In VIA 2.0, regions was changed from a dict to a list.
annotations1 = json.load(open(os.path.join(dataset_dir, "train.json")))
annotations = list(annotations1.values()) # don't need the dict keys
# The VIA tool saves images in the JSON even if they don't have any
# annotations. Skip unannotated images.
annotations = [a for a in annotations if a['regions']]
# Add images
for a in annotations:
# Get the x, y coordinaets of points of the polygons that make up
# the outline of each object instance. These are stores in the
# shape_attributes (see json format above)
# The if condition is needed to support VIA versions 1.x and 2.x.
if type(a['regions']) is dict:
polygons = [r['shape_attributes'] for r in a['regions'].values()]
else:
polygons = [r['shape_attributes'] for r in a['regions']]
#labelling each class in the given image to a number
custom = [s['region_attributes'] for s in a['regions']]
num_ids=[]
#Add the classes according to the requirement
for n in custom:
try:
if n['name']=="Primary_Track":
num_ids.append(0)
elif n['name']=='Secondary_Track':
num_ids.append(1)
except:
pass
# load_mask() needs the image size to convert polygons to masks.
# Unfortunately, VIA doesn't include it in JSON, so we must read
# the image. This is only managable since the dataset is tiny.
image_path = os.path.join(dataset_dir, a['filename'])
image = skimage.io.imread(image_path)
height, width = image.shape[:2]
self.add_image(
"custom",
image_id=a['filename'], # use file name as a unique image id
path=image_path,
width=width, height=height,
polygons=polygons,
num_ids=num_ids)
def load_mask(self, image_id):
"""Generate instance masks for an image.
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
# If not a custom dataset image, delegate to parent class.
image_info = self.image_info[image_id]
if image_info["source"] != "custom":
return super(self.__class__, self).load_mask(image_id)
num_ids = image_info['num_ids']
#print("Here is the numID",num_ids)
# Convert polygons to a bitmap mask of shape
# [height, width, instance_count]
info = self.image_info[image_id]
mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
dtype=np.uint8)
for i, p in enumerate(info["polygons"]):
if p['name'] == 'polygon':
# Get indexes of pixels inside the polygon and set them to 1
rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
else:
rr, cc = skimage.draw.rectangle((p['y'], p['x']), extent=(p['height'], p['width']))
rr[rr > mask.shape[0]-1] = mask.shape[0]-1
cc[cc > mask.shape[1]-1] = mask.shape[1]-1
mask[rr, cc, i] = 1
# Return mask, and array of class IDs of each instance. Since we have
# one class ID only, we return an array of 1s
num_ids = np.array(num_ids, dtype=np.int32)
return mask.astype(np.bool), num_ids.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
#return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
def image_reference(self, image_id):
"""Return the path of the image."""
info = self.image_info[image_id]
if info["source"] == "Railtrack":
return info["path"]
else:
super(self.__class__, self).image_reference(image_id)
def train(model):
"""Train the model."""
# Training dataset.
dataset_train = CustomDataset()
dataset_train.load_custom(args.dataset, "train")
dataset_train.prepare()
# Validation dataset
dataset_val = CustomDataset()
dataset_val.load_custom(args.dataset, "val")
dataset_val.prepare()
# *** This training schedule is an example. Update to your needs ***
# Since we're using a very small dataset, and starting from
# COCO trained weights, we don't need to train too long. Also,
# no need to train all layers, just the heads should do it.
print("Training network heads")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=30,
layers='heads')
############################################################
Training
############################################################
if name == 'main':
import argparse
# Parse command line arguments
parser = argparse.ArgumentParser(
description='Train Mask R-CNN to detect custom objects.')
parser.add_argument("command",
metavar="<command>",
help="'train' or 'splash'")
parser.add_argument('--dataset', required=False,
metavar="/path/to/custom/dataset/",
help='Directory of the Custom dataset')
parser.add_argument('--weights', required=True,
metavar="/path/to/weights.h5",
help="Path to weights .h5 file or 'coco'")
parser.add_argument('--logs', required=False,
default=DEFAULT_LOGS_DIR,
metavar="/path/to/logs/",
help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--image', required=False,
metavar="path or URL to image",
help='Image to apply the color splash effect on')
parser.add_argument('--video', required=False,
metavar="path or URL to video",
help='Video to apply the color splash effect on')
args = parser.parse_args()
# Validate arguments
if args.command == "train":
assert args.dataset, "Argument --dataset is required for training"
elif args.command == "splash":
assert args.image or args.video,\
"Provide --image or --video to apply color splash"
print("Weights: ", args.weights)
print("Dataset: ", args.dataset)
print("Logs: ", args.logs)
# Configurations
if args.command == "train":
config = CustomConfig()
# Create model
if args.command == "train":
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=args.logs)
# Select weights file to load
if args.weights.lower() == "coco":
weights_path = COCO_WEIGHTS_PATH
# Download weights file
if not os.path.exists(weights_path):
utils.download_trained_weights(weights_path)
elif args.weights.lower() == "last":
# Find last trained weights
weights_path = model.find_last()
elif args.weights.lower() == "imagenet":
# Start from ImageNet trained weights
weights_path = model.get_imagenet_weights()
else:
weights_path = args.weights
# Load weights
print("Loading weights ", weights_path)
if args.weights.lower() == "coco":
# Exclude the last layers because they require a matching
# number of classes
model.load_weights(weights_path, by_name=True, exclude=[
"mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
else:
model.load_weights(weights_path, by_name=True)
# Train or evaluate
if args.command == "train":
train(model)
else:
print("'{}' is not recognized. "
"Use 'train' or 'splash'".format(args.command))

How to read, save and display images, encoded in csv format

I've got some images for training and testing a tensorflow model encoded in csv format. Is there a way to extract those images and / or save them in jpg like format?
Part of the file can be seen above as a opened in excel as a screenshot. If you prefer text to hyperlinks, here is a part of it in a form of a text:
label pixel1 pixel2 ...
6 149 149 ...
5 126 128 ...
10 85 88 ...
0 203 205 ...
There are 785 columns and 7173 rows in total. I have no idea how to deal with that.
You can do it like this
# first i create a dummy dataset to work on
data = make_classification(10000, n_features=784, random_state=1234)
df = pd.DataFrame(data[0], columns=[str(f'col_{i}') for i in range(784)])
df['label'] = data[1]
# Now we create a img_vector and labels array from the dataframe
img_vector = df[[str(f'col_{i}') for i in range(784)]].values
labels = df['label'].values
# splitting the data
# Now we creating the dataset
def get_img(inputs, labels):
# here you have 784 pixels which usually represent a 28*28 image with 1 channel
# hence I reshape it that way
img = tf.reshape(inputs, (28,28,1))
# you can also add some augmentation
img = tf.image.flip_left_right(img)
img = tf.image.flip_up_down(img)
return img, labels
# We pass the img_vector and labels to the make the dataset
train_dataset = tf.data.Dataset.from_tensor_slices((train_mat, train_label))
# Map the dataset to get images form it.
train_dataset = train_dataset.map(get_img).batch(16)
# same for valid dataset
valid_dataset = tf.data.Dataset.from_tensor_slices((valid_mat, valid_label))
valid_dataset = valid_dataset.map(get_img).batch(16)
# A sanity check
import matplotlib.pyplot as plt
sample = None
for i in train_dataset:
sample = i
break
plt.imshow(sample[0][0])
# Creating a model
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(3,3, input_shape=(28,28,1)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
# Finally train the model
model.fit(train_dataset,
epochs=10,
validation_data=valid_dataset)
Also, if you ever take a dataset from Kaggle you will usually find a sample notebook for that dataset in the code section.
You can read any row, plot it and save it as image like this:
import numpy as np
import pandas as pd
# read csv file
df = pd.read_csv("data.csv")
# read pixels
images = np.array(df.iloc[:,1:])
labels = np.array(df.iloc[:,0])
# select random number between 0 and 7172
index = 2
# reshape 784 rows to 28 height x 28 width
sample_image = images[index,:].reshape(28,28)
# import plt for displaying image
from matplotlib import pyplot as plt
# plot image
plt.imshow(sample_image)
plt.axis('off')
# plot it's label
print(labels[index])
# save image
plt.savefig("./image{}_label{}".format(index,labels[index]))

How to load all 5 batches of CIFAR10 in a single data structure as on MNIST in PyTorch?

With Mnist I have a single file with the labels and a single file for the train, so I simply do:
self.data = datasets.MNIST(root='./data', train=True, download=True)
Basically I create a set of labels (from 0-9) and save the i-th position of the image in the data structure, to create my custom tasks:
def make_tasks (self):
        self.task_to_examples = {} #task 0-9
        self.all_tasks = set (self.data.train_labels.numpy ())
        for i, digit in enumerate (self.data.train_labels.numpy ()):
            if str(digit) not in self.task_to_examples:
                self.task_to_examples[str(digit)] = []
            self.task_to_examples[str(digits)].append(i)
I don't understand how to do the same thing using CIFAR10 because it is divided into 5 batches, I would like all the data in a single structure.
If your desired structure is {"class_id": [indices of the samples]}, then for CIFAR10 you can do something like this:
import numpy as np
import torchvision
# set root accordingly
cifar = torchvision.datasets.CIFAR10(root=".", train=True, download=True)
task_to_examples = {
str(task_id): np.where(cifar.targets == task_id)[0].tolist()
for task_id in np.unique(cifar.targets)
}