tf.data.Dataset: The `batch_size` argument must not be specified for the given input type - deep-learning

I'm using Talos and Google colab TPU to run hyperparameter tuning of a Keras model. Note that I'm using Tensorflow 1.15.0 and Keras 2.2.4-tf.
import os
import tensorflow as tf
import talos as ta
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
def iris_model(x_train, y_train, x_val, y_val, params):
# Specify a distributed strategy to use TPU
resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.contrib.distribute.initialize_tpu_system(resolver)
strategy = tf.contrib.distribute.TPUStrategy(resolver)
# Use the strategy to create and compile a Keras model
with strategy.scope():
model = Sequential()
model.add(Dense(32, input_shape=(4,), activation=tf.nn.relu, name="relu"))
model.add(Dense(3, activation=tf.nn.softmax, name="softmax"))
model.compile(optimizer=Adam(learning_rate=0.1), loss=params['losses'])
# Convert data type to use TPU
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
dataset = dataset.cache()
dataset = dataset.shuffle(1000, reshuffle_each_iteration=True).repeat()
dataset = dataset.batch(params['batch_size'], drop_remainder=True)
# Fit the Keras model on the dataset
out = model.fit(dataset, batch_size=params['batch_size'], epochs=params['epochs'], validation_data=[x_val, y_val], verbose=0, steps_per_epoch=2)
return out, model
# Load dataset
X, y = ta.templates.datasets.iris()
# Train and test set
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.30, shuffle=False)
# Create a hyperparameter distributions
p = {'losses': ['logcosh'], 'batch_size': [128, 256, 384, 512, 1024], 'epochs': [10, 20]}
# Use Talos to scan the best hyperparameters of the Keras model
scan_object = ta.Scan(x_train, y_train, params=p, model=iris_model, experiment_name='test', x_val=x_val, y_val=y_val, fraction_limit=0.1)
After converting the train set to a Dataset using tf.data.Dataset, I get the following error when fitting the model with out = model.fit:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-c812209b95d0> in <module>()
8
9 # Use Talos to scan the best hyperparameters of the Keras model
---> 10 scan_object = ta.Scan(x_train, y_train, params=p, model=iris_model, experiment_name='test', x_val=x_val, y_val=y_val, fraction_limit=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in _validate_or_infer_batch_size(self, batch_size, steps, x)
1813 'The `batch_size` argument must not be specified for the given '
1814 'input type. Received input: {}, batch_size: {}'.format(
-> 1815 x, batch_size))
1816 return
1817
ValueError: The `batch_size` argument must not be specified for the given input type. Received input: <DatasetV1Adapter shapes: ((512, 4), (512, 3)), types: (tf.float32, tf.float32)>, batch_size: 512
Then, if I follow those instructions and don't set the batch-size argument to model.fit. I get another error in:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-c812209b95d0> in <module>()
8
9 # Use Talos to scan the best hyperparameters of the Keras model
---> 10 scan_object = ta.Scan(x_train, y_train, params=p, model=iris_model, experiment_name='test', x_val=x_val, y_val=y_val, fraction_limit=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in _distribution_standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, validation_split, shuffle, epochs, allow_partial_batch)
2307 strategy) and not drop_remainder:
2308 dataset_size = first_x_value.shape[0]
-> 2309 if dataset_size % batch_size == 0:
2310 drop_remainder = True
2311
TypeError: unsupported operand type(s) for %: 'int' and 'NoneType'

There seems to be an issue on keras distributed code.
If you take a look at
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-c812209b95d0> in <module>()
8
9 # Use Talos to scan the best hyperparameters of the Keras model
---> 10 scan_object = ta.Scan(x_train, y_train, params=p, model=iris_model, experiment_name='test', x_val=x_val, y_val=y_val, fraction_limit=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in _distribution_standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, validation_split, shuffle, epochs, allow_partial_batch)
2307 strategy) and not drop_remainder:
2308 dataset_size = first_x_value.shape[0]
-> 2309 if dataset_size % batch_size == 0:
2310 drop_remainder = True
2311
TypeError: unsupported operand type(s) for %: 'int' and 'NoneType'
you can see that the error is thrown at operation "dataset_size % batch_size" and it states "unsupported operand type(s) for %: 'int' and 'NoneType'". This means that at that point the batch_size variable should have already been inferred from the Dataset object but it is still 'None'
If you take a look at the source code (you can access it from collab by clicking on the path), you will see that in the fit function
def fit(self,
model,
x=None,
y=None,
batch_size=None,
epochs=1,
verbose=1,
callbacks=None,
validation_split=0.,
validation_data=None,
shuffle=True,
class_weight=None,
sample_weight=None,
initial_epoch=0,
steps_per_epoch=None,
validation_steps=None,
validation_freq=1,
**kwargs):
"""Fit loop for Distribution Strategies."""
dist_utils.validate_callbacks(input_callbacks=callbacks,
optimizer=model.optimizer)
dist_utils.validate_inputs(x, y)
batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size(
model._distribution_strategy,
x,
batch_size,
steps_per_epoch,
ModeKeys.TRAIN,
validation_split=validation_split)
batch_size = model._validate_or_infer_batch_size(
batch_size, steps_per_epoch, x)
dataset = model._distribution_standardize_user_data(
there is a step
batch_size = model._validate_or_infer_batch_size(
batch_size, steps_per_epoch, x)
in which the batch_size should change from 'None' (default value when not specified) to the one inferred from the Dataset object (but it doesn't, I checked by printing the variable). I think this might be related to the fact that your batch_size is in fact a list of batch_sizes. If you change the source code (you can directly edit it from collab and then click on restart runtime in order to try) to this:
batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size(
model._distribution_strategy,
x,
batch_size,
steps_per_epoch,
ModeKeys.TRAIN,
validation_split=validation_split)
batch_size = model._validate_or_infer_batch_size(
batch_size, steps_per_epoch, x)
batch_size = 128
dataset = model._distribution_standardize_user_data(
(see that I manually inserted the batch_size in the source code after the point at which it should have been inferred) the program runs with no error.
Maybe the fact of trying different batch_sizes for hyparameter tunning is a feature that is just not feasible with this current versions. I tried tf 2.1 and did not work either.

Related

TypeError: conv2d() received an invalid combination of arguments

everyone!
I am working on an object detection project using the VGG network in the PASCAL VOC dataset. I used custom dataset loading to load the PASCAL VOC dataset. And coded network from scratch. (They are similar to PyTorch's Vision method).
Currently, I'm getting the following error:
Traceback (most recent call last):
File "/home/khushi/Documents/deep-learning/benchmarking-deep-neural-networks/vgg/main.py", line 59, in <module>
main()
File "/home/khushi/Documents/deep-learning/benchmarking-deep-neural-networks/vgg/main.py", line 55, in main
train(data, model, num_epochs, criteria, optimizer)
File "/home/khushi/Documents/deep-learning/benchmarking-deep-neural-networks/vgg/main.py", line 21, in train
outputs = model(image)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/khushi/Documents/deep-learning/benchmarking-deep-neural-networks/vgg/vgg_torch.py", line 39, in forward
x = self.features(x)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/container.py", line 141, in forward
input = module(input)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/conv.py", line 446, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/conv.py", line 442, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
TypeError: conv2d() received an invalid combination of arguments - got (Image, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (Image, Parameter, Parameter, tuple, tuple, tuple, int)
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (Image, Parameter, Parameter, tuple, tuple, tuple, int)
The code I am implementing:
import vgg_torch
import voc_loader
import time
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
num_epochs = 5
learning_rate = 0.01
# https://github.com/khushi-411/tutorials/pytorch
def train(data, model, num_epochs, criteria, optimizer):
steps = len(data)
for epochs in range(num_epochs):
for i, (image, target) in enumerate(data):
# forward pass
# https://stackoverflow.com/questions/57237381
#outputs = model(image[None, ...])
outputs = model(image)
loss = criteria(outputs, target)
# backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epochs+1, num_epochs, i+1, steps, loss.item()))
def main():
# give absolute path to dataset
# https://stackoverflow.com/questions/56741108
data = voc_loader.VOCDetection('/home/khushi/Documents/deep-learning/datasets/pascal-voc/')
""",
transform=transforms.Compose([
transforms.ToTensor(),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
)
"""
# Load model: vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19, vgg19_bn
model = vgg_torch.vgg11()
print(model)
# Loss function and optimizer
criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# training
start = time.time()
train(data, model, num_epochs, criteria, optimizer)
print("Total time taken to train: ", time.time() - start)
if __name__ == "__main__":
main()
Dependencies
PyTorch: 1.10.0+cu102
OS: Manjaro Distro
RAM: 16GB
Will anyone please help me out to resolve this error? Thanks!

HuggingFace BertForMaskedLM: Expected input batch_size (3200) to match target batch_size (16)

Im working on a Multiclass Classification (Bengali Language Sentiment Analysis) on a pretrained Huggingface (BertForMaskedLM) model.
When the error occured I knew I have to change the label(output) size to match the input. But do not know how. Im adding the code snippents below.
MAX_LEN = 200
BATCH_SIZE = 16
The pretrained models used:
from transformers import BertForMaskedLM, BertTokenizer, pipeline
model = BertForMaskedLM.from_pretrained("sagorsarker/bangla-bert-base")
tokenizer = BertTokenizer.from_pretrained("sagorsarker/bangla-bert-base")
Code to make the pytorch dataset:
class GPReviewDataset(Dataset):
def __init__(self, reviews, targets, tokenizer, max_len):
self.reviews = reviews
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.reviews)
def __getitem__(self, item):
review = str(self.reviews[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
review,
add_special_tokens=True,
max_length=self.max_len,
truncation = True,
return_token_type_ids=False,
padding='max_length',
return_attention_mask=True,
return_tensors='pt',
)
return {
'review_text': review,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
The input dimentions are:
print(data['input_ids'].shape)
print(data['attention_mask'].shape)
print(data['targets'].shape)
Which Outputs:
torch.Size([16, 200])
torch.Size([16, 200])
torch.Size([16])
Training Class
def train_epoch(model, data_loader, optimizer, device, scheduler, n_examples):
model = model.train() # tells your model that we are training
losses = []
correct_predictions = 0
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
loss, logits = model(
input_ids=input_ids,
attention_mask=attention_mask,
labels = targets
)
#logits = classification scores befroe softmax
#loss = classification loss
logits = logits.view(-1, 28*28).detach().cpu().numpy()
label_ids = targets.to('cpu').numpy()
preds = np.argmax(logits, axis=1).flatten() #returns indices of maximum logit
targ = label_ids.flatten()
correct_predictions += np.sum(preds == targ)
losses.append(loss.item())
loss.backward() # performs backpropagation(computes derivates of loss w.r.t to parameters)
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) #clipping gradients so they dont explode
optimizer.step() #After gradients are computed by loss.backward() this makes the optimizer iterate over all parameters it is supposed to update and use internally #stored grad to update their values
scheduler.step() # this will make sure learning rate changes. If we dont provide this learning rate stays at initial value
optimizer.zero_grad() # clears old gradients from last step
return correct_predictions / n_examples, np.mean(losses)
Where the training Starts (Where the error triggers):
%%time
# standard block
# used accuracy as metric here
history = defaultdict(list)
best_acc = 0
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(model, train_data_loader, optimizer, device, scheduler, len(df_train))
print(f'Train loss {train_loss} Accuracy {train_acc}')
val_acc, val_loss = eval_model(model, valid_data_loader, device, len(df_valid))
print(f'Val loss {val_loss} Accuracy {val_acc}')
print()
history['train_acc'].append(train_acc)
history['train_loss'].append(train_loss)
history['val_acc'].append(val_acc)
history['val_loss'].append(val_loss)
if val_acc > best_acc:
torch.save(model.state_dict(), 'best_model_state_a5.bin')
best_acc = val_acc
The error:
Epoch 1/5
----------
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-41-fb5a4d77ce37> in <module>()
----> 1 get_ipython().run_cell_magic('time', '', "# standard block\n# used accuracy as metric here\nhistory = defaultdict(list)\n\nbest_acc = 0\n\nfor epoch in range(EPOCHS):\n\n print(f'Epoch {epoch + 1}/{EPOCHS}')\n print('-' * 10)\n\n train_acc, train_loss = train_epoch(model, train_data_loader, optimizer, device, scheduler, len(df_train))\n\n print(f'Train loss {train_loss} Accuracy {train_acc}')\n\n val_acc, val_loss = eval_model(model, valid_data_loader, device, len(df_valid))\n\n print(f'Val loss {val_loss} Accuracy {val_acc}')\n print()\n\n history['train_acc'].append(train_acc)\n history['train_loss'].append(train_loss)\n history['val_acc'].append(val_acc)\n history['val_loss'].append(val_loss)\n\n if val_acc > best_acc:\n torch.save(model.state_dict(), 'best_model_state_a5.bin')\n best_acc = val_acc\n\n# We are storing state of best model indicated by highest validation accuracy")
8 frames
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2115 magic_arg_s = self.var_expand(line, stack_depth)
2116 with self.builtin_trap:
-> 2117 result = fn(magic_arg_s, cell)
2118 return result
2119
<decorator-gen-53> in time(self, line, cell, local_ns)
/usr/local/lib/python3.7/dist-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
/usr/local/lib/python3.7/dist-packages/IPython/core/magics/execution.py in time(self, line, cell, local_ns)
1191 else:
1192 st = clock2()
-> 1193 exec(code, glob, local_ns)
1194 end = clock2()
1195 out = None
<timed exec> in <module>()
<ipython-input-39-948eefef2f8d> in train_epoch(model, data_loader, optimizer, device, scheduler, n_examples)
13 input_ids=input_ids,
14 attention_mask=attention_mask,
---> 15 labels = targets
16 )
17
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/transformers/models/bert/modeling_bert.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, labels, output_attentions, output_hidden_states, return_dict)
1327 if labels is not None:
1328 loss_fct = CrossEntropyLoss() # -100 index = padding token
-> 1329 masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), labels.view(-1))
1330
1331 if not return_dict:
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/loss.py in forward(self, input, target)
1119 def forward(self, input: Tensor, target: Tensor) -> Tensor:
1120 return F.cross_entropy(input, target, weight=self.weight,
-> 1121 ignore_index=self.ignore_index, reduction=self.reduction)
1122
1123
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2822 if size_average is not None or reduce is not None:
2823 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2824 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2825
2826
ValueError: Expected input batch_size (3200) to match target batch_size (16).

categorical_crossentropy expects targets to be binary matrices

First of all I am not a programmer, but I am self-teaching me Deep Learning to undertake a real project with my own dataset. My situation can be broken down as follows:
I am trying to undertake a multiclass text classification project. I have a corpus with 1000 examples, each example with 4 possible labels(A1,A2,B1,B2) They are mutually exclusive. All the examples are in separate folders and separate .txt files.
After a lot of effort and some man tears I managed to put together this code:
import os
import string
import keras
import nltk
from nltk.corpus import stopwords
from nltk import word_tokenize
import re
import numpy as np
import tensorflow as tf
from numpy import array
from sklearn.model_selection import KFold
from numpy.random import seed
seed(1)
tf.random.set_seed(1)
root="D:/bananaCorpus"
train_dir=os.path.join(root,"train")
texts=[]
labels=[]
for label in ["A1","A2","B1","B2"]:
directory=os.path.join(train_dir,label)
for fname in os.listdir(directory):
if fname[-4:]==".txt":
f = open(os.path.join(directory, fname),encoding="cp1252")
texts.append(f.read())
f.close()
if label == 'A1':
labels.append(0)
elif label=="A2":
labels.append(1)
elif label=="B1":
labels.append(2)
else:
labels.append(3)
print(texts)
print(labels)
print("Corpus Length", len( root), "\n")
print("The total number of reviews in the train dataset is", len(texts),"\n")
stops = set(stopwords.words("english"))
print("The number of stopwords used in the beginning: ", len(stops),"\n")
print("The words removed from the corpus will be",stops,"\n")
## This adds new words or terms from words_to_add list to the stop_words
words_to_add=[]
[stops.append(w) for w in words_to_add]
##This removes the words or terms from the words_to_remove list,
##so that they are no longer included in stopwords
words_to_remove=["i","having"]
[stops.remove(w) for w in words_to_remove ]
texts=[[w.lower() for w in word_tokenize("".join(str(review))) if w not in stops and w not in string.punctuation and len(w)>2 and w.isalpha()]for review in texts ]
print("costumized stopwords: ", stops,"\n")
print("count of costumized stopwords",len(stops),"\n")
print("**********",texts,"\n")
#vectorization
#tokenizing the raw data
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
maxlen = 50
training_samples = 200
validation_samples = 10000
max_words = 10000
#delete?
tokens=keras.preprocessing.text.text_to_word_sequence(str(texts))
print("Sequence of tokens: ",tokens,"\n")
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
print("Tokens:", sequences,"\n")
word_index = tokenizer.word_index
print("Unique tokens:",word_index,"\n")
print(' %s unique tokens in total.' % len(word_index,),"\n")
print("Unique tokens: ", word_index,"\n")
print("Dictionary of words and their count:", tokenizer.word_counts,"\n" )
print(" Number of docs/seqs used to fit the Tokenizer:", tokenizer.document_count,"\n")
print(tokenizer.word_index,"\n")
print("Dictionary of words and how many documents each appeared in:",tokenizer.word_docs,"\n")
data = pad_sequences(sequences, maxlen=maxlen, padding="post")
print("padded data","\n")
print(data)
#checking the encoding with a new document
text2="I like to study english in the morning and play games in the afternoon"
text2=[w.lower() for w in word_tokenize("".join(str(text2))) if w not in stops and w not in string.punctuation
and len(w)>2 and w.isalpha()]
sequences = tokenizer.texts_to_sequences([text2])
text2 = pad_sequences(sequences, maxlen=maxlen, padding="post")
print("padded text2","\n")
print(text2)
#cross-validation
labels = np.asarray(labels)
print('Shape of data tensor:', data.shape,"\n")
print('Shape of label tensor:', labels.shape,"\n")
print("labels",labels,"\n")
kf = KFold(n_splits=4, random_state=None, shuffle=True)
kf.get_n_splits(data)
print(kf)
KFold(n_splits=4, random_state=None, shuffle=True)
for train_index, test_index in kf.split(data):
print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = data[train_index], data[test_index]
y_train, y_test = labels[train_index], labels[test_index]
#Pretrained embedding
glove_dir = 'D:\glove'
embeddings_index = {}
f = open(os.path.join(glove_dir, 'glove.6B.100d.txt'),encoding="utf-8")
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
print("Found %s words vectors fom GLOVE."% len(embeddings_index))
#Preparing the Glove word-embeddings matrix to pass to the embedding layer(max_words, embedding_dim)
embedding_dim = 100
embedding_matrix = np.zeros((max_words, embedding_dim))
for word, i in word_index.items():
if i < max_words:
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
# define vocabulary size (largest integer value)
# define model
from keras.models import Sequential
from keras.layers import Embedding,Flatten,Dense
from keras import layers
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=maxlen))#vocabulary size + the size of glove version +max len of input documents.
model.add(Conv1D(filters=32, kernel_size=8, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
print(model.summary())
#Loading pretrained word embeddings and Freezing the Embedding layer
model.layers[0].set_weights([embedding_matrix])
model.layers[0].trainable = False
# compile network
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
history=model.fit(X_train, y_train, epochs=6,verbose=2)
# evaluate
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %f' % (acc*100))
However, I am getting this error:
Traceback (most recent call last):
File "D:/banana.py", line 177, in <module>
history=model.fit(X_train, y_train, epochs=6,verbose=2)
File "D:\ProgramData\Miniconda3\envs\Env_DLexp1\lib\site-packages\keras\engine\training.py", line 1154, in fit
batch_size=batch_size)
File "D:\ProgramData\Miniconda3\envs\Env_DLexp1\lib\site-packages\keras\engine\training.py", line 642, in _standardize_user_data
y, self._feed_loss_fns, feed_output_shapes)
File "D:\ProgramData\Miniconda3\envs\Env_DLexp1\lib\site-packages\keras\engine\training_utils.py", line 284, in check_loss_and_target_compatibility
' while using as loss `categorical_crossentropy`. '
ValueError: You are passing a target array of shape (3, 1) while using as loss `categorical_crossentropy`. `categorical_crossentropy` expects targets to be binary matrices (1s and 0s) of shape (samples, classes). If your targets are integer classes, you can convert them to the expected format via:
```
from keras.utils import to_categorical
y_binary = to_categorical(y_int)
```
Alternatively, you can use the loss function `sparse_categorical_crossentropy` instead, which does expect integer targets.
I tried everything the error message says, but to no avail. After some research I came to the conclusion that the model is not trying to predict multiple classes, that's why the categorical_crossentropy loss is not being accepted. I then realized that, if I changed it for binary cross-entropy the error goes away, which is really a confirmation that this is not working as a multiclass classification model.
What can I do to adjust my code to make it work as intended? Am I S*it out of luck and have to start a whole different project?
Any type of guidance will be of immense help for me and my mental health.
You should make two changes. First the number of neurons in the output of your network should match the number of classes, and use the softmax activation:
model.add(Dense(4, activation='softmax'))
Then you should use the sparse_categorical_crossentropy loss as you are not one-hot encoding the labels:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
Then the model should be able to train without errors.

unknown resampling filter error when trying to create my own dataset with pytorch

I am trying to create a CNN implemented with data augmentation in pytorch to classify dogs and cats. The issue that I am having is that when I try to input my dataset and enumerate through it I keep getting this error:
Traceback (most recent call last):
File "<ipython-input-55-6337e0536bae>", line 75, in <module>
for i, (inputs, labels) in enumerate(trainloader):
File "/usr/local/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 188, in __next__
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/usr/local/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 188, in <listcomp>
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/usr/local/lib/python3.6/site-packages/torchvision/datasets/folder.py", line 124, in __getitem__
img = self.transform(img)
File "/usr/local/lib/python3.6/site-packages/torchvision/transforms/transforms.py", line 42, in __call__
img = t(img)
File "/usr/local/lib/python3.6/site-packages/torchvision/transforms/transforms.py", line 147, in __call__
return F.resize(img, self.size, self.interpolation)
File "/usr/local/lib/python3.6/site-packages/torchvision/transforms/functional.py", line 197, in resize
return img.resize((ow, oh), interpolation)
File "/usr/local/lib/python3.6/site-packages/PIL/Image.py", line 1724, in resize
raise ValueError("unknown resampling filter")
ValueError: unknown resampling filter
and I really dont know whats wrong with my code. I have provided the code below:
# Creating the CNN
# Importing the libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import torchvision
from torchvision import transforms
#Creating the CNN Model
class CNN(nn.Module):
def __init__(self, nb_outputs):
super(CNN, self).__init__() #activates the inheritance and allows the use of all the tools in the nn.Module
#making the 3 convolutional layers that will be used in the convolutional neural network
self.convolution1 = nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 5) #kernal_size -> the deminson of the feature detector e.g kernel_size = 5 => feature detector of size 5x5
self.convolution2 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 2)
#making 2 full connections one to connect the inputs of the ANN to the hidden layer and another to connect the hidden layer to the outputs of the ANN
self.fc1 = nn.Linear(in_features = self.count_neurons((1, 64,64)), out_features = 40)
self.fc2 = nn.Linear(in_features = 40, out_features = nb_outputs)
def count_neurons(self, image_dim):
x = Variable(torch.rand(1, *image_dim)) #this variable repersents a fake image to allow us to compute the number of neruons
#in order to pass the elements of the tuple image_dim into our function as a list of arguments we need to add a * before image_dim
#since x will be going into our neural network we need to convert it into a torch variable using the Variable() function
x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2)) #first we apply the convolution to x then apply max_pooling to the convolutional fake images and then activate all the neurons in the pooling layer
x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2)) #the signals are now propragated up to the thrid convoulational layer
#Now to flatten x to obtain the number of neurons in the flattening layer
return x.data.view(1, -1).size(1) #this will flatten x into a huge vector and returns the size of the vector, that size repersents the number of neurons that will be inputted into the ANN
#even though x is not a real image from the game since the size of the flattened vector only depends on the dimention of the inputted image we can just set x to have the same dimentions as the image
def forward(self, x):
x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2)) #first we apply the convolution to x then apply max_pooling to the convolutional fake images and then activate all the neurons in the pooling layer
x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2))
#flattening layer of the CNN
x = x.view(x.size(0), -1)
#x is now the inputs to the ANN
x = F.relu(self.fc1(x)) #we propagte the signals from the flatten layer to the full connected layer and activate the neruons by breaking the linearilty with the relu function
x = F.sigmoid(self.fc2(x))
#x is now the output neurons of the ANN
return x
train_tf = transforms.Compose([transforms.RandomHorizontalFlip(),
transforms.Resize(64,64),
transforms.RandomRotation(20),
transforms.RandomGrayscale(.2),
transforms.ToTensor()])
test_tf = transforms.Compose([transforms.Resize(64,64),
transforms.ToTensor()])
training_set = torchvision.datasets.ImageFolder(root = './dataset/training_set',
transform = train_tf)
test_set = torchvision.datasets.ImageFolder(root = './dataset/test_set',
transform = transforms.Compose([transforms.Resize(64,64),
transforms.ToTensor()]) )
trainloader = torch.utils.data.DataLoader(training_set, batch_size=32,
shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(test_set, batch_size= 32,
shuffle=False, num_workers=0)
#training the model
cnn = CNN(1)
cnn.train()
loss = nn.BCELoss()
optimizer = optim.Adam(cnn.parameters(), lr = 0.001) #the optimizer => Adam optimizer
nb_epochs = 25
for epoch in range(nb_epochs):
train_loss = 0.0
train_acc = 0.0
total = 0.0
for i, (inputs, labels) in enumerate(trainloader):
inputs, labels = Variable(inputs), Variable(labels)
cnn.zero_grad()
outputs = cnn(inputs)
loss_error = loss(outputs, labels)
optimizer.step()
_, pred = torch.max(outputs.data, 1)
total += labels.size(0)
train_loss += loss_error.data[0]
train_acc += (pred == labels).sum()
train_loss = train_loss/len(training_loader)
train_acc = train_acc/total
print('Epoch: %d, loss: %.4f, accuracy: %.4f' %(epoch+1, train_loss, train_acc))
The folder arrangement for the code is /dataset/training_set and inside the training_set folder are two more folders one for all the cat images and the other for all the dog images. Each image is name either dog.xxxx.jpg or cat.xxxx.jpg, where the xxxx represents the number so for the first cat image it would be cat.1.jpg up to cat.4000.jpg. This is the same format for the test_set folder. The number of training images is 8000 and the number of test images is 2000. If anyone can point out my error I would greatly appreciate it.
Thank you
Try to set the desired size in transforms.Resize as a tuple:
transforms.Resize((64, 64))
PIL is using the second argument (in your case 64) as the interpolation method.
in torchvision.transforms.Compose([put every transform in these brackets]),
This, will not give the error.

Different between Caffe and Keras

I have trained LeNet for MNIST using Caffe and now I would like to export this model to be used within Keras.
To this end I tried to extract weights from caffe.Net and use them to initialize Keras's network. However, I received different predictions from the two models. So I have tried to debug them layer by layer, starting with the first one. The code I have tested is as follows:
# import caffe and load facce.Net from prototxt and caffemodel files
import sys
sys.path.append('/opt/caffe/python')
import caffe
net = caffe.Net('lenet_train_test.prototxt', 'save/mnist_iter_500000.caffemodel', caffe.TEST)
# this should be the kernel weights and bias for the first convolution layer 'conv1'
c1_w = net.params['conv1'][0].data
c1_b = net.params['conv1'][1].data
# import Keras and build a convolution layer using the same parameters as in lenet_train_test.prototxt and instantiate it with the weights above
import keras
from keras.layers import Convolution2D
conv1 = Convolution2D(20, 5, 5, border_mode='valid', input_shape=(1, 28, 28), weights=[c1_w,c1_b], activation='linear')
from keras.models import Sequential
model=Sequential()
model.add(conv1)
model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
# load MNIST data and do scaling like I did when training Caffe model
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
X_test = X_test.astype('float32')
X_test *= 0.00392157
# set the first test example and get the prediction
net.blobs['data'].data[0] = X_test[0][0]
net.forward()
out0 = net.blobs['conv1'].data[0] # this is the prediction for 20 kernels
m0 = out0[0] # just consider the first one
m0.shape # >>> (24,24)
# pass the same example through the conv1 layer in Keras model
import numpy as np
a = np.zeros( (1,1,28,28) )
a[0] = X_test[0]
out1 = model.predict_on_batch(a) # this is the prediction for 20 kernels
m1 = out1[0][0] # just consider the first one
m1.shape # >>> (24,24) the same size
# I get a lots of 'False'
m0 == m1
Have I done anything wrong in the layer construction? or maybe Caffe and Keras implement the convolution2D differently?