tfa.optimizers.MultiOptimizer - TypeError: 'Not JSON Serializable:' - json

I'm trying to use tfa.optimizers.MultiOptimizer(). I did everything according to the docs (https://www.tensorflow.org/addons/api_docs/python/tfa/optimizers/MultiOptimizer) yet I'm getting the following error:
TypeError: ('Not JSON Serializable:', <tf.Tensor 'gradient_tape/model_80/dense_3/Tensordot/MatMul/MatMul:0' shape=(1, 1) dtype=float32>)
Below is a minimal, working example that reproduces the error, just copy and paste it. The error occurs when the first epoch is finished and the callback trys to save the model.
##############################################################################
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.layers as l
import tensorflow_addons.layers as la
import tensorflow.keras as ke
import numpy as np
##############################################################################
def build_model_1():
model_input = l.Input(shape=(32,1))
x = l.Dense(1)(model_input)
model = ke.Model(inputs=model_input, outputs=x)
##########
optimizers = [tf.keras.optimizers.Adam(),
tf.keras.optimizers.Adam()]
optimizers_and_layers = [(optimizers[0], model.layers[:5]), (optimizers[1], model.layers[5:])]
optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)
model.compile(optimizer=optimizer, loss='mse', metrics='mse')
test = tf.keras.optimizers.serialize(optimizer)
return model
##############################################################################
input_data = np.arange( 0, 10000, 1).reshape(10000,1)
target_data = np.arange(-10000, 0, 1).reshape(10000,1)
model = build_model_1()
model_checkpoint = ke.callbacks.ModelCheckpoint('best_model.h5',
monitor='val_mse',
mode='min',
save_best_only=True,
verbose=1)
training_history = model.fit(x = input_data,
y = target_data,
validation_split = 0.2,
epochs = 5,
verbose = 1,
callbacks = [model_checkpoint])
##############################################################################

When saving a complete Keras model (with its own structure in the .h5 file) the tf.keras.Model object is completely serialized as a JSON: this means that every property of the model should be JSON serializable.
NOTE: tf.Tensor are NOT JSON serializable.
When using this multi optimizer from tfa you're adding properties to the model that the JSON serializer will try (and fail) to serialize.
In particular there's this attribute gv that I think it comes from the custom optimizer used.
'gv': [(<tf.Tensor 'gradient_tape/model/dense/Tensordot/MatMul/MatMul:0' shape=(1, 1) dtype=float32>, <tf.Variable 'dense/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[-0.55191684]], dtype=float32)>), (<tf.Tensor 'gradient_tape/model/dense/BiasAdd/BiasAddGrad:0' shape=(1,) dtype=float32>, <tf.Variable 'dense/bias:0' shape=(1,) dtype=float32, numpy=array([-0.23444518], dtype=float32)>)]},
All this tf.Tensor are not JSON serializable, that's why it fails.
The only option is to do not save the model completely (with all its attributes, which should be defined as Keras layers, but in this case is not possible) but saving only the model parameters.
In short, if you add the save_weights_only=True to the callback your training (and checkpoint of the weights) will work fine.
model_checkpoint = ke.callbacks.ModelCheckpoint(
"best_model.h5",
monitor="val_mse",
mode="min",
save_best_only=True,
verbose=1,
save_weights_only=True,
)

Related

NonImplementedError on using torch.onnx.export

I am trying to convert a pre-saved PyTorch model into a TensorFlow one via ONNX. For now, the following code is to export the model into .onnx format. The neural network has 2 inputs, one hidden layer with 5 neurons and a scalar output.
Here's the code I'm working with:
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
class Model(nn.Module):
def __init__(self, n_h_layers, n_h_neurons, dim_in, dim_out, in_bound, out_bound):
super(Model,self).__init__()
self.n_h_layers=n_h_layers
self.n_h_neurons=n_h_neurons
self.dim_in=dim_in
self.dim_out=dim_out
self.in_bound=in_bound
self.out_bound=out_bound
layer_input = [nn.Linear(dim_in, n_h_neurons, bias=True)]
layer_output = [nn.ReLU(), nn.Linear(n_h_neurons, dim_out, bias=True), nn.Hardtanh(in_bound, out_bound)]
# hidden layer
module_hidden = [[nn.ReLU(), nn.Linear(n_h_neurons, n_h_neurons, bias=True)] for _ in range(n_h_layers - 1)]
layer_hidden = list(np.array(module_hidden).flatten())
# nn model
layers = layer_input + layer_hidden + layer_output
self.model = nn.Sequential(*layers)
print(self.model)
trained_nn=torch.load('path')
trained_model=Model(1,5,2,1,-1,1)
trained_model.load_state_dict(trained_nn,strict=False)
dummy_input=Variable(torch.randn(1,2))
torch.onnx.export(trained_model,dummy_input, 'file.onnx', verbose=True)
I have two problems:
Running this snippet raises "NonImplementedError" in _forward_unimplemented in module.py as follows:
File ".../anaconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 201, in _forward_unimplemented
raise NotImplementedError
NotImplementedError
I am not aware with Exception handling in python and I do not know what I must change in order to tackle the error.
When I print trained_nn, this is what it gives me:
OrderedDict([('0.weight',
tensor([[ 0.2035, -0.7679],
[ 1.6368, -0.4135],
[-0.0908, -0.2335],
[ 1.3731, -0.3135],
[ 0.6361, 0.2521]])),
('0.bias', tensor([-1.6907, 0.7262, 1.4032, 1.2551, 0.8013])),
('2.weight',
tensor([[-0.4603, -0.0719, 0.4082, -1.0235, -0.0538]])),
('2.bias', tensor([-1.1568]))])
However, printing trained_model.state_dict() gives me a neural network with a completely different set of weights and biases, although I believe that it should be giving me the exact same model as before as this is what I need to save as onnx file?
OrderedDict([('model.0.weight',
tensor([[ 0.4817, 0.0928],
[-0.4313, 0.1253],
[ 0.6681, -0.4029],
[ 0.6474, 0.0029],
[-0.4663, 0.5029]])),
('model.0.bias',
tensor([-0.2292, 0.6674, -0.3755, 0.0778, 0.0527])),
('model.2.weight',
tensor([[-0.2097, -0.3029, 0.2792, 0.2596, 0.1362]])),
('model.2.bias', tensor([-0.1835]))])
Not sure what mistakes I'm making. Any help is appreciated.
When you are making a subclass of nn.Module you need to implement forward method. In your case you need to add:
class Model(nn.Module):
def __init__(self, n_h_layers, n_h_neurons, dim_in, dim_out, in_bound, out_bound):
super(Model, self).__init__()
...
def forward(self, x):
return self.model(x)
The names of parameters does not match:
model.0.weight != 0.weight
model.0.bias != 0.bias
prefix model is missed.
So when you call load_state_dict() with strict=False the parameters will not be used.
You can rename the parameters to match the model:
trained_nn = torch.load('path')
trained_nn = {f'model.{name}': w for name, w in trained_nn.items()}
trained_model.load_state_dict(trained_nn, strict=True)

How to solve data type error using pickle and pandas udf for XGBoost model deployment python?

I have created a pandas udf() which splits dataset, fit XGBoost model, save it using pickle and returns a df with the saved model as a string column. The problem is when I call pandas udf(). It gives 'unsupported data type' error. But, when I run code without pandas udf() framework it runs successfully. Does anyone have any ideas on this?
#pandas_udf(schema,PandasUDFType.GROUPED_MAP)
def pickle_model(df1):
X = df1.iloc[:,1:50]
Y= df1.iloc[:,50]
seed = 7
test_size = 0.30
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
model = XGBClassifier(max_depth=3,...)
eval_set = [(X_train, y_train), (X_test, y_test)]
model.fit(X_train, y_train, early_stopping_rounds=8,...)
model_str = pickle.dumps(model)
model_saved = pd.DataFrame([model_str],columns = ['model_str'])
return model_saved
pickled_model = df2.groupby('id').apply(pickle_model)
pickled_model.collect()
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 691.0 failed 4 times, most recent failure: Lost task 0.3 in stage 691.0 (TID 38146, 10.139.64.9, executor 40): java.lang.UnsupportedOperationException: Unsupported data type: struct<type:tinyint,size:int,indices:array<int>,values:array<double>>
For dumping model pickle files use
model_str = str(pickle.dumps(model),'latin1')
And when loading back
use
pickle.loads(bytes(model_str,'latin1'))

Keras Functional API and loss function with multiple inputs

I am trying to use a custom Keras loss function that apart from the usual signature (y_true, y_pred) takes another parameter sigma (which is also produced by the last layer of the network).
The training works fine, but then I am not sure how to perform forward propagation and return sigma (while muis the output of the model.predict method).
This is the code I am using, which features a custom layer GaussianLayer that returns the list [mu, sigma].
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Dense, Layer, Dropout
from keras.models import Model
from keras.initializers import glorot_normal
import numpy as np
def custom_loss(sigma):
def gaussian_loss(y_true, y_pred):
return tf.reduce_mean(0.5*tf.log(sigma) + 0.5*tf.div(tf.square(y_true - y_pred), sigma)) + 10
return gaussian_loss
class GaussianLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(GaussianLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel_1 = self.add_weight(name='kernel_1',
shape=(30, self.output_dim),
initializer=glorot_normal(),
trainable=True)
self.kernel_2 = self.add_weight(name='kernel_2',
shape=(30, self.output_dim),
initializer=glorot_normal(),
trainable=True)
self.bias_1 = self.add_weight(name='bias_1',
shape=(self.output_dim, ),
initializer=glorot_normal(),
trainable=True)
self.bias_2 = self.add_weight(name='bias_2',
shape=(self.output_dim, ),
initializer=glorot_normal(),
trainable=True)
super(GaussianLayer, self).build(input_shape)
def call(self, x):
output_mu = K.dot(x, self.kernel_1) + self.bias_1
output_sig = K.dot(x, self.kernel_2) + self.bias_2
output_sig_pos = K.log(1 + K.exp(output_sig)) + 1e-06
return [output_mu, output_sig_pos]
def compute_output_shape(self, input_shape):
return [(input_shape[0], self.output_dim), (input_shape[0], self.output_dim)]
# This returns a tensor
inputs = Input(shape=(1,))
x = Dense(30, activation='relu')(inputs)
x = Dropout(0.3)(x)
x = Dense(30, activation='relu')(x)
x = Dense(40, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(30, activation='relu')(x)
mu, sigma = GaussianLayer(1)(x)
model = Model(inputs, mu)
model.compile(loss=custom_loss(sigma), optimizer='adam')
model.fit(train_x, train_y, epochs=150)
Since your model returns two tensors as output, you also need to pass a list of two arrays as the output when calling fit() method. That's essentially what the error is trying to convey:
Error when checking model target:
So the error is in targets (i.e. labels). What is wrong?
the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 2 array(s), but instead got the following list of 1 arrays:
I may have found the answer among Keras FAQs.
I found out that it is possible to retrieve intermediate steps' output using the code snippet below:
layer_name = 'main_output'
intermediate_layer_model = Model(inputs=model.input,
outputs=model.get_layer(layer_name).output)
intermediate_output = intermediate_layer_model.predict(train_x[0])
intermediate_output
In this case intermediate_output is a list of two values [mu, sigma] (just needed to name the output layer main_output and retrieve it later)

Train Caffe CNN to output multidimensional features

I'd like to build a feature extractor using Caffe's CNNs and I already have a large sample of input features and desired output features.
Now I need to train some convolutional layers to learn how to transform the input features into the output.
My question is: How can I achieve this on Caffe?
As a minimal example, suppose I wanted to train a CNN that inverts the values of a 2D array.
For example, if my input is
[[0,1,0],
[1,1,1],
[0,1,0]]
the CNN should output
[[1,0,1],
[0,0,0],
[1,0,1]].
For
[[0,0,0],
[0,1,0],
[0,0,0]]
the output should be
[[1,1,1],
[1,0,1],
[1,1,1]]
and so on.
Of course this is just a minimal example to share, the actual problem is nearly impossible to tackle without the use of multiple convolutions.
I was able to create this code for this problem. I used the Euclidean Loss at the end, but unfortunately the CNN is not learning anything.
ROOT_DIR = '/home'
from os.path import join
import numpy as np
import h5py
from itertools import product
import caffe
from caffe import layers
from caffe.proto import caffe_pb2
#%% GENERATE DATA
data_in = np.array([np.array(seq).reshape(1,3,3) for seq in product([0,1], repeat=9)])
data_out = np.array([-1*array+1 for array in data_in])
with open(join(ROOT_DIR, 'data.txt'), 'w') as ftxt:
with h5py.File(join(ROOT_DIR, 'data.hdf5'), 'w') as fhdf5:
fhdf5['data'] = data_in.astype(np.float32)
fhdf5['label'] = data_out.astype(np.float32)
ftxt.write(join(ROOT_DIR, 'data.hdf5'))
#%%DEFINE NET
net = caffe.NetSpec()
net.data, net.label = layers.HDF5Data(batch_size=64, source=join(ROOT_DIR, 'data.txt'), ntop=2)
net.conv1 = layers.Convolution(net.data, kernel_size=1, num_output=128)
net.relu1 = layers.ReLU(net.conv1, in_place=True)
net.conv2 = layers.Convolution(net.relu1, kernel_size=1, num_output=1)
net.relu2 = layers.ReLU(net.conv2, in_place=True)
net.loss = layers.EuclideanLoss(net.relu2, net.label)
net.to_proto()
with open(join(ROOT_DIR, 'invert_net.prototxt'), 'w') as f:
f.write(str(net.to_proto()))
#%% DEFINE SOLVER
solver = caffe_pb2.SolverParameter()
solver.train_net = join(ROOT_DIR, 'invert_net.prototxt')
solver.max_iter = 10000
solver.base_lr = 0.01
solver.lr_policy = 'fixed'
with open(join(ROOT_DIR, 'solver.prototxt'), 'w') as f:
f.write(str(solver))
#%% TRAIN NET
caffe.set_mode_cpu()
solver = caffe.SGDSolver(join(ROOT_DIR, 'solver.prototxt'))
solver.solve()

How to test the correctness of a Keras custom layer?

After creating a Keras custom layer with training weight, how can one test the correctness of the code? It does not seem to be described in Keras' manual.
For example, to test the expected behavior of a function, one can write a unit test. How can we do this for a Keras custom layer?
You can still do something like unit test by getting the output of the custom layer for the given input and verifying it against the manually calculated output,
Let's say your custom layer Custom takes (None, 3, 200) as input shape and returns (None, 3)
from keras.layers import Input
from keras.models import Model
inp = Input(shape=(3, 200))
out = Custom()(inp)
model = Model(inp, out)
output = model.predict(your_input)
You can verify the layer output output with your expected output for a known input your_input.
layer_test in keras utils.
https://github.com/keras-team/keras/blob/master/keras/utils/test_utils.py
They provide following code, which tests the shape, the actual result, serializing and training:
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
input_data=None, expected_output=None,
expected_output_dtype=None, fixed_batch_size=False):
"""Test routine for a layer with a single input tensor
and single output tensor.
"""
# generate input data
if input_data is None:
assert input_shape
if not input_dtype:
input_dtype = K.floatx()
input_data_shape = list(input_shape)
for i, e in enumerate(input_data_shape):
if e is None:
input_data_shape[i] = np.random.randint(1, 4)
input_data = (10 * np.random.random(input_data_shape))
input_data = input_data.astype(input_dtype)
else:
if input_shape is None:
input_shape = input_data.shape
if input_dtype is None:
input_dtype = input_data.dtype
if expected_output_dtype is None:
expected_output_dtype = input_dtype
# instantiation
layer = layer_cls(**kwargs)
# test get_weights , set_weights at layer level
weights = layer.get_weights()
layer.set_weights(weights)
expected_output_shape = layer.compute_output_shape(input_shape)
# test in functional API
if fixed_batch_size:
x = Input(batch_shape=input_shape, dtype=input_dtype)
else:
x = Input(shape=input_shape[1:], dtype=input_dtype)
y = layer(x)
assert K.dtype(y) == expected_output_dtype
# check with the functional API
model = Model(x, y)
actual_output = model.predict(input_data)
actual_output_shape = actual_output.shape
for expected_dim, actual_dim in zip(expected_output_shape,
actual_output_shape):
if expected_dim is not None:
assert expected_dim == actual_dim
if expected_output is not None:
assert_allclose(actual_output, expected_output, rtol=1e-3)
# test serialization, weight setting at model level
model_config = model.get_config()
recovered_model = model.__class__.from_config(model_config)
if model.weights:
weights = model.get_weights()
recovered_model.set_weights(weights)
_output = recovered_model.predict(input_data)
assert_allclose(_output, actual_output, rtol=1e-3)
# test training mode (e.g. useful when the layer has a
# different behavior at training and testing time).
if has_arg(layer.call, 'training'):
model.compile('rmsprop', 'mse')
model.train_on_batch(input_data, actual_output)
# test instantiation from layer config
layer_config = layer.get_config()
layer_config['batch_input_shape'] = input_shape
layer = layer.__class__.from_config(layer_config)
# for further checks in the caller function
return actual_output