ValueError: Layer count mismatch when loading weights from file. Model expected 241 layers, found - deep-learning

I'm trying to create the base pre-trained model
I did use the following code:
base_model = DenseNet121(weights='/Users/awabe/Desktop/Project/PapilaDB/ClinicalData/densenet121_weights_tf_dim_ordering_tf_kernels.h5', include_top=False)
x = base_model.output
# add a global spatial average pooling layer
x = GlobalAveragePooling2D()(x)
# and a logistic layer
predictions = Dense(len(labels), activation="sigmoid")(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss=get_weighted_loss(pos_weights, neg_weights))
It gives me an error with message:
`` ValueError Traceback (most recent call last)
Cell In[73], line 2
1 # create the base pre-trained model
----> 2 base_model = DenseNet121(weights='/Users/awabe/Desktop/Project/PapilaDB/ClinicalData/densenet121_weights_tf_dim_ordering_tf_kernels.h5', include_top=False)
4 x = base_model.output
6 # add a global spatial average pooling layer
File /opt/anaconda3/envs/tensorflow/lib/python3.10/site-packages/keras/applications/densenet.py:358, in DenseNet121(include_top, weights, input_tensor, input_shape, pooling, classes, classifier_activation)
345 #keras_export(
346 "keras.applications.densenet.DenseNet121", "keras.applications.DenseNet121"
347 )
(...)
355 classifier_activation="softmax",
356 ):
357 """Instantiates the Densenet121 architecture."""
--> 358 return DenseNet(
359 [6, 12, 24, 16],
360 include_top,
361 weights,
362 input_tensor,
363 input_shape,
364 pooling,
365 classes,
366 classifier_activation,
367 )
File /opt/anaconda3/envs/tensorflow/lib/python3.10/site-packages/keras/applications/densenet.py:340, in DenseNet(blocks, include_top, weights, input_tensor, input_shape, pooling, classes, classifier_activation)
338 model.load_weights(weights_path)
339 elif weights is not None:
--> 340 model.load_weights(weights)
342 return model
File /opt/anaconda3/envs/tensorflow/lib/python3.10/site-packages/keras/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.traceback)
68 # To get the full stack trace, call:
69 # tf.debugging.disable_traceback_filtering()
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File /opt/anaconda3/envs/tensorflow/lib/python3.10/site-packages/keras/saving/hdf5_format.py:817, in load_weights_from_hdf5_group(f, model)
815 layer_names = filtered_layer_names
816 if len(layer_names) != len(filtered_layers):
--> 817 raise ValueError(
818 f"Layer count mismatch when loading weights from file. "
819 f"Model expected {len(filtered_layers)} layers, found "
820 f"{len(layer_names)} saved layers."
821 )
823 # We batch weight value assignments in a single backend call
824 # which provides a speedup in TensorFlow.
825 weight_value_tuples = []
ValueError: Layer count mismatch when loading weights from file. Model expected 241 layers, found 242 saved layers. ``

Related

Type Error when trying to save model in tensorflow python, getting trace 'Unrecognized type <class 'tensorflow.python.framework.ops.EagerTensor'>.'

I'm relatively new to python and machine learning and I'm trying to classify chest X-ray scans and I created a model that does that. However, when I'm trying to save the model I'm getting the error:
TypeError: Unable to serialize [2.0896919 2.1128857 2.1081853] to JSON. Unrecognized type <class 'tensorflow.python.framework.ops.EagerTensor'>.
The full Stack Trace is:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_22092\3924096485.py in <module>
1 working_dir=os.getcwd()
2 subject='chest scans'
----> 3 save_model(subject, classes, img_size, f1score, working_dir)
~\AppData\Local\Temp\ipykernel_22092\541087647.py in save_model(subject, classes, img_size, f1score, working_dir)
3 save_id=f'{name}-{f1score:5.2f}.h5'
4 model_save_loc=os.path.join(working_dir, save_id)
----> 5 model.save(model_save_loc)
6 msg= f'model was saved as {model_save_loc}'
7 print_in_color(msg, (0,255,255), (100,100,100)) # cyan foreground
~\anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
~\anaconda3\lib\json\__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
232 if cls is None:
233 cls = JSONEncoder
--> 234 return cls(
235 skipkeys=skipkeys, ensure_ascii=ensure_ascii,
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
~\anaconda3\lib\json\encoder.py in encode(self, o)
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
~\anaconda3\lib\json\encoder.py in iterencode(self, o, _one_shot)
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
258
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
TypeError: Unable to serialize [2.0896919 2.1128857 2.1081853] to JSON. Unrecognized type <class 'tensorflow.python.framework.ops.EagerTensor'>.
I have created this function to save the model:
def save_model(subject, classes, img_size, f1score, working_dir):
name=subject + '-' + str(len(classes)) + '-(' + str(img_size[0]) + ' X ' + str(img_size[1]) + ')'
save_id=f'{name}-{f1score:5.2f}.h5'
model_save_loc=os.path.join(working_dir, save_id)
model.save(model_save_loc)
msg= f'model was saved as {model_save_loc}'
print_in_color(msg, (0,255,255), (100,100,100)) # cyan foreground
I have the following packages installed for tensorflow.
tensorboard 2.8.0
tensorboard-data-server 0.6.1
tensorboard-plugin-wit 1.8.1
tensorflow 2.8.1
tensorflow-estimator 2.8.0
tensorflow-io-gcs-filesystem 0.28.0
termcolor 1.1.0
keras 2.8.0
keras-nightly 2.5.0.dev2021032900
Keras-Preprocessing 1.1.2
Any help would be great! I am really not being able to convert the tensorflow EagerTensor to Json File for my ML model. Thanks!
I tried to save a ml model that I created and I failed and it's giving me an error when converting it to json file

In tensorflow fit function, I am getting this error: TypeError: __array__() takes 1 positional argument but 2 were given

in a cat and dog classification problem
I got an error in
cnn.fit(training_set, validation_data = test_set, batch_size=32, epochs = 30)
TypeError Traceback (most recent call last)
<ipython-input-20-ee0b03f0e8d6> in <module>
1 # training training set and evaluate test set
----> 2 cnn.fit(training_set, validation_data = test_set, batch_size=32, epochs = 30)
~\anaconda\envs\TF\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1145 use_multiprocessing=use_multiprocessing,
1146 model=self,
-> 1147 steps_per_execution=self._steps_per_execution)
1148
1149 # Container that configures and calls `tf.keras.Callback`s.
~\anaconda\envs\TF\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in get_data_handler(*args, **kwargs)
1362 if getattr(kwargs["model"], "_cluster_coordinator", None):
1363 return _ClusterCoordinatorDataHandler(*args, **kwargs)
-> 1364 return DataHandler(*args, **kwargs)
1365
1366
~\anaconda\envs\TF\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution, distribute)
1164 use_multiprocessing=use_multiprocessing,
1165 distribution_strategy=ds_context.get_strategy(),
-> 1166 model=model)
1167
1168 strategy = ds_context.get_strategy()
~\anaconda\envs\TF\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weights, shuffle, workers, use_multiprocessing, max_queue_size, model, **kwargs)
937 max_queue_size=max_queue_size,
938 model=model,
--> 939 **kwargs)
940
941 #staticmethod
~\anaconda\envs\TF\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weights, workers, use_multiprocessing, max_queue_size, model, **kwargs)
807 # Since we have to know the dtype of the python generator when we build the
808 # dataset, we have to look at a batch to infer the structure.
--> 809 peek, x = self._peek_and_restore(x)
810 peek = self._standardize_batch(peek)
811 peek = _process_tensorlike(peek)
~\anaconda\envs\TF\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in _peek_and_restore(x)
941 #staticmethod
942 def _peek_and_restore(x):
--> 943 return x[0], x
944
945 def _handle_multiprocessing(self, x, workers, use_multiprocessing,
~\anaconda\envs\TF\lib\site-packages\keras_preprocessing\image\iterator.py in __getitem__(self, idx)
63 index_array = self.index_array[self.batch_size * idx:
64 self.batch_size * (idx + 1)]
---> 65 return self._get_batches_of_transformed_samples(index_array)
66
67 def __len__(self):
~\anaconda\envs\TF\lib\site-packages\keras_preprocessing\image\iterator.py in _get_batches_of_transformed_samples(self, index_array)
229 target_size=self.target_size,
230 interpolation=self.interpolation)
--> 231 x = img_to_array(img, data_format=self.data_format)
232 # Pillow images should be closed after `load_img`,
233 # but not PIL images.
~\anaconda\envs\TF\lib\site-packages\keras_preprocessing\image\utils.py in img_to_array(img, data_format, dtype)
307 # or (channel, height, width)
308 # but original PIL image has format (width, height, channel)
--> 309 x = np.asarray(img, dtype=dtype)
310 if len(x.shape) == 3:
311 if data_format == 'channels_first':
~\anaconda\envs\TF\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
81 UPDATEIFCOPY : False
82
---> 83 >>> y = np.require(x, dtype=np.float32, requirements=['A', 'O', 'W', 'F'])
84 >>> y.flags
85 C_CONTIGUOUS : False
TypeError: __array__() takes 1 positional argument but 2 were given
I think there is some problem with numpy or with pillow but I am not sure, I am using tensorflow = 2.5.0 and numpy = 1.21.0, cuda = 11.0 please check if I am using the right version or there can be problem in cuda version
I am having he same question, my env is cuda 10.1 tensorflow 1.13.1 numpy 1.21.0, I think maybe something wrong with numpy, However I tested it , but not right. I search again and found maybe there is something wrong with pillow, I fixed it by pip install pillow==8.2.0
seems somethings wrong with pillow pack

how to use Shared memory and Global memory and is it possible to use shared as intermediate stage in calculating

I am trying to write a code in numba cuda. I saw a lot of examples that deal with device memory and shared memory separately. I got stuck and confused. Can the code or the function deal with both, as example can the code multiply numbers using shared memory in some scale and in another scale use device.
Another thing to ask for, As I am trying to complicate the code step by step to calculate a fitness function I used a space i shared memory as intermediate stage sD with reduction step according mark harris presentation with half the threads and add 2 as s
Sdata[tid] += Sdata[tid+s]
When I wrote the following code, I got an errors and I can't figure out why.
import numpy as np
import math
from numba import cuda, float32
#cuda.jit
def fast_matmul(A, C):
sA = cuda.shared.array(shape=(1, TPB), dtype=float32)
sD = cuda.shared.array(shape=(1, TPB), dtype=float32)
thread_idx_x = cuda.threadIdx.x
thread_idx_y = cuda.threadIdx.y
totla_No_of_threads_x = cuda.blockDim.x
totla_No_of_threads_y = cuda.blockDim.y
block_idx_x = cuda.blockIdx.x
block_idx_y = cuda.blockIdx.y
x, y = cuda.grid(2)
if x >= A.shape[1]: #and y >= C.shape[1]:
return
s = 0
index_1 = 1
for i in range(int(A.shape[1] / TPB)):
sA[thread_idx_x, thread_idx_y] = A[x, thread_idx_y + i * TPB]
cuda.syncthreads()
if thread_idx_y <= (totla_No_of_threads_y-index_1):
sD[thread_idx_x, thread_idx_y] = sA[thread_idx_x, (thread_idx_y +index_1)] - sA[thread_idx_x, thread_idx_y]
cuda.syncthreads()
for s in range(totla_No_of_threads_y//2):
if thread_idx_y < s:
sD[thread_idx_x, thread_idx_y] += sD[thread_idx_x, thread_idx_y+s]
cuda.syncthreads()
C[x, y] = sD[x,y]
A = np.full((1, 16), 3, dtype=np.float32)
C = np.zeros((1, 16))
print('A:', A, 'C:', C)
TPB = 32
dA = cuda.to_device(A)
dC= cuda.to_device(C)
fast_matmul[(1, 1), (32, 32)](dA, dC)
res= dC.copy_to_host()
print(res)
Error appears as
CudaAPIError Traceback (most recent call last)
<ipython-input-214-780fde9bbab5> in <module>
5 TPB = 32
6
----> 7 dA = cuda.to_device(A)
8 dC= cuda.to_device(C)
9 fast_matmul[(8, 8), (32, 32)](dA, dC)
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\devices.py in _require_cuda_context(*args, **kws)
222 def _require_cuda_context(*args, **kws):
223 with _runtime.ensure_context():
--> 224 return fn(*args, **kws)
225
226 return _require_cuda_context
~\Anaconda3\lib\site-packages\numba\cuda\api.py in to_device(obj, stream, copy, to)
108 """
109 if to is None:
--> 110 to, new = devicearray.auto_device(obj, stream=stream, copy=copy)
111 return to
112 if copy:
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\devicearray.py in auto_device(obj, stream, copy)
764 subok=True)
765 sentry_contiguous(obj)
--> 766 devobj = from_array_like(obj, stream=stream)
767 if copy:
768 devobj.copy_to_device(obj, stream=stream)
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\devicearray.py in from_array_like(ary, stream, gpu_data)
686 "Create a DeviceNDArray object that is like ary."
687 return DeviceNDArray(ary.shape, ary.strides, ary.dtype,
--> 688 writeback=ary, stream=stream, gpu_data=gpu_data)
689
690
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\devicearray.py in __init__(self, shape, strides, dtype, stream, writeback, gpu_data)
102 self.strides,
103 self.dtype.itemsize)
--> 104 gpu_data = devices.get_context().memalloc(self.alloc_size)
105 else:
106 self.alloc_size = _driver.device_memory_size(gpu_data)
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\driver.py in memalloc(self, bytesize)
1099
1100 def memalloc(self, bytesize):
-> 1101 return self.memory_manager.memalloc(bytesize)
1102
1103 def memhostalloc(self, bytesize, mapped=False, portable=False, wc=False):
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\driver.py in memalloc(self, size)
849 driver.cuMemAlloc(byref(ptr), size)
850
--> 851 self._attempt_allocation(allocator)
852
853 finalizer = _alloc_finalizer(self, ptr, size)
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\driver.py in _attempt_allocation(self, allocator)
709 """
710 try:
--> 711 allocator()
712 except CudaAPIError as e:
713 # is out-of-memory?
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\driver.py in allocator()
847
848 def allocator():
--> 849 driver.cuMemAlloc(byref(ptr), size)
850
851 self._attempt_allocation(allocator)
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\driver.py in safe_cuda_api_call(*args)
300 _logger.debug('call driver api: %s', libfn.__name__)
301 retcode = libfn(*args)
--> 302 self._check_error(fname, retcode)
303 return safe_cuda_api_call
304
~\Anaconda3\lib\site-packages\numba\cuda\cudadrv\driver.py in _check_error(self, fname, retcode)
335 _logger.critical(msg, _getpid(), self.pid)
336 raise CudaDriverError("CUDA initialized before forking")
--> 337 raise CudaAPIError(retcode, msg)
338
339 def get_device(self, devnum=0):
CudaAPIError: [700] Call to cuMemAlloc results in UNKNOWN_CUDA_ERROR
Yes, you can use both. When you copy data from host to device, it will start out in "device memory". Thereafter, if you want to use shared memory, you will have to explicitly copy data into it, from your kernel code. Likewise, when you want to return results back to host code (copy data from device to host) that data must be "device memory".
Shared memory is a smaller, scratchpad-style resource.
This provides a good example/comparison.
I don't know if this will solve your error as it looks like you aren't using multiprocessing. But I hit the exact same error "raise CudaDriverError("CUDA initialized before forking")" and the issue was python multiprocessing was using "fork" instead of "spawn".
multiprocessing.set_start_method('spawn')
Fixed the issue for me, it may not help you, but perhaps will help others who are searching based on this numba error.

how to fix "OperatorNotAllowedInGraphError " error in Tensorflow 2.0

I'm learn tensorflow2.0 from official tutorials.I can understand the result from below code.
def square_if_positive(x):
return [i ** 2 if i > 0 else i for i in x]
square_if_positive(range(-5, 5))
# result
[-5, -4, -3, -2, -1, 0, 1, 4, 9, 16]
But if I change the inputs with tensor not python code, just like this
def square_if_positive(x):
return [i ** 2 if i > 0 else i for i in x]
square_if_positive(tf.range(-5, 5))
I get below error!!
OperatorNotAllowedInGraphError Traceback (most recent call last)
<ipython-input-39-6c17f29a3443> in <module>
2 def square_if_positive(x):
3 return [i**2 if i > 0 else i for i in x]
----> 4 square_if_positive(tf.range(10))
5 # measure_graph_size(square_if_positive, range(10))
~/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
437 # This is the first call of __call__, so we have to initialize.
438 initializer_map = {}
--> 439 self._initialize(args, kwds, add_initializers_to=initializer_map)
440 if self._created_variables:
441 try:
~/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
380 self._concrete_stateful_fn = (
381 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 382 *args, **kwds))
383
384 def invalid_creator_scope(*unused_args, **unused_kwds):
~/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
1793 if self.input_signature:
1794 args, kwargs = None, None
-> 1795 graph_function, _, _ = self._maybe_define_function(args, kwargs)
1796 return graph_function
1797
~/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2093 graph_function = self._function_cache.primary.get(cache_key, None)
2094 if graph_function is None:
-> 2095 graph_function = self._create_graph_function(args, kwargs)
2096 self._function_cache.primary[cache_key] = graph_function
2097 return graph_function, args, kwargs
~/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
1984 arg_names=arg_names,
1985 override_flat_arg_shapes=override_flat_arg_shapes,
-> 1986 capture_by_value=self._capture_by_value),
1987 self._function_attributes,
1988 # Tell the ConcreteFunction to clean up its graph once it goes out of
~/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
851 converted_func)
852
--> 853 func_outputs = python_func(*func_args, **func_kwargs)
854
855 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
323 # __wrapped__ allows AutoGraph to swap in a converted function. We give
324 # the function a weak reference to itself to avoid a reference cycle.
--> 325 return weak_wrapped_fn().__wrapped__(*args, **kwds)
326 weak_wrapped_fn = weakref.ref(wrapped_fn)
327
~/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py in wrapper(*args, **kwargs)
841 except Exception as e: # pylint:disable=broad-except
842 if hasattr(e, "ag_error_metadata"):
--> 843 raise e.ag_error_metadata.to_exception(type(e))
844 else:
845 raise
OperatorNotAllowedInGraphError: in converted code:
<ipython-input-37-6c17f29a3443>:3 square_if_positive *
return [i**2 if i > 0 else i for i in x]
/Users/zhangpan/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:547 __iter__
self._disallow_iteration()
/Users/zhangpan/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:540 _disallow_iteration
self._disallow_when_autograph_enabled("iterating over `tf.Tensor`")
/Users/zhangpan/tf2_workspace/tf2.0/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:518 _disallow_when_autograph_enabled
" decorating it directly with #tf.function.".format(task))
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did not convert this function. Try decorating it directly with #tf.function.
I can't find any specifications about this error. I think the real reason is not "iterating over tf.Tensor is not allowed" . Becase I can write like this.
#tf.function
def square_if_positive(x):
for i in x:
if i>0:
tf.print(i**2)
else:
tf.print(i)
square_if_positive(tf.range(10))
I iterate over tensor just like above code.
So my question is what's the real reason about this error? Any suggestions will help me. I really can't understand this error through I read a lot of materials.
The root cause is that autograph doesn't yet support list comprehensions (primarily because it's difficult to determine the dtype of the result in all cases)
As a workaround, you can use tf.map_fn for the comprehension:
return tf.map_fn(lambda i: i ** 2 if i > 0 else i, x)
For more information please take a look at this issue
In case it helps someone.
I had the same problem with a code that did:
for index, image in enumerate(inputs):
... My code ...
The solution was just to do:
index = 0
for image in inputs:
.... My code ...
index += 1
I had a similar issue when using tf.range() instead of python's range() for a list comprehension inside a tensorflow graph function. I was training a 3D segmentation neural net and had to use range() for the code to work.
Check the pseudo code below:-
Y = # [Batch,Height,Width,Depth,Channels]
y_predict = # [B,H,W,D,C,MC_Runs] ; MC_Runs=Monte Carlo Runs
#tf.function
def train_loss(Y,y_predict):
# calulate loss and return scalar value
#tf.function
def train_step():
loss = [train_loss(Y, y_predict[:,:,:,:,:,id_])) for id_ in range(MC_RUNS)]
loss = tf.math.reduce_mean(loss)

Error while exporting a dask dataframe to csv

My dask dataframe has about 120 mm rows and 4 columns:
df_final.dtypes
cust_id int64
score float64
total_qty float64
update_score float64
dtype: object
and I'm doing this operation on jupyter notebooks connected to linux machine :
%time df_final.to_csv('/path/claritin-files-*.csv')
and it throws up this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-24-46468ae45023> in <module>()
----> 1 get_ipython().magic(u"time df_final.to_csv('path/claritin-files-*.csv')")
/home/mspra/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
2334 magic_name, _, magic_arg_s = arg_s.partition(' ')
2335 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2336 return self.run_line_magic(magic_name, magic_arg_s)
2337
2338 #-------------------------------------------------------------------------
/home/mspra/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2255 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2256 with self.builtin_trap:
-> 2257 result = fn(*args,**kwargs)
2258 return result
2259
/home/mspra/anaconda2/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
/home/mspra/anaconda2/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
191 **# but it's overkill for just that one bit of state.**
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
/home/mspra/anaconda2/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in time(self, line, cell, local_ns)
1161 if mode=='eval':
1162 st = clock2()
-> 1163 out = eval(code, glob, local_ns)
1164 end = clock2()
1165 else:
<timed eval> in <module>()
/home/mspra/anaconda2/lib/python2.7/site-packages/dask/dataframe/core.pyc in to_csv(self, filename, **kwargs)
936 """ See dd.to_csv docstring for more information """
937 from .io import to_csv
--> 938 return to_csv(self, filename, **kwargs)
939
940 def to_delayed(self):
/home/mspra/anaconda2/lib/python2.7/site-packages/dask/dataframe/io/csv.pyc in to_csv(df, filename, name_function, compression, compute, get, **kwargs)
411 if compute:
412 from dask import compute
--> 413 compute(*values, get=get)
414 else:
415 return values
/home/mspra/anaconda2/lib/python2.7/site-packages/dask/base.pyc in compute(*args, **kwargs)
177 dsk = merge(var.dask for var in variables)
178 keys = [var._keys() for var in variables]
--> 179 results = get(dsk, keys, **kwargs)
180
181 results_iter = iter(results)
/home/mspra/anaconda2/lib/python2.7/site-packages/dask/threaded.pyc in get(dsk, result, cache, num_workers, **kwargs)
74 results = get_async(pool.apply_async, len(pool._pool), dsk, result,
75 cache=cache, get_id=_thread_get_id,
---> 76 **kwargs)
77
78 # Cleanup pools associated to dead threads
/home/mspra/anaconda2/lib/python2.7/site-packages/dask/async.pyc in get_async(apply_async, num_workers, dsk, result, cache, get_id, raise_on_exception, rerun_exceptions_locally, callbacks, dumps, loads, **kwargs)
491 _execute_task(task, data) # Re-execute locally
492 else:
--> 493 raise(remote_exception(res, tb))
494 state['cache'][key] = res
495 finish_task(dsk, key, state, results, keyorder.get)
**ValueError: invalid literal for long() with base 10: 'total_qty'**
Traceback
---------
File "/home/mspra/anaconda2/lib/python2.7/site-packages/dask/async.py", line 268, in execute_task
result = _execute_task(task, data)
File "/home/mspra/anaconda2/lib/python2.7/site-packages/dask/async.py", line 249, in _execute_task
return func(*args2)
File "/home/mspra/anaconda2/lib/python2.7/site-packages/dask/dataframe/io/csv.py", line 55, in pandas_read_text
coerce_dtypes(df, dtypes)
File "/home/mspra/anaconda2/lib/python2.7/site-packages/dask/dataframe/io/csv.py", line 83, in coerce_dtypes
df[c] = df[c].astype(dtypes[c])
File "/home/mspra/anaconda2/lib/python2.7/site-packages/pandas/core/generic.py", line 3054, in astype
raise_on_error=raise_on_error, **kwargs)
File "/home/mspra/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 3189, in astype
return self.apply('astype', dtype=dtype, **kwargs)
File "/home/mspra/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 3056, in apply
applied = getattr(b, f)(**kwargs)
File "/home/mspra/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 461, in astype
values=values, **kwargs)
File "/home/mspra/anaconda2/lib/python2.7/site-packages/pandas/core/internals.py", line 504, in _astype
values = _astype_nansafe(values.ravel(), dtype, copy=True)
File "/home/mspra/anaconda2/lib/python2.7/site-packages/pandas/types/cast.py", line 534, in _astype_nansafe
return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
File "pandas/lib.pyx", line 980, in pandas.lib.astype_intsafe (pandas/lib.c:17409)
File "pandas/src/util.pxd", line 93, in util.set_value_at_unsafe (pandas/lib.c:72777)
I have a couple of questions:
1) First of all this export was working fine on Friday, it spit out 100 csv files ( since it has 100 partitions), which I later aggregated. So what is wrong today -- anything from the error log?
2) May be this question is for the creators of this package, what is the most time-efficient way to get a csv extract out of a dask dataframe of this size, since it was taking about 1.5 to 2 hrs, the last time it was working.
I'm not using dask distributed and this is on single core of a linux cluster.
This error likely has little to do with to_csv and more to do with something else in your computation. The call to df.to_csv was just the first time you forced the computation to roll through all of the data.
Given the error I actually suspect that this is failing in read_csv. Dask.dataframe read the first few hundred kilobytes of your first file to guess at the datatypes, but it seems to have guessed incorrectly. You might want to try specifying dtypes explicitly in the read_csv call.
In regards to the second question about writing to CSV quickly, my first answer would be "use Parquet or HDF5 instead". They're much faster and more accurate in almost every respect.