I am trying to implement a residual layer for CNN (using caffe and python).
This is a simple block diagram for residual learning:
This is the code I've written:
def res(self,bottom,args):
'residual layer'
rp = {'negative_slope': 0}
if len(args)!=6:
raise Exception('conv requires 6 arguments: ks, stride, pad, group, nout, bias')
ks, stride, pad, group, nout, bias = [int(x) for x in args]
wf = {}
bias = bool(bias)
cp = { 'kernel_size' : [1, ks],
'stride' : [1, stride],
'pad' : [0, pad],
'group' : group,
'num_output' : nout,
'bias_term' : bias,
'axis' : 1,
'weight_filler' : { 'type': 'xavier' },
'bias_filler' : { 'type': 'constant', 'value':0.0 },
}
# multipliers for learning rate and decay of weights and bias
p = [{'lr_mult':1, 'decay_mult':1}]
if bias:
p.append({'lr_mult':2, 'decay_mult':0})
myconv1 = L.Convolution(bottom, param=p, convolution_param=cp)
rconv1 = L.ReLU(myconv1, relu_param=rp, in_place=True)
cp2 = { 'kernel_size' : [1, ks],
'stride' : [1, stride],
'pad' : [0, pad+2],
'group' : group,
'num_output' : nout,
'bias_term' : bias,
'axis' : 1,
'weight_filler' : { 'type': 'xavier' },
'bias_filler' : { 'type': 'constant', 'value':0.0 },
}
myconv2 = L.Convolution(rconv1, param=p, convolution_param=cp2)
forSum = []
forSum.append(bottom)
forSum.append(myconv2)
ep = { 'operation' : 1 }
return L.Eltwise(*forSum, eltwise_param=ep)
And this is the error I get for this architecture c:3:1:0:1:16:0 r mp:2:2 res:3:1:0:1:16:0 r mp:2:2 fc:20:0:
python /afs/in2p3.fr/home/n/nhatami/sps/spectroML/src/python/makeSpectroNet.py -label label -n CNN_062 -bs 10 res/2048_1e5_0.00_s/CNN_062_bs10/CNN_062_tmp/CNN_062 data/2048_1e5_0.00/2048_1e5_0.00_s c:3:1:0:1:16:0 cr mp:2:2 res:3:1:0:1:16:0 cr mp:2:2 fc:20:0
Namespace(batchSize=10, droot='data/2048_1e5_0.00/2048_1e5_0.00_s', label='label', layers=['c:3:1:0:1:16:0', 'cr', 'mp:2:2', 'res:3:1:0:1:16:0', 'cr', 'mp:2:2', 'fc:20:0'], name='CNN_062', oroot='res/2048_1e5_0.00_s/CNN_062_bs10/CNN_062_tmp/CNN_062')
data/2048_1e5_0.00/2048_1e5_0.00_s data/2048_1e5_0.00/2048_1e5_0.00_s_train_list.txt data/2048_1e5_0.00/2048_1e5_0.00_s_val_list.txt
WARNING: Logging before InitGoogleLogging() is written to STDERR
I0208 18:00:05.952062 194649 upgrade_proto.cpp:67] Attempting to upgrade input file specified using deprecated input fields: res/2048_1e5_0.00_s/CNN_062_bs10/CNN_062_tmp/CNN_062_deploy.txt
I0208 18:00:05.952121 194649 upgrade_proto.cpp:70] Successfully upgraded file specified using deprecated input fields.
W0208 18:00:05.952126 194649 upgrade_proto.cpp:72] Note that future Caffe releases will only support input layers and not input fields.
I0208 18:00:06.349092 194649 net.cpp:51] Initializing net from parameters:
name: "CNN_062"
state {
phase: TEST
level: 0
}
layer {
name: "input"
type: "Input"
top: "data"
input_param {
shape {
dim: 1
dim: 2
dim: 1
dim: 2048
}
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 16
bias_term: false
pad: 0
pad: 0
kernel_size: 1
kernel_size: 3
group: 1
stride: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
axis: 1
}
}
layer {
name: "Scale1"
type: "Scale"
bottom: "conv1"
top: "Scale1"
param {
lr_mult: 0
decay_mult: 0
}
scale_param {
filler {
type: "constant"
value: -1
}
}
}
layer {
name: "ReLU1"
type: "ReLU"
bottom: "Scale1"
top: "ReLU1"
relu_param {
negative_slope: 0
}
}
layer {
name: "Scale2"
type: "Scale"
bottom: "ReLU1"
top: "Scale2"
param {
lr_mult: 0
decay_mult: 0
}
scale_param {
filler {
type: "constant"
value: -1
}
}
}
layer {
name: "ReLU2"
type: "ReLU"
bottom: "conv1"
top: "ReLU2"
relu_param {
negative_slope: 0
}
}
layer {
name: "crelu1"
type: "Concat"
bottom: "Scale2"
bottom: "ReLU2"
top: "crelu1"
}
layer {
name: "maxPool1"
type: "Pooling"
bottom: "crelu1"
top: "maxPool1"
pooling_param {
pool: MAX
kernel_h: 1
kernel_w: 2
stride_h: 1
stride_w: 2
pad_h: 0
pad_w: 0
}
}
layer {
name: "Convolution1"
type: "Convolution"
bottom: "maxPool1"
top: "Convolution1"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 16
bias_term: false
pad: 0
pad: 0
kernel_size: 1
kernel_size: 3
group: 1
stride: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
axis: 1
}
}
layer {
name: "ReLU3"
type: "ReLU"
bottom: "Convolution1"
top: "Convolution1"
relu_param {
negative_slope: 0
}
}
layer {
name: "Convolution2"
type: "Convolution"
bottom: "Convolution1"
top: "Convolution2"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 16
bias_term: false
pad: 0
pad: 2
kernel_size: 1
kernel_size: 3
group: 1
stride: 1
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
axis: 1
}
}
layer {
name: "res1"
type: "Eltwise"
bottom: "maxPool1"
bottom: "Convolution2"
top: "res1"
eltwise_param {
operation: SUM
}
}
layer {
name: "Scale3"
type: "Scale"
bottom: "res1"
top: "Scale3"
param {
lr_mult: 0
decay_mult: 0
}
scale_param {
filler {
type: "constant"
value: -1
}
}
}
layer {
name: "ReLU4"
type: "ReLU"
bottom: "Scale3"
top: "ReLU4"
relu_param {
negative_slope: 0
}
}
layer {
name: "Scale4"
type: "Scale"
bottom: "ReLU4"
top: "Scale4"
param {
lr_mult: 0
decay_mult: 0
}
scale_param {
filler {
type: "constant"
value: -1
}
}
}
layer {
name: "ReLU5"
type: "ReLU"
bottom: "res1"
top: "ReLU5"
relu_param {
negative_slope: 0
}
}
layer {
name: "crelu2"
type: "Concat"
bottom: "Scale4"
bottom: "ReLU5"
top: "crelu2"
}
layer {
name: "maxPool2"
type: "Pooling"
bottom: "crelu2"
top: "maxPool2"
pooling_param {
pool: MAX
kernel_h: 1
kernel_w: 2
stride_h: 1
stride_w: 2
pad_h: 0
pad_w: 0
}
}
layer {
name: "ampl"
type: "InnerProduct"
bottom: "maxPool2"
top: "ampl"
param {
lr_mult: 1
decay_mult: 1
}
inner_product_param {
num_output: 20
bias_term: false
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.2
}
}
}
I0208 18:00:06.349267 194649 layer_factory.hpp:77] Creating layer input
I0208 18:00:06.349287 194649 net.cpp:84] Creating Layer input
I0208 18:00:06.349298 194649 net.cpp:380] input -> data
I0208 18:00:06.349334 194649 net.cpp:122] Setting up input
I0208 18:00:06.349346 194649 net.cpp:129] Top shape: 1 2 1 2048 (4096)
I0208 18:00:06.349351 194649 net.cpp:137] Memory required for data: 16384
I0208 18:00:06.349356 194649 layer_factory.hpp:77] Creating layer conv1
I0208 18:00:06.349371 194649 net.cpp:84] Creating Layer conv1
I0208 18:00:06.349376 194649 net.cpp:406] conv1 <- data
I0208 18:00:I0208 18:00:06.349556 194649 net.cpp:380] conv1_conv1_0_split -> conv1_conv1_0_split_1
I0208 18:00:06.349568 194649 net.cpp:122] Setting up conv1_conv1_0_split
I0208 18:00:06.349575 194649 net.cpp:129] Top shape: 1 16 1 2046 (32736)
I0208 18:00:06.349580 194649 net.cpp:129] Top shape: 1 16 1 2046 (32736)
I0208 18:00:06.349583 194649 net.cpp:137] Memory required for data: 409216
I0208 18:00:06.349587 194649 layer_factory.hpp:77] Creating layer Scale1
I0208 18:00:06.349598 194649 net.cpp:84] Creating Layer Scale1
I0208 18:00:06.349603 194649 net.cpp:406] Scale1 <- conv1_conv1_0_split_0
I0208 18:00:06.349611 194649 net.cpp:380] Scale1 -> Scale1
I0208 18:00:06.349642 194649 net.cpp:122] Setting up Scale1
I0208 18:00:06.349647 194649 net.cpp:129] Top shape: 1 16 1 2046 (32736)
I0208 18:00:06.349651 194649 net.cpp:137] Memory required for data: 540160
I0208 18:00:06.349659 194649 layer_factory.hpp:77] Creating layer ReLU1
I0208 18:00:06.349668 194649 net.cpp:84] Creating Layer ReLU1
I0208 18:00:06.349673 194649 net.cpp:406] ReLU1 <- Scale1
I0208 18:00:06.349679 194649 net.cpp:380] ReLU1 -> ReLU1
I0208 18:00:06.349689 194649 net.cpp:122] Setting up ReLU1
I0208 18:00:06.349694 194649 net.cpp:129] Top shape: 1 16 1 2046 (32736)
I0208 18:00:06.349699 194649 net.cpp:137] Memory required for data: 671104
I0208 18:00:06.349702 194649 layer_factory.hpp:77] Creating layer Scale2
I0208 18:00:06.349709 194649 net.cpp:84] Creating Layer Scale2
I0208 18:00:06.349714 194649 net.cpp:406] Scale2 <- ReLU1
I0208 18:00:06.349720 194649 net.cpp:380] Scale2 -> Scale2
I0208 18:00:06.349741 194649 net.cpp:122] Setting up Scale2
I0208 18:00:06.349747 194649 net.cpp:129] Top shape: 1 16 1 2046 (32736)
I0208 18:00:06.349751 194649 net.cpp:137] Memory required for data: 802048
I0208 18:00:06.349758 194649 layer_factory.hpp:77] Creating layer ReLU2
I0208 18:00:06.349771 194649 net.cpp:84] Creating Layer ReLU2
I0208 18:00:06.349776 194649 net.cpp:406] ReLU2 <- conv1_conv1_0_split_1
I0208 18:00:06.349782 194649 net.cpp:380] ReLU2 -> ReLU2
I0208 18:00:06.349789 194649 net.cpp:122] Setting up ReLU2
I0208 18:00:06.349795 194649 net.cpp:129] Top shape: 1 16 1 2046 (32736)
I0208 18:00:06.349799 194649 net.cpp:137] Memory required for data: 932992
I0208 18:00:06.349803 194649 layer_factory.hpp:77] Creating layer crelu1
I0208 18:00:06.349812 194649 net.cpp:84] Creating Layer crelu1
I0208 18:00:06.349815 194649 net.cpp:406] crelu1 <- Scale2
I0208 18:00:06.349822 194649 net.cpp:406] crelu1 <- ReLU2
I0208 18:00:06.349829 194649 net.cpp:380] crelu1 -> crelu1
I0208 18:00:06.349843 194649 net.cpp:122] Setting up crelu1
I0208 18:00:06.349848 194649 net.cpp:129] Top shape: 1 32 1 2046 (65472)
I0208 18:00:06.349853 194649 net.cpp:137] Memory required for data: 1194880
I0208 18:00:06.349856 194649 layer_factory.hpp:77] Creating layer maxPool1
I0208 18:00:06.349864 194649 net.cpp:84] Creating Layer maxPool1
I0208 18:00:06.349870 194649 net.cpp:406] maxPool1 <- crelu1
I0208 18:00:06.349876 194649 net.cpp:380] maxPool1 -> maxPool1
I0208 18:00:06.349891 194649 net.cpp:122] Setting up maxPool1
I0208 18:00:06.349897 194649 net.cpp:129] Top shape: 1 32 1 1023 (32736)
I0208 18:00:06.349901 194649 net.cpp:137] Memory required for data: 1325824
I0208 18:00:06.349905 194649 layer_factory.hpp:77] Creating layer maxPool1_maxPool1_0_split
I0208 18:00:06.349911 194649 net.cpp:84] Creating Layer maxPool1_maxPool1_0_split
I0208 18:00:06.349915 194649 net.cpp:406] maxPool1_maxPool1_0_split <- maxPool1
I0208 18:00:06.349925 194649 net.cpp:380] maxPool1_maxPool1_0_split -> maxPool1_maxPool1_0_split_0
I0208 18:00:06.349931 194649 net.cpp:380] maxPool1_maxPool1_0_split -> maxPool1_maxPool1_0_split_1
I0208 18:00:06.349937 194649 net.cpp:122] Setting up maxPool1_maxPool1_0_split
I0208 18:00:06.349943 194649 net.cpp:129] Top shape: 1 32 1 1023 (32736)
I0208 18:00:06.349948 194649 net.cpp:129] Top shape: 1 32 1 1023 (32736)
I0208 18:00:06.349952 194649 net.cpp:137] Memory required for data: 1587712
I0208 18:00:06.349962 194649 layer_factory.hpp:77] Creating layer Convolution1
I0208 18:00:06.349973 194649 net.cpp:84] Creating Layer Convolution1
I0208 18:00:06.349983 194649 net.cpp:406] Convolution1 <- maxPool1_maxPool1_0_split_0
I0208 18:00:06.349999 194649 net.cpp:380] Convolution1 -> Convolution1
I0208 18:00:06.350034 194649 net.cpp:122] Setting up Convolution1
I0208 18:00:06.350040 194649 net.cpp:129] Top shape: 1 16 1 1021 (16336)
I0208 18:00:06.350044 194649 net.cpp:137] Memory required for data: 1653056
I0208 18:00:06.350050 194649 layer_factory.hpp:77] Creating layer ReLU3
I0208 18:00:06.350056 194649 net.cpp:84] Creating Layer ReLU3
I0208 18:00:06.350061 194649 net.cpp:406] ReLU3 <- Convolution1
I0208 18:00:06.350067 194649 net.cpp:367] ReLU3 -> Convolution1 (in-place)
I0208 18:00:06.350075 194649 net.cpp:122] Setting up ReLU3
I0208 18:00:06.350080 194649 net.cpp:129] Top shape: 1 16 1 1021 (16336)
I0208 18:00:06.350083 194649 net.cpp:137] Memory required for data: 1718400
I0208 18:00:06.350087 194649 layer_factory.hpp:77] Creating layer Convolution2
I0208 18:00:06.350095 194649 net.cpp:84] Creating Layer Convolution2
I0208 18:00:06.350100 194649 net.cpp:406] Convolution2 <- Convolution1
I0208 18:00:06.350108 194649 net.cpp:380] Convolution2 -> Convolution2
I0208 18:00:06.350132 194649 net.cpp:122] Setting up Convolution2
I0208 18:00:06.350138 194649 net.cpp:129] Top shape: 1 16 1 1023 (16368)
I0208 18:00:06.350142 194649 net.cpp:137] Memory required for data: 1783872
I0208 18:00:06.350149 194649 layer_factory.hpp:77] Creating layer res1
I0208 18:00:06.350158 194649 net.cpp:84] Creating Layer res1
I0208 18:00:06.350163 194649 net.cpp:406] res1 <- maxPool1_maxPool1_0_split_1
I0208 18:00:06.350168 194649 net.cpp:406] res1 <- Convolution2
I0208 18:00:06.350178 194649 net.cpp:380] res1 -> res1
F0208 18:00:06.350195 194649 eltwise_layer.cpp:34] Check failed: bottom[0]->shape() == bottom[i]->shape() bottom[0]: 1 32 1 1023 (32736), bottom[1]: 1 16 1 1023 (16368)
*** Check failure stack trace: ***
336,1 63%
71,1 5%
I would really appreciate your help!
The tricky thing about residual blocks is that x and F(x) must have the same shape, otherwise you cannot sum them up: x + F(x).
In your example it seems like x has dimension 32 while F(x) has dimension 16.
It is common practice to place a 1x1 conv layer on the residual link in cases where the dimensions of F(x) are different from the dimensions of x:
- when stride!=1 (spatial dimension different)
- when changing the number of channels (usually in a new "block" in resnet)
Related
when changing pas size, I get this error. I tried different pad sizes, but I am getting similar errors. here is more details:
layer {
name: "Pooling1"
type: "Pooling"
bottom: "Convolution2"
top: "Pooling1"
pooling_param {
pool: MAX
kernel_h: 1
kernel_w: 2
stride_h: 1
stride_w: 2
pad_h: 0
pad_w: 3
}
This is the error:
...
Creating layer Convolution2
I0525 10:45:37.403520 20575 net.cpp:84] Creating Layer Convolution2
I0525 10:45:37.403524 20575 net.cpp:406] Convolution2 <- Concat1
I0525 10:45:37.403529 20575 net.cpp:380] Convolution2 -> Convolution2
I0525 10:45:37.403555 20575 net.cpp:122] Setting up Convolution2
I0525 10:45:37.403560 20575 net.cpp:129] Top shape: 1 16 1 4076 (65216)
I0525 10:45:37.403563 20575 net.cpp:137] Memory required for data: 3022080
I0525 10:45:37.403568 20575 layer_factory.hpp:77] Creating layer Pooling1
I0525 10:45:37.403571 20575 net.cpp:84] Creating Layer Pooling1
I0525 10:45:37.403575 20575 net.cpp:406] Pooling1 <- Convolution2
I0525 10:45:37.403581 20575 net.cpp:380] Pooling1 -> Pooling1
F0525 10:45:37.403594 20575 pooling_layer.cpp:74] Check failed: pad_w_ < kernel_w_ (3 vs. 2)
thanks in advance!
The problem is very clear from the error message you get: you set your pad to be larger than the pooling kernel size. Reduce the pad and you should be okay
I am getting into the programming of networks with caffe and since I am used to more comfortable and "lazy" solutions I am a bit overwhelmed by the problems that can occur.
Right now I am getting the error
Check failed: status == CUDNN_STATUS_SUCCESS (3 vs. 0) CUDNN_STATUS_BAD_PARAM
This one is quite well known to be produced by bad cuda or cudnn versions.
So i checked those and they are up to date. (Cuda: 8.0.61 Cudnn: 6.0.21)
Since I will only get this error when I add this ReLU layer I suppose it is caused by me confusing a parameter:
layer{
name: "relu1"
type: "ReLU"
bottom: "pool1"
top: "relu1"
}
And to give you all the information, here is the error message I get:
I0319 09:41:09.484148 6909 solver.cpp:44] Initializing solver from parameters:
test_iter: 10
test_interval: 1000
base_lr: 0.001
display: 20
max_iter: 800
lr_policy: "step"
gamma: 0.1
momentum: 0.9
weight_decay: 0.04
stepsize: 200
snapshot: 10000
snapshot_prefix: "models/train"
solver_mode: GPU
net: "train_val.prototxt"
I0319 09:41:09.484392 6909 solver.cpp:87] Creating training net from net file: train_val.prototxt
I0319 09:41:09.485164 6909 net.cpp:294] The NetState phase (0) differed from the phase (1) specified by a rule in layer feed2
I0319 09:41:09.485183 6909 net.cpp:51] Initializing net from parameters:
name: "CaffeNet"
state {
phase: TRAIN
}
layer {
name: "feed"
type: "HDF5Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
hdf5_data_param {
source: "train_h5_list.txt"
batch_size: 50
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 1
kernel_size: 3
stride: 1
weight_filler {
type: "gaussian"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 1
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "pool1"
top: "relu1"
}
layer {
name: "conv2"
type: "Convolution"
bottom: "relu1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 1
kernel_size: 3
stride: 1
weight_filler {
type: "gaussian"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "conv2"
top: "ip2"
param {
lr_mult: 1
decay_mult: 1
}
inner_product_param {
num_output: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "sig1"
type: "Sigmoid"
bottom: "ip2"
top: "sig1"
}
layer {
name: "loss"
type: "EuclideanLoss"
bottom: "sig1"
bottom: "label"
top: "loss"
}
I0319 09:41:09.485752 6909 layer_factory.hpp:77] Creating layer feed
I0319 09:41:09.485780 6909 net.cpp:84] Creating Layer feed
I0319 09:41:09.485792 6909 net.cpp:380] feed -> data
I0319 09:41:09.485819 6909 net.cpp:380] feed -> label
I0319 09:41:09.485836 6909 hdf5_data_layer.cpp:80] Loading list of HDF5 filenames from: train_h5_list.txt
I0319 09:41:09.485860 6909 hdf5_data_layer.cpp:94] Number of HDF5 files: 1
I0319 09:41:09.486469 6909 hdf5.cpp:32] Datatype class: H5T_FLOAT
I0319 09:41:09.500986 6909 net.cpp:122] Setting up feed
I0319 09:41:09.501011 6909 net.cpp:129] Top shape: 50 227 227 3 (7729350)
I0319 09:41:09.501027 6909 net.cpp:129] Top shape: 50 1 (50)
I0319 09:41:09.501039 6909 net.cpp:137] Memory required for data: 30917600
I0319 09:41:09.501051 6909 layer_factory.hpp:77] Creating layer conv1
I0319 09:41:09.501080 6909 net.cpp:84] Creating Layer conv1
I0319 09:41:09.501087 6909 net.cpp:406] conv1 <- data
I0319 09:41:09.501101 6909 net.cpp:380] conv1 -> conv1
I0319 09:41:09.880740 6909 net.cpp:122] Setting up conv1
I0319 09:41:09.880765 6909 net.cpp:129] Top shape: 50 1 225 1 (11250)
I0319 09:41:09.880781 6909 net.cpp:137] Memory required for data: 30962600
I0319 09:41:09.880808 6909 layer_factory.hpp:77] Creating layer pool1
I0319 09:41:09.880836 6909 net.cpp:84] Creating Layer pool1
I0319 09:41:09.880846 6909 net.cpp:406] pool1 <- conv1
I0319 09:41:09.880861 6909 net.cpp:380] pool1 -> pool1
I0319 09:41:09.880888 6909 net.cpp:122] Setting up pool1
I0319 09:41:09.880899 6909 net.cpp:129] Top shape: 50 1 224 0 (0)
I0319 09:41:09.880913 6909 net.cpp:137] Memory required for data: 30962600
I0319 09:41:09.880921 6909 layer_factory.hpp:77] Creating layer relu1
I0319 09:41:09.880934 6909 net.cpp:84] Creating Layer relu1
I0319 09:41:09.880941 6909 net.cpp:406] relu1 <- pool1
I0319 09:41:09.880952 6909 net.cpp:380] relu1 -> relu1
F0319 09:41:09.881192 6909 cudnn.hpp:80] Check failed: status == CUDNN_STATUS_SUCCESS (3 vs. 0) CUDNN_STATUS_BAD_PARAM
EDIT: Tried setting the solver mode to CPU, I still get this error.
I found out one of the problems.
I0319 09:41:09.880765 6909 net.cpp:129] Top shape: 50 1 225 1 (11250)
I0319 09:41:09.880781 6909 net.cpp:137] Memory required for data: 30962600
I0319 09:41:09.880808 6909 layer_factory.hpp:77] Creating layer pool1
I0319 09:41:09.880836 6909 net.cpp:84] Creating Layer pool1
I0319 09:41:09.880846 6909 net.cpp:406] pool1 <- conv1
I0319 09:41:09.880861 6909 net.cpp:380] pool1 -> pool1
I0319 09:41:09.880888 6909 net.cpp:122] Setting up pool1
I0319 09:41:09.880899 6909 net.cpp:129] Top shape: 50 1 224 0 (0)
As you can see the first Convolutional layer will take an input of size (50 227 227 3), wich is a bit problematic, since he thinks that the second dimension contains the channels.
Its only natural that this convolutional layer will simply butcher the dimensions that way and now no further layer after that will get proper input dimensions.
I managed to solve the problem by simply reshaping the input this way:
layer {
name: "reshape"
type: "Reshape"
bottom: "data"
top: "res"
reshape_param {
shape {
dim: 50
dim: 3
dim: 227
dim: 227
}
}
}
the first dimension in this is the batch size, so whoever reads this has to remember to set this dim to 1 in the .prototxt file for the classification phase (since that one won't work with batches)
EDIT: I will mark this as an answer since it covers the basic solution to the problem i had and no other solution is in sight. If anyone wants to shine more light on the matter, please do so.
The reason why it is throwing this error is because you have no more room to "shrink". From your error message: 50 1 224 0 (0)
This indicates the size of the net has a 0 in one dimension.
To fix this error, you can tweak some of the parameters, including (S)tride, (K)ernel size, and (P)adding. To calculate the dimensions of your next layer (W_new), you can use the formula:
W_new = (W_old - K + 2*P)/S + 1
So, if we have an input that is 227x227x3 and our first layer has K = 5, S = 2, P = 1, and numOutputs = N, conv1 then has a dimension that is:
(227-5+2*1)/2 + 1 = 112x112xN.
Note: if you end up with an odd number in the numerator, round up after adding 1.
Edit: The reason why it's showing up with the ReLU layer is likely because the ReLU layer has nothing to pass through, ergo it throws an error.
I created a caffe model on top of segnet/deepobservationEO
I compiled caffe successfully. I used the model proposed here:
https://github.com/nshaud/DeepNetsForEO/blob/master/models/segnet_isprs_vaihingen_irrg.prototxt
I get that issue:
....
I0221 05:26:39.852553 1508 net.cpp:408] conv1_1 <- data
I0221 05:26:39.852568 1508 net.cpp:382] conv1_1 -> conv1_1
I0221 05:26:39.855118 1508 net.cpp:124] Setting up conv1_1
I0221 05:26:39.855144 1508 net.cpp:131] Top shape: 10 64 128 128 (10485760)
I0221 05:26:39.855150 1508 net.cpp:139] Memory required for data: 45875200
I0221 05:26:39.855170 1508 layer_factory.hpp:77] Creating layer conv1_1_bn
I0221 05:26:39.856801 1508 net.cpp:86] Creating Layer conv1_1_bn
I0221 05:26:39.856830 1508 net.cpp:408] conv1_1_bn <- conv1_1
I0221 05:26:39.856847 1508 net.cpp:369] conv1_1_bn -> conv1_1 (in-place)
F0221 05:26:39.856976 1508 batch_norm_layer.cpp:44] Check failed: this-> layer_param_.param(i).lr_mult() == 0.f (0.5 vs. 0) Cannot configure batch normalization statistics as layer parameters.
*** Check failure stack trace: ***
any idea about what to do, where to check?
thx
colin
It seems like you have lr_mult: 0.5 for layer conv1_1_bn of type "BatchNorm".
Caffe only supports lr_mult: 0 for layer "BatchNorm".
I know this may be better asked in caffe user group but I cannot access the user group and don't know where to pose the question as I'm not sure whether this needs to be raised as an issue in git. In any case, what I'm doing is this:
I have a set of grayscale images that I want to use to train a CNN using caffe. I'm using a modified version of the provided caffenet model definitions with minor modifications (ie: channel = 1 instead of 3 as I have grayscale images). So far, I used the imagenet provided mean image to train the CNN and it trained and generated results. Now I wanted to compute the image mean of my own train/test dataset and use that as the mean image so I used the file in build/tools/ to do this. It needed the images to be in lmdb so I first converted images to lmdb using convert_imageset and then used compute_mean to compute the mean. I ensured that I use --gray flag when using convert_imageset as my images are grayscale. When I rerun caffe, I get the following error. From what I can understand, it's a channel mismatch but I have no idea how to fix this. Any help on this is very much appreciated.
I0829 20:41:50.429733 17065 layer_factory.hpp:77] Creating layer data
I0829 20:41:50.429764 17065 net.cpp:100] Creating Layer data
I0829 20:41:50.429769 17065 net.cpp:408] data -> data
I0829 20:41:50.429790 17065 net.cpp:408] data -> label
I0829 20:41:50.429805 17065 data_transformer.cpp:25] Loading mean file from: data/flickr_style/train_mean.binaryproto
I0829 20:41:50.438251 17065 image_data_layer.cpp:38] Opening file data/flickr_style/train.txt
I0829 20:41:50.446666 17065 image_data_layer.cpp:58] A total of 31740 images.
I0829 20:41:50.451941 17065 image_data_layer.cpp:85] output data size: 10,3,227,227
I0829 20:41:50.459661 17065 net.cpp:150] Setting up data
I0829 20:41:50.459692 17065 net.cpp:157] Top shape: 10 3 227 227 (1545870)
I0829 20:41:50.459697 17065 net.cpp:157] Top shape: 10 (10)
I0829 20:41:50.459699 17065 net.cpp:165] Memory required for data: 6183520
I0829 20:41:50.459707 17065 layer_factory.hpp:77] Creating layer conv1
I0829 20:41:50.459728 17065 net.cpp:100] Creating Layer conv1
I0829 20:41:50.459733 17065 net.cpp:434] conv1 <- data
I0829 20:41:50.459744 17065 net.cpp:408] conv1 -> conv1
F0829 20:41:50.463794 17106 data_transformer.cpp:257] Check failed: img_channels == data_mean_.channels() (3 vs. 1)
*** Check failure stack trace: ***
# 0x7f0712106daa (unknown)
# 0x7f0712106ce4 (unknown)
# 0x7f07121066e6 (unknown)
# 0x7f0712109687 (unknown)
# 0x7f071287d6cd caffe::DataTransformer<>::Transform()
# 0x7f07127fde60 caffe::ImageDataLayer<>::load_batch()
# 0x7f0712839539 caffe::BasePrefetchingDataLayer<>::InternalThreadEntry()
# 0x7f0712886020 caffe::InternalThread::entry()
# 0x7f070a762a4a (unknown)
# 0x7f070603e184 start_thread
# 0x7f07111eb37d (unknown)
# (nil) (unknown)
I have the following in train_val.prototxt
name: "FlickrStyleCaffeNet"
layer {
name: "data"
type: "ImageData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 227
mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
}
image_data_param {
source: "data/flickr_style/mri_train.txt"
batch_size: 10
new_height: 256
new_width: 256
}
}
and this in deploy.prototxt
name: "FlickrStyleCaffeNet"
layer {
name: "data"
type: "Input"
top: "data"
input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } }
}
You (or the interface) have failed to adjust the input for gray scale. Gray has only 1 input channel (value); the model expects 3 channels (RGB). That 3 in the layer's top shape should be 1.
Look in your *.prototxt files for something like this near the top (input layer):
shape {
dim: 10
dim: 3
dim: 227
dim: 227
}
These dimensions are batch_size, channels, rows, and columns. Wherever you have something on this order (there should be only one, and only in input files), change the 3 to 1.
I figured out how to do this. In the train_val.prototxt, there's an image_data_param section under data layer. I had to add is_color: false in it and that fixed the issue.
Thanks everyone for comments and replies, appreciate it.
I am trying to implement a pixel-wise binary classification for images using caffe. For each image having dimension 3x256x256, I have a 256x256 label array in which each entry is marked as either 0 or 1. Also, when I read my HDF5 file using the below code,
dirname = "examples/hdf5_classification/data"
f = h5py.File(os.path.join(dirname, 'train.h5'), "r")
ks = f.keys()
data = np.array(f[ks[0]])
label = np.array(f[ks[1]])
print "Data dimension from HDF5", np.shape(data)
print "Label dimension from HDF5", np.shape(label)
I get the data and label dimension as
Data dimension from HDF5 (402, 3, 256, 256)
Label dimension from HDF5 (402, 256, 256)
I am trying to feed this data into the given hdf5 classification network and while training, I have the following output(using the default solver, but in GPU mode).
!cd /home/unni/MTPMain/caffe-master/ && ./build/tools/caffe train -solver examples/hdf5_classification/solver.prototxt
gives
I1119 01:29:02.222512 11910 caffe.cpp:184] Using GPUs 0
I1119 01:29:02.509752 11910 solver.cpp:47] Initializing solver from parameters:
train_net: "examples/hdf5_classification/train_val.prototxt"
test_net: "examples/hdf5_classification/train_val.prototxt"
test_iter: 250
test_interval: 1000
base_lr: 0.01
display: 1000
max_iter: 10000
lr_policy: "step"
gamma: 0.1
momentum: 0.9
weight_decay: 0.0005
stepsize: 5000
snapshot: 10000
snapshot_prefix: "examples/hdf5_classification/data/train"
solver_mode: GPU
device_id: 0
I1119 01:29:02.519805 11910 solver.cpp:80] Creating training net from train_net file: examples/hdf5_classification/train_val.prototxt
I1119 01:29:02.520031 11910 net.cpp:322] The NetState phase (0) differed from the phase (1) specified by a rule in layer data
I1119 01:29:02.520053 11910 net.cpp:322] The NetState phase (0) differed from the phase (1) specified by a rule in layer accuracy
I1119 01:29:02.520104 11910 net.cpp:49] Initializing net from parameters:
name: "LogisticRegressionNet"
state {
phase: TRAIN
}
layer {
name: "data"
type: "HDF5Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
hdf5_data_param {
source: "examples/hdf5_classification/data/train.txt"
batch_size: 10
}
}
layer {
name: "fc1"
type: "InnerProduct"
bottom: "data"
top: "fc1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc1"
bottom: "label"
top: "loss"
}
I1119 01:29:02.520256 11910 layer_factory.hpp:76] Creating layer data
I1119 01:29:02.520277 11910 net.cpp:106] Creating Layer data
I1119 01:29:02.520290 11910 net.cpp:411] data -> data
I1119 01:29:02.520331 11910 net.cpp:411] data -> label
I1119 01:29:02.520352 11910 hdf5_data_layer.cpp:80] Loading list of HDF5 filenames from: examples/hdf5_classification/data/train.txt
I1119 01:29:02.529341 11910 hdf5_data_layer.cpp:94] Number of HDF5 files: 1
I1119 01:29:02.542645 11910 hdf5.cpp:32] Datatype class: H5T_FLOAT
I1119 01:29:10.601307 11910 net.cpp:150] Setting up data
I1119 01:29:10.612926 11910 net.cpp:157] Top shape: 10 3 256 256 (1966080)
I1119 01:29:10.612963 11910 net.cpp:157] Top shape: 10 256 256 (655360)
I1119 01:29:10.612969 11910 net.cpp:165] Memory required for data: 10485760
I1119 01:29:10.612983 11910 layer_factory.hpp:76] Creating layer fc1
I1119 01:29:10.624948 11910 net.cpp:106] Creating Layer fc1
I1119 01:29:10.625015 11910 net.cpp:454] fc1 <- data
I1119 01:29:10.625039 11910 net.cpp:411] fc1 -> fc1
I1119 01:29:10.645814 11910 net.cpp:150] Setting up fc1
I1119 01:29:10.645864 11910 net.cpp:157] Top shape: 10 2 (20)
I1119 01:29:10.645875 11910 net.cpp:165] Memory required for data: 10485840
I1119 01:29:10.645912 11910 layer_factory.hpp:76] Creating layer loss
I1119 01:29:10.657094 11910 net.cpp:106] Creating Layer loss
I1119 01:29:10.657133 11910 net.cpp:454] loss <- fc1
I1119 01:29:10.657147 11910 net.cpp:454] loss <- label
I1119 01:29:10.657163 11910 net.cpp:411] loss -> loss
I1119 01:29:10.657189 11910 layer_factory.hpp:76] Creating layer loss
F1119 01:29:14.883095 11910 softmax_loss_layer.cpp:42] Check failed: outer_num_ * inner_num_ == bottom[1]->count() (10 vs. 655360) Number of labels must match number of predictions; e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), label count (number of labels) must be N*H*W, with integer values in {0, 1, ..., C-1}.
*** Check failure stack trace: ***
# 0x7f0652e1adaa (unknown)
# 0x7f0652e1ace4 (unknown)
# 0x7f0652e1a6e6 (unknown)
# 0x7f0652e1d687 (unknown)
# 0x7f0653494219 caffe::SoftmaxWithLossLayer<>::Reshape()
# 0x7f065353f50f caffe::Net<>::Init()
# 0x7f0653541f05 caffe::Net<>::Net()
# 0x7f06535776cf caffe::Solver<>::InitTrainNet()
# 0x7f0653577beb caffe::Solver<>::Init()
# 0x7f0653578007 caffe::Solver<>::Solver()
# 0x7f06535278b3 caffe::Creator_SGDSolver<>()
# 0x410831 caffe::SolverRegistry<>::CreateSolver()
# 0x40a16b train()
# 0x406908 main
# 0x7f065232cec5 (unknown)
# 0x406e28 (unknown)
# (nil) (unknown)
Aborted
Basically the error is
softmax_loss_layer.cpp:42] Check failed:
outer_num_ * inner_num_ == bottom[1]->count() (10 vs. 655360)
Number of labels must match number of predictions;
e.g., if softmax axis == 1 and prediction shape is (N, C, H, W),
label count (number of labels) must be N*H*W,
with integer values in {0, 1, ..., C-1}.
I am not able to understand why the number of labels expected is just same as my batch size. How exactly should I tackle this problem ? Is this a problem with my labeling method ?
Your problem is that "SoftmaxWithLoss" layer tries to compare a prediction vector of 2 elements per input image to a label of size 256-by-256 per image.
This makes no sense.
Root cause of the error: I guess what you tired to do is to have a binary classifier applied to each pixel in the image. To that end you defined "fc1" as an "InnerProduct" layer with num_output=2. However, the way caffe sees this is that you have a single binary classifier applied to the entire image. Thus caffe gives you a single binary prediction to the entire image.
How to solve: when working on pixel-wise predictions you no longer need to use "InnerProduct" layers and you have a "fully convolutional net". If you replace "fc1" with a conv layer (for instance a kernel that examine the 5-by-5 environment of each pixel and makes a decision according to this patch):
layer {
name: "bin_class"
type: "Convolution"
bottom: "data"
top: "bin_class"
convolution_param {
num_output: 2 # binary class output
kernel_size: 5 # 5-by-5 patch for prediciton
pad: 2 # make sure spatial output size equals size of label
}
}
Now applying "SoftmaxWithLoss" to bottom: bin_class and bottom: label should work.