Is num_epochs limited in tensorflow's csv file reader string_input_producer()? - csv

I have a dummy csv file (y=-x+1)
x,y
1,0
2,-1
3,-2
I try to feed that into a linear regression model. Since I have only so few examples, I want to iterate the training like 1000 times over that file, so I set num_epochs=1000.
However, it seems that Tensorflow limits this number. It works fine if I use num_epochs=5 or 10, but beyond 33 it is capped to 33 epochs. Is that true or am Im doing anything wrong?
# model = W*x+b
...
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# reading input from csv
filename_queue = tf.train.string_input_producer(["/tmp/testinput.csv"], num_epochs=1000)
reader = tf.TextLineReader(skip_header_lines=1)
...
col_x, col_label = tf.decode_csv(csv_row, record_defaults=record_defaults)
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
while True:
try:
input_x, input_y = sess.run([col_x, col_label])
sess.run(train, feed_dict={x:input_x, y:input_y})
...
Side question, do I need to do:
input_x, input_y = sess.run([col_x, col_label])
sess.run(train, feed_dict={x:input_x, y:input_y})
I have tried sess.run(train, feed_dict={x:col_x, y:col_y}) directly to avoid the friction but it doesn't work (they are nodes, and feed_dict expects regular data)

The following snippets works perfectly (with your input):
import tensorflow as tf
filename_queue = tf.train.string_input_producer(["/tmp/input.csv"], num_epochs=1000)
reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = reader.read(filename_queue)
col_x, col_label = tf.decode_csv(csv_row, record_defaults=[[0], [0]])
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
num = 0
try:
while True:
sess.run([col_x, col_label])
num += 1
except:
print(num)
Which gives the following output:
edb#lapelidb:/tmp$ python csv.py
3000

Related

RuntimeError: shape '[128, -1]' is invalid for input of size 378 pytorch

I'm running a spiking neural network for data that has 21 features with a batch size of 128. I get the following error after many iterations of training (this error doesn't arise immediately!):
RuntimeError: shape '[128, -1]' is invalid for input of size 378 pytorch
When I went to go print out what the shapes of the tensors are before, I get the following:
Train
torch.Size([128, 21])
Test
torch.Size([128, 21])
This is my network:
class SpikingNeuralNetwork(nn.Module):
"""
Parameters in SpikingNeuralNetwork class:
1. number_inputs: Number of inputs to the SNN.
2. number_hidden: Number of hidden layers.
3. number_outputs: Number of output classes.
4. beta: Decay rate.
"""
def __init__(self, number_inputs, number_hidden, number_outputs, beta):
super().__init__()
self.number_inputs = number_inputs
self.number_hidden = number_hidden
self.number_outputs = number_outputs
self.beta = beta
# Initialize layers
self.fc1 = nn.Linear(self.number_inputs, self.number_hidden) # Applies linear transformation to all input points
self.lif1 = snn.Leaky(beta = self.beta) # Integrates weighted input over time, emitting a spike if threshold condition is met
self.fc2 = nn.Linear(self.number_hidden, self.number_outputs) # Applies linear transformation to output spikes of lif1
self.lif2 = snn.Leaky(beta = self.beta) # Another spiking neuron, integrating the weighted spikes over time
"""
Forward propagation of SNN. The code below function will only be called once the input argument x
is explicitly passed into net.
#param x: input passed into the network
#return layer of output after applying final spiking neuron
"""
def forward(self, x):
num_steps = 25
# Initialize hidden states at t = 0
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
# Record the final layer
spk2_rec = []
mem2_rec = []
for step in range(num_steps):
cur1 = self.fc1(x)
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.fc2(spk1)
spk2, mem2 = self.lif2(cur2, mem2)
spk2_rec.append(spk2)
mem2_rec.append(mem2)
return torch.stack(spk2_rec, dim = 0), torch.stack(mem2_rec, dim = 0)
This is my training loop:
def training_loop(net, train_loader, test_loader, dtype, device, optimizer):
num_epochs = 1
loss_history = []
test_loss_history = []
counter = 0
# Temporal dynamics
num_steps = 25
# Outer training loop
for epoch in range(num_epochs):
iter_counter = 0
train_batch = iter(train_loader)
# Minibatch training loop
for data, targets in train_batch:
data = data.to(device)
targets = targets.to(device)
# Forward pass
net.train()
print("Train")
print(data.size())
spk_rec, mem_rec = net(data.view(batch_size, -1))
# Initialize the loss and sum over time
loss_val = torch.zeros((1), dtype = dtype, device = device)
for step in range(num_steps):
loss_val += loss_function(mem_rec[step], targets.long().flatten().to(device))
# Gradient calculation and weight update
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
# Store loss history for future plotting
loss_history.append(loss_val.item())
# Test set
with torch.no_grad():
net.eval()
test_data, test_targets = next(iter(test_loader))
test_data = test_data.to(device)
test_targets = test_targets.to(device)
# Test set forward pass
print("Test")
print(test_data.size())
test_spk, test_mem = net(test_data.view(batch_size, -1))
# Test set loss
test_loss = torch.zeros((1), dtype = dtype, device = device)
for step in range(num_steps):
test_loss += loss_function(test_mem[step], test_targets.long().flatten().to(device))
test_loss_history.append(test_loss.item())
# Print train/test loss and accuracy
if counter % 50 == 0:
train_printer(epoch, iter_counter, counter, loss_history, data, targets, test_data, test_targets)
counter = counter + 1
iter_counter = iter_counter + 1
return loss_history, test_loss_history
The error occurs on spk_rec, mem_rec = net(data.view(batch_size, -1)).
The code was adopted from https://snntorch.readthedocs.io/en/latest/tutorials/tutorial_5.html, where it was originally used for the MNIST dataset. However, I am not working with an image dataset. I am working with a dataset that has 21 features and predicts just one target (with 100 classes). I tried to change data.view(batch_size, -1) and test_data.view(batch_size, -1) to data.view(batch_size, 21) and test_data.view(batch_size, 21) based on some other forum answers that I saw, and my program is running for now through the training loop. Does anyone have any suggestions for how I can run through the training with no errors?
EDIT: I now get the error RuntimeError: shape '[128, 21]' is invalid for input of size 378 from spk_rec, mem_rec = net(data.view(batch_size, -1)).
Here are my DataLoaders:
train_loader = DataLoader(dataset = train, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test, batch_size = batch_size, shuffle = True)
My batch size is 128.
Tryng to run it by myself to try to solve your problem I luck also: net params and snn.snn.Leaky
import torch
from torch import nn
from torch.utils.data import DataLoader
class SpikingNeuralNetwork(nn.Module):
"""
Parameters in SpikingNeuralNetwork class:
1. number_inputs: Number of inputs to the SNN.
2. number_hidden: Number of hidden layers.
3. number_outputs: Number of output classes.
4. beta: Decay rate.
"""
def __init__(self, number_inputs, number_hidden, number_outputs, beta):
super().__init__()
self.number_inputs = number_inputs
self.number_hidden = number_hidden
self.number_outputs = number_outputs
self.beta = beta
# Initialize layers
self.fc1 = nn.Linear(self.number_inputs,
self.number_hidden) # Applies linear transformation to all input points
self.lif1 = snn.Leaky(
beta=self.beta) # Integrates weighted input over time, emitting a spike if threshold condition is met
self.fc2 = nn.Linear(self.number_hidden,
self.number_outputs) # Applies linear transformation to output spikes of lif1
self.lif2 = snn.Leaky(beta=self.beta) # Another spiking neuron, integrating the weighted spikes over time
"""
Forward propagation of SNN. The code below function will only be called once the input argument x
is explicitly passed into net.
#param x: input passed into the network
#return layer of output after applying final spiking neuron
"""
def forward(self, x):
num_steps = 25
# Initialize hidden states at t = 0
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
# Record the final layer
spk2_rec = []
mem2_rec = []
for step in range(num_steps):
cur1 = self.fc1(x)
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.fc2(spk1)
spk2, mem2 = self.lif2(cur2, mem2)
spk2_rec.append(spk2)
mem2_rec.append(mem2)
return torch.stack(spk2_rec, dim=0), torch.stack(mem2_rec, dim=0)
batch_size = 2
train = torch.rand(128, 21)
test = torch.rand(128, 21)
train_loader = DataLoader(dataset=train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test, batch_size=batch_size, shuffle=True)
net = SpikingNeuralNetwork(number_inputs=1)
loss_function = nn.CrossEntropyLoss()
optimizer = nn.optim.Adam(net.parameters(), lr=0.1)
def training_loop(net, train_loader, test_loader, dtype, device, optimizer):
num_epochs = 1
loss_history = []
test_loss_history = []
counter = 0
# Temporal dynamics
num_steps = 25
# Outer training loop
for epoch in range(num_epochs):
iter_counter = 0
train_batch = iter(train_loader)
# Minibatch training loop
for data, targets in train_batch:
data = data.to(device)
targets = targets.to(device)
# Forward pass
net.train()
print("Train")
print(data.size())
spk_rec, mem_rec = net(data.view(batch_size, -1))
# Initialize the loss and sum over time
loss_val = torch.zeros((1), dtype=dtype, device=device)
for step in range(num_steps):
loss_val += loss_function(mem_rec[step], targets.long().flatten().to(device))
# Gradient calculation and weight update
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
# Store loss history for future plotting
loss_history.append(loss_val.item())
# Test set
with torch.no_grad():
net.eval()
test_data, test_targets = next(iter(test_loader))
test_data = test_data.to(device)
test_targets = test_targets.to(device)
# Test set forward pass
print("Test")
print(test_data.size())
test_spk, test_mem = net(test_data.view(batch_size, -1))
# Test set loss
test_loss = torch.zeros((1), dtype=dtype, device=device)
for step in range(num_steps):
test_loss += loss_function(test_mem[step], test_targets.long().flatten().to(device))
test_loss_history.append(test_loss.item())
# Print train/test loss and accuracy
if counter % 50 == 0:
train_printer(epoch, iter_counter, counter, loss_history, data, targets, test_data, test_targets)
counter = counter + 1
iter_counter = iter_counter + 1
return loss_history, test_loss_history
Your code works just fine on the MNIST dataset, so I think it might be a problem with how the DataLoader is being called. My guess is that the total dataset is not evenly divisible by your batch_size. If this is true, then you have two options:
Instead of spk_rec, mem_rec = net(data.view(batch_size, -1)), try spk_rec, mem_rec = net(data.flatten(1)) which preserves the first dimension of your data.
Alternatively, you may need to set drop_last=True in the DataLoader functions.

Why model's loss is always revolving around 1 in every epoch?

During training, loss of my model is revolving around "1". It is not converging.
I tried various optimizer but it still showing the same pattern. I am using keras with tensorflow backend. What could be possible reasons? Any help or reference link will be appreciable.
here is my model:
def model_vgg19():
vgg_model = VGG19(weights="imagenet", include_top=False, input_shape=(128,128,3))
for layer in vgg_model.layers[:10]:
layer.trainable = False
intermediate_layer_outputs = get_layers_output_by_name(vgg_model, ["block1_pool", "block2_pool", "block3_pool", "block4_pool"])
convnet_output = GlobalAveragePooling2D()(vgg_model.output)
for layer_name, output in intermediate_layer_outputs.items():
output = GlobalAveragePooling2D()(output)
convnet_output = concatenate([convnet_output, output])
convnet_output = Dense(2048, activation='relu')(convnet_output)
convnet_output = Dropout(0.6)(convnet_output)
convnet_output = Dense(2048, activation='relu')(convnet_output)
convnet_output = Lambda(lambda x: K.l2_normalize(x,axis=1)(convnet_output)
final_model = Model(inputs=[vgg_model.input], outputs=convnet_output)
return final_model
model=model_vgg19()
here is my loss function:
def hinge_loss(y_true, y_pred):
y_pred = K.clip(y_pred, _EPSILON, 1.0-_EPSILON)
loss = tf.convert_to_tensor(0,dtype=tf.float32)
g = tf.constant(1.0, shape=[1], dtype=tf.float32)
for i in range(0, batch_size, 3):
try:
q_embedding = y_pred[i+0]
p_embedding = y_pred[i+1]
n_embedding = y_pred[i+2]
D_q_p = K.sqrt(K.sum((q_embedding - p_embedding)**2))
D_q_n = K.sqrt(K.sum((q_embedding - n_embedding)**2))
loss = (loss + g + D_q_p - D_q_n)
except:
continue
loss = loss/(batch_size/3)
zero = tf.constant(0.0, shape=[1], dtype=tf.float32)
return tf.maximum(loss,zero)
What is definitely a problem is that you shuffle your data and then try to learn triplets out of this.
As you can see here: https://keras.io/models/model/ model.fit shuffles your data in each epoch, making your triplet setup obsolete. Try to set the shuffle parameter to false and see what happens, there might be different errors as well.

How do I get CSV files into an Estimator in Tensorflow 1.6

I am new to tensorflow (and my first question in StackOverflow)
As a learning tool, I am trying to do something simple. (4 days later I am still confused)
I have one CSV file with 36 columns (3500 records) with 0s and 1s.
I am envisioning this file as a flattened 6x6 matrix.
I have another CSV file with 1 columnn of ground truth 0 or 1 (3500 records) which indicates if at least 4 of the 6 of elements in the 6x6 matrix's diagonal are 1's.
I am not sure I have processed the CSV files correctly.
I am confused as to how I create the features dictionary and Labels and how that fits into the DNNClassifier
I am using TensorFlow 1.6, Python 3.6
Below is the small amount of code I have so far.
import tensorflow as tf
import os
def x_map(line):
rDefaults = [[] for cl in range(36)]
x_row = tf.decode_csv(line, record_defaults=rDefaults)
return x_row
def y_map(line):
line = tf.string_to_number(line, out_type=tf.int32)
y_row = tf.one_hot(line, depth=2)
return y_row
x_path_file = os.path.join('D:', 'Diag', '6x6_train.csv')
y_path_file = os.path.join('D:', 'Diag', 'HasDiag_train.csv')
filenames = [x_path_file]
x_dataset = tf.data.TextLineDataset(filenames)
x_dataset = x_dataset.map(x_map)
x_dataset = x_dataset.batch(1)
x_iter = x_dataset.make_one_shot_iterator()
x_next_el = x_iter.get_next()
filenames = [y_path_file]
y_dataset = tf.data.TextLineDataset(filenames)
y_dataset = y_dataset.map(y_map)
y_dataset = y_dataset.batch(1)
y_iter = y_dataset.make_one_shot_iterator()
y_next_el = y_iter.get_next()
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
x_el = (sess.run(x_next_el))
y_el = (sess.run(y_next_el))
The output for x_el is:
(array([1.], dtype=float32), array([1.], dtype=float32), array([1.], dtype=float32), array([1.], dtype=float32), array([1.], dtype=float32), array([0.] ... it goes on...
The output for y_el is:
[[1. 0.]]
You're pretty much there for a minimal working model. The main issue I see is that tf.decode_csv returns a tuple of tensors, where as I expect you want a single tensor with all values. Easy fix:
x_row = tf.stack(tf.decode_csv(line, record_defaults=rDefaults))
That should work... but it fails to take advantage of many of the awesome things the tf.data.Dataset API has to offer, like shuffling, parallel threading etc. For example, if you shuffle each dataset, those shuffling operations won't be consistent. This is because you've created two separate datasets and manipulated them independently. If you create them independently, zip them together then manipulate, those manipulations will be consistent.
Try something along these lines:
def get_inputs(
count=None, shuffle=True, buffer_size=1000, batch_size=32,
num_parallel_calls=8, x_paths=[x_path_file], y_paths=[y_path_file]):
"""
Get x, y inputs.
Args:
count: number of epochs. None indicates infinite epochs.
shuffle: whether or not to shuffle the dataset
buffer_size: used in shuffle
batch_size: size of batch. See outputs below
num_parallel_calls: used in map. Note if > 1, intra-batch ordering
will be shuffled
x_paths: list of paths to x-value files.
y_paths: list of paths to y-value files.
Returns:
x: (batch_size, 6, 6) tensor
y: (batch_size, 2) tensor of 1-hot labels
"""
def x_map(line):
rDefaults = [[] for cl in range(n_dims**2)]
x_row = tf.stack(tf.decode_csv(line, record_defaults=rDefaults))
return x_row
def y_map(line):
line = tf.string_to_number(line, out_type=tf.int32)
y_row = tf.one_hot(line, depth=2)
return y_row
def xy_map(x, y):
return x_map(x), y_map(y)
x_ds = tf.data.TextLineDataset(x_paths)
y_ds = tf.data.TextLineDataset(y_paths)
combined = tf.data.Dataset.zip((x_ds, y_ds))
combined = combined.repeat(count=count)
if shuffle:
combined = combined.shuffle(buffer_size)
combined = combined.map(xy_map, num_parallel_calls=num_parallel_calls)
combined = combined.batch(batch_size)
x, y = combined.make_one_shot_iterator().get_next()
return x, y
To experiment/debug,
x, y = get_inputs()
with tf.Session() as sess:
xv, yv = sess.run((x, y))
print(xv.shape, yv.shape)
For use in an estimator, pass the function itself.
estimator.train(get_inputs, max_steps=10000)
def get_eval_inputs():
return get_inputs(
count=1, shuffle=False
x_paths=[x_eval_paths],
y_paths=[y_eval_paths])
estimator.eval(get_eval_inputs)

Tensorflow feed_dict not learning

Copying and pasting code from tensorflow's MNIST tutorial works just fine, resulting in a ~92% accuracy, as expected.
When I read MNIST data as a CSV, and convert to an np array using pd.DataFrame.values, this process breaks down. I get a ~10% (no better than random) accuracy from this.
Below is the code (tutorial code works well, my CSV reader fails to learn):
Working MNIST tutorial:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
Not working (read CSV and feed np array):
import pandas as pd
from sklearn.cross_validation import train_test_split
import numpy as np
# read csv file
MNIST = pd.read_csv("/data.csv")
# pop label column and create training label array
train_label = MNIST.pop("label")
# converts from dataframe to np array
MNIST=MNIST.values
# convert train labels to one hots
train_labels = pd.get_dummies(train_label)
# make np array
train_labels = train_labels.values
x_train,x_test,y_train,y_test = train_test_split(MNIST,train_labels,test_size=0.2)
# we now have features (x_train) and y values, separated into test and train
# convert to dtype float 32
x_train,x_test,y_train,y_test = np.array(x_train,dtype='float32'), np.array(x_test,dtype='float32'),np.array(y_train,dtype='float32'),np.array(y_test,dtype='float32')
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
def get_mini_batch(x,y):
# choose 100 random row values
rows=np.random.choice(x.shape[0], 100)
# return arrays of 100 random rows (for features and labels)
return x[rows], y[rows]
# train
for i in range(100):
# get mini batch
a,b=get_mini_batch(x_train,y_train)
# run train step, feeding arrays of 100 rows each time
sess.run(train_step, feed_dict={x: a, y_: b})
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: x_test, y_: y_test}))
Help would be greatly appreciated. (CSV file here.)
I am pretty sure batches shouldn't be 100 random rows but should be 100 rows that come after each other, for example, 0:99 and 100:199 would be your first two batches. Try this code for the batches. Check this kernel out for training Mnist from csv in TF
epochs_completed = 0
index_in_epoch = 0
num_examples = train_images.shape[0]
# serve data by batches
def next_batch(batch_size):
global train_images
global train_labels
global index_in_epoch
global epochs_completed
start = index_in_epoch
index_in_epoch += batch_size
# when all trainig data have been already used, it is reorder randomly
if index_in_epoch > num_examples:
# finished epoch
epochs_completed += 1
# shuffle the data
perm = np.arange(num_examples)
np.random.shuffle(perm)
train_images = train_images[perm]
train_labels = train_labels[perm]
# start next epoch
start = 0
index_in_epoch = batch_size
assert batch_size <= num_examples
end = index_in_epoch
return train_images[start:end], train_labels[start:end]
Did you try training it for more iterations? I see that the original code is training over 1000 iterations
for i in range(1000):
Whereas the csv code only trains for 100 iterations:
for i in range(100):
If that's not the reason, it would be helpful if you could also share your CSV file, than we can easily test your code.
Edit:
I have tested your code and it seems to be caused by numerical instabilities in the simple cross_entropy calculation (see this SO question). Replacing your cross_entropy definition by the following line, you be able to resolve the issue:
cross_entropy = tf.reduce_mean(tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(
y, y_, name='xentropy')))
By also visualizing the returned cross_entropy, you will see that your code returns NaN, whereas with this code you will get real numbers...
The complete working code which also prints out the cross_entropy per iteration:
import pandas as pd
from sklearn.cross_validation import train_test_split
import numpy as np
# read csv file
MNIST = pd.read_csv("data.csv")
# pop label column and create training label array
train_label = MNIST.pop("label")
# converts from dataframe to np array
MNIST=MNIST.values
# convert train labels to one hots
train_labels = pd.get_dummies(train_label)
# make np array
train_labels = train_labels.values
x_train,x_test,y_train,y_test = train_test_split(MNIST,train_labels,test_size=0.2)
# we now have features (x_train) and y values, separated into test and train
# convert to dtype float 32
x_train,x_test,y_train,y_test = np.array(x_train,dtype='float32'), np.array(x_test,dtype='float32'),np.array(y_train,dtype='float32'),np.array(y_test,dtype='float32')
x = tf.placeholder(tf.float32, [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, 10])
print y.get_shape()
print y_.get_shape()
cross_entropy = tf.reduce_mean(tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(y, y_, name='xentropy')))
train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(cross_entropy)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
def get_mini_batch(x,y):
# choose 100 random row values
rows=np.random.choice(x.shape[0], 100)
# return arrays of 100 random rows (for features and labels)
return x[rows], y[rows]
# train
for i in range(1000):
# get mini batch
a,b=get_mini_batch(x_train,y_train)
# run train step, feeding arrays of 100 rows each time
_, cost =sess.run([train_step,cross_entropy], feed_dict={x: a, y_: b})
print cost
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: x_test, y_: y_test}))
You still need to optimize the learning rate and the #iterations further, but with this setting you should already get ~70% accuracy.

Simple Multilayer Perceptron model does not converge in TensorFlow

I am new to TensorFlow. Today I tried to implement my first model in TF but it returned strange results. I know that I am missing something here but I was not able to figure it out. Here is the story.
Model
I have a simple Multilayer Perceptron model with only a single hidden layer applied on MNIST databse. Layers are defined like [input(784) , hidden_layer(470) , output_layer(10)] with tanh as non-linearity for hidden layer and softmax as the loss for output layer. The optimizer I am using is Gradient Descent Algorithm with learning rate of 0.01. My mini batch size is 1 (I am training model with samples one by one).
My implementations :
First I implemented my model in C++ and got around 96% accuracy.Here is the repository : https://github.com/amin2ros/Artificog
I implemented the exact model in TensorFlow but surprisingly the model didn't converge at all. Here is the code.
Code:
import sys
import input_data
import matplotlib.pyplot as plt
from pylab import *
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
# Parameters
learning_rate = 0.1
training_epochs = 1
batch_size = 1
display_step = 1
# Network Parameters
n_hidden_1 = 470 # 1st layer num features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Create model
def multilayer_perceptron(_X, _weights, _biases):
layer_1 = tf.tanh(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1']))
return tf.matmul(layer_1, _weights['out']) + _biases['out']
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax(pred)) # Softmax loss
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost) #
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
m= 0
total_batch = int(mnist.train.num_examples/batch_size)
counter=0
#print 'count = ' , total_batch
#sys.stdin.read(1)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
label = tf.argmax(batch_ys,1).eval()[0]
counter+=1
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
wrong_prediction = tf.not_equal(tf.argmax(pred, 1), tf.argmax(y, 1))
missed=tf.cast(wrong_prediction, "float")
m += missed.eval({x: batch_xs, y: batch_ys})[0]
print "Sample #", counter , " - Label : " , label , " - Prediction :" , tf.argmax(pred, 1).eval({x: batch_xs, y: batch_ys})[0] ,\
"- Missed = " , m , " - Error Rate = " , 100 * float(m)/counter
print "Optimization Finished!"
I am very curious why this happens. Any help is appreciated.
Edit:
As commented below definition of cost function was incorrect so it should be like
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred,y))
Now model converges :)