Actor Critic model returns NaN for action probabilities - reinforcement-learning

I am new to RL and walking through the Keras implementation of Actor Critic.
As a variant of it, I am trying to learn the strategy for WORDLE. However, after a few runs, my action spaces all go down to nan.
actions = [nan nan nan ... nan nan nan]
Not sure what's happening. Could someone have any insights or pointers?
Attaching my code for reference.
Thanks
import pandas as pd
import numpy as np
import random
import string
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Configuration parameters for the whole setup
gamma = 0.9 # Discount factor for past rewards
max_runs = 10000
eps = np.finfo(np.float32).eps.item() # Smallest number such that 1.0 + eps != 1.0
my_file = open("<wordle set of words data path>", "r")
content = my_file.read()
content = list(content.split('\n'))
lower_alphabet = list(string.ascii_letters)[:26]
def get_secret_word():
return random.choice(content)
def reset_available_action_space():
return [1 for i in range(len(content))]
def reset_guessed_alphabet_state():
return [0 for i in range(len(lower_alphabet))]
# array of 26 which represents which alphabet is available in word
def reset_contains_alphabet_state():
return [0 for i in range(len(lower_alphabet))]
# Array of 26*5.
# First 26 represent which alphabet was correctly guessed at the first slot
# Second 26 represent which alphabet was correctly guessed at the second slot. And so on for the next 5 slots.
def reset_correct_alphabet_pos_state():
return [0 for i in range(len(lower_alphabet)*5)]
def select_and_update_AVAILABLE_ACTION_SPACE(actions):
action_index = 0
while AVAILABLE_ACTION_SPACE[actions[action_index]] == False:
action_index += 1
AVAILABLE_ACTION_SPACE[actions[action_index]] = 0
return actions[action_index]
def env_reset():
AVAILABLE_ACTION_SPACE = reset_available_action_space()
guessed_alphabet_state = reset_guessed_alphabet_state()
contains_alphabet_state = reset_contains_alphabet_state()
correct_alphabet_pos_state = reset_correct_alphabet_pos_state()
state = guessed_alphabet_state + contains_alphabet_state + correct_alphabet_pos_state
SECRET_WORD = get_secret_word()
return state, SECRET_WORD, AVAILABLE_ACTION_SPACE
def env_step(action, state):
guessed_word = content[action]
guessed_alphabet_state = state[:26]
contains_alphabet_state = state[26:52]
correct_alphabet_pos_state = state[52:]
done = False
reward = -10
if SECRET_WORD == guessed_word:
done = True
reward = 10
secret_word = list(SECRET_WORD)
guessed_word = list(guessed_word)
for index_, char_ in enumerate(guessed_word):
alphabet_index = lower_alphabet.index(char_)
guessed_alphabet_state[alphabet_index] = 1
if char_ in secret_word:
contains_alphabet_state[alphabet_index] = 1
if secret_word[index_] == char_:
correct_alphabet_pos_state[26*index_ + alphabet_index] = 1
state = guessed_alphabet_state + contains_alphabet_state + correct_alphabet_pos_state
return state, reward, done
num_inputs = 182
num_actions = len(content)
num_hidden_1 = 256
num_hidden_2 = 128
inputs = layers.Input(shape=(num_inputs,))
common = layers.Dense(num_hidden_1, activation="relu")(inputs)
common = layers.Dense(num_hidden_2, activation="relu")(common)
action = layers.Dense(num_actions, activation="softmax")(common)
critic = layers.Dense(1)(common)
model = keras.Model(inputs=inputs, outputs=[action, critic])
optimizer = keras.optimizers.Adam(learning_rate=0.001)
huber_loss = keras.losses.Huber()
action_probs_history = []
critic_value_history = []
rewards_history = []
running_reward = 0
episode_count = 0
for runs in range(max_runs):
max_steps_per_episode = 6
state, SECRET_WORD, AVAILABLE_ACTION_SPACE = env_reset()
episode_reward = 0
with tf.GradientTape() as tape:
for timestep in range(max_steps_per_episode):
state_tensor = tf.convert_to_tensor(state)
state_tensor = tf.expand_dims(state, 0)
action_probs, critic_value = model(state_tensor)
critic_value_history.append(critic_value[0, 0])
actions = np.random.choice(num_actions, size=max_steps_per_episode+1, replace = False, p=np.squeeze(action_probs))
action = select_and_update_AVAILABLE_ACTION_SPACE(actions)
action_probs_history.append(tf.math.log(action_probs[0, action]))
state, reward, done = env_step(action, state)
rewards_history.append(reward)
episode_reward += reward
if done:
break
# Update running reward to check condition for solving
running_reward = 0.05 * episode_reward + (1 - 0.05) * running_reward
# Calculate expected value from rewards
# - At each timestep what was the total reward received after that timestep
# - Rewards in the past are discounted by multiplying them with gamma
# - These are the labels for our critic
returns = []
discounted_sum = 0
for r in rewards_history[::-1]:
discounted_sum = r + gamma * discounted_sum
returns.insert(0, discounted_sum)
# Normalize
returns = np.array(returns)
returns = (returns - np.mean(returns)) / (np.std(returns) + eps)
returns = returns.tolist()
# Calculating loss values to update our network
history = zip(action_probs_history, critic_value_history, returns)
actor_losses = []
critic_losses = []
for log_prob, value, ret in history:
# At this point in history, the critic estimated that we would get a
# total reward = `value` in the future. We took an action with log probability
# of `log_prob` and ended up recieving a total reward = `ret`.
# The actor must be updated so that it predicts an action that leads to
# high rewards (compared to critic's estimate) with high probability.
diff = ret - value
actor_losses.append(-log_prob * diff) # actor loss
# The critic must be updated so that it predicts a better estimate of
# the future rewards.
critic_losses.append(
huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))
)
# Backpropagation
loss_value = sum(actor_losses) + sum(critic_losses)
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
# Clear the loss and reward history
action_probs_history.clear()
critic_value_history.clear()
rewards_history.clear()
# Log details
episode_count += 1
if episode_count % 10 == 0:
template = "running reward: {:.2f} at episode {}"
print(template.format(running_reward, episode_count))
wordle set of words data path => https://gist.github.com/cfreshman/a03ef2cba789d8cf00c08f767e0fad7b
My State Space is [guessed alphabets, alphabets contained in the secret word, alphabet in the correct position]
guessed alphabets => Array of 26 size (a-z)
alphabets contained in the secret word => Array of 26 size (a-z)
alphabet in the correct position => Array of 26 * 5 [(a-z), (a-z), (a-z), (a-z), (a-z)] (as each word is 5 letters)
The Available action spaces get updated after every action. The previously taken actions are no longer available for future actions.
I have tried both relu and tanh for activation
Observation: Critic Value keeps increasing to an extremely large values

Related

RuntimeError: shape '[128, -1]' is invalid for input of size 378 pytorch

I'm running a spiking neural network for data that has 21 features with a batch size of 128. I get the following error after many iterations of training (this error doesn't arise immediately!):
RuntimeError: shape '[128, -1]' is invalid for input of size 378 pytorch
When I went to go print out what the shapes of the tensors are before, I get the following:
Train
torch.Size([128, 21])
Test
torch.Size([128, 21])
This is my network:
class SpikingNeuralNetwork(nn.Module):
"""
Parameters in SpikingNeuralNetwork class:
1. number_inputs: Number of inputs to the SNN.
2. number_hidden: Number of hidden layers.
3. number_outputs: Number of output classes.
4. beta: Decay rate.
"""
def __init__(self, number_inputs, number_hidden, number_outputs, beta):
super().__init__()
self.number_inputs = number_inputs
self.number_hidden = number_hidden
self.number_outputs = number_outputs
self.beta = beta
# Initialize layers
self.fc1 = nn.Linear(self.number_inputs, self.number_hidden) # Applies linear transformation to all input points
self.lif1 = snn.Leaky(beta = self.beta) # Integrates weighted input over time, emitting a spike if threshold condition is met
self.fc2 = nn.Linear(self.number_hidden, self.number_outputs) # Applies linear transformation to output spikes of lif1
self.lif2 = snn.Leaky(beta = self.beta) # Another spiking neuron, integrating the weighted spikes over time
"""
Forward propagation of SNN. The code below function will only be called once the input argument x
is explicitly passed into net.
#param x: input passed into the network
#return layer of output after applying final spiking neuron
"""
def forward(self, x):
num_steps = 25
# Initialize hidden states at t = 0
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
# Record the final layer
spk2_rec = []
mem2_rec = []
for step in range(num_steps):
cur1 = self.fc1(x)
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.fc2(spk1)
spk2, mem2 = self.lif2(cur2, mem2)
spk2_rec.append(spk2)
mem2_rec.append(mem2)
return torch.stack(spk2_rec, dim = 0), torch.stack(mem2_rec, dim = 0)
This is my training loop:
def training_loop(net, train_loader, test_loader, dtype, device, optimizer):
num_epochs = 1
loss_history = []
test_loss_history = []
counter = 0
# Temporal dynamics
num_steps = 25
# Outer training loop
for epoch in range(num_epochs):
iter_counter = 0
train_batch = iter(train_loader)
# Minibatch training loop
for data, targets in train_batch:
data = data.to(device)
targets = targets.to(device)
# Forward pass
net.train()
print("Train")
print(data.size())
spk_rec, mem_rec = net(data.view(batch_size, -1))
# Initialize the loss and sum over time
loss_val = torch.zeros((1), dtype = dtype, device = device)
for step in range(num_steps):
loss_val += loss_function(mem_rec[step], targets.long().flatten().to(device))
# Gradient calculation and weight update
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
# Store loss history for future plotting
loss_history.append(loss_val.item())
# Test set
with torch.no_grad():
net.eval()
test_data, test_targets = next(iter(test_loader))
test_data = test_data.to(device)
test_targets = test_targets.to(device)
# Test set forward pass
print("Test")
print(test_data.size())
test_spk, test_mem = net(test_data.view(batch_size, -1))
# Test set loss
test_loss = torch.zeros((1), dtype = dtype, device = device)
for step in range(num_steps):
test_loss += loss_function(test_mem[step], test_targets.long().flatten().to(device))
test_loss_history.append(test_loss.item())
# Print train/test loss and accuracy
if counter % 50 == 0:
train_printer(epoch, iter_counter, counter, loss_history, data, targets, test_data, test_targets)
counter = counter + 1
iter_counter = iter_counter + 1
return loss_history, test_loss_history
The error occurs on spk_rec, mem_rec = net(data.view(batch_size, -1)).
The code was adopted from https://snntorch.readthedocs.io/en/latest/tutorials/tutorial_5.html, where it was originally used for the MNIST dataset. However, I am not working with an image dataset. I am working with a dataset that has 21 features and predicts just one target (with 100 classes). I tried to change data.view(batch_size, -1) and test_data.view(batch_size, -1) to data.view(batch_size, 21) and test_data.view(batch_size, 21) based on some other forum answers that I saw, and my program is running for now through the training loop. Does anyone have any suggestions for how I can run through the training with no errors?
EDIT: I now get the error RuntimeError: shape '[128, 21]' is invalid for input of size 378 from spk_rec, mem_rec = net(data.view(batch_size, -1)).
Here are my DataLoaders:
train_loader = DataLoader(dataset = train, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test, batch_size = batch_size, shuffle = True)
My batch size is 128.
Tryng to run it by myself to try to solve your problem I luck also: net params and snn.snn.Leaky
import torch
from torch import nn
from torch.utils.data import DataLoader
class SpikingNeuralNetwork(nn.Module):
"""
Parameters in SpikingNeuralNetwork class:
1. number_inputs: Number of inputs to the SNN.
2. number_hidden: Number of hidden layers.
3. number_outputs: Number of output classes.
4. beta: Decay rate.
"""
def __init__(self, number_inputs, number_hidden, number_outputs, beta):
super().__init__()
self.number_inputs = number_inputs
self.number_hidden = number_hidden
self.number_outputs = number_outputs
self.beta = beta
# Initialize layers
self.fc1 = nn.Linear(self.number_inputs,
self.number_hidden) # Applies linear transformation to all input points
self.lif1 = snn.Leaky(
beta=self.beta) # Integrates weighted input over time, emitting a spike if threshold condition is met
self.fc2 = nn.Linear(self.number_hidden,
self.number_outputs) # Applies linear transformation to output spikes of lif1
self.lif2 = snn.Leaky(beta=self.beta) # Another spiking neuron, integrating the weighted spikes over time
"""
Forward propagation of SNN. The code below function will only be called once the input argument x
is explicitly passed into net.
#param x: input passed into the network
#return layer of output after applying final spiking neuron
"""
def forward(self, x):
num_steps = 25
# Initialize hidden states at t = 0
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
# Record the final layer
spk2_rec = []
mem2_rec = []
for step in range(num_steps):
cur1 = self.fc1(x)
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.fc2(spk1)
spk2, mem2 = self.lif2(cur2, mem2)
spk2_rec.append(spk2)
mem2_rec.append(mem2)
return torch.stack(spk2_rec, dim=0), torch.stack(mem2_rec, dim=0)
batch_size = 2
train = torch.rand(128, 21)
test = torch.rand(128, 21)
train_loader = DataLoader(dataset=train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test, batch_size=batch_size, shuffle=True)
net = SpikingNeuralNetwork(number_inputs=1)
loss_function = nn.CrossEntropyLoss()
optimizer = nn.optim.Adam(net.parameters(), lr=0.1)
def training_loop(net, train_loader, test_loader, dtype, device, optimizer):
num_epochs = 1
loss_history = []
test_loss_history = []
counter = 0
# Temporal dynamics
num_steps = 25
# Outer training loop
for epoch in range(num_epochs):
iter_counter = 0
train_batch = iter(train_loader)
# Minibatch training loop
for data, targets in train_batch:
data = data.to(device)
targets = targets.to(device)
# Forward pass
net.train()
print("Train")
print(data.size())
spk_rec, mem_rec = net(data.view(batch_size, -1))
# Initialize the loss and sum over time
loss_val = torch.zeros((1), dtype=dtype, device=device)
for step in range(num_steps):
loss_val += loss_function(mem_rec[step], targets.long().flatten().to(device))
# Gradient calculation and weight update
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
# Store loss history for future plotting
loss_history.append(loss_val.item())
# Test set
with torch.no_grad():
net.eval()
test_data, test_targets = next(iter(test_loader))
test_data = test_data.to(device)
test_targets = test_targets.to(device)
# Test set forward pass
print("Test")
print(test_data.size())
test_spk, test_mem = net(test_data.view(batch_size, -1))
# Test set loss
test_loss = torch.zeros((1), dtype=dtype, device=device)
for step in range(num_steps):
test_loss += loss_function(test_mem[step], test_targets.long().flatten().to(device))
test_loss_history.append(test_loss.item())
# Print train/test loss and accuracy
if counter % 50 == 0:
train_printer(epoch, iter_counter, counter, loss_history, data, targets, test_data, test_targets)
counter = counter + 1
iter_counter = iter_counter + 1
return loss_history, test_loss_history
Your code works just fine on the MNIST dataset, so I think it might be a problem with how the DataLoader is being called. My guess is that the total dataset is not evenly divisible by your batch_size. If this is true, then you have two options:
Instead of spk_rec, mem_rec = net(data.view(batch_size, -1)), try spk_rec, mem_rec = net(data.flatten(1)) which preserves the first dimension of your data.
Alternatively, you may need to set drop_last=True in the DataLoader functions.

How are n dimensional vectors state vectors represented in Q Learning?

Using this code:
import gym
import numpy as np
import time
"""
SARSA on policy learning python implementation.
This is a python implementation of the SARSA algorithm in the Sutton and Barto's book on
RL. It's called SARSA because - (state, action, reward, state, action). The only difference
between SARSA and Qlearning is that SARSA takes the next action based on the current policy
while qlearning takes the action with maximum utility of next state.
Using the simplest gym environment for brevity: https://gym.openai.com/envs/FrozenLake-v0/
"""
def init_q(s, a, type="ones"):
"""
#param s the number of states
#param a the number of actions
#param type random, ones or zeros for the initialization
"""
if type == "ones":
return np.ones((s, a))
elif type == "random":
return np.random.random((s, a))
elif type == "zeros":
return np.zeros((s, a))
def epsilon_greedy(Q, epsilon, n_actions, s, train=False):
"""
#param Q Q values state x action -> value
#param epsilon for exploration
#param s number of states
#param train if true then no random actions selected
"""
if train or np.random.rand() < epsilon:
action = np.argmax(Q[s, :])
else:
action = np.random.randint(0, n_actions)
return action
def sarsa(alpha, gamma, epsilon, episodes, max_steps, n_tests, render = True, test=False):
"""
#param alpha learning rate
#param gamma decay factor
#param epsilon for exploration
#param max_steps for max step in each episode
#param n_tests number of test episodes
"""
env = gym.make('Taxi-v3')
n_states, n_actions = env.observation_space.n, env.action_space.n
Q = init_q(n_states, n_actions, type="ones")
print('Q shape:' , Q.shape)
timestep_reward = []
for episode in range(episodes):
print(f"Episode: {episode}")
total_reward = 0
s = env.reset()
print('s:' , s)
a = epsilon_greedy(Q, epsilon, n_actions, s)
t = 0
done = False
while t < max_steps:
if render:
env.render()
t += 1
s_, reward, done, info = env.step(a)
total_reward += reward
a_ = epsilon_greedy(Q, epsilon, n_actions, s_)
if done:
Q[s, a] += alpha * ( reward - Q[s, a] )
else:
Q[s, a] += alpha * ( reward + (gamma * Q[s_, a_] ) - Q[s, a] )
s, a = s_, a_
if done:
if render:
print(f"This episode took {t} timesteps and reward {total_reward}")
timestep_reward.append(total_reward)
break
# print('Updated Q values:' , Q)
if render:
print(f"Here are the Q values:\n{Q}\nTesting now:")
if test:
test_agent(Q, env, n_tests, n_actions)
return timestep_reward
def test_agent(Q, env, n_tests, n_actions, delay=0.1):
for test in range(n_tests):
print(f"Test #{test}")
s = env.reset()
done = False
epsilon = 0
total_reward = 0
while True:
time.sleep(delay)
env.render()
a = epsilon_greedy(Q, epsilon, n_actions, s, train=True)
print(f"Chose action {a} for state {s}")
s, reward, done, info = env.step(a)
total_reward += reward
if done:
print(f"Episode reward: {total_reward}")
time.sleep(1)
break
if __name__ =="__main__":
alpha = 0.4
gamma = 0.999
epsilon = 0.9
episodes = 200
max_steps = 20
n_tests = 20
timestep_reward = sarsa(alpha, gamma, epsilon, episodes, max_steps, n_tests)
print(timestep_reward)
from :
https://towardsdatascience.com/reinforcement-learning-temporal-difference-sarsa-q-learning-expected-sarsa-on-python-9fecfda7467e
A sample Q table generated is :
[[ 1. 1. 1. 1. 1. 1. ]
[ 0.5996 0.5996 0.5996 0.35936 0.5996 1. ]
[ 0.19936016 0.35936 0.10336026 0.35936 0.35936 -5.56063984]
...
[ 0.35936 0.5996 0.35936 0.5996 1. 1. ]
[ 1. 0.5996 1. 1. 1. 1. ]
[ 0.35936 0.5996 1. 1. 1. 1. ]]
The columns representing the actions and rows representing the corresponding states.
Can the state be represented by a vector ? The Q table cells are not contained by vectors of size > 1 so how should these states be represented ? For example if I'm in state [2] can this be represented as an n dimensional vector ?
Put another way if Q[1,3] = 4 can the Q state 1 with action 3 be represented as vector[1,3,2,12,3] ? If so then is the state_number->state_attributes mapping stored in a separate lookup table ?
Yes, states can be represented by anything you want, including vectors of arbitrary length. Note, however, that if you are using a tabular version of Q-learning (or SARSA as in this case), you must have a discrete set of states. Therefore, you need a way to map the representation of your state (for example, a vector of potentially continuous values) to a set of discrete states.
Expanding on the example you have given, imagine that you have three states represented by vectors:
s0 = [1, 3, 2, 12, 3]
s1 = [3, 1, 2, 2, 23]
s2 = [2, 12, 3, 2, 1]
In the end, you only have 3 states, regardless of how they are represented. You could map the vectors to the states s0, s1 and s2, and use a simple Q-table. Or you can use other data structures that use vector representation (i.e. [1, 3, 2, 12, 3]) as an index.
On the other hand, if your state space is continuous and you don't want to discretize it, then you can use function approximators (e.g., a neural network) to store Q values instead of a table. But that's another topic (more info in chapter 8 of Sutton & Barto RL book).

Function approximator and q-learning

I am trying to implement q-learning with an action-value approximation-function. I am using openai-gym and the "MountainCar-v0" enviroment to test my algorithm out. My problem is, it does not converge or find the goal at all.
Basically the approximator works like the following, you feed in the 2 features: position and velocity and one of the 3 actions in a one-hot encoding: 0 -> [1,0,0], 1 -> [0,1,0] and 2 -> [0,0,1]. The output is the action-value approximation Q_approx(s,a), for one specific action.
I know that usually, the input is the state (2 features) and the output layer contains 1 output for each action. The big difference that I see is that I have run the feed forward pass 3 times (one for each action) and take the max, while in the standard implementation you run it once and take the max over the output.
Maybe my implementation is just completely wrong and I am thinking wrong. Gonna paste the code here, it is a mess but I am just experimenting a bit:
import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
env = gym.make('MountainCar-v0')
# The mean reward over 20 episodes
mean_rewards = np.zeros(20)
# Feature numpy holder
features = np.zeros(5)
# Q_a value holder
qa_vals = np.zeros(3)
one_hot = {
0 : np.asarray([1,0,0]),
1 : np.asarray([0,1,0]),
2 : np.asarray([0,0,1])
}
model = Sequential()
model.add(Dense(20, activation="relu",input_dim=(5)))
model.add(Dense(10,activation="relu"))
model.add(Dense(1))
model.compile(optimizer='rmsprop',
loss='mse',
metrics=['accuracy'])
epsilon_greedy = 0.1
discount = 0.9
batch_size = 16
# Experience replay containing features and target
experience = np.ones((10*300,5+1))
# Ring buffer
def add_exp(features,target,index):
if index % experience.shape[0] == 0:
index = 0
global filled_once
filled_once = True
experience[index,0:5] = features
experience[index,5] = target
index += 1
return index
for e in range(0,100000):
obs = env.reset()
old_obs = None
new_obs = obs
rewards = 0
loss = 0
for i in range(0,300):
if old_obs is not None:
# Find q_a max for s_(t+1)
features[0:2] = new_obs
for i,pa in enumerate([0,1,2]):
features[2:5] = one_hot[pa]
qa_vals[i] = model.predict(features.reshape(-1,5))
rewards += reward
target = reward + discount*np.max(qa_vals)
features[0:2] = old_obs
features[2:5] = one_hot[a]
fill_index = add_exp(features,target,fill_index)
# Find new action
if np.random.random() < epsilon_greedy:
a = env.action_space.sample()
else:
a = np.argmax(qa_vals)
else:
a = env.action_space.sample()
obs, reward, done, info = env.step(a)
old_obs = new_obs
new_obs = obs
if done:
break
if filled_once:
samples_ids = np.random.choice(experience.shape[0],batch_size)
loss += model.train_on_batch(experience[samples_ids,0:5],experience[samples_ids,5].reshape(-1))[0]
mean_rewards[e%20] = rewards
print("e = {} and loss = {}".format(e,loss))
if e % 50 == 0:
print("e = {} and mean = {}".format(e,mean_rewards.mean()))
Thanks in advance!
There shouldn't be much difference between the actions as inputs to your network or as different outputs of your network. It does make a huge difference if your states are images for example. because Conv nets work very well with images and there would be no obvious way of integrating the actions to the input.
Have you tried the cartpole balancing environment? It is better to test if your model is working correctly.
Mountain climb is pretty hard. It has no reward until you reach the top, which often doesn't happen at all. The model will only start learning something useful once you get to the top once. If you are never getting to the top you should probably increase your time doing exploration. in other words take more random actions, a lot more...

Is Caffe's reported accuracy reliable?

I recently tried to get the confusion matrix for one of my trained models, to see how precise it is. I downloaded this script and fed my model. To my astonishment, the accuracy calculated by the script, is very different than the one, Caffe reports.
I have used this script to calculate the confusion matrix, this however, reports the accuracy as well, the problem is the accuracy reported by this script is way different that the one reported by Caffe! For example Caffe reports the accuracy lets say for CIFAR10, as 92.34%, while, when the model is fed to the script to calculate confusion matrix and its accuracy, it results in for example something like 86.5%!
Which one of these accuracies are the correct one, and can be reported in papers or compared with the results of other papers such as those here ?
I also saw something weird again, I trained two identical models, with only one difference, that being one used Xavier, and the other used msra for initialization. The first one reports an accuracy of 94.25 and the other reports 94.26 in Caffe. when these models are fed to the script I linked above, for confusion matrix computations. their accuracies were 89.2% and 87.4% respectively!
Is this normal? what is the cause for this? msra?
Are the accuracies reported by Caffe true and reliable?
PS: The accuracy in the script is calculated as (complete script):
for i, image, label in reader:
image_caffe = image.reshape(1, *image.shape)
out = net.forward_all(data=np.asarray([ image_caffe ]))
plabel = int(out['prob'][0].argmax(axis=0))
count += 1
iscorrect = label == plabel
correct += (1 if iscorrect else 0)
matrix[(label, plabel)] += 1
labels_set.update([label, plabel])
if not iscorrect:
print("\rError: i=%s, expected %i but predicted %i" \
% (i, label, plabel))
sys.stdout.write("\rAccuracy: %.1f%%" % (100.*correct/count))
sys.stdout.flush()
print(", %i/%i corrects" % (correct, count))
Which imho is OK and correct. the number of correct predictions, divided by the total number of instances in the dataset.
I found the reason.
The reason for the mismatch between Caffe generated accuracy and the accuract generated by the script in question, was solely because of mean-subtraction, which was done in caffe, and not in the script.
This is the modified version of script which takes this into account and hopefully everything is just fine.
# Author: Axel Angel, copyright 2015, license GPLv3.
# added mean subtraction so that, the accuracy can be reported accurately just like caffe when doing a mean subtraction
# Seyyed Hossein Hasan Pour
# Coderx7#Gmail.com
# 7/3/2016
import sys
import caffe
import numpy as np
import lmdb
import argparse
from collections import defaultdict
def flat_shape(x):
"Returns x without singleton dimension, eg: (1,28,28) -> (28,28)"
return x.reshape(filter(lambda s: s > 1, x.shape))
def lmdb_reader(fpath):
import lmdb
lmdb_env = lmdb.open(fpath)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
def leveldb_reader(fpath):
import leveldb
db = leveldb.LevelDB(fpath)
for key, value in db.RangeIter():
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
yield (key, flat_shape(image), label)
def npz_reader(fpath):
npz = np.load(fpath)
xs = npz['arr_0']
ls = npz['arr_1']
for i, (x, l) in enumerate(np.array([ xs, ls ]).T):
yield (i, x, l)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--proto', type=str, required=True)
parser.add_argument('--model', type=str, required=True)
parser.add_argument('--mean', type=str, required=True)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--lmdb', type=str, default=None)
group.add_argument('--leveldb', type=str, default=None)
group.add_argument('--npz', type=str, default=None)
args = parser.parse_args()
# Extract mean from the mean image file
mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
f = open(args.mean, 'rb')
mean_blobproto_new.ParseFromString(f.read())
mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
f.close()
count = 0
correct = 0
matrix = defaultdict(int) # (real,pred) -> int
labels_set = set()
# CNN reconstruction and loading the trained weights
net = caffe.Net(args.proto, args.model, caffe.TEST)
caffe.set_mode_cpu()
print "args", vars(args)
if args.lmdb != None:
reader = lmdb_reader(args.lmdb)
if args.leveldb != None:
reader = leveldb_reader(args.leveldb)
if args.npz != None:
reader = npz_reader(args.npz)
for i, image, label in reader:
image_caffe = image.reshape(1, *image.shape)
out = net.forward_all(data=np.asarray([ image_caffe ])- mean_image)
plabel = int(out['prob'][0].argmax(axis=0))
count += 1
iscorrect = label == plabel
correct += (1 if iscorrect else 0)
matrix[(label, plabel)] += 1
labels_set.update([label, plabel])
if not iscorrect:
print("\rError: i=%s, expected %i but predicted %i" \
% (i, label, plabel))
sys.stdout.write("\rAccuracy: %.1f%%" % (100.*correct/count))
sys.stdout.flush()
print(", %i/%i corrects" % (correct, count))
print ""
print "Confusion matrix:"
print "(r , p) | count"
for l in labels_set:
for pl in labels_set:
print "(%i , %i) | %i" % (l, pl, matrix[(l,pl)])

Simple Multilayer Perceptron model does not converge in TensorFlow

I am new to TensorFlow. Today I tried to implement my first model in TF but it returned strange results. I know that I am missing something here but I was not able to figure it out. Here is the story.
Model
I have a simple Multilayer Perceptron model with only a single hidden layer applied on MNIST databse. Layers are defined like [input(784) , hidden_layer(470) , output_layer(10)] with tanh as non-linearity for hidden layer and softmax as the loss for output layer. The optimizer I am using is Gradient Descent Algorithm with learning rate of 0.01. My mini batch size is 1 (I am training model with samples one by one).
My implementations :
First I implemented my model in C++ and got around 96% accuracy.Here is the repository : https://github.com/amin2ros/Artificog
I implemented the exact model in TensorFlow but surprisingly the model didn't converge at all. Here is the code.
Code:
import sys
import input_data
import matplotlib.pyplot as plt
from pylab import *
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
# Parameters
learning_rate = 0.1
training_epochs = 1
batch_size = 1
display_step = 1
# Network Parameters
n_hidden_1 = 470 # 1st layer num features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Create model
def multilayer_perceptron(_X, _weights, _biases):
layer_1 = tf.tanh(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1']))
return tf.matmul(layer_1, _weights['out']) + _biases['out']
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax(pred)) # Softmax loss
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost) #
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
m= 0
total_batch = int(mnist.train.num_examples/batch_size)
counter=0
#print 'count = ' , total_batch
#sys.stdin.read(1)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
label = tf.argmax(batch_ys,1).eval()[0]
counter+=1
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
wrong_prediction = tf.not_equal(tf.argmax(pred, 1), tf.argmax(y, 1))
missed=tf.cast(wrong_prediction, "float")
m += missed.eval({x: batch_xs, y: batch_ys})[0]
print "Sample #", counter , " - Label : " , label , " - Prediction :" , tf.argmax(pred, 1).eval({x: batch_xs, y: batch_ys})[0] ,\
"- Missed = " , m , " - Error Rate = " , 100 * float(m)/counter
print "Optimization Finished!"
I am very curious why this happens. Any help is appreciated.
Edit:
As commented below definition of cost function was incorrect so it should be like
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred,y))
Now model converges :)