making my multi-agent environment by deep reinforcement learning

making my multi-agent environment by deep reinforcement learning - deep-learning

I should make my own environment and apply dqn algorithm in a multi-agent environment.
I have 4 agents . Each state of my environment has 5 variables state=[p1, p2, p3, p4,p5], at each time step,we update the different parameters of all states. Action is one of amount: {-2,-1,0,1,2} given the best q-value.
param0,param1,param2,param3,param4=[[0 for x in range(numframe)] for y in range(number_nodes)]
`timestep p4[agent0]=random.randint(0,2)
p4[agent1]=p4[agent0]+action
p4[agent2]=p4[agent1]+action
p4[agent3]=p4[agent2]+action
(actions find by a DNN in dqn and can be one of {-2,-1,0,1,2})`
param0..5=[[0 for x in range(numframe)] for y in range(number_nodes)]
numframe: shows amount for experience-replay, number_nodes=4 showing number of agents
I have written the following code based on [dqn-keras-code][1],
1- how I could change it to work as multi-agent?
2- how I could change to write my reset? (I should reset to 0each of parameters)
I write some code but as I am beginner in dqn and multi-agent, I saw the following error: (I know it has also some problem related to multi-agent)
line 156, in <module>
state = env.reset()
TypeError: reset() missing 1 required positional argument: 'self'
Could you please help me more than this error how I can fix my reset section and step section?
Here is my code:
import random
import numpy as np
import tensorflow as tf
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
#-----------------------------------------------------------------
global param0,param1,param2,param3,param4,state,next_state,action_space,action_size,w,m, reward,episodes,time_t,state
#--------------------------------------------------------------------------
episodes=2000
number_nodes=5 #one more than number of nodes
timemax=500
action_size=5
state_size=5
action_space=[-2,-1,0,1,2]
m=16 #4*(ltime+ftime)=16
numframe=16
#-------------------------------------------------------------------------
class env:
def __init__(self):
self.action_space=[-2,-1,0,1,2] # X=[-2,2]
self.action_size = len(self.action_space)
self.state = None
return action_space, action_size
def reset(self):
#self.action_space=[0,0,0,0,0]
for ii in range (1,4): #both sides
param1[ii]=0
param2[ii]=0
param3[ii]=0
param4[ii]=0
param0[ii]=0
reward[ii]=0
state[ii]=[param0[ii],param1[ii],param2[ii],param3[ii],param4[ii]]
return state
# def reset(self):
# self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
# self.steps_beyond_done = None
# return np.array(self.state)
def step(self,action):
state = self.state
param1, param2, param3, param4, param0 = state
param0[0]=random.randint(0,2) #produce a random param0
#relationship between parameteres for refreshing
param0[1]=param0[0]+action
param0[2]=param0[1]+action
param0[3]=param0[2]+action
param0[4]=param0[3]+action
for i in range (1,4):
param1[time_t][i]=param4[time_t][i+1]-param0[i+1]
#action[i]=agent.init(state_size, action_size)
#relationship between parameteres for refreshing
param2[time_t][i]=param0[i]+action
param3[time_t][i]=param2[time_t][i]
param4[time_t][i]=param3[time_t][i]
#param1,param3,param4,param0
next_state[i]=[param1[time_t][i],param2[time_t][i],param3[time_t][i],param4[time_t][i],param0[i]]
cp= [2, 0, 0, 0]
ch= [2, 2, 2, 2]
# reward function
if param1[i]>=0:
reward[i]+=ch[i]*param1[time_t][i]
else:
reward[i]+=cp[i]*param1[time_t][i]
return next_state, reward
#-------------------------------------------------
class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # discount rate
self.epsilon = 1.0 # exploration rate
self.epsilon_min = 0.01
self.epsilon_decay = 0.995
self.learning_rate = 0.001
self.model = self._build_model()
def _build_model(self):
# Neural Net for Deep-Q learning Model
model = Sequential()
model.add(Dense(24, input_dim=self.state_size, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(self.action_size, activation='linear'))
model.compile(loss='mse',
optimizer=Adam(lr=self.learning_rate))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
act_values = self.model.predict(state)
return np.argmax(act_values[0]) # returns action
def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = (reward + self.gamma *
np.amax(self.model.predict(next_state)[0]))
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state, target_f, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def load(self, name):
self.model.load_weights(name)
def save(self, name):
self.model.save_weights(name)
if __name__ == "__main__":
#env = gym.make('CartPole-v1')
#state_size = env.observation_space.shape[0]
#action_size = env.action_space.n
state_size=4
action_size=5
agent = DQNAgent(state_size, action_size)
# agent.load("./save/cartpole-dqn.h5")
done = False
batch_size = 32
for e in range(episodes):
state = env.reset()
state = np.reshape(state, [1, state_size])
for time in range(500):
# env.render()
action = agent.act(state)
next_state, reward, done, _ = env.step(action)
reward = reward if not done else -10
next_state = np.reshape(next_state, [1, state_size])
agent.remember(state, action, reward, next_state, done)
state = next_state
if done:
print("episode: {}/{}, score: {}, e: {:.2}"
.format(e, EPISparam2DES, time, agent.epsilon))
break
if len(agent.memory) > batch_size:
agent.replay(batch_size)
# if e % 10 == 0:
# agent.save("./save/cartpole-dqn.h5")
agent = DQNAgent(state_size, action_size)
# agent.load("./save/cartpole-dqn.h5")
[1]: https://github.com/keon/deep-q-learning/blob/master/dqn.py

The DQN algorithm you linked to is for a single agent game. You have to change it quite a bit to work with multiple agents. There are multiple papers written on the subject. If you want to truly understand what your code is doing, I suggest finding a paper that tries to solve an environment similar to yours and then applying the concepts within that paper to your code.

Related

RuntimeError: shape '[128, -1]' is invalid for input of size 378 pytorch

I'm running a spiking neural network for data that has 21 features with a batch size of 128. I get the following error after many iterations of training (this error doesn't arise immediately!):
RuntimeError: shape '[128, -1]' is invalid for input of size 378 pytorch
When I went to go print out what the shapes of the tensors are before, I get the following:
Train
torch.Size([128, 21])
Test
torch.Size([128, 21])
This is my network:
class SpikingNeuralNetwork(nn.Module):
"""
Parameters in SpikingNeuralNetwork class:
1. number_inputs: Number of inputs to the SNN.
2. number_hidden: Number of hidden layers.
3. number_outputs: Number of output classes.
4. beta: Decay rate.
"""
def __init__(self, number_inputs, number_hidden, number_outputs, beta):
super().__init__()
self.number_inputs = number_inputs
self.number_hidden = number_hidden
self.number_outputs = number_outputs
self.beta = beta
# Initialize layers
self.fc1 = nn.Linear(self.number_inputs, self.number_hidden) # Applies linear transformation to all input points
self.lif1 = snn.Leaky(beta = self.beta) # Integrates weighted input over time, emitting a spike if threshold condition is met
self.fc2 = nn.Linear(self.number_hidden, self.number_outputs) # Applies linear transformation to output spikes of lif1
self.lif2 = snn.Leaky(beta = self.beta) # Another spiking neuron, integrating the weighted spikes over time
"""
Forward propagation of SNN. The code below function will only be called once the input argument x
is explicitly passed into net.
#param x: input passed into the network
#return layer of output after applying final spiking neuron
"""
def forward(self, x):
num_steps = 25
# Initialize hidden states at t = 0
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
# Record the final layer
spk2_rec = []
mem2_rec = []
for step in range(num_steps):
cur1 = self.fc1(x)
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.fc2(spk1)
spk2, mem2 = self.lif2(cur2, mem2)
spk2_rec.append(spk2)
mem2_rec.append(mem2)
return torch.stack(spk2_rec, dim = 0), torch.stack(mem2_rec, dim = 0)
This is my training loop:
def training_loop(net, train_loader, test_loader, dtype, device, optimizer):
num_epochs = 1
loss_history = []
test_loss_history = []
counter = 0
# Temporal dynamics
num_steps = 25
# Outer training loop
for epoch in range(num_epochs):
iter_counter = 0
train_batch = iter(train_loader)
# Minibatch training loop
for data, targets in train_batch:
data = data.to(device)
targets = targets.to(device)
# Forward pass
net.train()
print("Train")
print(data.size())
spk_rec, mem_rec = net(data.view(batch_size, -1))
# Initialize the loss and sum over time
loss_val = torch.zeros((1), dtype = dtype, device = device)
for step in range(num_steps):
loss_val += loss_function(mem_rec[step], targets.long().flatten().to(device))
# Gradient calculation and weight update
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
# Store loss history for future plotting
loss_history.append(loss_val.item())
# Test set
with torch.no_grad():
net.eval()
test_data, test_targets = next(iter(test_loader))
test_data = test_data.to(device)
test_targets = test_targets.to(device)
# Test set forward pass
print("Test")
print(test_data.size())
test_spk, test_mem = net(test_data.view(batch_size, -1))
# Test set loss
test_loss = torch.zeros((1), dtype = dtype, device = device)
for step in range(num_steps):
test_loss += loss_function(test_mem[step], test_targets.long().flatten().to(device))
test_loss_history.append(test_loss.item())
# Print train/test loss and accuracy
if counter % 50 == 0:
train_printer(epoch, iter_counter, counter, loss_history, data, targets, test_data, test_targets)
counter = counter + 1
iter_counter = iter_counter + 1
return loss_history, test_loss_history
The error occurs on spk_rec, mem_rec = net(data.view(batch_size, -1)).
The code was adopted from https://snntorch.readthedocs.io/en/latest/tutorials/tutorial_5.html, where it was originally used for the MNIST dataset. However, I am not working with an image dataset. I am working with a dataset that has 21 features and predicts just one target (with 100 classes). I tried to change data.view(batch_size, -1) and test_data.view(batch_size, -1) to data.view(batch_size, 21) and test_data.view(batch_size, 21) based on some other forum answers that I saw, and my program is running for now through the training loop. Does anyone have any suggestions for how I can run through the training with no errors?
EDIT: I now get the error RuntimeError: shape '[128, 21]' is invalid for input of size 378 from spk_rec, mem_rec = net(data.view(batch_size, -1)).
Here are my DataLoaders:
train_loader = DataLoader(dataset = train, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test, batch_size = batch_size, shuffle = True)
My batch size is 128.

Tryng to run it by myself to try to solve your problem I luck also: net params and snn.snn.Leaky
import torch
from torch import nn
from torch.utils.data import DataLoader
class SpikingNeuralNetwork(nn.Module):
"""
Parameters in SpikingNeuralNetwork class:
1. number_inputs: Number of inputs to the SNN.
2. number_hidden: Number of hidden layers.
3. number_outputs: Number of output classes.
4. beta: Decay rate.
"""
def __init__(self, number_inputs, number_hidden, number_outputs, beta):
super().__init__()
self.number_inputs = number_inputs
self.number_hidden = number_hidden
self.number_outputs = number_outputs
self.beta = beta
# Initialize layers
self.fc1 = nn.Linear(self.number_inputs,
self.number_hidden) # Applies linear transformation to all input points
self.lif1 = snn.Leaky(
beta=self.beta) # Integrates weighted input over time, emitting a spike if threshold condition is met
self.fc2 = nn.Linear(self.number_hidden,
self.number_outputs) # Applies linear transformation to output spikes of lif1
self.lif2 = snn.Leaky(beta=self.beta) # Another spiking neuron, integrating the weighted spikes over time
"""
Forward propagation of SNN. The code below function will only be called once the input argument x
is explicitly passed into net.
#param x: input passed into the network
#return layer of output after applying final spiking neuron
"""
def forward(self, x):
num_steps = 25
# Initialize hidden states at t = 0
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
# Record the final layer
spk2_rec = []
mem2_rec = []
for step in range(num_steps):
cur1 = self.fc1(x)
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.fc2(spk1)
spk2, mem2 = self.lif2(cur2, mem2)
spk2_rec.append(spk2)
mem2_rec.append(mem2)
return torch.stack(spk2_rec, dim=0), torch.stack(mem2_rec, dim=0)
batch_size = 2
train = torch.rand(128, 21)
test = torch.rand(128, 21)
train_loader = DataLoader(dataset=train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test, batch_size=batch_size, shuffle=True)
net = SpikingNeuralNetwork(number_inputs=1)
loss_function = nn.CrossEntropyLoss()
optimizer = nn.optim.Adam(net.parameters(), lr=0.1)
def training_loop(net, train_loader, test_loader, dtype, device, optimizer):
num_epochs = 1
loss_history = []
test_loss_history = []
counter = 0
# Temporal dynamics
num_steps = 25
# Outer training loop
for epoch in range(num_epochs):
iter_counter = 0
train_batch = iter(train_loader)
# Minibatch training loop
for data, targets in train_batch:
data = data.to(device)
targets = targets.to(device)
# Forward pass
net.train()
print("Train")
print(data.size())
spk_rec, mem_rec = net(data.view(batch_size, -1))
# Initialize the loss and sum over time
loss_val = torch.zeros((1), dtype=dtype, device=device)
for step in range(num_steps):
loss_val += loss_function(mem_rec[step], targets.long().flatten().to(device))
# Gradient calculation and weight update
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
# Store loss history for future plotting
loss_history.append(loss_val.item())
# Test set
with torch.no_grad():
net.eval()
test_data, test_targets = next(iter(test_loader))
test_data = test_data.to(device)
test_targets = test_targets.to(device)
# Test set forward pass
print("Test")
print(test_data.size())
test_spk, test_mem = net(test_data.view(batch_size, -1))
# Test set loss
test_loss = torch.zeros((1), dtype=dtype, device=device)
for step in range(num_steps):
test_loss += loss_function(test_mem[step], test_targets.long().flatten().to(device))
test_loss_history.append(test_loss.item())
# Print train/test loss and accuracy
if counter % 50 == 0:
train_printer(epoch, iter_counter, counter, loss_history, data, targets, test_data, test_targets)
counter = counter + 1
iter_counter = iter_counter + 1
return loss_history, test_loss_history

Your code works just fine on the MNIST dataset, so I think it might be a problem with how the DataLoader is being called. My guess is that the total dataset is not evenly divisible by your batch_size. If this is true, then you have two options:
Instead of spk_rec, mem_rec = net(data.view(batch_size, -1)), try spk_rec, mem_rec = net(data.flatten(1)) which preserves the first dimension of your data.
Alternatively, you may need to set drop_last=True in the DataLoader functions.

Policy Network returning different outputs for batched states and individual states

I am implementing REINFORCE applied to the CartPole-V0 openAI gym environment. I am trying 2 different implementations of the same, and the issue I am not able to resolve is the following:
Upon passing a single state to the Policy Network, I get an output Tensor of size 2, containing the action probabilities of the 2 actions. However, when I pass a `batch of states' to the Policy Network to compute the output action probabilities of all of them, the values that I obtain are very different from when each state is individually passed to the network.
Can someone help me understand the issue?
My code for the same is below: (Note: this is NOT the complete REINFORCE algorithm -- I am aware that I need to compute the loss from the probabilities. But I am trying to understand the difference in the computation of the two probabilities, which I think should be the same, before proceeding.)
# architecture of the Policy Network
class PolicyNetwork(nn.Module):
def __init__(self, state_dim, n_actions):
super().__init__()
self.n_actions = n_actions
self.model = nn.Sequential(
nn.Linear(state_dim, 64),
nn.ReLU(),
nn.Linear(64, n_actions),
nn.Softmax(dim=0)
).float()
def forward(self, X):
return self.model(X)
def train_reinforce_agent(env, episode_length, max_episodes, gamma, visualize_step, learning_rate=0.003):
# define the parametric model for the Policy: this is an instantiation of the PolicyNetwork class
model = PolicyNetwork(env.observation_space.shape[0], env.action_space.n)
# define the optimizer for updating the weights of the Policy Network
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# hyperparameters of the reinforce agent
EPISODE_LENGTH = episode_length
MAX_EPISODES = max_episodes
GAMMA = gamma
VISUALIZE_STEP = max(1, visualize_step)
score = []
for episode in range(MAX_EPISODES):
# reset the environment
curr_state = env.reset()
done = False
episode_t = []
# rollout an entire episode from the Policy Network
pred_vals = []
for t in range(EPISODE_LENGTH):
act_prob = model(torch.from_numpy(curr_state).float())
pred_vals.append(act_prob)
action = np.random.choice(np.array(list(range(env.action_space.n))), p=act_prob.data.numpy())
prev_state = curr_state
curr_state, _, done, info = env.step(action)
episode_t.append((prev_state, action, t+1))
if done:
break
score.append(len(episode_t))
# reward_batch = torch.Tensor([r for (s,a,r) in episode_t]).flip(dims=(0,))
reward_batch = torch.Tensor([r for (s, a, r) in episode_t])
# compute the return for every state-action pair from the rewards at every time-step
batch_Gvals = []
for i in range(len(episode_t)):
new_Gval = 0
power = 0
for j in range(i, len(episode_t)):
new_Gval = new_Gval + ((GAMMA ** power) * reward_batch[j]).numpy()
power += 1
batch_Gvals.append(new_Gval)
# normalize the returns for the batch
expected_returns_batch = torch.FloatTensor(batch_Gvals)
if torch.is_nonzero(expected_returns_batch.max()):
expected_returns_batch /= expected_returns_batch.max()
# batch the states, actions, prob after the episode
state_batch = torch.Tensor([s for (s,a,r) in episode_t])
print("State batch:", state_batch)
all_states = [s for (s,a,r) in episode_t]
print("All states:", all_states)
action_batch = torch.Tensor([a for (s,a,r) in episode_t])
pred_batch_v1 = model(state_batch)
pred_batch_v2 = torch.stack(pred_vals)
print("Batched state pred_vals:", pred_batch_v1)
print("Individual state pred_vals:", pred_batch_v2) ### Why is this different from the above predicted values??
My main function where I pass the environment is:
def main():
env = gym.make('CartPole-v0')
# train a REINFORCE-agent to learn the optimal policy
episode_length = 500
n_episodes = 500
gamma = 0.99
vis_steps = 50
train_reinforce_agent(env, episode_length, n_episodes, gamma, vis_steps)

In your policy, you have Softmax over dim 0. This normalizes the probability of each action across your batch. You want to do it across actions by dim=1.

PyTorch: Confusion Matrix for Transfer Learning

I've been trying to plot a confusion matrix for the below code - check def train_alexnet(). But I keep getting this error:
IndexError: only integers, slices (`:`), ellipsis (`...`), None and long or byte Variables are valid indices (got float)
So, I tried converting my tensors to an integer tensor but then got the error:
ValueError: only one element tensors can be converted to Python scalars
Can someone suggest me what can be done to convert the tensors 'all_preds' and 'source_value' to tensors containing integer values? I found the torch no grad option but I am unaware as to how to use it because I'm new to pytorch.
Here's the link of the github repo that I'm trying to work with: https://github.com/syorami/DDC-transfer-learning/blob/master/DDC.py
from __future__ import print_function
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import warnings
warnings.filterwarnings('ignore')
import math
import model
import torch
import dataloader
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix
from plotcm import plot_confusion_matrix
from torch import nn
from torch import optim
from torch.autograd import Variable
cuda = torch.cuda.is_available()
def step_decay(epoch, learning_rate):
# learning rate step decay
# :param epoch: current training epoch
# :param learning_rate: initial learning rate
# :return: learning rate after step decay
initial_lrate = learning_rate
drop = 0.8
epochs_drop = 10.0
lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
return lrate
def train_alexnet(epoch, model, learning_rate, source_loader):
# train source on alexnet
# :param epoch: current training epoch
# :param model: defined alexnet
# :param learning_rate: initial learning rate
# :param source_loader: source loader
# :return:
log_interval = 10
LEARNING_RATE = step_decay(epoch, learning_rate)
print(f'Learning Rate: {LEARNING_RATE}')
optimizer = optim.SGD([
{'params': model.features.parameters()},
{'params': model.classifier.parameters()},
{'params': model.final_classifier.parameters(), 'lr': LEARNING_RATE}
], lr=LEARNING_RATE / 10, momentum=MOMENTUM, weight_decay=L2_DECAY)
# enter training mode
model.train()
iter_source = iter(source_loader)
num_iter = len(source_loader)
correct = 0
total_loss = 0
clf_criterion = nn.CrossEntropyLoss()
all_preds = torch.tensor([])
source_value = torch.tensor([])
for i in range(1, num_iter):
source_data, source_label = iter_source.next()
# print("source label: ", source_label)
if cuda:
source_data, source_label = source_data.cuda(), source_label.cuda()
source_data, source_label = Variable(source_data), Variable(source_label)
optimizer.zero_grad()
##
source_preds = model(source_data)
preds = source_preds.data.max(1, keepdim=True)[1]
correct += preds.eq(source_label.data.view_as(preds)).sum()
#prediction label
all_preds = torch.cat(
(all_preds, preds)
,dim=0
)
#actual label
source_value = torch.cat(
(source_value,source_label)
,dim=0
)
loss = clf_criterion(source_preds, source_label)
total_loss += loss
loss.backward()
optimizer.step()
if i % log_interval == 0:
print('Train Epoch {}: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, i * len(source_data), len(source_loader) * BATCH_SIZE,
100. * i / len(source_loader), loss.item()))
total_loss /= len(source_loader)
acc_train = float(correct) * 100. / (len(source_loader) * BATCH_SIZE)
# print('all preds= ',int(all_preds))
# print("source value", int(source_value))
stacked = torch.stack(
(
source_value
,(all_preds.argmax(dim=1))
)
,dim=1
)
print("stacked",stacked)
cmt = torch.zeros(3
,3, dtype=torch.float64)
with torch.no_grad():
for p in stacked:
tl, pl = p.tolist()
cmt[tl, pl] = cmt[tl, pl] + 1
print("cmt: ",cmt)
print('{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
SOURCE_NAME, total_loss.item(), correct, len(source_loader.dataset), acc_train))
def test_alexnet(model, target_loader):
# test target data on fine-tuned alexnet
# :param model: trained alexnet on source data set
# :param target_loader: target dataloader
# :return: correct num
# enter evaluation mode
clf_criterion = nn.CrossEntropyLoss()
model.eval()
test_loss = 0
correct = 0
for data, target in target_test_loader:
if cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
target_preds = model(data)
test_loss += clf_criterion(target_preds, target) # sum up batch loss
pred = target_preds.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
stacked = torch.stack(
(
target
,target_preds.argmax(dim=1)
)
,dim=1
)
print("stacked target",stacked)
test_loss /= len(target_loader)
print('{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
TARGET_NAME, test_loss.item(), correct, len(target_loader.dataset),
100. * correct / len(target_loader.dataset)))
return correct
def compute_confusion_matrix(preds, y):
#round predictions to the closest integer
rounded_preds = torch.round(torch.sigmoid(preds))
return confusion_matrix(y, rounded_preds)
if __name__ == '__main__':
ROOT_PATH = './v1234_combined/pets'
SOURCE_NAME = 'v123'
TARGET_NAME = 'v4'
BATCH_SIZE = 15
TRAIN_EPOCHS = 1
learning_rate = 1e-2
L2_DECAY = 5e-4
MOMENTUM = 0.9
source_loader = dataloader.load_training(ROOT_PATH, SOURCE_NAME, BATCH_SIZE)
#target_train_loader = dataloader.load_training(ROOT_PATH, TARGET_NAME, BATCH_SIZE)
target_test_loader = dataloader.load_testing(ROOT_PATH, TARGET_NAME, BATCH_SIZE)
print('Load data complete')
alexnet = model.Alexnet_finetune(num_classes=3)
print('Construct model complete')
# load pretrained alexnet model
alexnet = model.load_pretrained_alexnet(alexnet)
print('Load pretrained alexnet parameters complete\n')
if cuda: alexnet.cuda()
for epoch in range(1, TRAIN_EPOCHS + 1):
print(f'Train Epoch {epoch}:')
train_alexnet(epoch, alexnet, learning_rate, source_loader)
correct = test_alexnet(alexnet, target_test_loader)
print(len(source_loader.dataset))

In oder to conver all elements of a tensor from floats to ints, you need to use .to():
all_preds_int = all_preds.to(torch.int64)
Note that it appears as if your all_preds are the predicted class probabilities and not the actual labels. You might need to torch.argmax along the appropriate dimension. (BTW, the output of argmax is int - no need to convert).

PyTorch: Target 1 is out of bounds

I am new to Deep Learning and wondering how to modify my model to fix it.
It says Target 1 is out of bounds, so what parameter should I change to make it works. When the output is changed to 2, it works. However, the goal for the model is to predict 2 classes classification. Also, when output is 2, the training loss becomes nan.
The data is a dataframe with shape (15958, 4) transformed into tensor format.
Sorry Split_NN is a class:
# SplitNN
# to protect privacy and split
class SplitNN:
def __init__(self, models, optimizers):
self.models = models
self.optimizers = optimizers
self.data = []
self.remote_tensors = []
def forward(self, x):
data = []
remote_tensors = []
data.append(self.models[0](x))
if data[-1].location == self.models[1].location:
remote_tensors.append(data[-1].detach().requires_grad_())
else:
remote_tensors.append(
data[-1].detach().move(self.models[1].location).requires_grad_()
)
i = 1
while i < (len(models) - 1):
data.append(self.models[i](remote_tensors[-1]))
if data[-1].location == self.models[i + 1].location:
remote_tensors.append(data[-1].detach().requires_grad_())
else:
remote_tensors.append(
data[-1].detach().move(self.models[i + 1].location).requires_grad_()
)
i += 1
data.append(self.models[i](remote_tensors[-1]))
self.data = data
self.remote_tensors = remote_tensors
return data[-1]
def backward(self):
for i in range(len(models) - 2, -1, -1):
if self.remote_tensors[i].location == self.data[i].location:
grads = self.remote_tensors[i].grad.copy()
else:
grads = self.remote_tensors[i].grad.copy().move(self.data[i].location)
self.data[i].backward(grads)
def zero_grads(self):
for opt in self.optimizers:
opt.zero_grad()
def step(self):
for opt in self.optimizers:
opt.step()
Below are the codes:
Model set up: The Model is a sequential deep learning model, which I tried to use nn.linear to generated binary prediction.
torch.manual_seed(0)
# Define our model segments
input_size = 3
hidden_sizes = [128, 640]
output_size = 1
# original models
models = [
nn.Sequential(
nn.Linear(input_size, hidden_sizes[0]),
nn.ReLU(),
nn.Linear(hidden_sizes[0], hidden_sizes[1]),
nn.ReLU(),
),
nn.Sequential(nn.Linear(hidden_sizes[1], output_size), nn.LogSoftmax(dim=1)),
]
# Create optimisers for each segment and link to them
optimizers = [
optim.SGD(model.parameters(), lr=0.03,)
for model in models
]
Train model is here:
def train(x, target, splitNN):
#1) Zero our grads
splitNN.zero_grads()
#2) Make a prediction
pred = splitNN.forward(x)
#3) Figure out how much we missed by
criterion = nn.NLLLoss()
loss = criterion(pred, target)
#4) Backprop the loss on the end layer
loss.backward()
#5) Feed Gradients backward through the nework
splitNN.backward()
#6) Change the weights
splitNN.step()
return loss, pred
Finally the training part, also the part where problem happen:
the send function is for assigning model to the nodes, cuz this is set up to simulating federated learning.
for i in range(epochs):
running_loss = 0
correct_preds = 0
total_preds = 0
for (data, ids1), (labels, ids2) in dataloader:
# Train a model
data = data.send(models[0].location)
data = data.view(data.shape[0], -1)
labels = labels.send(models[-1].location)
# Call model
loss, preds = train(data.float(), labels, splitNN)
# Collect statistics
running_loss += loss.get()
correct_preds += preds.max(1)[1].eq(labels).sum().get().item()
total_preds += preds.get().size(0)
print(f"Epoch {i} - Training loss: {running_loss/len(dataloader):.3f} - Accuracy: {100*correct_preds/total_preds:.3f}")
The error show the problem occurs at loss, preds = train(data.float(), labels, splitNN)
The actual error message:
During handling of the above exception, another exception occurred:
IndexError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
1836 .format(input.size(0), target.size(0)))
1837 if dim == 2:
-> 1838 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
1839 elif dim == 4:
1840 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
IndexError: Target 1 is out of bounds.
Please help me. Thank you

Simple Multilayer Perceptron model does not converge in TensorFlow

I am new to TensorFlow. Today I tried to implement my first model in TF but it returned strange results. I know that I am missing something here but I was not able to figure it out. Here is the story.
Model
I have a simple Multilayer Perceptron model with only a single hidden layer applied on MNIST databse. Layers are defined like [input(784) , hidden_layer(470) , output_layer(10)] with tanh as non-linearity for hidden layer and softmax as the loss for output layer. The optimizer I am using is Gradient Descent Algorithm with learning rate of 0.01. My mini batch size is 1 (I am training model with samples one by one).
My implementations :
First I implemented my model in C++ and got around 96% accuracy.Here is the repository : https://github.com/amin2ros/Artificog
I implemented the exact model in TensorFlow but surprisingly the model didn't converge at all. Here is the code.
Code:
import sys
import input_data
import matplotlib.pyplot as plt
from pylab import *
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
# Parameters
learning_rate = 0.1
training_epochs = 1
batch_size = 1
display_step = 1
# Network Parameters
n_hidden_1 = 470 # 1st layer num features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Create model
def multilayer_perceptron(_X, _weights, _biases):
layer_1 = tf.tanh(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1']))
return tf.matmul(layer_1, _weights['out']) + _biases['out']
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax(pred)) # Softmax loss
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost) #
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
m= 0
total_batch = int(mnist.train.num_examples/batch_size)
counter=0
#print 'count = ' , total_batch
#sys.stdin.read(1)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
label = tf.argmax(batch_ys,1).eval()[0]
counter+=1
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
wrong_prediction = tf.not_equal(tf.argmax(pred, 1), tf.argmax(y, 1))
missed=tf.cast(wrong_prediction, "float")
m += missed.eval({x: batch_xs, y: batch_ys})[0]
print "Sample #", counter , " - Label : " , label , " - Prediction :" , tf.argmax(pred, 1).eval({x: batch_xs, y: batch_ys})[0] ,\
"- Missed = " , m , " - Error Rate = " , 100 * float(m)/counter
print "Optimization Finished!"
I am very curious why this happens. Any help is appreciated.
Edit:
As commented below definition of cost function was incorrect so it should be like
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred,y))
Now model converges :)

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

making my multi-agent environment by deep reinforcement learning - deep-learning

Related

RuntimeError: shape '[128, -1]' is invalid for input of size 378 pytorch

Policy Network returning different outputs for batched states and individual states

PyTorch: Confusion Matrix for Transfer Learning

PyTorch: Target 1 is out of bounds

Simple Multilayer Perceptron model does not converge in TensorFlow

Categories

Resources