Using a metric predictor when modelling ordinal predicted variable in PyMC3 - regression

I am trying to implement the ordered probit regression model from chapter 23.4 of Doing Bayesian Data Analysis (Kruschke) in PyMC3. After sampling, the posterior distribution for the intercept and slope are not really comparable to the results from the book. I think there is some fundamental issue with the model definition, but I fail to see it.
Data:
X is the metric predictor (standardized to zX), Y are ordinal outcomes (1-7).
nYlevels3 = df3.Y.nunique()
# Setting the thresholds for the ordinal outcomes. The outer sides are
# fixed, while the others are estimated.
thresh3 = [k + .5 for k in range(1, nYlevels3)]
thresh_obs3 = np.ma.asarray(thresh3)
thresh_obs3[1:-1] = np.ma.masked
#as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
def outcome_probabilities(theta, mu, sigma):
out = np.empty((mu.size, nYlevels3))
n = norm(loc=mu, scale=sigma)
out[:,0] = n.cdf(theta[0])
out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])])
out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])])
out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])])
out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])])
out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])])
out[:,6] = 1 - n.cdf(theta[5])
return out
with pm.Model() as ordinal_model_metric:
theta = pm.Normal('theta', mu=thresh3, tau=np.repeat(1/2**2, len(thresh3)),
shape=len(thresh3), observed=thresh_obs3, testval=thresh3[1:-1])
# Intercept
zbeta0 = pm.Normal('zbeta0', mu=(1+nYlevels3)/2, tau=1/nYlevels3**2)
# Slope
zbeta = pm.Normal('zbeta', mu=0.0, tau=1/nYlevels3**2)
# Mean of the underlying metric distribution
mu = pm.Deterministic('mu', zbeta0 + zbeta*zX)
zsigma = pm.Uniform('zsigma', nYlevels3/1000.0, nYlevels3*10.0)
pr = outcome_probabilities(theta, mu, zsigma)
y = pm.Categorical('y', pr, observed=df3.Y.cat.codes)
http://nbviewer.jupyter.org/github/JWarmenhoven/DBDA-python/blob/master/Notebooks/Chapter%2023.ipynb
For reference, here is the JAGS model used by Kruschke on which I based my model:
Ntotal = length(y)
# Threshold 1 and nYlevels-1 are fixed; other thresholds are estimated.
# This allows all parameters to be interpretable on the response scale.
nYlevels = max(y)
thresh = rep(NA,nYlevels-1)
thresh[1] = 1 + 0.5
thresh[nYlevels-1] = nYlevels-1 + 0.5
modelString = "
model {
for ( i in 1:Ntotal ) {
y[i] ~ dcat( pr[i,1:nYlevels] )
pr[i,1] <- pnorm( thresh[1] , mu[x[i]] , 1/sigma[x[i]]^2 )
for ( k in 2:(nYlevels-1) ) {
pr[i,k] <- max( 0 , pnorm( thresh[ k ] , mu[x[i]] , 1/sigma[x[i]]^2 )
- pnorm( thresh[k-1] , mu[x[i]] , 1/sigma[x[i]]^2 ) )
}
pr[i,nYlevels] <- 1 - pnorm( thresh[nYlevels-1] , mu[x[i]] , 1/sigma[x[i]]^2 )
}
for ( j in 1:2 ) { # 2 groups
mu[j] ~ dnorm( (1+nYlevels)/2 , 1/(nYlevels)^2 )
sigma[j] ~ dunif( nYlevels/1000 , nYlevels*10 )
}
for ( k in 2:(nYlevels-2) ) { # 1 and nYlevels-1 are fixed, not stochastic
thresh[k] ~ dnorm( k+0.5 , 1/2^2 )
}
}

It was not a fundamental issue after all: I forgot to indicate the axis for np.max() in the function below.
#as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
def outcome_probabilities(theta, mu, sigma):
out = np.empty((mu.size, nYlevels3))
n = norm(loc=mu, scale=sigma)
out[:,0] = n.cdf(theta[0])
out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])], axis=0)
out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])], axis=0)
out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])], axis=0)
out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])], axis=0)
out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])], axis=0)
out[:,6] = 1 - n.cdf(theta[5])
return out

Related

Tensor conversion requested dtype int64 for Tensor with dtype float32 when creating CNN model

I tried to create a CNN but I got an error. Could you figure out what I did wrong?
C1, C2 = tf.constant(70, dtype='float32'), tf.constant(1000, dtype="float32")
def score(y_true, y_pred):
tf.dtypes.cast(y_true, tf.float32)
tf.dtypes.cast(y_pred, tf.float32)
sigma = y_pred[:, 2] - y_pred[:, 0]
fvc_pred = y_pred[:, 1]
# sigma_clip = sigma + C1
sigma_clip = tf.maximum(sigma, C1)
delta = tf.abs(y_true[:, 0] - fvc_pred)
delta = tf.minimum(delta, C2)
sq2 = tf.sqrt(tf.dtypes.cast(2, dtype=tf.float32))
metric = (delta / sigma_clip) * sq2 + tf.math.log(sigma_clip * sq2)
return K.mean(metric)
def mloss(_lambda):
def loss(y_true, y_pred):
return _lambda * qloss(y_true, y_pred) + (1 - _lambda) * score(y_true, y_pred)
return loss
def make_model():
z = L.Input((9,), name="Patient")
x = L.Dense(100, activation="relu", name="d1")(z)
x = L.Dense(100, activation="relu", name="d2")(x)
p1 = L.Dense(3, activation="linear", name="p1")(x)
p2 = L.Dense(3, activation="relu", name="p2")(x)
preds = L.Lambda(lambda x: x[0] + tf.cumsum(x[1], axis=1), name="preds")([p1, p2])
model = M.Model(z, preds, name="CNN")
model.compile(loss=mloss(0.8), optimizer="adam", metrics=[score])
return model
net = make_model()
net.fit(z[tr_idx], y[tr_idx], batch_size=200, epochs=1000,
validation_data=(z[val_idx], y[val_idx]), verbose=0)
Error here: ValueError: Tensor conversion requested dtype int64 for
Tensor with dtype float32: <tf.Tensor 'CNN/preds/add:0' shape=(None,
3) dtype=float32>
type cast but didn't solve the problem

Why is my REINFORCE algorithm not learning?

I am training a REINFORCE algorithm on the CartPole environment. Due to the simple nature of the environment, I expect it to learn quickly. However, that doesn't happen.
Here is the main portion of the algorithm -
for i in range(episodes):
print("i = ", i)
state = env.reset()
done = False
transitions = []
tot_rewards = 0
while not done:
act_proba = model(torch.from_numpy(state))
action = np.random.choice(np.array([0,1]), p = act_proba.data.numpy())
next_state, reward, done, info = env.step(action)
tot_rewards += 1
transitions.append((state, action, tot_rewards))
state = next_state
if i%50==0:
print("i = ", i, ",reward = ", tot_rewards)
score.append(tot_rewards)
reward_batch = torch.Tensor([r for (s,a,r) in transitions])
disc_rewards = discount_rewards(reward_batch)
nrml_disc_rewards = normalize_rewards(disc_rewards)
state_batch = torch.Tensor([s for (s,a,r) in transitions])
action_batch = torch.Tensor([a for (s,a,r) in transitions])
pred_batch = model(state_batch)
prob_batch = pred_batch.gather(dim=1, index=action_batch.long().view(-1, 1)).squeeze()
loss = -(torch.sum(torch.log(prob_batch)*nrml_disc_rewards))
opt.zero_grad()
loss.backward()
opt.step()
Here is the entire algorithm -
#I referred to this when writing the code - https://github.com/DeepReinforcementLearning/DeepReinforcementLearningInAction/blob/master/Chapter%204/Ch4_book.ipynb
import numpy as np
import gym
import torch
from torch import nn
env = gym.make('CartPole-v0')
learning_rate = 0.0001
episodes = 10000
def discount_rewards(reward, gamma = 0.99):
return torch.pow(gamma, torch.arange(len(reward)))*reward
def normalize_rewards(disc_reward):
return disc_reward/(disc_reward.max())
class NeuralNetwork(nn.Module):
def __init__(self, state_size, action_size):
super(NeuralNetwork, self).__init__()
self.state_size = state_size
self.action_size = action_size
self.linear_relu_stack = nn.Sequential(
nn.Linear(state_size, 300),
nn.ReLU(),
nn.Linear(300, 128),
nn.ReLU(),
nn.Linear(128, 128),
nn.ReLU(),
nn.Linear(128, action_size),
nn.Softmax()
)
def forward(self,x):
x = self.linear_relu_stack(x)
return x
model = NeuralNetwork(env.observation_space.shape[0], env.action_space.n)
opt = torch.optim.Adam(params = model.parameters(), lr = learning_rate)
score = []
for i in range(episodes):
print("i = ", i)
state = env.reset()
done = False
transitions = []
tot_rewards = 0
while not done:
act_proba = model(torch.from_numpy(state))
action = np.random.choice(np.array([0,1]), p = act_proba.data.numpy())
next_state, reward, done, info = env.step(action)
tot_rewards += 1
transitions.append((state, action, tot_rewards))
state = next_state
if i%50==0:
print("i = ", i, ",reward = ", tot_rewards)
score.append(tot_rewards)
reward_batch = torch.Tensor([r for (s,a,r) in transitions])
disc_rewards = discount_rewards(reward_batch)
nrml_disc_rewards = normalize_rewards(disc_rewards)
state_batch = torch.Tensor([s for (s,a,r) in transitions])
action_batch = torch.Tensor([a for (s,a,r) in transitions])
pred_batch = model(state_batch)
prob_batch = pred_batch.gather(dim=1, index=action_batch.long().view(-1, 1)).squeeze()
loss = -(torch.sum(torch.log(prob_batch)*nrml_disc_rewards))
opt.zero_grad()
loss.backward()
opt.step()
Your computation for discounting the reward is where your mistake is.
In REINFORCE (and many other algorithms) you need to compute the sum of future discounted rewards for every step onward.
This means that the sum of discounted rewards for the first step should be:
G_1 = r_1 + gamma * r_2 + gamma ^ 2 * r_3 + ... + gamma ^ (T-1) * r_T
G_2 = r_2 + gamma * r_3 + gamma ^ 2 * r_4 + ... + gamma ^ (T-1) * r_T
And so on...
This gives you an array containing all the sum of future rewards for every step (i.e. [G_1, G_2, G_3, ... , G_T])
However, what you compute currently is only applying a discount on the current step's reward:
G_1 = r_1
G_2 = gamma * r_2
G_3 = gamma ^ 2 * r_3
And so on...
Here is the Python code fixing your problem. We compute from the back of the list of reward to the front to be more computationally efficient.
def discount_rewards(reward, gamma=0.99):
R = 0
returns = []
reward = reward.tolist()
for r in reward[::-1]:
R = r + gamma * R
returns.append(R)
returns = torch.tensor(returns[::-1])
return returns
Here is a figure showing the progression of the algorithm's score over the first 5000 steps.

Applying REINFORCE to easy21

I'm trying to apply the REINFORCE algorithm (with SoftMax policy, undiscounted Gt with baseline) on David Silver's easy21, and I am having problems with the actual implementation. When compared to a pure MC approach, the produced result does not converge to Q*. Here is the related code:
hit = True
stick = False
actions = [hit, stick]
alpha = 0.1
theta = np.random.randn(420).reshape((420,1))
def psi(state, action):
if state.player < 1 or state.player > 21:
return np.zeros((420, 1))
dealers = [int(state.dealer == x + 1) for x in range(0, 10)]
players = [int(state.player == x + 1) for x in range(0, 21)]
actions = [int(action == hit), int(action == stick)]
psi = [1 if (i == 1 and j == 1 and k == 1) else 0
for i in dealers for j in players for k in actions]
return np.array(psi).reshape((420, 1))
def Q(state, action, weight):
return np.matmul(psi(state, action).T, weight)
def softmax(state, weight):
allQ = [Q(state, a, weight) for a in actions]
probs = np.exp(allQ) / np.sum(np.exp(allQ))
return probs.reshape((2,))
def score_function(state, action, weight):
probs = softmax(state, weight)
expected_score = (probs[0] * psi(state, hit)) + (probs[1] * psi(state, stick))
return psi(state, action) - expected_score
def softmax_policy(state, weight):
probs = softmax(state, weight)
if np.random.random() < probs[1]:
return stick
else:
return hit
if __name__ == "__main__":
Q_star = np.load('Q_star.npy')
for k in range(1, ITERATIONS):
terminal = False
state = game.initialise_state()
action = softmax_policy(state, theta)
history = [state, action]
while not terminal:
state, reward = game.step(state, action)
action = softmax_policy(state, theta)
terminal = state.terminal
if terminal:
state_action_pairs = zip(history[0::3], history[1::3])
history.append(reward)
history.append(state)
Gt = sum(history[2::3])
for s, a in state_action_pairs:
advantage = Gt - Q(s, a, prev_theta)
theta += alpha * score_function(s, a, theta) * advantage
else:
history.append(reward)
history.append(state)
history.append(action)
if k % 10000 == 0:
print("MSE: " + str(round(np.sum((Q_star - generate_Q(theta)) ** 2),2)))
Output:
python reinforce.py
MSE: 288.18
MSE: 248.45
MSE: 227.08
MSE: 215.46
MSE: 207.3
MSE: 202.61
MSE: 197.82
MSE: 195.96
MSE: 194.01
The table below shows the value function created using this algorithm:
Update:
Fixed the code by using a different theta initialisation:
theta = np.zeros((420,1))
Current value function:
But the current value function still does not match Q*
(missing peak at player sum = 11)
The entire code is available at:
https://github.com/Soundpulse/easy21-rl/blob/main/reinforce.py

Why Deep Adaptive Input Normalization (DAIN) normalizes time series data accross rows?

The DAIN paper describes how a network learns to normalize time series data by itself, here is how the authors implemented it. The code leads me to think that normalization is happening across rows, not columns. Can anyone explain why it is implemented that way? Because I always thought that one normalizes time series only across columns to keep each feature's true information.
Here is the piece the does normalization:
```python
class DAIN_Layer(nn.Module):
def __init__(self, mode='adaptive_avg', mean_lr=0.00001, gate_lr=0.001, scale_lr=0.00001, input_dim=144):
super(DAIN_Layer, self).__init__()
print("Mode = ", mode)
self.mode = mode
self.mean_lr = mean_lr
self.gate_lr = gate_lr
self.scale_lr = scale_lr
# Parameters for adaptive average
self.mean_layer = nn.Linear(input_dim, input_dim, bias=False)
self.mean_layer.weight.data = torch.FloatTensor(data=np.eye(input_dim, input_dim))
# Parameters for adaptive std
self.scaling_layer = nn.Linear(input_dim, input_dim, bias=False)
self.scaling_layer.weight.data = torch.FloatTensor(data=np.eye(input_dim, input_dim))
# Parameters for adaptive scaling
self.gating_layer = nn.Linear(input_dim, input_dim)
self.eps = 1e-8
def forward(self, x):
# Expecting (n_samples, dim, n_feature_vectors)
# Nothing to normalize
if self.mode == None:
pass
# Do simple average normalization
elif self.mode == 'avg':
avg = torch.mean(x, 2)
avg = avg.resize(avg.size(0), avg.size(1), 1)
x = x - avg
# Perform only the first step (adaptive averaging)
elif self.mode == 'adaptive_avg':
avg = torch.mean(x, 2)
adaptive_avg = self.mean_layer(avg)
adaptive_avg = adaptive_avg.resize(adaptive_avg.size(0), adaptive_avg.size(1), 1)
x = x - adaptive_avg
# Perform the first + second step (adaptive averaging + adaptive scaling )
elif self.mode == 'adaptive_scale':
# Step 1:
avg = torch.mean(x, 2)
adaptive_avg = self.mean_layer(avg)
adaptive_avg = adaptive_avg.resize(adaptive_avg.size(0), adaptive_avg.size(1), 1)
x = x - adaptive_avg
# Step 2:
std = torch.mean(x ** 2, 2)
std = torch.sqrt(std + self.eps)
adaptive_std = self.scaling_layer(std)
adaptive_std[adaptive_std <= self.eps] = 1
adaptive_std = adaptive_std.resize(adaptive_std.size(0), adaptive_std.size(1), 1)
x = x / (adaptive_std)
elif self.mode == 'full':
# Step 1:
avg = torch.mean(x, 2)
adaptive_avg = self.mean_layer(avg)
adaptive_avg = adaptive_avg.resize(adaptive_avg.size(0), adaptive_avg.size(1), 1)
x = x - adaptive_avg
# # Step 2:
std = torch.mean(x ** 2, 2)
std = torch.sqrt(std + self.eps)
adaptive_std = self.scaling_layer(std)
adaptive_std[adaptive_std <= self.eps] = 1
adaptive_std = adaptive_std.resize(adaptive_std.size(0), adaptive_std.size(1), 1)
x = x / adaptive_std
# Step 3:
avg = torch.mean(x, 2)
gate = F.sigmoid(self.gating_layer(avg))
gate = gate.resize(gate.size(0), gate.size(1), 1)
x = x * gate
else:
assert False
return x
```
I am not sure either but they do transpose in forward function : x = x.transpose(1, 2) of the MLP class. Thus, it seemed to me that they normalise over time for each feature.

How to use Dataparallel when there is ‘for’ cycle in the network?

I have a server with two GPUs, if I use one GPU I need more than 10 days to finish 1000 epoch. But when I tried to use Dataparallel, the programm didn’t work. It seems because there is a ‘for’ cycle in my network. So how can I use Dataparallel in this case. Or is there any other solution to speed up training?
class WaveNet( nn.Module ):
def __init__(self, mu, n_residue, n_skip, dilation_depth, n_repeat):
# mu: audio quantization size
# n_residue: residue channels
# n_skip: skip channels
# dilation_depth & n_repeat: dilation layer setup
self.mu = mu
super( WaveNet, self ).__init__()
self.dilation_depth = dilation_depth
dilations = self.dilations = [2 ** i for i in range( dilation_depth )] * n_repeat
self.one_hot = One_Hot( mu )
self.from_input = nn.Conv1d( in_channels=mu, out_channels=n_residue, kernel_size=1 )
self.from_input = nn.DataParallel(self.from_input)
self.conv_sigmoid = nn.ModuleList(
[nn.Conv1d( in_channels=n_residue, out_channels=n_residue, kernel_size=2, dilation=d )
for d in dilations] )
self.conv_sigmoid = nn.DataParallel(self.conv_sigmoid)
self.conv_tanh = nn.ModuleList(
[nn.Conv1d( in_channels=n_residue, out_channels=n_residue, kernel_size=2, dilation=d )
for d in dilations] )
self.conv_tanh = nn.DataParallel(self.conv_tanh)
self.skip_scale = nn.ModuleList( [nn.Conv1d( in_channels=n_residue, out_channels=n_skip, kernel_size=1 )
for d in dilations] )
self.skip_scale = nn.DataParallel(self.skip_scale)
self.residue_scale = nn.ModuleList( [nn.Conv1d( in_channels=n_residue, out_channels=n_residue, kernel_size=1 )
for d in dilations] )
self.residue_scale = nn.DataParallel(self.residue_scale)
self.conv_post_1 = nn.Conv1d( in_channels=n_skip, out_channels=n_skip, kernel_size=1 )
self.conv_post_1 = nn.DataParallel(self.conv_post_1)
self.conv_post_2 = nn.Conv1d( in_channels=n_skip, out_channels=mu, kernel_size=1 )
self.conv_post_2 = nn.DataParallel(self.conv_post_2)
def forward(self, input, train=True):
output = self.preprocess( input, train )
skip_connections = [] # save for generation purposes
for s, t, skip_scale, residue_scale in zip( self.conv_sigmoid, self.conv_tanh, self.skip_scale,
self.residue_scale ):
output, skip = self.residue_forward( output, s, t, skip_scale, residue_scale )
skip_connections.append( skip )
# sum up skip connections
output = sum( [s[:, :, -output.size( 2 ):] for s in skip_connections] )
output = self.postprocess( output, train )
return output
TypeError: zip argument #1 must support iteration