how can i solve "empty() received an invalid combination of arguments - got (tuple, dtype=NoneType, device=NoneType)" in MushroomRL? - deep-learning

I'm using MushroomRL for a Deep Reinforcement Learning project, and I'm using a Graph representation as RL Environment where the number of nodes represents the number of actions now in my neural network the input is one value ex: tensor([[5.]], and the output Q is the number of nodes which is ten ex: tensor([[5972.4927, 8562.3330, 7443.6479, 7326.1587, 6615.2090, 6617.3145,6911.8672, 8233.7930, 6821.0093, 7000.1182,]] now I'm using a new framework called MushroomRL, and this is the code
if __name__ == '__main__':
from mushroom_rl.core import Core
from mushroom_rl.algorithms.value import TrueOnlineSARSALambda
from mushroom_rl.policy import EpsGreedy
from mushroom_rl.features import Features
from mushroom_rl.features.tiles import Tiles
from mushroom_rl.utils.dataset import compute_J
from mushroom_rl.utils.parameters import LinearParameter, Parameter
from mushroom_rl.approximators.parametric import TorchApproximator
from mushroom_rl.algorithms.value import DQN
# Set the seed
np.random.seed(1)
# Create the toy environment with default parameters
#mdp = Environment.make('graph_env')
mdp=graph_env()
# Using an epsilon-greedy policy
epsilon = Parameter(value=0.1)
pi = EpsGreedy(epsilon=epsilon)
# Policy
epsilon = LinearParameter(value=1.,
threshold_value=.1,
n=1000000)
epsilon_test = Parameter(value=.05)
epsilon_random = Parameter(value=1)
pi = EpsGreedy(epsilon=epsilon_random)
approximator_params = dict(
network=Network,
input_shape=(1,),
output_shape=(1,),
n_actions=mdp.info.action_space.n,
optimizer=optimizer,
loss=F.mse_loss
)
approximator = TorchApproximator
algorithm_params = dict(
batch_size=32,
target_update_frequency=target_update_frequency // train_frequency,
replay_memory=True,
initial_replay_size=initial_replay_size,
max_replay_size=max_replay_size
)
agent=agent = DQN(mdp.info, pi, approximator,
approximator_params=approximator_params,
**algorithm_params)
# Algorithm
core = Core(agent, mdp)
# RUN
# Fill replay memory with random dataset
print_epoch(0)
core.learn(n_steps=initial_replay_size,n_steps_per_fit=initial_replay_size)
# Evaluate initial policy
pi.set_epsilon(epsilon_test)
#mdp.set_episode_end(False)
dataset = core.evaluate(n_steps=test_samples)
scores.append(get_stats(dataset))
for n_epoch in range(1, max_steps // evaluation_frequency + 1):
print_epoch(n_epoch)
print('- Learning:')
# learning step
pi.set_epsilon(epsilon)
mdp.set_episode_end(True)
core.learn(n_steps=evaluation_frequency,
n_steps_per_fit=train_frequency)
print('- Evaluation:')
# evaluation step
pi.set_epsilon(epsilon_test)
mdp.set_episode_end(False)
dataset = core.evaluate(n_steps=test_samples)
scores.append(get_stats(dataset))
it givs me this error when i run the code
TypeError: empty() received an invalid combination of arguments - got (tuple, dtype=NoneType, device=NoneType), but expected one of:
* (tuple of ints size, *, tuple of names names, torch.memory_format memory_format, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
* (tuple of ints size, *, torch.memory_format memory_format, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
i beleve the proplem in part of the code can any one help to fix it ?

Related

Problem with PettingZoo and Stable-Baselines3 with a ParallelEnv

I am having trouble in making things work with a Custom ParallelEnv I wrote by using PettingZoo. I am using SuperSuit's ss.pettingzoo_env_to_vec_env_v1(env) as a wrapper to Vectorize the environment and make it work with Stable-Baseline3 and documented here.
You can find attached a summary of the most relevant part of the code:
from typing import Optional
from gym import spaces
import random
import numpy as np
from pettingzoo import ParallelEnv
from pettingzoo.utils.conversions import parallel_wrapper_fn
import supersuit as ss
from gym.utils import EzPickle, seeding
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
#env_ = ss.concat_vec_envs_v1(env_, 1)
return env_
petting_zoo = env
class parallel_env(ParallelEnv, EzPickle):
metadata = {'render_modes': ['ansi'], "name": "PlayerEnv-Multi-v0"}
def __init__(self, n_agents: int = 20, new_step_api: bool = True) -> None:
EzPickle.__init__(
self,
n_agents,
new_step_api
)
self._episode_ended = False
self.n_agents = n_agents
self.possible_agents = [
f"player_{idx}" for idx in range(n_agents)]
self.agents = self.possible_agents[:]
self.agent_name_mapping = dict(
zip(self.possible_agents, list(range(len(self.possible_agents))))
)
self.observation_spaces = spaces.Dict(
{agent: spaces.Box(shape=(len(self.agents),),
dtype=np.float64, low=0.0, high=1.0) for agent in self.possible_agents}
)
self.action_spaces = spaces.Dict(
{agent: spaces.Discrete(4) for agent in self.possible_agents}
)
self.current_step = 0
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
def observation_space(self, agent):
return self.observation_spaces[agent]
def action_space(self, agent):
return self.action_spaces[agent]
def __calculate_observation(self, agent_id: int) -> np.ndarray:
return self.observation_space(agent_id).sample()
def __calculate_observations(self) -> np.ndarray:
observations = {
agent: self.__calculate_observation(
agent_id=agent)
for agent in self.agents
}
return observations
def observe(self, agent):
return self.__calculate_observation(agent_id=agent)
def step(self, actions):
if self._episode_ended:
return self.reset()
observations = self.__calculate_observations()
rewards = random.sample(range(100), self.n_agents)
self.current_step += 1
self._episode_ended = self.current_step >= 100
infos = {agent: {} for agent in self.agents}
dones = {agent: self._episode_ended for agent in self.agents}
rewards = {
self.agents[i]: rewards[i]
for i in range(len(self.agents))
}
if self._episode_ended:
self.agents = {} # To satisfy `set(par_env.agents) == live_agents`
return observations, rewards, dones, infos
def reset(self,
seed: Optional[int] = None,
return_info: bool = False,
options: Optional[dict] = None,):
self.agents = self.possible_agents[:]
self._episode_ended = False
self.current_step = 0
observations = self.__calculate_observations()
return observations
def render(self, mode="human"):
# TODO: IMPLEMENT
print("TO BE IMPLEMENTED")
def close(self):
pass
Unfortunately when I try to test with the following main procedure:
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.env_checker import check_env
from dummy_env import dummy
from pettingzoo.test import parallel_api_test
if __name__ == '__main__':
# Testing the parallel algorithm alone
env_parallel = dummy.parallel_env()
parallel_api_test(env_parallel) # This works!
# Testing the environment with the wrapper
env = dummy.petting_zoo()
# ERROR: AssertionError: The observation returned by the `reset()` method does not match the given observation space
check_env(env)
# Model initialization
model = PPO("MlpPolicy", env, verbose=1)
# ERROR: ValueError: could not broadcast input array from shape (20,20) into shape (20,)
model.learn(total_timesteps=10_000)
I get the following error:
AssertionError: The observation returned by the `reset()` method does not match the given observation space
If I skip check_env() I get the following one:
ValueError: could not broadcast input array from shape (20,20) into shape (20,)
It seems like that ss.pettingzoo_env_to_vec_env_v1(env) is capable of splitting the parallel environment in multiple vectorized ones, but not for the reset() function.
Does anyone know how to fix this problem?
Plese find the Github Repository to reproduce the problem.
You should double check the reset() function in PettingZoo. It will return None instead of an observation like GYM
Thanks to discussion I had in the issue section of the SuperSuit repository, I am able to post the solution to the problem. Thanks to jjshoots!
First of all it is necessary to have the latest SuperSuit version. In order to get that I needed to install Stable-Baseline3 using the instructions here to make it work with gym 0.24+.
After that, taking the code in the question as example, it is necessary to substitute
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
#env_ = ss.concat_vec_envs_v1(env_, 1)
return env_
with
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
env_ = ss.concat_vec_envs_v1(env_, 1, base_class="stable_baselines3")
return env_
The outcomes are:
Outcome 1: leaving the line with check_env(env) I got an error AssertionError: Your environment must inherit from the gym.Env class cf https://github.com/openai/gym/blob/master/gym/core.py
Outcome 2: removing the line with check_env(env), the agent starts training successfully!
In the end, I think that the argument base_class="stable_baselines3" made the difference.
Only the small problem on check_env remains to be reported, but I think it can be considered as trivial if the training works.

How can i extract information quickly from 130,000+ Json files located in S3?

i have an S3 was over 130k Json Files which i need to calculate numbers based on data in the json files (for example calculate the number of gender of Speakers). i am currently using s3 Paginator and JSON.load to read each file and extract information form. but it take a very long time to process such a large number of file (2-3 files per second). how can i speed up the process? please provide working code examples if possible. Thank you
here is some of my code:
client = boto3.client('s3')
paginator = client.get_paginator('list_objects_v2')
result = paginator.paginate(Bucket='bucket-name',StartAfter='')
for page in result:
if "Contents" in page:
for key in page[ "Contents" ]:
keyString = key[ "Key" ]
s3 = boto3.resource('s3')
content_object = s3.Bucket('bucket-name').Object(str(keyString))
file_content = content_object.get()['Body'].read().decode('utf-8')
json_content = json.loads(file_content)
x = (json_content['dict-name'])
In order to use the code below, I'm assuming you understand pandas (if not, you may want to get to know it). Also, it's not clear if your 2-3 seconds is on the read or includes part of the number crunching, nonetheless multiprocessing will speed this up dramatically. The gist is to read all the files in (as dataframes), concatenate them, then do your analysis.
To be useful for me, I run this on spot instances that have lots of vCPUs and memory. I've found the instances that are network optimized (like c5n - look for the n) and the inf1 (for machine learning) are much faster at reading/writing than T or M instance types, as examples.
My use case is reading 2000 'directories' with roughly 1200 files in each and analyzing them. The multithreading is orders of magnitude faster than single threading.
File 1: your main script
# create script.py file
import os
from multiprocessing import Pool
from itertools import repeat
import pandas as pd
import json
from utils_file_handling import *
ufh = file_utilities() #instantiate the class functions - see below (second file)
bucket = 'your-bucket'
prefix = 'your-prefix/here/' # if you don't have a prefix pass '' (empty string or function will fail)
#define multiprocessing function - get to know this to use multiple processors to read files simultaneously
def get_dflist_multiprocess(keys_list, num_proc=4):
with Pool(num_proc) as pool:
df_list = pool.starmap(ufh.reader_json, zip(repeat(bucket), keys_list), 15)
pool.close()
pool.join()
return df_list
#create your master keys list upfront; you can loop through all or slice the list to test
keys_list = ufh.get_keys_from_prefix(bucket, prefix)
# keys_list = keys_list[0:2000] # as an exampmle
num_proc = os.cpu_count() #tells you how many processors your machine has; function above defaults to 4 unelss given
df_list = get_dflist_multiprocess(keys_list, num_proc=num_proc) #collect dataframes for each file
df_new = pd.concat(df_list, sort=False)
df_new = df_new.reset_index(drop=True)
# do your analysis on the dataframe
File 2: class functions
#utils_file_handling.py
# create this in a separate file; name as you wish but change the import in the script.py file
import boto3
import json
import pandas as pd
#define client and resource
s3sr = boto3.resource('s3')
s3sc = boto3.client('s3')
class file_utilities:
"""file handling function"""
def get_keys_from_prefix(self, bucket, prefix):
'''gets list of keys and dates for given bucket and prefix'''
keys_list = []
paginator = s3sr.meta.client.get_paginator('list_objects_v2')
# use Delimiter to limit search to that level of hierarchy
for page in paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
keys = [content['Key'] for content in page.get('Contents')]
print('keys in page: ', len(keys))
keys_list.extend(keys)
return keys_list
def read_json_file_from_s3(self, bucket, key):
"""read json file"""
bucket_obj = boto3.resource('s3').Bucket(bucket)
obj = boto3.client('s3').get_object(Bucket=bucket, Key=key)
data = obj['Body'].read().decode('utf-8')
return data
# you may need to tweak this for your ['dict-name'] example; I think I have it correct
def reader_json(self, bucket, key):
'''returns dataframe'''
return pd.DataFrame(json.loads(self.read_json_file_from_s3(bucket, key))['dict-name'])

How to estimate the parameters of a mixture model in OpenTURNS?

I would like to estimate the parameters of a mixture model of normal distributions in OpenTURNS (that is, the distribution of a weighted sum of Gaussian random variables). OpenTURNS can create such a mixture, but it cannot estimate its parameters. Moreover, I need to create the mixture as an OpenTURNS distribution in order to propagate uncertainty through a function.
For example, I know how to create a mixture of two normal distributions:
import openturns as ot
mu1 = 1.0
sigma1 = 0.5
mu2 = 3.0
sigma2 = 2.0
weights = [0.3, 0.7]
n1 = ot.Normal(mu1, sigma1)
n2 = ot.Normal(mu2, sigma2)
m = ot.Mixture([n1, n2], weights)
In this example, I would like to estimate mu1, sigma1, mu2, sigma2 on a given sample. In order to create a working example, it is easy to generate a sample by simulation.
s = m.getSample(100)
You can rely on scikit-learn's GaussianMixture to estimate the parameters and then use them to define a Mixture model in OpenTURNS.
The script hereafter contains a Python class MixtureFactory that estimates the parameters of a scikitlearn GaussianMixture and outputs an OpenTURNS Mixture distribution:
from sklearn.mixture import GaussianMixture
from sklearn.utils.validation import check_is_fitted
import openturns as ot
import numpy as np
class MixtureFactory(GaussianMixture):
"""
Representation of a Gaussian mixture model probability distribution.
This class allows to estimate the parameters of a Gaussian mixture
distribution using scikit algorithms & provides openturns Mixture object.
Read more in scikit learn user guide & openturns theory.
Parameters:
-----------
n_components : int, defaults to 1.
The number of mixture components.
covariance_type : {'full' (default), 'tied', 'diag', 'spherical'}
String describing the type of covariance parameters to use.
Must be one of:
'full'
each component has its own general covariance matrix
'tied'
all components share the same general covariance matrix
'diag'
each component has its own diagonal covariance matrix
'spherical'
each component has its own single variance
tol : float, defaults to 1e-3.
The convergence threshold. EM iterations will stop when the
lower bound average gain is below this threshold.
reg_covar : float, defaults to 1e-6.
Non-negative regularization added to the diagonal of covariance.
Allows to assure that the covariance matrices are all positive.
max_iter : int, defaults to 100.
The number of EM iterations to perform.
n_init : int, defaults to 1.
The number of initializations to perform. The best results are kept.
init_params : {'kmeans', 'random'}, defaults to 'kmeans'.
The method used to initialize the weights, the means and the
precisions.
Must be one of::
'kmeans' : responsibilities are initialized using kmeans.
'random' : responsibilities are initialized randomly.
weights_init : array-like, shape (n_components, ), optional
The user-provided initial weights, defaults to None.
If it None, weights are initialized using the `init_params` method.
means_init : array-like, shape (n_components, n_features), optional
The user-provided initial means, defaults to None,
If it None, means are initialized using the `init_params` method.
precisions_init : array-like, optional.
The user-provided initial precisions (inverse of the covariance
matrices), defaults to None.
If it None, precisions are initialized using the 'init_params' method.
The shape depends on 'covariance_type'::
(n_components,) if 'spherical',
(n_features, n_features) if 'tied',
(n_components, n_features) if 'diag',
(n_components, n_features, n_features) if 'full'
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by `np.random`.
warm_start : bool, default to False.
If 'warm_start' is True, the solution of the last fitting is used as
initialization for the next call of fit(). This can speed up
convergence when fit is called several times on similar problems.
In that case, 'n_init' is ignored and only a single initialization
occurs upon the first call.
See :term:`the Glossary <warm_start>`.
verbose : int, default to 0.
Enable verbose output. If 1 then it prints the current
initialization and each iteration step. If greater than 1 then
it prints also the log probability and the time needed
for each step.
verbose_interval : int, default to 10.
Number of iteration done before the next print.
"""
def __init__(self, n_components=2, covariance_type='full', tol=1e-6,
reg_covar=1e-6, max_iter=1000, n_init=1, init_params='kmeans',
weights_init=None, means_init=None, precisions_init=None,
random_state=41, warm_start=False,
verbose=0, verbose_interval=10):
super().__init__(n_components, covariance_type, tol, reg_covar,
max_iter, n_init, init_params, weights_init, means_init,
precisions_init, random_state, warm_start, verbose, verbose_interval)
def fit(self, X):
"""
Fit the mixture model parameters.
EM algorithm is applied here to estimate the model parameters and build a
Mixture distribution (see openturns mixture).
The method fits the model ``n_init`` times and sets the parameters with
which the model has the largest likelihood or lower bound. Within each
trial, the method iterates between E-step and M-step for ``max_iter``
times until the change of likelihood or lower bound is less than
``tol``, otherwise, a ``ConvergenceWarning`` is raised.
If ``warm_start`` is ``True``, then ``n_init`` is ignored and a single
initialization is performed upon the first call. Upon consecutive
calls, training starts where it left off.
Parameters
----------
X : array-like, shape (n_samples, n_features)
List of n_features-dimensional data points. Each row
corresponds to a single data point.
Returns
-------
"""
data = np.array(X)
# Evaluate the model parameters.
super().fit(data)
# openturns mixture
# n_components ==> weight of size n_components
weights = self.weights_
n_components = len(weights)
# Create ot distribution
collection = n_components * [0]
# Covariance matrices
cov = self.covariances_
mu = self.means_
# means : n_components x n_features
n_components, n_features = mu.shape
# Following the type of covariance, we define the collection of gaussians
# Spherical : C_k = Identity * sigma_k
if self.covariance_type is 'spherical':
c = ot.CorrelationMatrix(n_features)
for l in range(n_components):
sigma = np.sqrt(cov[l])
collection[l] = ot.Normal(list(mu[l]), [ sigma ] * n_features , c)
elif self.covariance_type is 'diag' :
for l in range(n_components):
c = ot.CovarianceMatrix(n_features)
for i in range(n_features):
c[i,i] = cov[l, i]
collection[l] = ot.Normal(list(mu[l]), c)
elif self.covariance_type == 'tied':
# Same covariance for all clusters
c = ot.CovarianceMatrix(n_features)
for i in range(n_features):
for j in range(0, i+1):
c[i,j] = cov[i,j]
# Define the collection with the same covariance
for l in range(n_components):
collection[l] = ot.Normal(list(mu[l]), c)
else:
n_features = cov.shape[1]
for l in range(n_components):
c = ot.CovarianceMatrix(n_features)
for i in range(n_features):
for j in range(0, i+1):
c[i,j] = cov[l][i,j]
collection[l] = ot.Normal(list(mu[l]), c)
self._mixture = ot.Mixture(collection, weights)
return self
def get_mixture(self):
"""
Returns the Mixture object
"""
check_is_fitted(self)
return self._mixture
if __name__ == "__main__":
mu1 = 1.0
sigma1 = 0.5
mu2 = 3.0
sigma2 = 2.0
weights = [0.3, 0.7]
n1 = ot.Normal(mu1, sigma1)
n2 = ot.Normal(mu2, sigma2)
m = ot.Mixture([n1, n2], weights)
x = m.getSample(1000)
est_dist = MixtureFactory(random_state=1)
est_dist.fit(x)
print(est_dist.get_mixture())
I have actually tried this method and it works perfectly. On top of that the fit of the model through the SciKit GMM and the ulterior adjustment thanks to OpenTurns are very fast. I recommend future users to test several numbers of components and covariance matrix structures, as it will not take a lot of time and can substantially improve the goodness of fit to the data.
Thanks for the answer.
Here is a pure OpenTURNS solution. It is probably slower than the scikit-learn-based method, but it is more generic: you could use it to estimate the parameters of any mixture model, not necessarily a mixture of normal distributions.
The idea is to retrieve the log-likelihood function from the Mixture object and minimize it.
In the following, let us assume that s is the sample we want to fit the mixture on.
First, we need to build the mixture we want to estimate the parameters of. We can specify any valid set of parameters, it does not matter. In your example, you want a mixture of 2 normal distributions.
mixture = ot.Mixture([ot.Normal()]*2, [0.5]*2)
There is a small hurdle. All weights sum to 1, thus one of them is determined by the others: the solver must not be allowed to freely set it. The order of the parameters of an OpenTURNS Mixture is as follows:
weight of the first distribution;
parameters of the first distribution;
weight of the second distribution;
parameters of the second distribution:
...
You can view all parameters with mixture.getParameter() and their names with mixture.getParameterDescription(). The following is a helper function that:
takes as input the list containing of all mixture parameters except the weight of its first distribution;
outputs a Point containing all parameters including the weight of the first distribution.
def full(params):
"""
Point of all mixture parameters from a list that omits the first weight.
"""
params = ot.Point(params)
aux_mixture = ot.Mixture(mixture)
dist_number = aux_mixture.getDistributionCollection().getSize()
index = aux_mixture.getDistributionCollection()[0].getParameter().getSize()
list_weights = []
for num in range(1, dist_number):
list_weights.append(params[index])
index += 1 + aux_mixture.getDistributionCollection()[num].getParameter().getSize()
complementary_weight = ot.Point([abs(1.0 - sum(list_weights))])
complementary_weight.add(params)
return complementary_weight
The next function computes the opposite of the log-likelihood of a given list of parameters (except the first weight).
For the sake of numerical stability, it divides this value by the number of observations.
We will minimize this function in order to find the Maximum Likelihood Estimate.
def minus_log_pdf(params):
"""
- log-likelihood of a list of parameters excepting the first weight
divided by the number of observations
"""
aux_mixture = ot.Mixture(mixture)
full_params = full(params)
try:
aux_mixture.setParameter(full_params)
except TypeError:
# case where the proposed parameters are invalid:
# return a huge value
return [ot.SpecFunc.LogMaxScalar]
res = - aux_mixture.computeLogPDF(s).computeMean()
return res
To use OpenTURNS optimization facilities, we need to turn this function into a PythonFunction object.
OT_minus_log_pdf = ot.PythonFunction(mixture.getParameter().getSize()-1, 1, minus_log_pdf)
Cobyla is usually good at likelihood optimization.
problem = ot.OptimizationProblem(OT_minus_log_pdf)
algo = ot.Cobyla(problem)
In order to decrease chances of Cobyla being stuck on a local minimum, we are going to use MultiStart. We pick a starting set of parameters and randomly change the weights. The following helper function makes it easy:
def random_weights(params, nb):
"""
List of nb Points representing mixture parameters with randomly varying weights.
"""
aux_mixture = ot.Mixture(mixture)
full_params = full(params)
aux_mixture.setParameter(full_params)
list_params = []
for num in range(nb):
dirichlet = ot.Dirichlet([1.0] * aux_mixture.getDistributionCollection().getSize()).getRealization()
dirichlet.add(1.0 - sum(dirichlet))
aux_mixture.setWeights(dirichlet)
list_params.append(aux_mixture.getParameter()[1:])
return list_params
We pick 10 starting points and increase the number of maximum evaluations of the log-likelihood from 100 (by default) to 10000.
init = mixture.getParameter()[1:]
starting_points = random_weights(init, 10)
algo_multistart = ot.MultiStart(algo, starting_points)
algo_multistart.setMaximumEvaluationNumber(10000)
Let's run the solver and retrieve the result.
algo_multistart.run()
result = algo_multistart.getResult()
All that remains is to set the mixture's parameters to the optimal value.
We must not forget to add the first weight back!
optimal_parameters = result.getOptimalPoint()
mixture.setParameter(full(optimal_parameters))
Below is an alternative.
The first step creates a new GaussianMixture class, derived from PythonDistribution. The key point is to implement the computeLogPDF method and the set/getParameters methods. Notice that this parametrization of a mixture only has one single weight w.
class GaussianMixture(ot.PythonDistribution):
def __init__(self, mu1 = -5.0, sigma1 = 1.0, \
mu2 = 5.0, sigma2 = 1.0, \
w = 0.5):
super(GaussianMixture, self).__init__(1)
if w < 0.0 or w > 1.0:
raise ValueError('The weight is not in [0, 1]. w=%s.' % (w))
self.mu1 = mu2
self.sigma1 = sigma1
self.mu2 = mu2
self.sigma2 = sigma2
self.w = w
collDist = [ot.Normal(mu1, sigma1), ot.Normal(mu2, sigma2)]
weight = [w, 1.0 - w]
self.distribution = ot.Mixture(collDist, weight)
def computeCDF(self, x):
p = self.distribution.computeCDF(x)
return p
def computePDF(self, x):
p = self.distribution.computePDF(x)
return p
def computeQuantile(self, prob, tail = False):
quantile = self.distribution.computeQuantile(prob, tail)
return quantile
def getSample(self, size):
X = self.distribution.getSample(size)
return X
def getParameter(self):
parameter = ot.Point([self.mu1, self.sigma1, \
self.mu2, self.sigma2, \
self.w])
return parameter
def setParameter(self, parameter):
[mu1, sigma1, mu2, sigma2, w] = parameter
self.__init__(mu1, sigma1, mu2, sigma2, w)
return parameter
def computeLogPDF(self, sample):
logpdf = self.distribution.computeLogPDF(sample)
return logpdf
In order to create the distribution, we use the Distribution class:
gm = ot.Distribution(GaussianMixture())
Estimating the parameters of this distribution is straightforward with MaximumLikelihoodFactory. However, we must set the bounds, because sigma cannot be negative and that w is in (0, 1).
factory = ot.MaximumLikelihoodFactory(gm)
lowerBound = [0.0, 1.e-6, 0.0, 1.e-6, 0.01]
upperBound = [0.0, 0.0, 0.0, 0.0, 0.99]
finiteLowerBound = [False, True, False, True, True]
finiteUpperBound = [False, False, False, False, True]
bounds = ot.Interval(lowerBound, upperBound, finiteLowerBound, finiteUpperBound)
factory.setOptimizationBounds(bounds)
Then we configure the optimization solver.
solver = factory.getOptimizationAlgorithm()
startingPoint = [-4.0, 1.0, 7.0, 1.5, 0.3]
solver.setStartingPoint(startingPoint)
factory.setOptimizationAlgorithm(solver)
Estimating the parameters is based on the build method.
distribution = factory.build(sample)
There are two limitations with this implementation.
First, it is not as fast as it should be, because of a limitation of the PythonDistribution (see https://github.com/openturns/openturns/issues/1391).
Estimating the parameters may be difficult, because the problem may have local optimas that cannot be retrieved with the default algorithm in MaximumLikelihoodFactory. This kind of task is generally done with the EM algorithm.

Function approximator and q-learning

I am trying to implement q-learning with an action-value approximation-function. I am using openai-gym and the "MountainCar-v0" enviroment to test my algorithm out. My problem is, it does not converge or find the goal at all.
Basically the approximator works like the following, you feed in the 2 features: position and velocity and one of the 3 actions in a one-hot encoding: 0 -> [1,0,0], 1 -> [0,1,0] and 2 -> [0,0,1]. The output is the action-value approximation Q_approx(s,a), for one specific action.
I know that usually, the input is the state (2 features) and the output layer contains 1 output for each action. The big difference that I see is that I have run the feed forward pass 3 times (one for each action) and take the max, while in the standard implementation you run it once and take the max over the output.
Maybe my implementation is just completely wrong and I am thinking wrong. Gonna paste the code here, it is a mess but I am just experimenting a bit:
import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
env = gym.make('MountainCar-v0')
# The mean reward over 20 episodes
mean_rewards = np.zeros(20)
# Feature numpy holder
features = np.zeros(5)
# Q_a value holder
qa_vals = np.zeros(3)
one_hot = {
0 : np.asarray([1,0,0]),
1 : np.asarray([0,1,0]),
2 : np.asarray([0,0,1])
}
model = Sequential()
model.add(Dense(20, activation="relu",input_dim=(5)))
model.add(Dense(10,activation="relu"))
model.add(Dense(1))
model.compile(optimizer='rmsprop',
loss='mse',
metrics=['accuracy'])
epsilon_greedy = 0.1
discount = 0.9
batch_size = 16
# Experience replay containing features and target
experience = np.ones((10*300,5+1))
# Ring buffer
def add_exp(features,target,index):
if index % experience.shape[0] == 0:
index = 0
global filled_once
filled_once = True
experience[index,0:5] = features
experience[index,5] = target
index += 1
return index
for e in range(0,100000):
obs = env.reset()
old_obs = None
new_obs = obs
rewards = 0
loss = 0
for i in range(0,300):
if old_obs is not None:
# Find q_a max for s_(t+1)
features[0:2] = new_obs
for i,pa in enumerate([0,1,2]):
features[2:5] = one_hot[pa]
qa_vals[i] = model.predict(features.reshape(-1,5))
rewards += reward
target = reward + discount*np.max(qa_vals)
features[0:2] = old_obs
features[2:5] = one_hot[a]
fill_index = add_exp(features,target,fill_index)
# Find new action
if np.random.random() < epsilon_greedy:
a = env.action_space.sample()
else:
a = np.argmax(qa_vals)
else:
a = env.action_space.sample()
obs, reward, done, info = env.step(a)
old_obs = new_obs
new_obs = obs
if done:
break
if filled_once:
samples_ids = np.random.choice(experience.shape[0],batch_size)
loss += model.train_on_batch(experience[samples_ids,0:5],experience[samples_ids,5].reshape(-1))[0]
mean_rewards[e%20] = rewards
print("e = {} and loss = {}".format(e,loss))
if e % 50 == 0:
print("e = {} and mean = {}".format(e,mean_rewards.mean()))
Thanks in advance!
There shouldn't be much difference between the actions as inputs to your network or as different outputs of your network. It does make a huge difference if your states are images for example. because Conv nets work very well with images and there would be no obvious way of integrating the actions to the input.
Have you tried the cartpole balancing environment? It is better to test if your model is working correctly.
Mountain climb is pretty hard. It has no reward until you reach the top, which often doesn't happen at all. The model will only start learning something useful once you get to the top once. If you are never getting to the top you should probably increase your time doing exploration. in other words take more random actions, a lot more...

Using a metamodel in a design process using a nested approach

We are interested in using a surrogate model in an aircraft design process implemented in OpenMDAO. Basically we want to use an aerodynamic code (such as VSPaero in our aim) to produce a database (using a DOE ) and then built a surrogate that will be used in the design process. It looks like your proposal 2) in use of MOE in openMDAO and we also want to access to the "gradient" information of the surrogate to be used in the full design problem .
We started from the code you have provided in nested problem question and try to built a mock up case with simplified component for aerodynamic . The example code is below (using kriging) and we have two concerns to finish it:
we need to implement a "linearize" function in our component if we want to use surrogate gradient information: I guess we should use the "calc_gradient" function of problem to do this . Is it right ?
in our example code, the training will be done each time we call the component what is not very efficient : is there a way to call it only once or to do the surrogate training only after the setup() of the bigger problem (aircraft design in our case )?
Here is the code (sorry it is a bit long):
from openmdao.api import IndepVarComp, Group, Problem, ScipyOptimizer, ExecComp, DumpRecorder, Component, NLGaussSeidel,ScipyGMRES, Newton,SqliteRecorder,MetaModel, \
KrigingSurrogate, FloatKrigingSurrogate
from openmdao.drivers.latinhypercube_driver import LatinHypercubeDriver, OptimizedLatinHypercubeDriver
from openmdao.solvers.solver_base import NonLinearSolver
import numpy as np
import sys
alpha_test = np.array([0.56, 0.24, 0.30, 0.32, 0.20])
eta_test = np.array([-0.30, -0.14, -0.19, -0.18, -0.12])
num_elem = len(alpha_test)
class SysAeroSurrogate(Component):
""" Simulates the presence of an aero surrogate mode using linear aerodynamic model """
""" coming from pymission code """
""" https://github.com/OpenMDAO-Plugins/pyMission/blob/master/src/pyMission/aerodynamics.py """
def __init__(self, num_elem=1):
super(SysAeroSurrogate, self).__init__()
self.add_param('alpha', 0.5)
self.add_param('eta', -0.33)
self.add_param('AR', 0.0)
self.add_param('oswald', 0.0)
self.add_output('CL', val=0.0)
self.add_output('CD', val=0.0) ## Drag Coefficient
def solve_nonlinear(self, params, unknowns, resids):
""" Compute lift and drag coefficient using angle of attack and tail
rotation angles. Linear aerodynamics is assumed."""
alpha = params['alpha']
eta = params['eta']
aspect_ratio = params['AR']
oswald = params['oswald']
lift_c0 = 0.30
lift_ca = 6.00
lift_ce = 0.27
drag_c0 = 0.015
unknowns['CL'] = lift_c0 + lift_ca*alpha*1e-1 + lift_ce*eta*1e-1
unknowns['CD'] = (drag_c0 + (unknowns['CL'])**2 /(np.pi * aspect_ratio * oswald))/1e-1
class SuroMM(Group):
def __init__(self):
super(SuroMM, self).__init__()
#kriging
AeroMM = self.add("AeroMM", MetaModel())
AeroMM.add_param('alpha', val=0.)
AeroMM.add_param('eta', val=0.)
AeroMM.add_output('CL_MM', val=0., surrogate=FloatKrigingSurrogate())
AeroMM.add_output('CD_MM', val=0., surrogate=FloatKrigingSurrogate())
class SurrogateAero(Component):
def __init__(self):
super(SurrogateAero, self).__init__()
## Inputs to this subprob
self.add_param('alpha', val=0.5*np.ones(num_elem)) ## Angle of attack
self.add_param('eta', val=0.5*np.ones(num_elem)) ## Tail rotation angle
self.add_param('AR', 0.0)
self.add_param('oswald', 0.0)
## Unknowns for this sub prob
self.add_output('CD', val=np.zeros(num_elem))
self.add_output('CL', val=np.zeros(num_elem))
#####
self.problem = prob = Problem()
prob.root = Group()
prob.root.add('d1', SuroMM(), promotes=['*'])
prob.setup()
#### training of metamodel
prob['AeroMM.train:alpha'] = DOEX1
prob['AeroMM.train:eta'] = DOEX2
prob['AeroMM.train:CL_MM'] = DOEY1
prob['AeroMM.train:CD_MM'] =DOEY2
def solve_nonlinear(self, params, unknowns, resids):
CL_temp=np.zeros(num_elem)
CD_temp=np.zeros(num_elem)
prob = self.problem
# Pass values into our problem
for i in range(len(params['alpha'])):
prob['AeroMM.alpha'] = params['alpha'][i]
prob['AeroMM.eta'] = params['eta'][i]
# Run problem
prob.run()
CL_temp[i] = prob['AeroMM.CL_MM']
CD_temp[i] = prob['AeroMM.CD_MM']
# Pull values from problem
unknowns['CL'] = CL_temp
unknowns['CD'] = CD_temp
if __name__ == "__main__":
###### creation of database with DOE #####
top = Problem()
root = top.root = Group()
root.add('comp', SysAeroSurrogate(), promotes=['*'])
root.add('p1', IndepVarComp('alpha', val=0.50), promotes=['*'])
root.add('p2', IndepVarComp('eta',val=0.50), promotes=['*'])
root.add('p3', IndepVarComp('AR', 10.), promotes=['*'])
root.add('p4', IndepVarComp('oswald', 0.92), promotes=['*'])
top.driver = OptimizedLatinHypercubeDriver(num_samples=16, seed=0, population=20, generations=4, norm_method=2)
top.driver.add_desvar('alpha', lower=-5.0*(np.pi/180.0)*1e-1, upper=15.0*(np.pi/180.0)*1e-1)
top.driver.add_desvar('eta', lower=-5.0*(np.pi/180.0)*1e-1, upper=15.0*(np.pi/180.0)*1e-1)
top.driver.add_objective('CD')
recorder = SqliteRecorder('Aero')
recorder.options['record_params'] = True
recorder.options['record_unknowns'] = True
recorder.options['record_resids'] = False
recorder.options['record_metadata'] = False
top.driver.add_recorder(recorder)
top.setup()
top.run()
import sqlitedict
db = sqlitedict.SqliteDict( 'Aero', 'openmdao' )
print( list( db.keys() ) )
DOEX1 = []
DOEX2 = []
DOEY1 = []
DOEY2 = []
for i in list(db.keys()):
data = db[i]
p = data['Parameters']
DOEX1.append(p['comp.alpha'])
DOEX2.append(p['comp.eta'])
p = data['Unknowns']
DOEY1.append(p['CL'])
DOEY2.append(p['CD'])
################ use of surrogate model ######
prob2 = Problem(root=Group())
prob2.root.add('SurrAero', SurrogateAero(), promotes=['*'])
prob2.root.add('v1', IndepVarComp('alpha', val=alpha_test), promotes=['*'])
prob2.root.add('v2', IndepVarComp('eta',val=eta_test), promotes=['*'])
prob2.setup()
prob2.run()
print'CL predicted:', prob2['CL']
print'CD predicted:', prob2['CD']
The way you have your model set up seems correct. The MetaModel component will only train its data one time (the first pass through the model), as you can see in this part of the source code. Every subsequent iteration, it just uses the trained surrogate thats already there.
The meta-model is also already setup to provide analytic derivatives of the predicted output with respect to the input independent variables. Derivatives of the prediction with respect to the training point values are not available in the base implementation. That requires a more complex setup that, at least for the moment, will require some custom setup that is not in the standard library.