Using a metamodel in a design process using a nested approach - metamodel

We are interested in using a surrogate model in an aircraft design process implemented in OpenMDAO. Basically we want to use an aerodynamic code (such as VSPaero in our aim) to produce a database (using a DOE ) and then built a surrogate that will be used in the design process. It looks like your proposal 2) in use of MOE in openMDAO and we also want to access to the "gradient" information of the surrogate to be used in the full design problem .
We started from the code you have provided in nested problem question and try to built a mock up case with simplified component for aerodynamic . The example code is below (using kriging) and we have two concerns to finish it:
we need to implement a "linearize" function in our component if we want to use surrogate gradient information: I guess we should use the "calc_gradient" function of problem to do this . Is it right ?
in our example code, the training will be done each time we call the component what is not very efficient : is there a way to call it only once or to do the surrogate training only after the setup() of the bigger problem (aircraft design in our case )?
Here is the code (sorry it is a bit long):
from openmdao.api import IndepVarComp, Group, Problem, ScipyOptimizer, ExecComp, DumpRecorder, Component, NLGaussSeidel,ScipyGMRES, Newton,SqliteRecorder,MetaModel, \
KrigingSurrogate, FloatKrigingSurrogate
from openmdao.drivers.latinhypercube_driver import LatinHypercubeDriver, OptimizedLatinHypercubeDriver
from openmdao.solvers.solver_base import NonLinearSolver
import numpy as np
import sys
alpha_test = np.array([0.56, 0.24, 0.30, 0.32, 0.20])
eta_test = np.array([-0.30, -0.14, -0.19, -0.18, -0.12])
num_elem = len(alpha_test)
class SysAeroSurrogate(Component):
""" Simulates the presence of an aero surrogate mode using linear aerodynamic model """
""" coming from pymission code """
""" https://github.com/OpenMDAO-Plugins/pyMission/blob/master/src/pyMission/aerodynamics.py """
def __init__(self, num_elem=1):
super(SysAeroSurrogate, self).__init__()
self.add_param('alpha', 0.5)
self.add_param('eta', -0.33)
self.add_param('AR', 0.0)
self.add_param('oswald', 0.0)
self.add_output('CL', val=0.0)
self.add_output('CD', val=0.0) ## Drag Coefficient
def solve_nonlinear(self, params, unknowns, resids):
""" Compute lift and drag coefficient using angle of attack and tail
rotation angles. Linear aerodynamics is assumed."""
alpha = params['alpha']
eta = params['eta']
aspect_ratio = params['AR']
oswald = params['oswald']
lift_c0 = 0.30
lift_ca = 6.00
lift_ce = 0.27
drag_c0 = 0.015
unknowns['CL'] = lift_c0 + lift_ca*alpha*1e-1 + lift_ce*eta*1e-1
unknowns['CD'] = (drag_c0 + (unknowns['CL'])**2 /(np.pi * aspect_ratio * oswald))/1e-1
class SuroMM(Group):
def __init__(self):
super(SuroMM, self).__init__()
#kriging
AeroMM = self.add("AeroMM", MetaModel())
AeroMM.add_param('alpha', val=0.)
AeroMM.add_param('eta', val=0.)
AeroMM.add_output('CL_MM', val=0., surrogate=FloatKrigingSurrogate())
AeroMM.add_output('CD_MM', val=0., surrogate=FloatKrigingSurrogate())
class SurrogateAero(Component):
def __init__(self):
super(SurrogateAero, self).__init__()
## Inputs to this subprob
self.add_param('alpha', val=0.5*np.ones(num_elem)) ## Angle of attack
self.add_param('eta', val=0.5*np.ones(num_elem)) ## Tail rotation angle
self.add_param('AR', 0.0)
self.add_param('oswald', 0.0)
## Unknowns for this sub prob
self.add_output('CD', val=np.zeros(num_elem))
self.add_output('CL', val=np.zeros(num_elem))
#####
self.problem = prob = Problem()
prob.root = Group()
prob.root.add('d1', SuroMM(), promotes=['*'])
prob.setup()
#### training of metamodel
prob['AeroMM.train:alpha'] = DOEX1
prob['AeroMM.train:eta'] = DOEX2
prob['AeroMM.train:CL_MM'] = DOEY1
prob['AeroMM.train:CD_MM'] =DOEY2
def solve_nonlinear(self, params, unknowns, resids):
CL_temp=np.zeros(num_elem)
CD_temp=np.zeros(num_elem)
prob = self.problem
# Pass values into our problem
for i in range(len(params['alpha'])):
prob['AeroMM.alpha'] = params['alpha'][i]
prob['AeroMM.eta'] = params['eta'][i]
# Run problem
prob.run()
CL_temp[i] = prob['AeroMM.CL_MM']
CD_temp[i] = prob['AeroMM.CD_MM']
# Pull values from problem
unknowns['CL'] = CL_temp
unknowns['CD'] = CD_temp
if __name__ == "__main__":
###### creation of database with DOE #####
top = Problem()
root = top.root = Group()
root.add('comp', SysAeroSurrogate(), promotes=['*'])
root.add('p1', IndepVarComp('alpha', val=0.50), promotes=['*'])
root.add('p2', IndepVarComp('eta',val=0.50), promotes=['*'])
root.add('p3', IndepVarComp('AR', 10.), promotes=['*'])
root.add('p4', IndepVarComp('oswald', 0.92), promotes=['*'])
top.driver = OptimizedLatinHypercubeDriver(num_samples=16, seed=0, population=20, generations=4, norm_method=2)
top.driver.add_desvar('alpha', lower=-5.0*(np.pi/180.0)*1e-1, upper=15.0*(np.pi/180.0)*1e-1)
top.driver.add_desvar('eta', lower=-5.0*(np.pi/180.0)*1e-1, upper=15.0*(np.pi/180.0)*1e-1)
top.driver.add_objective('CD')
recorder = SqliteRecorder('Aero')
recorder.options['record_params'] = True
recorder.options['record_unknowns'] = True
recorder.options['record_resids'] = False
recorder.options['record_metadata'] = False
top.driver.add_recorder(recorder)
top.setup()
top.run()
import sqlitedict
db = sqlitedict.SqliteDict( 'Aero', 'openmdao' )
print( list( db.keys() ) )
DOEX1 = []
DOEX2 = []
DOEY1 = []
DOEY2 = []
for i in list(db.keys()):
data = db[i]
p = data['Parameters']
DOEX1.append(p['comp.alpha'])
DOEX2.append(p['comp.eta'])
p = data['Unknowns']
DOEY1.append(p['CL'])
DOEY2.append(p['CD'])
################ use of surrogate model ######
prob2 = Problem(root=Group())
prob2.root.add('SurrAero', SurrogateAero(), promotes=['*'])
prob2.root.add('v1', IndepVarComp('alpha', val=alpha_test), promotes=['*'])
prob2.root.add('v2', IndepVarComp('eta',val=eta_test), promotes=['*'])
prob2.setup()
prob2.run()
print'CL predicted:', prob2['CL']
print'CD predicted:', prob2['CD']

The way you have your model set up seems correct. The MetaModel component will only train its data one time (the first pass through the model), as you can see in this part of the source code. Every subsequent iteration, it just uses the trained surrogate thats already there.
The meta-model is also already setup to provide analytic derivatives of the predicted output with respect to the input independent variables. Derivatives of the prediction with respect to the training point values are not available in the base implementation. That requires a more complex setup that, at least for the moment, will require some custom setup that is not in the standard library.

Related

Problem with PettingZoo and Stable-Baselines3 with a ParallelEnv

I am having trouble in making things work with a Custom ParallelEnv I wrote by using PettingZoo. I am using SuperSuit's ss.pettingzoo_env_to_vec_env_v1(env) as a wrapper to Vectorize the environment and make it work with Stable-Baseline3 and documented here.
You can find attached a summary of the most relevant part of the code:
from typing import Optional
from gym import spaces
import random
import numpy as np
from pettingzoo import ParallelEnv
from pettingzoo.utils.conversions import parallel_wrapper_fn
import supersuit as ss
from gym.utils import EzPickle, seeding
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
#env_ = ss.concat_vec_envs_v1(env_, 1)
return env_
petting_zoo = env
class parallel_env(ParallelEnv, EzPickle):
metadata = {'render_modes': ['ansi'], "name": "PlayerEnv-Multi-v0"}
def __init__(self, n_agents: int = 20, new_step_api: bool = True) -> None:
EzPickle.__init__(
self,
n_agents,
new_step_api
)
self._episode_ended = False
self.n_agents = n_agents
self.possible_agents = [
f"player_{idx}" for idx in range(n_agents)]
self.agents = self.possible_agents[:]
self.agent_name_mapping = dict(
zip(self.possible_agents, list(range(len(self.possible_agents))))
)
self.observation_spaces = spaces.Dict(
{agent: spaces.Box(shape=(len(self.agents),),
dtype=np.float64, low=0.0, high=1.0) for agent in self.possible_agents}
)
self.action_spaces = spaces.Dict(
{agent: spaces.Discrete(4) for agent in self.possible_agents}
)
self.current_step = 0
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
def observation_space(self, agent):
return self.observation_spaces[agent]
def action_space(self, agent):
return self.action_spaces[agent]
def __calculate_observation(self, agent_id: int) -> np.ndarray:
return self.observation_space(agent_id).sample()
def __calculate_observations(self) -> np.ndarray:
observations = {
agent: self.__calculate_observation(
agent_id=agent)
for agent in self.agents
}
return observations
def observe(self, agent):
return self.__calculate_observation(agent_id=agent)
def step(self, actions):
if self._episode_ended:
return self.reset()
observations = self.__calculate_observations()
rewards = random.sample(range(100), self.n_agents)
self.current_step += 1
self._episode_ended = self.current_step >= 100
infos = {agent: {} for agent in self.agents}
dones = {agent: self._episode_ended for agent in self.agents}
rewards = {
self.agents[i]: rewards[i]
for i in range(len(self.agents))
}
if self._episode_ended:
self.agents = {} # To satisfy `set(par_env.agents) == live_agents`
return observations, rewards, dones, infos
def reset(self,
seed: Optional[int] = None,
return_info: bool = False,
options: Optional[dict] = None,):
self.agents = self.possible_agents[:]
self._episode_ended = False
self.current_step = 0
observations = self.__calculate_observations()
return observations
def render(self, mode="human"):
# TODO: IMPLEMENT
print("TO BE IMPLEMENTED")
def close(self):
pass
Unfortunately when I try to test with the following main procedure:
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.env_checker import check_env
from dummy_env import dummy
from pettingzoo.test import parallel_api_test
if __name__ == '__main__':
# Testing the parallel algorithm alone
env_parallel = dummy.parallel_env()
parallel_api_test(env_parallel) # This works!
# Testing the environment with the wrapper
env = dummy.petting_zoo()
# ERROR: AssertionError: The observation returned by the `reset()` method does not match the given observation space
check_env(env)
# Model initialization
model = PPO("MlpPolicy", env, verbose=1)
# ERROR: ValueError: could not broadcast input array from shape (20,20) into shape (20,)
model.learn(total_timesteps=10_000)
I get the following error:
AssertionError: The observation returned by the `reset()` method does not match the given observation space
If I skip check_env() I get the following one:
ValueError: could not broadcast input array from shape (20,20) into shape (20,)
It seems like that ss.pettingzoo_env_to_vec_env_v1(env) is capable of splitting the parallel environment in multiple vectorized ones, but not for the reset() function.
Does anyone know how to fix this problem?
Plese find the Github Repository to reproduce the problem.
You should double check the reset() function in PettingZoo. It will return None instead of an observation like GYM
Thanks to discussion I had in the issue section of the SuperSuit repository, I am able to post the solution to the problem. Thanks to jjshoots!
First of all it is necessary to have the latest SuperSuit version. In order to get that I needed to install Stable-Baseline3 using the instructions here to make it work with gym 0.24+.
After that, taking the code in the question as example, it is necessary to substitute
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
#env_ = ss.concat_vec_envs_v1(env_, 1)
return env_
with
def env(**kwargs):
env_ = parallel_env(**kwargs)
env_ = ss.pettingzoo_env_to_vec_env_v1(env_)
env_ = ss.concat_vec_envs_v1(env_, 1, base_class="stable_baselines3")
return env_
The outcomes are:
Outcome 1: leaving the line with check_env(env) I got an error AssertionError: Your environment must inherit from the gym.Env class cf https://github.com/openai/gym/blob/master/gym/core.py
Outcome 2: removing the line with check_env(env), the agent starts training successfully!
In the end, I think that the argument base_class="stable_baselines3" made the difference.
Only the small problem on check_env remains to be reported, but I think it can be considered as trivial if the training works.

Why is RandomCrop with size 84 and padding 8 returning an image size of 84 and not 100 in pytorch?

I was using the mini-imagenet data set and noticed this line of code:
elif data_augmentation == 'lee2019:
normalize = Normalize(
mean=[120.39586422 / 255.0, 115.59361427 / 255.0, 104.54012653 / 255.0],
std=[70.68188272 / 255.0, 68.27635443 / 255.0, 72.54505529 / 255.0],
)
train_data_transforms = Compose([
ToPILImage(),
RandomCrop(84, padding=8),
ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
RandomHorizontalFlip(),
ToTensor(),
normalize,
])
test_data_transforms = Compose([
normalize,
])
but when I checked the image size it was 84 instead of 100 (after adding padding):
X.size()=torch.Size([50, 3, 84, 84])
what is going on with this? Shouldn't it be 100?
reproduction:
import random
from typing import Callable
import learn2learn as l2l
import numpy as np
import torch
from learn2learn.data import TaskDataset, MetaDataset, DataDescription
from learn2learn.data.transforms import TaskTransform
from torch.utils.data import Dataset
class IndexableDataSet(Dataset):
def __init__(self, datasets):
self.datasets = datasets
def __len__(self) -> int:
return len(self.datasets)
def __getitem__(self, idx: int):
return self.datasets[idx]
class SingleDatasetPerTaskTransform(Callable):
"""
Transform that samples a data set first, then creates a task (e.g. n-way, k-shot) and finally
applies the remaining task transforms.
"""
def __init__(self, indexable_dataset: IndexableDataSet, cons_remaining_task_transforms: Callable):
"""
:param: cons_remaining_task_transforms; constructor that builds the remaining task transforms. Cannot be a list
of transforms because we don't know apriori which is the data set we will use. So this function should be of
type MetaDataset -> list[TaskTransforms] i.e. given the dataset it returns the transforms for it.
"""
self.indexable_dataset = MetaDataset(indexable_dataset)
self.cons_remaining_task_transforms = cons_remaining_task_transforms
def __call__(self, task_description: list):
"""
idea:
- receives the index of the dataset to use
- then use the normal NWays l2l function
"""
# - this is what I wish could have gone in a seperate callable transform, but idk how since the transforms take apriori (not dynamically) which data set to use.
i = random.randint(0, len(self.indexable_dataset) - 1)
task_description = [DataDescription(index=i)] # using this to follow the l2l convention
# - get the sampled data set
dataset_index = task_description[0].index
dataset = self.indexable_dataset[dataset_index]
dataset = MetaDataset(dataset)
# - use the sampled data set to create task
remaining_task_transforms: list[TaskTransform] = self.cons_remaining_task_transforms(dataset)
description = None
for transform in remaining_task_transforms:
description = transform(description)
return description
def sample_dataset(dataset):
def sample_random_dataset(x):
print(f'{x=}')
i = random.randint(0, len(dataset) - 1)
return [DataDescription(index=i)]
# return dataset[i]
return sample_random_dataset
def get_task_transforms(dataset: IndexableDataSet) -> list[TaskTransform]:
"""
:param dataset:
:return:
"""
transforms = [
sample_dataset(dataset),
l2l.data.transforms.NWays(dataset, n=5),
l2l.data.transforms.KShots(dataset, k=5),
l2l.data.transforms.LoadData(dataset),
l2l.data.transforms.RemapLabels(dataset),
l2l.data.transforms.ConsecutiveLabels(dataset),
]
return transforms
def print_datasets(dataset_lst: list):
for dataset in dataset_lst:
print(f'\n{dataset=}\n')
def get_indexable_list_of_datasets_mi_and_cifarfs(root: str = '~/data/l2l_data/') -> IndexableDataSet:
from learn2learn.vision.benchmarks import mini_imagenet_tasksets
datasets, transforms = mini_imagenet_tasksets(root=root)
mi = datasets[0].dataset
from learn2learn.vision.benchmarks import cifarfs_tasksets
datasets, transforms = cifarfs_tasksets(root=root)
cifarfs = datasets[0].dataset
dataset_list = [mi, cifarfs]
dataset_list = [l2l.data.MetaDataset(dataset) for dataset in dataset_list]
dataset = IndexableDataSet(dataset_list)
return dataset
# -- tests
def loop_through_l2l_indexable_datasets_test():
"""
:return:
"""
# - for determinism
random.seed(0)
torch.manual_seed(0)
np.random.seed(0)
# - options for number of tasks/meta-batch size
batch_size: int = 10
# - create indexable data set
indexable_dataset: IndexableDataSet = get_indexable_list_of_datasets_mi_and_cifarfs()
# - get task transforms
def get_remaining_transforms(dataset: MetaDataset) -> list[TaskTransform]:
remaining_task_transforms = [
l2l.data.transforms.NWays(dataset, n=5),
l2l.data.transforms.KShots(dataset, k=5),
l2l.data.transforms.LoadData(dataset),
l2l.data.transforms.RemapLabels(dataset),
l2l.data.transforms.ConsecutiveLabels(dataset),
]
return remaining_task_transforms
task_transforms: TaskTransform = SingleDatasetPerTaskTransform(indexable_dataset, get_remaining_transforms)
# -
taskset: TaskDataset = TaskDataset(dataset=indexable_dataset, task_transforms=task_transforms)
# - loop through tasks
for task_num in range(batch_size):
print(f'{task_num=}')
X, y = taskset.sample()
print(f'{X.size()=}')
print(f'{y.size()=}')
print(f'{y=}')
print()
print('-- end of test --')
# -- Run experiment
if __name__ == "__main__":
import time
from uutils import report_times
start = time.time()
# - run experiment
loop_through_l2l_indexable_datasets_test()
# - Done
print(f"\nSuccess Done!: {report_times(start)}\a")
context: https://github.com/learnables/learn2learn/issues/333
crossposted:
https://discuss.pytorch.org/t/why-is-randomcrop-with-size-84-and-padding-8-returning-an-image-size-of-84-and-not-100-in-pytorch/151463
https://www.reddit.com/r/pytorch/comments/uno1ih/why_is_randomcrop_with_size_84_and_padding_8/
The padding is applied to the input image or tensor before applying the random crop. Ultimately, the output image has a spatial size equal to that of the provided size(s) given to the T.RandomCrop function since the operation is performed after.
After all, it makes more sense to pad the input image rather than the cropped image, doesn't it?

Why is my Transformer implementation losing to a BiLSTM?

I am dealing with a sequence tagging problem and I am using a single Transformer Encoder to obtain logits from each element of the sequence. Having experimented both with Transformer and BiLSTM it looks like in my case BiLSTM is working better, so I was wondering if maybe it is because my Transformer implementation has some problem... Below is my implementation of the Transformer Encoder and related functions for creating padding mask and positional embeddings:
def create_mask(src, lengths):
"""Create a mask hiding future tokens
Parameters:
src (tensor): the source tensor having shape [batch_size, number_of_steps, features_dimensions]
length (list): a list of integers representing the length (i.e. number_of_steps) of each sample in the batch."""
mask = []
max_len = src.shape[1]
for index, i in enumerate(src):
# The mask consists in tensors having false at the step number that doesn't need to be hidden and true otherwise
mask.append([False if (i+1)>lengths[index] else True for i in range(max_len)])
return torch.tensor(mask)
class PositionalEncoding(nn.Module):
def __init__(self, d_model, dropout=0.1, max_len=5000, device = 'cpu'):
super().__init__()
self.dropout = nn.Dropout(p=dropout)
self.device = device
position = torch.arange(max_len).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
pe = torch.zeros(1, max_len, d_model)
pe[0, :, 0::2] = torch.sin(position * div_term)
pe[0, :, 1::2] = torch.cos(position * div_term)
self.register_buffer('pe', pe)
def forward(self, x):
x = x + self.pe[:, :x.size(1), :].to(self.device)
return self.dropout(x)
class Transformer(nn.Module):
"""Class implementing transformer ecnoder, partially based on
https://pytorch.org/tutorials/beginner/transformer_tutorial.html"""
def __init__(self, in_dim, h_dim, n_heads, n_layers, dropout=0.2, drop_out = 0.0, batch_first = True, device = 'cpu', positional_encoding = True):
super(Transformer, self).__init__()
self.model_type = 'Transformer'
self.pos_encoder = PositionalEncoding(in_dim, dropout, device = device)
encoder_layers = nn.TransformerEncoderLayer(in_dim, n_heads, h_dim, dropout)
self.transformer_encoder = nn.TransformerEncoder(encoder_layers, n_layers, norm=nn.LayerNorm(in_dim))
self.in_dim = in_dim
self.drop_out = drop_out
self.positional_encoding = positional_encoding
def forward(self, src, mask = None, line_len=None):
src = src * math.sqrt(self.in_dim)
if self.positional_encoding:
src = self.pos_encoder(src)
if line_len is not None and mask is None:
mask = create_mask(src, line_len)
else:
mask = None
output = self.transformer_encoder(src, src_key_padding_mask = mask)
if self.drop_out:
output = F.dropout(output, p = self.drop_out)
return src, output
As it can be seen, the above network outputs the hidden states and then I pass them into an additional linear layer and train with a CrossEntropy loss over two classes and Adam optimizer. I have tried multiple combinations of hyperparameters but the BiLSTM still performs better. Can anyone spot anything off in my Transformer or suggest why I experience such a counterintuitive result?
This may be surprising, but Transformers don't always beat LSTMs. For example, Language Models with Transformers states:
Transformer architectures are suboptimal for language model itself. Neither self-attention nor the positional encoding in the Transformer is able to efficiently incorporate the word-level sequential context crucial to language modeling.
If you run the Transformer tutorial code itself (on which your code is based), you'll also see LSTM do better there. See this thread on stats.SE for more discussion on this topic (disclaimer: both the question and the answer there are mine)

convert pytorch model with multiple networks to onnx

I am trying to convert pytorch model with multiple networks to ONNX, and encounter some problem.
The git repo: https://github.com/InterDigitalInc/HRFAE
The Trainer Class:
class Trainer(nn.Module):
def __init__(self, config):
super(Trainer, self).__init__()
# Load Hyperparameters
self.config = config
# Networks
self.enc = Encoder()
self.dec = Decoder()
self.mlp_style = Mod_Net()
self.dis = Dis_PatchGAN()
...
Here is how the trained model process image:
def gen_encode(self, x_a, age_a, age_b=0, training=False, target_age=0):
if target_age:
self.target_age = target_age
age_modif = self.target_age*torch.ones(age_a.size()).type_as(age_a)
else:
age_modif = self.random_age(age_a, diff_val=25)
# Generate modified image
self.content_code_a, skip_1, skip_2 = self.enc(x_a)
style_params_a = self.mlp_style(age_a)
style_params_b = self.mlp_style(age_modif)
x_a_recon = self.dec(self.content_code_a, style_params_a, skip_1, skip_2)
x_a_modif = self.dec(self.content_code_a, style_params_b, skip_1, skip_2)
return x_a_recon, x_a_modif, age_modif
And as following is how I did to convert to onnx:
enc = Encoder()
dec = Decoder()
mlp = Mod_Net()
layers = [enc, mlp, dec]
model = torch.nn.Sequential(*layers)
# here is my confusion: how do I specify the inputs of each layer??
# E.g. one of the outputs of 'enc' layer should be input of 'mlp' layer,
# or the outputs of 'enc' layer should be part of inputs of 'dec' layer...
params = torch.load('./logs/001/checkpoint')
model[0].load_state_dict(params['enc_state_dict'])
model[1].load_state_dict(params['mlp_style_state_dict'])
model[2].load_state_dict(params['dec_state_dict'])
torch.onnx.export(model, torch.randn([1, 3, 1024, 1024]), 'trained_hrfae.onnx', do_constant_folding=True)
Maybe the convert-part code is in wrong way??
Could anyone help, many thanks!
#20210629-11:52GMT Edit:
I found there's constraint of using torch.nn.Sequential. The output of former layer in Sequential should be consistent with latter input.
So my code shouldn't work at all because the output of 'enc' layer is not consistent with input of 'mlp' layer.
Could anyone help how to convert this type of pytorch model to onnx? Many thanks, again :)
After research and try, I found a method which maybe in correct way:
Convert each net(Encoder, Mod_Net, Decoder) to onnx model, and handle their input/output in latter logic-process or any further procedure (e.g convert to tflite model).
I'm trying to port onto Android using this method.
#Edit 20210705-03:52GMT#
Another approach may be better: write a new net combines the three nets. I've prove the output is same as origin pytorch model.
class HRFAE(nn.Module):
def __init__(self):
super(HRFAE, self).__init__()
self.enc = Encoder()
self.mlp_style = Mod_Net()
self.dec = Decoder()
def forward(self, x, age_modif):
content_code_a, skip_1, skip_2 = self.enc(x)
style_params_b = self.mlp_style(age_modif)
x_a_modif = self.dec(content_code_a, style_params_b, skip_1, skip_2)
return x_a_modif
and then convert use following:
net = HRFAE()
params = torch.load('./logs/002/checkpoint')
net.enc.load_state_dict(params['enc_state_dict'])
net.mlp_style.load_state_dict(params['mlp_style_state_dict'])
net.dec.load_state_dict(params['dec_state_dict'])
net.eval()
torch.onnx.export(net, (torch.randn([1, 3, 512, 512]), torch.randn([1]).type(torch.long)), 'test_hrfae.onnx')
This should be the answer.

Function approximator and q-learning

I am trying to implement q-learning with an action-value approximation-function. I am using openai-gym and the "MountainCar-v0" enviroment to test my algorithm out. My problem is, it does not converge or find the goal at all.
Basically the approximator works like the following, you feed in the 2 features: position and velocity and one of the 3 actions in a one-hot encoding: 0 -> [1,0,0], 1 -> [0,1,0] and 2 -> [0,0,1]. The output is the action-value approximation Q_approx(s,a), for one specific action.
I know that usually, the input is the state (2 features) and the output layer contains 1 output for each action. The big difference that I see is that I have run the feed forward pass 3 times (one for each action) and take the max, while in the standard implementation you run it once and take the max over the output.
Maybe my implementation is just completely wrong and I am thinking wrong. Gonna paste the code here, it is a mess but I am just experimenting a bit:
import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation
env = gym.make('MountainCar-v0')
# The mean reward over 20 episodes
mean_rewards = np.zeros(20)
# Feature numpy holder
features = np.zeros(5)
# Q_a value holder
qa_vals = np.zeros(3)
one_hot = {
0 : np.asarray([1,0,0]),
1 : np.asarray([0,1,0]),
2 : np.asarray([0,0,1])
}
model = Sequential()
model.add(Dense(20, activation="relu",input_dim=(5)))
model.add(Dense(10,activation="relu"))
model.add(Dense(1))
model.compile(optimizer='rmsprop',
loss='mse',
metrics=['accuracy'])
epsilon_greedy = 0.1
discount = 0.9
batch_size = 16
# Experience replay containing features and target
experience = np.ones((10*300,5+1))
# Ring buffer
def add_exp(features,target,index):
if index % experience.shape[0] == 0:
index = 0
global filled_once
filled_once = True
experience[index,0:5] = features
experience[index,5] = target
index += 1
return index
for e in range(0,100000):
obs = env.reset()
old_obs = None
new_obs = obs
rewards = 0
loss = 0
for i in range(0,300):
if old_obs is not None:
# Find q_a max for s_(t+1)
features[0:2] = new_obs
for i,pa in enumerate([0,1,2]):
features[2:5] = one_hot[pa]
qa_vals[i] = model.predict(features.reshape(-1,5))
rewards += reward
target = reward + discount*np.max(qa_vals)
features[0:2] = old_obs
features[2:5] = one_hot[a]
fill_index = add_exp(features,target,fill_index)
# Find new action
if np.random.random() < epsilon_greedy:
a = env.action_space.sample()
else:
a = np.argmax(qa_vals)
else:
a = env.action_space.sample()
obs, reward, done, info = env.step(a)
old_obs = new_obs
new_obs = obs
if done:
break
if filled_once:
samples_ids = np.random.choice(experience.shape[0],batch_size)
loss += model.train_on_batch(experience[samples_ids,0:5],experience[samples_ids,5].reshape(-1))[0]
mean_rewards[e%20] = rewards
print("e = {} and loss = {}".format(e,loss))
if e % 50 == 0:
print("e = {} and mean = {}".format(e,mean_rewards.mean()))
Thanks in advance!
There shouldn't be much difference between the actions as inputs to your network or as different outputs of your network. It does make a huge difference if your states are images for example. because Conv nets work very well with images and there would be no obvious way of integrating the actions to the input.
Have you tried the cartpole balancing environment? It is better to test if your model is working correctly.
Mountain climb is pretty hard. It has no reward until you reach the top, which often doesn't happen at all. The model will only start learning something useful once you get to the top once. If you are never getting to the top you should probably increase your time doing exploration. in other words take more random actions, a lot more...