Refactor policy config

This commit is contained in:
Cadene 2024-02-25 18:26:44 +00:00
parent b16c334825
commit 5a219fed6e
4 changed files with 83 additions and 85 deletions

View File

@ -1,10 +1,13 @@
defaults:
- _self_
- env: simxarm
- policy: tdmpc
hydra:
run:
dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.name}
dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${env.name}_${policy.name}_${hydra.job.name}
job:
name: default
seed: 1337
device: cuda
@ -20,82 +23,7 @@ fps: ???
env: ???
policy:
name: tdmpc
reward_scale: 1.0
# xarm_lift
train_steps: ${train_steps}
episode_length: ${env.episode_length}
discount: 0.9
modality: 'all'
# pixels
frame_stack: 1
num_channels: 32
img_size: ${env.image_size}
state_dim: ???
action_dim: ???
# planning
mpc: true
iterations: 6
num_samples: 512
num_elites: 50
mixture_coef: 0.1
min_std: 0.05
max_std: 2.0
temperature: 0.5
momentum: 0.1
uncertainty_cost: 1
# actor
log_std_min: -10
log_std_max: 2
# learning
batch_size: 256
max_buffer_size: 10000
horizon: 5
reward_coef: 0.5
value_coef: 0.1
consistency_coef: 20
rho: 0.5
kappa: 0.1
lr: 3e-4
std_schedule: ${policy.min_std}
horizon_schedule: ${policy.horizon}
per: true
per_alpha: 0.6
per_beta: 0.4
grad_clip_norm: 10
seed_steps: 0
update_freq: 2
tau: 0.01
utd: 1
# offline rl
# dataset_dir: ???
data_first_percent: 1.0
is_data_clip: true
data_clip_eps: 1e-5
expectile: 0.9
A_scaling: 3.0
# offline->online
offline_steps: 25000 # ${train_steps}/2
pretrained_model_path: ""
# pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/offline.pt"
# pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/final.pt"
balanced_sampling: true
demo_schedule: 0.5
# architecture
enc_dim: 256
num_q: 5
mlp_dim: 512
latent_dim: 50
policy: ???
wandb:
enable: true

View File

@ -1,9 +1,5 @@
# @package _global_
hydra:
job:
name: pusht
eval_episodes: 50
eval_freq: 7500
save_freq: 75000

View File

@ -1,9 +1,5 @@
# @package _global_
hydra:
job:
name: simxarm
eval_episodes: 20
eval_freq: 1000
save_freq: 10000

View File

@ -0,0 +1,78 @@
# @package _global_
policy:
name: tdmpc
reward_scale: 1.0
# xarm_lift
train_steps: ${train_steps}
episode_length: ${env.episode_length}
discount: 0.9
modality: 'all'
# pixels
frame_stack: 1
num_channels: 32
img_size: ${env.image_size}
state_dim: ???
action_dim: ???
# planning
mpc: true
iterations: 6
num_samples: 512
num_elites: 50
mixture_coef: 0.1
min_std: 0.05
max_std: 2.0
temperature: 0.5
momentum: 0.1
uncertainty_cost: 1
# actor
log_std_min: -10
log_std_max: 2
# learning
batch_size: 256
max_buffer_size: 10000
horizon: 5
reward_coef: 0.5
value_coef: 0.1
consistency_coef: 20
rho: 0.5
kappa: 0.1
lr: 3e-4
std_schedule: ${policy.min_std}
horizon_schedule: ${policy.horizon}
per: true
per_alpha: 0.6
per_beta: 0.4
grad_clip_norm: 10
seed_steps: 0
update_freq: 2
tau: 0.01
utd: 1
# offline rl
# dataset_dir: ???
data_first_percent: 1.0
is_data_clip: true
data_clip_eps: 1e-5
expectile: 0.9
A_scaling: 3.0
# offline->online
offline_steps: 25000 # ${train_steps}/2
pretrained_model_path: ""
# pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/offline.pt"
# pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/final.pt"
balanced_sampling: true
demo_schedule: 0.5
# architecture
enc_dim: 256
num_q: 5
mlp_dim: 512
latent_dim: 50