diff --git a/lerobot/configs/default.yaml b/lerobot/configs/default.yaml index 1bd7dd89..83cbc5a3 100644 --- a/lerobot/configs/default.yaml +++ b/lerobot/configs/default.yaml @@ -1,10 +1,13 @@ defaults: - _self_ - env: simxarm + - policy: tdmpc hydra: run: - dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${hydra.job.name} + dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}_${env.name}_${policy.name}_${hydra.job.name} + job: + name: default seed: 1337 device: cuda @@ -20,82 +23,7 @@ fps: ??? env: ??? -policy: - name: tdmpc - - reward_scale: 1.0 - - # xarm_lift - train_steps: ${train_steps} - episode_length: ${env.episode_length} - discount: 0.9 - modality: 'all' - - # pixels - frame_stack: 1 - num_channels: 32 - img_size: ${env.image_size} - state_dim: ??? - action_dim: ??? - - # planning - mpc: true - iterations: 6 - num_samples: 512 - num_elites: 50 - mixture_coef: 0.1 - min_std: 0.05 - max_std: 2.0 - temperature: 0.5 - momentum: 0.1 - uncertainty_cost: 1 - - # actor - log_std_min: -10 - log_std_max: 2 - - # learning - batch_size: 256 - max_buffer_size: 10000 - horizon: 5 - reward_coef: 0.5 - value_coef: 0.1 - consistency_coef: 20 - rho: 0.5 - kappa: 0.1 - lr: 3e-4 - std_schedule: ${policy.min_std} - horizon_schedule: ${policy.horizon} - per: true - per_alpha: 0.6 - per_beta: 0.4 - grad_clip_norm: 10 - seed_steps: 0 - update_freq: 2 - tau: 0.01 - utd: 1 - - # offline rl - # dataset_dir: ??? - data_first_percent: 1.0 - is_data_clip: true - data_clip_eps: 1e-5 - expectile: 0.9 - A_scaling: 3.0 - - # offline->online - offline_steps: 25000 # ${train_steps}/2 - pretrained_model_path: "" - # pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/offline.pt" - # pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/final.pt" - balanced_sampling: true - demo_schedule: 0.5 - - # architecture - enc_dim: 256 - num_q: 5 - mlp_dim: 512 - latent_dim: 50 +policy: ??? wandb: enable: true diff --git a/lerobot/configs/env/pusht.yaml b/lerobot/configs/env/pusht.yaml index 7b2ac7ba..1a500120 100644 --- a/lerobot/configs/env/pusht.yaml +++ b/lerobot/configs/env/pusht.yaml @@ -1,9 +1,5 @@ # @package _global_ -hydra: - job: - name: pusht - eval_episodes: 50 eval_freq: 7500 save_freq: 75000 diff --git a/lerobot/configs/env/simxarm.yaml b/lerobot/configs/env/simxarm.yaml index 80324e78..3972631c 100644 --- a/lerobot/configs/env/simxarm.yaml +++ b/lerobot/configs/env/simxarm.yaml @@ -1,9 +1,5 @@ # @package _global_ -hydra: - job: - name: simxarm - eval_episodes: 20 eval_freq: 1000 save_freq: 10000 diff --git a/lerobot/configs/policy/tdmpc.yaml b/lerobot/configs/policy/tdmpc.yaml new file mode 100644 index 00000000..1c2140f8 --- /dev/null +++ b/lerobot/configs/policy/tdmpc.yaml @@ -0,0 +1,78 @@ +# @package _global_ + +policy: + name: tdmpc + + reward_scale: 1.0 + + # xarm_lift + train_steps: ${train_steps} + episode_length: ${env.episode_length} + discount: 0.9 + modality: 'all' + + # pixels + frame_stack: 1 + num_channels: 32 + img_size: ${env.image_size} + state_dim: ??? + action_dim: ??? + + # planning + mpc: true + iterations: 6 + num_samples: 512 + num_elites: 50 + mixture_coef: 0.1 + min_std: 0.05 + max_std: 2.0 + temperature: 0.5 + momentum: 0.1 + uncertainty_cost: 1 + + # actor + log_std_min: -10 + log_std_max: 2 + + # learning + batch_size: 256 + max_buffer_size: 10000 + horizon: 5 + reward_coef: 0.5 + value_coef: 0.1 + consistency_coef: 20 + rho: 0.5 + kappa: 0.1 + lr: 3e-4 + std_schedule: ${policy.min_std} + horizon_schedule: ${policy.horizon} + per: true + per_alpha: 0.6 + per_beta: 0.4 + grad_clip_norm: 10 + seed_steps: 0 + update_freq: 2 + tau: 0.01 + utd: 1 + + # offline rl + # dataset_dir: ??? + data_first_percent: 1.0 + is_data_clip: true + data_clip_eps: 1e-5 + expectile: 0.9 + A_scaling: 3.0 + + # offline->online + offline_steps: 25000 # ${train_steps}/2 + pretrained_model_path: "" + # pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/offline.pt" + # pretrained_model_path: "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/final.pt" + balanced_sampling: true + demo_schedule: 0.5 + + # architecture + enc_dim: 256 + num_q: 5 + mlp_dim: 512 + latent_dim: 50 \ No newline at end of file