seed: 1337 log_dir: logs/2024_01_26_train exp_name: default device: cuda buffer_device: cuda eval_freq: 1000 save_freq: 10000 eval_episodes: 20 save_video: false save_model: false save_buffer: false # env env: simxarm task: lift from_pixels: True pixels_only: False image_size: 84 reward_scale: 1.0 # xarm_lift episode_length: 25 modality: 'all' action_repeat: 2 # TODO(rcadene): verify we use this discount: 0.9 train_steps: 50000 # pixels frame_stack: 1 num_channels: 32 img_size: 84 # TDMPC # planning mpc: true iterations: 6 num_samples: 512 num_elites: 50 mixture_coef: 0.1 min_std: 0.05 max_std: 2.0 temperature: 0.5 momentum: 0.1 uncertainty_cost: 1 # actor log_std_min: -10 log_std_max: 2 # learning batch_size: 256 max_buffer_size: 10000 horizon: 5 reward_coef: 0.5 value_coef: 0.1 consistency_coef: 20 rho: 0.5 kappa: 0.1 lr: 3e-4 std_schedule: ${min_std} horizon_schedule: ${horizon} per: true per_alpha: 0.6 per_beta: 0.4 grad_clip_norm: 10 seed_steps: 0 update_freq: 2 tau: 0.01 utd: 1 # offline rl # dataset_dir: ??? data_first_percent: 1.0 is_data_clip: true data_clip_eps: 1e-5 expectile: 0.9 A_scaling: 3.0 # offline->online offline_steps: ${train_steps}/2 pretrained_model_path: "" balanced_sampling: true demo_schedule: 0.5 # architecture enc_dim: 256 num_q: 5 mlp_dim: 512 latent_dim: 50 # wandb use_wandb: false wandb_project: FOWM wandb_entity: rcadene # insert your own