diff --git a/lerobot/configs/env/maniskill_example.yaml b/lerobot/configs/env/maniskill_example.yaml index 3df23b2e..2beaa8a6 100644 --- a/lerobot/configs/env/maniskill_example.yaml +++ b/lerobot/configs/env/maniskill_example.yaml @@ -1,6 +1,6 @@ # @package _global_ -fps: 20 +fps: 400 env: name: maniskill/pushcube diff --git a/lerobot/configs/policy/sac_maniskill.yaml b/lerobot/configs/policy/sac_maniskill.yaml index c9bbca44..cf20d059 100644 --- a/lerobot/configs/policy/sac_maniskill.yaml +++ b/lerobot/configs/policy/sac_maniskill.yaml @@ -8,22 +8,23 @@ # env.gym.obs_type=environment_state_agent_pos \ seed: 1 -dataset_repo_id: "AdilZtn/Maniskill-Pushcube-demonstration-medium" +# dataset_repo_id: "AdilZtn/Maniskill-Pushcube-demonstration-medium" +dataset_repo_id: null training: # Offline training dataloader num_workers: 4 batch_size: 512 - grad_clip_norm: 10.0 + grad_clip_norm: 40.0 lr: 3e-4 - storage_device: "cpu" + storage_device: "cuda" eval_freq: 2500 log_freq: 10 - save_freq: 2000000 + save_freq: 1000000 online_steps: 1000000 online_rollout_n_episodes: 10 @@ -32,17 +33,12 @@ training: online_sampling_ratio: 1.0 online_env_seed: 10000 online_buffer_capacity: 200000 + offline_buffer_capacity: 100000 online_buffer_seed_size: 0 online_step_before_learning: 500 do_online_rollout_async: false policy_update_freq: 1 - # delta_timestamps: - # observation.environment_state: "[i / ${fps} for i in range(${policy.horizon} + 1)]" - # observation.state: "[i / ${fps} for i in range(${policy.horizon} + 1)]" - # action: "[i / ${fps} for i in range(${policy.horizon})]" - # next.reward: "[i / ${fps} for i in range(${policy.horizon})]" - policy: name: sac @@ -68,28 +64,33 @@ policy: camera_number: 1 # Normalization / Unnormalization - input_normalization_modes: null - # input_normalization_modes: - # observation.state: min_max - input_normalization_params: null - # observation.state: - # min: [-1.9361e+00, -7.7640e-01, -7.7094e-01, -2.9709e+00, -8.5656e-01, - # 1.0764e+00, -1.2680e+00, 0.0000e+00, 0.0000e+00, -9.3448e+00, - # -3.3828e+00, -3.8420e+00, -5.2553e+00, -3.4154e+00, -6.5082e+00, - # -6.0500e+00, -8.7193e+00, -8.2337e+00, -3.4650e-01, -4.9441e-01, - # 8.3516e-03, -3.1114e-01, -9.9700e-01, -2.3471e-01, -2.7137e-01] + # input_normalization_modes: null + input_normalization_modes: + observation.state: min_max + observation.image: mean_std + # input_normalization_params: null + input_normalization_params: + observation.state: + min: [-1.9361e+00, -7.7640e-01, -7.7094e-01, -2.9709e+00, -8.5656e-01, + 1.0764e+00, -1.2680e+00, 0.0000e+00, 0.0000e+00, -9.3448e+00, + -3.3828e+00, -3.8420e+00, -5.2553e+00, -3.4154e+00, -6.5082e+00, + -6.0500e+00, -8.7193e+00, -8.2337e+00, -3.4650e-01, -4.9441e-01, + 8.3516e-03, -3.1114e-01, -9.9700e-01, -2.3471e-01, -2.7137e-01] + max: [ 0.8644, 1.4306, 1.8520, -0.7578, 0.9508, 3.4901, 1.9381, 0.0400, + 0.0400, 5.0885, 4.7156, 7.9393, 7.9100, 2.9796, 5.7720, 4.7163, + 7.8145, 9.7415, 0.2422, 0.4505, 0.6306, 0.2622, 1.0000, 0.5135, + 0.4001] - # max: [ 0.8644, 1.4306, 1.8520, -0.7578, 0.9508, 3.4901, 1.9381, 0.0400, - # 0.0400, 5.0885, 4.7156, 7.9393, 7.9100, 2.9796, 5.7720, 4.7163, - # 7.8145, 9.7415, 0.2422, 0.4505, 0.6306, 0.2622, 1.0000, 0.5135, - # 0.4001] + observation.image: + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] output_normalization_modes: action: min_max output_normalization_params: action: - min: [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] - max: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + min: [-0.03, -0.03, -0.03, -0.03, -0.03, -0.03, -0.03] + max: [0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03] output_normalization_shapes: action: [7] @@ -99,8 +100,8 @@ policy: # discount: 0.99 discount: 0.80 temperature_init: 1.0 - num_critics: 10 #10 - num_subsample_critics: 2 + num_critics: 2 #10 + num_subsample_critics: null critic_lr: 3e-4 actor_lr: 3e-4 temperature_lr: 3e-4 @@ -111,7 +112,7 @@ policy: actor_learner_config: learner_host: "127.0.0.1" learner_port: 50051 - policy_parameters_push_frequency: 1 + policy_parameters_push_frequency: 4 concurrency: - actor: 'processes' - learner: 'processes' + actor: 'threads' + learner: 'threads' diff --git a/lerobot/scripts/server/learner_server.py b/lerobot/scripts/server/learner_server.py index d1235980..713fc2a8 100644 --- a/lerobot/scripts/server/learner_server.py +++ b/lerobot/scripts/server/learner_server.py @@ -202,6 +202,7 @@ def initialize_offline_replay_buffer( action_delta=cfg.env.wrapper.delta_action, storage_device=storage_device, optimize_memory=True, + capacity=cfg.training.offline_buffer_capacity, ) return offline_replay_buffer diff --git a/lerobot/scripts/server/maniskill_manipulator.py b/lerobot/scripts/server/maniskill_manipulator.py index e4d55955..495042de 100644 --- a/lerobot/scripts/server/maniskill_manipulator.py +++ b/lerobot/scripts/server/maniskill_manipulator.py @@ -159,7 +159,7 @@ def make_maniskill( env.unwrapped.metadata["render_fps"] = 20 env = ManiSkillCompat(env) env = ManiSkillActionWrapper(env) - env = ManiSkillMultiplyActionWrapper(env, multiply_factor=1) + env = ManiSkillMultiplyActionWrapper(env, multiply_factor=0.03) return env