Update configuration files for improved performance and flexibility
- Increased frame rate in `maniskill_example.yaml` from 20 to 400 for enhanced simulation speed. - Updated `sac_maniskill.yaml` to set `dataset_repo_id` to null and adjusted `grad_clip_norm` from 10.0 to 40.0. - Changed `storage_device` from "cpu" to "cuda" for better resource utilization. - Modified `save_freq` from 2000000 to 1000000 to optimize saving intervals. - Enhanced input normalization parameters for `observation.state` and `observation.image` in SAC policy. - Adjusted `num_critics` from 10 to 2 and `policy_parameters_push_frequency` from 1 to 4 for improved training dynamics. - Updated `learner_server.py` to utilize `offline_buffer_capacity` for replay buffer initialization. - Changed action multiplier in `maniskill_manipulator.py` from 1 to 0.03 for finer control over actions.
This commit is contained in:
parent
f899edb57f
commit
b7bd13570f
|
@ -1,6 +1,6 @@
|
|||
# @package _global_
|
||||
|
||||
fps: 20
|
||||
fps: 400
|
||||
|
||||
env:
|
||||
name: maniskill/pushcube
|
||||
|
|
|
@ -8,22 +8,23 @@
|
|||
# env.gym.obs_type=environment_state_agent_pos \
|
||||
|
||||
seed: 1
|
||||
dataset_repo_id: "AdilZtn/Maniskill-Pushcube-demonstration-medium"
|
||||
# dataset_repo_id: "AdilZtn/Maniskill-Pushcube-demonstration-medium"
|
||||
dataset_repo_id: null
|
||||
|
||||
training:
|
||||
# Offline training dataloader
|
||||
num_workers: 4
|
||||
|
||||
batch_size: 512
|
||||
grad_clip_norm: 10.0
|
||||
grad_clip_norm: 40.0
|
||||
lr: 3e-4
|
||||
|
||||
|
||||
storage_device: "cpu"
|
||||
storage_device: "cuda"
|
||||
|
||||
eval_freq: 2500
|
||||
log_freq: 10
|
||||
save_freq: 2000000
|
||||
save_freq: 1000000
|
||||
|
||||
online_steps: 1000000
|
||||
online_rollout_n_episodes: 10
|
||||
|
@ -32,17 +33,12 @@ training:
|
|||
online_sampling_ratio: 1.0
|
||||
online_env_seed: 10000
|
||||
online_buffer_capacity: 200000
|
||||
offline_buffer_capacity: 100000
|
||||
online_buffer_seed_size: 0
|
||||
online_step_before_learning: 500
|
||||
do_online_rollout_async: false
|
||||
policy_update_freq: 1
|
||||
|
||||
# delta_timestamps:
|
||||
# observation.environment_state: "[i / ${fps} for i in range(${policy.horizon} + 1)]"
|
||||
# observation.state: "[i / ${fps} for i in range(${policy.horizon} + 1)]"
|
||||
# action: "[i / ${fps} for i in range(${policy.horizon})]"
|
||||
# next.reward: "[i / ${fps} for i in range(${policy.horizon})]"
|
||||
|
||||
policy:
|
||||
name: sac
|
||||
|
||||
|
@ -68,28 +64,33 @@ policy:
|
|||
camera_number: 1
|
||||
|
||||
# Normalization / Unnormalization
|
||||
input_normalization_modes: null
|
||||
# input_normalization_modes:
|
||||
# observation.state: min_max
|
||||
input_normalization_params: null
|
||||
# observation.state:
|
||||
# min: [-1.9361e+00, -7.7640e-01, -7.7094e-01, -2.9709e+00, -8.5656e-01,
|
||||
# 1.0764e+00, -1.2680e+00, 0.0000e+00, 0.0000e+00, -9.3448e+00,
|
||||
# -3.3828e+00, -3.8420e+00, -5.2553e+00, -3.4154e+00, -6.5082e+00,
|
||||
# -6.0500e+00, -8.7193e+00, -8.2337e+00, -3.4650e-01, -4.9441e-01,
|
||||
# 8.3516e-03, -3.1114e-01, -9.9700e-01, -2.3471e-01, -2.7137e-01]
|
||||
# input_normalization_modes: null
|
||||
input_normalization_modes:
|
||||
observation.state: min_max
|
||||
observation.image: mean_std
|
||||
# input_normalization_params: null
|
||||
input_normalization_params:
|
||||
observation.state:
|
||||
min: [-1.9361e+00, -7.7640e-01, -7.7094e-01, -2.9709e+00, -8.5656e-01,
|
||||
1.0764e+00, -1.2680e+00, 0.0000e+00, 0.0000e+00, -9.3448e+00,
|
||||
-3.3828e+00, -3.8420e+00, -5.2553e+00, -3.4154e+00, -6.5082e+00,
|
||||
-6.0500e+00, -8.7193e+00, -8.2337e+00, -3.4650e-01, -4.9441e-01,
|
||||
8.3516e-03, -3.1114e-01, -9.9700e-01, -2.3471e-01, -2.7137e-01]
|
||||
max: [ 0.8644, 1.4306, 1.8520, -0.7578, 0.9508, 3.4901, 1.9381, 0.0400,
|
||||
0.0400, 5.0885, 4.7156, 7.9393, 7.9100, 2.9796, 5.7720, 4.7163,
|
||||
7.8145, 9.7415, 0.2422, 0.4505, 0.6306, 0.2622, 1.0000, 0.5135,
|
||||
0.4001]
|
||||
|
||||
# max: [ 0.8644, 1.4306, 1.8520, -0.7578, 0.9508, 3.4901, 1.9381, 0.0400,
|
||||
# 0.0400, 5.0885, 4.7156, 7.9393, 7.9100, 2.9796, 5.7720, 4.7163,
|
||||
# 7.8145, 9.7415, 0.2422, 0.4505, 0.6306, 0.2622, 1.0000, 0.5135,
|
||||
# 0.4001]
|
||||
observation.image:
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
|
||||
output_normalization_modes:
|
||||
action: min_max
|
||||
output_normalization_params:
|
||||
action:
|
||||
min: [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0]
|
||||
max: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
|
||||
min: [-0.03, -0.03, -0.03, -0.03, -0.03, -0.03, -0.03]
|
||||
max: [0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03]
|
||||
output_normalization_shapes:
|
||||
action: [7]
|
||||
|
||||
|
@ -99,8 +100,8 @@ policy:
|
|||
# discount: 0.99
|
||||
discount: 0.80
|
||||
temperature_init: 1.0
|
||||
num_critics: 10 #10
|
||||
num_subsample_critics: 2
|
||||
num_critics: 2 #10
|
||||
num_subsample_critics: null
|
||||
critic_lr: 3e-4
|
||||
actor_lr: 3e-4
|
||||
temperature_lr: 3e-4
|
||||
|
@ -111,7 +112,7 @@ policy:
|
|||
actor_learner_config:
|
||||
learner_host: "127.0.0.1"
|
||||
learner_port: 50051
|
||||
policy_parameters_push_frequency: 1
|
||||
policy_parameters_push_frequency: 4
|
||||
concurrency:
|
||||
actor: 'processes'
|
||||
learner: 'processes'
|
||||
actor: 'threads'
|
||||
learner: 'threads'
|
||||
|
|
|
@ -202,6 +202,7 @@ def initialize_offline_replay_buffer(
|
|||
action_delta=cfg.env.wrapper.delta_action,
|
||||
storage_device=storage_device,
|
||||
optimize_memory=True,
|
||||
capacity=cfg.training.offline_buffer_capacity,
|
||||
)
|
||||
return offline_replay_buffer
|
||||
|
||||
|
|
|
@ -159,7 +159,7 @@ def make_maniskill(
|
|||
env.unwrapped.metadata["render_fps"] = 20
|
||||
env = ManiSkillCompat(env)
|
||||
env = ManiSkillActionWrapper(env)
|
||||
env = ManiSkillMultiplyActionWrapper(env, multiply_factor=1)
|
||||
env = ManiSkillMultiplyActionWrapper(env, multiply_factor=0.03)
|
||||
|
||||
return env
|
||||
|
||||
|
|
Loading…
Reference in New Issue