diff --git a/lerobot/common/policies/sac/configuration_sac.py b/lerobot/common/policies/sac/configuration_sac.py index 18ceee24..1d296bf1 100644 --- a/lerobot/common/policies/sac/configuration_sac.py +++ b/lerobot/common/policies/sac/configuration_sac.py @@ -95,6 +95,6 @@ class SACConfig: "use_tanh_squash": True, "log_std_min": -5, "log_std_max": 2, - "init_final": 0.01, + "init_final": 0.005, } ) diff --git a/lerobot/scripts/server/actor_server.py b/lerobot/scripts/server/actor_server.py index fa4e34af..8284f024 100644 --- a/lerobot/scripts/server/actor_server.py +++ b/lerobot/scripts/server/actor_server.py @@ -163,10 +163,10 @@ def serve_actor_service(port=50052): def update_policy_parameters(policy: SACPolicy, parameters_queue: queue.Queue, device): if not parameters_queue.empty(): - logging.debug("[ACTOR] Load new parameters from Learner.") + logging.info("[ACTOR] Load new parameters from Learner.") state_dict = parameters_queue.get() state_dict = move_state_dict_to_device(state_dict, device=device) - policy.load_state_dict(state_dict) + policy.load_state_dict(state_dict, strict=False) def act_with_policy(cfg: DictConfig, robot: Robot, reward_classifier: nn.Module):