From 95de8e273dffd9e2f5991a6c73d84d02cf880c2b Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Thu, 13 Feb 2025 17:12:57 +0100 Subject: [PATCH] nit Co-authored-by: Adil Zouitine --- lerobot/common/policies/sac/configuration_sac.py | 2 +- lerobot/scripts/server/actor_server.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lerobot/common/policies/sac/configuration_sac.py b/lerobot/common/policies/sac/configuration_sac.py index 18ceee24..1d296bf1 100644 --- a/lerobot/common/policies/sac/configuration_sac.py +++ b/lerobot/common/policies/sac/configuration_sac.py @@ -95,6 +95,6 @@ class SACConfig: "use_tanh_squash": True, "log_std_min": -5, "log_std_max": 2, - "init_final": 0.01, + "init_final": 0.005, } ) diff --git a/lerobot/scripts/server/actor_server.py b/lerobot/scripts/server/actor_server.py index fa4e34af..8284f024 100644 --- a/lerobot/scripts/server/actor_server.py +++ b/lerobot/scripts/server/actor_server.py @@ -163,10 +163,10 @@ def serve_actor_service(port=50052): def update_policy_parameters(policy: SACPolicy, parameters_queue: queue.Queue, device): if not parameters_queue.empty(): - logging.debug("[ACTOR] Load new parameters from Learner.") + logging.info("[ACTOR] Load new parameters from Learner.") state_dict = parameters_queue.get() state_dict = move_state_dict_to_device(state_dict, device=device) - policy.load_state_dict(state_dict) + policy.load_state_dict(state_dict, strict=False) def act_with_policy(cfg: DictConfig, robot: Robot, reward_classifier: nn.Module):