From 1a8b99e360d89b0caf296b4f845ab050a34c3baa Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Tue, 17 Dec 2024 18:03:46 +0100 Subject: [PATCH] added comments from kewang --- lerobot/common/policies/sac/modeling_sac.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lerobot/common/policies/sac/modeling_sac.py b/lerobot/common/policies/sac/modeling_sac.py index 7d451b4e..de8283de 100644 --- a/lerobot/common/policies/sac/modeling_sac.py +++ b/lerobot/common/policies/sac/modeling_sac.py @@ -128,6 +128,7 @@ class SACPolicy( # perform image augmentation # reward bias + # from HIL-SERL code base # add_or_replace={"rewards": batch["rewards"] + self.config["reward_bias"]} in reward_batch @@ -207,6 +208,7 @@ class SACPolicy( def update(self): self.critic_target.lerp_(self.critic_ensemble, self.config.critic_target_update_weight) # TODO: implement UTD update + # First update only critics for utd_ratio-1 times #for critic_step in range(self.config.utd_ratio - 1): # only update critic and critic target # Then update critic, critic target, actor and temperature