From 9ceb68ee90bc1f7bbafefc66d389b034acc6da54 Mon Sep 17 00:00:00 2001 From: KeWang1017 Date: Sat, 28 Dec 2024 22:11:34 +0000 Subject: [PATCH] Refine SAC configuration and policy for enhanced performance - Updated standard deviation parameterization in SACConfig to 'softplus' with defined min and max values for improved stability. - Modified action sampling in SACPolicy to use reparameterized sampling, ensuring better gradient flow and log probability calculations. - Cleaned up log probability calculations in TanhMultivariateNormalDiag for clarity and efficiency. - Increased evaluation frequency in YAML configuration to 50000 for more efficient training cycles. These changes aim to enhance the robustness and performance of the SAC implementation during training and inference. --- lerobot/configs/policy/sac_pusht_keypoints.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lerobot/configs/policy/sac_pusht_keypoints.yaml b/lerobot/configs/policy/sac_pusht_keypoints.yaml index 19af60d4..6d8971a2 100644 --- a/lerobot/configs/policy/sac_pusht_keypoints.yaml +++ b/lerobot/configs/policy/sac_pusht_keypoints.yaml @@ -19,7 +19,7 @@ training: grad_clip_norm: 10.0 lr: 3e-4 - eval_freq: 10000 + eval_freq: 50000 log_freq: 500 save_freq: 50000