From c85f88fb62ab427b783822182a134300e7fba391 Mon Sep 17 00:00:00 2001
From: AdilZouitine <adilzouitinegm@gmail.com>
Date: Mon, 17 Feb 2025 10:08:49 +0000
Subject: [PATCH] Improve wandb logging and custom step tracking in logger

- Modify logger to support multiple custom step keys
- Update logging method to handle custom step keys more flexibly

- Enhance logging of optimization step and frequency
Co-authored-by: michel-aractingi  <michel.aractingi@gmail.com>
---
 lerobot/common/logger.py                 | 27 ++++++++++++------------
 lerobot/scripts/server/learner_server.py | 15 ++++++++-----
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/lerobot/common/logger.py b/lerobot/common/logger.py
index 6dc724db..3a9cb2a5 100644
--- a/lerobot/common/logger.py
+++ b/lerobot/common/logger.py
@@ -128,7 +128,7 @@ class Logger:
                 resume="must" if cfg.resume else None,
             )
             # Handle custom step key for rl asynchronous training.
-            self._wandb_custom_step_key = None
+            self._wandb_custom_step_key: set[str] | None = None
             print(colored("Logs will be synced with wandb.", "blue", attrs=["bold"]))
             logging.info(f"Track this run --> {colored(wandb.run.get_url(), 'yellow', attrs=['bold'])}")
             self._wandb = wandb
@@ -264,11 +264,13 @@ class Logger:
             # multiple time steps is possible for example, the interaction step with the environment,
             # the training step, the evaluation step, etc. So we need to define a custom step key
             # to log the correct step for each metric.
-            if custom_step_key is not None and self._wandb_custom_step_key is None:
-                # NOTE: Define the custom step key, once for the moment this implementation support only one
-                # custom step.
-                self._wandb_custom_step_key = f"{mode}/{custom_step_key}"
-                self._wandb.define_metric(self._wandb_custom_step_key, hidden=True)
+            if custom_step_key is not None:
+                if self._wandb_custom_step_key is None:
+                    self._wandb_custom_step_key = set()
+                new_custom_key = f"{mode}/{custom_step_key}"
+                if new_custom_key not in self._wandb_custom_step_key:
+                    self._wandb_custom_step_key.add(new_custom_key)
+                    self._wandb.define_metric(new_custom_key, hidden=True)
 
             for k, v in d.items():
                 if not isinstance(v, (int, float, str, wandb.Table)):
@@ -277,17 +279,16 @@ class Logger:
                     )
                     continue
 
-                # We don't want to log the custom step
-                if k == custom_step_key:
+                # Do not log the custom step key itself.
+                if self._wandb_custom_step_key is not None and k in self._wandb_custom_step_key:
                     continue
 
-                if self._wandb_custom_step_key is not None and custom_step_key is not None:
-                    # NOTE: Log the metric with the custom step key.
-                    value_custom_step_key = d[custom_step_key]
-                    self._wandb.log({f"{mode}/{k}": v, self._wandb_custom_step_key: value_custom_step_key})
+                if custom_step_key is not None:
+                    value_custom_step = d[custom_step_key]
+                    self._wandb.log({f"{mode}/{k}": v, f"{mode}/{custom_step_key}": value_custom_step})
                     continue
 
-                self._wandb.log({f"{mode}/{k}": v}, step=step)
+                self._wandb.log(data={f"{mode}/{k}": v}, step=step)
 
     def log_video(self, video_path: str, step: int, mode: str = "train"):
         assert mode in {"train", "eval"}
diff --git a/lerobot/scripts/server/learner_server.py b/lerobot/scripts/server/learner_server.py
index 78b5d7b8..29afca7b 100644
--- a/lerobot/scripts/server/learner_server.py
+++ b/lerobot/scripts/server/learner_server.py
@@ -206,9 +206,9 @@ def start_learner_threads(
 
     server_thread.start()
     transition_thread.start()
-    param_push_thread.start()
+    # param_push_thread.start()
 
-    param_push_thread.join()
+    # param_push_thread.join()
     transition_thread.join()
     server_thread.join()
 
@@ -448,7 +448,9 @@ def add_actor_information_and_train(
 
         policy.update_target_networks()
         if optimization_step % cfg.training.log_freq == 0:
-            logger.log_dict(training_infos, step=optimization_step, mode="train")
+            training_infos["Optimization step"] = optimization_step
+            logger.log_dict(d=training_infos, mode="train", custom_step_key="Optimization step")
+            # logging.info(f"Training infos: {training_infos}")
 
         time_for_one_optimization_step = time.time() - time_for_one_optimization_step
         frequency_for_one_optimization_step = 1 / (time_for_one_optimization_step + 1e-9)
@@ -456,9 +458,12 @@ def add_actor_information_and_train(
         logging.info(f"[LEARNER] Optimization frequency loop [Hz]: {frequency_for_one_optimization_step}")
 
         logger.log_dict(
-            {"Optimization frequency loop [Hz]": frequency_for_one_optimization_step},
-            step=optimization_step,
+            {
+                "Optimization frequency loop [Hz]": frequency_for_one_optimization_step,
+                "Optimization step": optimization_step,
+            },
             mode="train",
+            custom_step_key="Optimization step",
         )
 
         optimization_step += 1