From 7ae368e98365f881790c519a39a8e31baf4a7234 Mon Sep 17 00:00:00 2001
From: Michel Aractingi <michel.aractingi@huggingface.co>
Date: Fri, 14 Feb 2025 15:17:16 +0100
Subject: [PATCH] Fixed bug in the action scale of the intervention actions and
 offline dataset actions. (scale by inverse delta)

Co-authored-by: Adil Zouitine <adizouitinegm@gmail.com>
---
 lerobot/configs/policy/sac_real.yaml       |  4 +--
 lerobot/scripts/server/buffer.py           |  4 +++
 lerobot/scripts/server/crop_dataset_roi.py |  8 ++---
 lerobot/scripts/server/gym_manipulator.py  | 41 +++++++++++++++++-----
 lerobot/scripts/server/learner_server.py   |  1 +
 5 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/lerobot/configs/policy/sac_real.yaml b/lerobot/configs/policy/sac_real.yaml
index 5d248aef..4b021aaa 100644
--- a/lerobot/configs/policy/sac_real.yaml
+++ b/lerobot/configs/policy/sac_real.yaml
@@ -8,8 +8,8 @@
 #   env.gym.obs_type=environment_state_agent_pos \
 
 seed: 1
-dataset_repo_id: aractingi/push_cube_square_offline_demo_cropped_resized
-#aractingi/push_cube_square_light_offline_demo_cropped_resized
+dataset_repo_id: aractingi/push_cube_overfit_cropped_resized
+#aractingi/push_cube_square_offline_demo_cropped_resized
 
 training:
   # Offline training dataloader
diff --git a/lerobot/scripts/server/buffer.py b/lerobot/scripts/server/buffer.py
index dcfc259c..6a290e6e 100644
--- a/lerobot/scripts/server/buffer.py
+++ b/lerobot/scripts/server/buffer.py
@@ -209,6 +209,7 @@ class ReplayBuffer:
         state_keys: Optional[Sequence[str]] = None,
         capacity: Optional[int] = None,
         action_mask: Optional[Sequence[int]] = None,
+        action_delta: Optional[float] = None,
     ) -> "ReplayBuffer":
         """
         Convert a LeRobotDataset into a ReplayBuffer.
@@ -249,6 +250,9 @@ class ReplayBuffer:
                 else:
                     data["action"] = data["action"][:, action_mask]
 
+            if action_delta is not None:
+                data["action"] = data["action"] / action_delta
+
             replay_buffer.add(
                 state=data["state"],
                 action=data["action"],
diff --git a/lerobot/scripts/server/crop_dataset_roi.py b/lerobot/scripts/server/crop_dataset_roi.py
index 172eb22c..fb9077c9 100644
--- a/lerobot/scripts/server/crop_dataset_roi.py
+++ b/lerobot/scripts/server/crop_dataset_roi.py
@@ -260,12 +260,8 @@ if __name__ == "__main__":
             rois = json.load(f)
 
     # rois = {
-    #     "observation.images.side": (92, 123, 379, 349),
-    #     "observation.images.front": (109, 37, 361, 557),
-    # }
-    # rois = {
-    #   "observation.images.front": [109, 37, 361, 557],
-    #   "observation.images.side": [94, 161, 372, 315],
+    #     "observation.images.front": [102, 43, 358, 523],
+    #     "observation.images.side": [92, 123, 379, 349],
     # }
 
     # Print the selected rectangular ROIs
diff --git a/lerobot/scripts/server/gym_manipulator.py b/lerobot/scripts/server/gym_manipulator.py
index a43f07ca..b3d71d4d 100644
--- a/lerobot/scripts/server/gym_manipulator.py
+++ b/lerobot/scripts/server/gym_manipulator.py
@@ -213,18 +213,18 @@ class HILSerlRobotEnv(gym.Env):
 
             # When applying the delta action space, convert teleop absolute values to relative differences.
             if self.use_delta_action_space:
-                teleop_action = teleop_action - self.current_joint_positions
-                if torch.any(teleop_action < -self.delta_relative_bounds_size * self.delta) and torch.any(
-                    teleop_action > self.delta_relative_bounds_size
+                teleop_action = (teleop_action - self.current_joint_positions) / self.delta
+                if torch.any(teleop_action < -self.relative_bounds_size) and torch.any(
+                    teleop_action > self.relative_bounds_size
                 ):
                     logging.debug(
-                        f"Relative teleop delta exceeded bounds {self.delta_relative_bounds_size}, teleop_action {teleop_action}\n"
-                        f"lower bounds condition {teleop_action < -self.delta_relative_bounds_size}\n"
-                        f"upper bounds condition {teleop_action > self.delta_relative_bounds_size}"
+                        f"Relative teleop delta exceeded bounds {self.relative_bounds_size}, teleop_action {teleop_action}\n"
+                        f"lower bounds condition {teleop_action < -self.relative_bounds_size}\n"
+                        f"upper bounds condition {teleop_action > self.relative_bounds_size}"
                     )
 
                     teleop_action = torch.clamp(
-                        teleop_action, -self.delta_relative_bounds_size, self.delta_relative_bounds_size
+                        teleop_action, -self.relative_bounds_size, self.relative_bounds_size
                     )
             # NOTE: To mimic the shape of a neural network output, we add a batch dimension to the teleop action.
             if teleop_action.dim() == 1:
@@ -312,7 +312,7 @@ class RewardWrapper(gym.Wrapper):
         start_time = time.perf_counter()
         with torch.inference_mode():
             reward = (
-                self.reward_classifier.predict_reward(images, threshold=0.6)
+                self.reward_classifier.predict_reward(images, threshold=0.8)
                 if self.reward_classifier is not None
                 else 0.0
             )
@@ -726,6 +726,24 @@ def get_classifier(pretrained_path, config_path, device="mps"):
     return model
 
 
+def replay_episode(env, repo_id, root=None, episode=0):
+    from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+
+    local_files_only = root is not None
+    dataset = LeRobotDataset(repo_id, root=root, episodes=[episode], local_files_only=local_files_only)
+    actions = dataset.hf_dataset.select_columns("action")
+
+    for idx in range(dataset.num_frames):
+        start_episode_t = time.perf_counter()
+
+        action = actions[idx]["action"][:4]
+        print(action)
+        env.step((action / env.unwrapped.delta, False))
+
+        dt_s = time.perf_counter() - start_episode_t
+        busy_wait(1 / 10 - dt_s)
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--fps", type=int, default=30, help="control frequency")
@@ -776,6 +794,9 @@ if __name__ == "__main__":
     parser.add_argument("--env-overrides", type=str, default=None, help="Overrides for the env yaml file")
     parser.add_argument("--control-time-s", type=float, default=20, help="Maximum episode length in seconds")
     parser.add_argument("--reset-follower-pos", type=int, default=1, help="Reset follower between episodes")
+    parser.add_argument("--replay-repo-id", type=str, default=None, help="Repo ID of the episode to replay")
+    parser.add_argument("--replay-root", type=str, default=None, help="Root of the dataset to replay")
+    parser.add_argument("--replay-episode", type=int, default=0, help="Episode to replay")
     args = parser.parse_args()
 
     robot_cfg = init_hydra_config(args.robot_path, args.robot_overrides)
@@ -795,6 +816,10 @@ if __name__ == "__main__":
 
     env.reset()
 
+    if args.replay_repo_id is not None:
+        replay_episode(env, args.replay_repo_id, root=args.replay_root, episode=args.replay_episode)
+        exit()
+
     # Retrieve the robot's action space for joint commands.
     action_space_robot = env.action_space.spaces[0]
 
diff --git a/lerobot/scripts/server/learner_server.py b/lerobot/scripts/server/learner_server.py
index 31976537..2d8eab67 100644
--- a/lerobot/scripts/server/learner_server.py
+++ b/lerobot/scripts/server/learner_server.py
@@ -600,6 +600,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
             device=device,
             state_keys=cfg.policy.input_shapes.keys(),
             action_mask=active_action_dims,
+            action_delta=cfg.env.wrapper.delta_action,
         )
         batch_size: int = batch_size // 2  # We will sample from both replay buffer