Fixed bug in the action scale of the intervention actions and offline dataset actions. (scale by inverse delta)

Co-authored-by: Adil Zouitine <adizouitinegm@gmail.com>
2025-02-14 15:17:16 +01:00 · 2025-02-14 15:17:16 +01:00 · 7ae368e983
parent 36711d766a
commit 7ae368e983
5 changed files with 42 additions and 16 deletions
--- a/lerobot/configs/policy/sac_real.yaml
+++ b/lerobot/configs/policy/sac_real.yaml
@ -8,8 +8,8 @@
 #   env.gym.obs_type=environment_state_agent_pos \

 seed: 1
-dataset_repo_id: aractingi/push_cube_square_offline_demo_cropped_resized
-#aractingi/push_cube_square_light_offline_demo_cropped_resized
+dataset_repo_id: aractingi/push_cube_overfit_cropped_resized
+#aractingi/push_cube_square_offline_demo_cropped_resized

 training:
  # Offline training dataloader
--- a/lerobot/scripts/server/buffer.py
+++ b/lerobot/scripts/server/buffer.py
@ -209,6 +209,7 @@ class ReplayBuffer:
        state_keys: Optional[Sequence[str]] = None,
        capacity: Optional[int] = None,
        action_mask: Optional[Sequence[int]] = None,
+        action_delta: Optional[float] = None,
    ) -> "ReplayBuffer":
        """
        Convert a LeRobotDataset into a ReplayBuffer.
@ -249,6 +250,9 @@ class ReplayBuffer:
                else:
                    data["action"] = data["action"][:, action_mask]

+            if action_delta is not None:
+                data["action"] = data["action"] / action_delta
+
            replay_buffer.add(
                state=data["state"],
                action=data["action"],
--- a/lerobot/scripts/server/crop_dataset_roi.py
+++ b/lerobot/scripts/server/crop_dataset_roi.py
@ -260,12 +260,8 @@ if __name__ == "__main__":
            rois = json.load(f)

    # rois = {
-    #     "observation.images.side": (92, 123, 379, 349),
-    #     "observation.images.front": (109, 37, 361, 557),
-    # }
-    # rois = {
-    #   "observation.images.front": [109, 37, 361, 557],
-    #   "observation.images.side": [94, 161, 372, 315],
+    #     "observation.images.front": [102, 43, 358, 523],
+    #     "observation.images.side": [92, 123, 379, 349],
    # }

    # Print the selected rectangular ROIs
--- a/lerobot/scripts/server/gym_manipulator.py
+++ b/lerobot/scripts/server/gym_manipulator.py
@ -213,18 +213,18 @@ class HILSerlRobotEnv(gym.Env):

            # When applying the delta action space, convert teleop absolute values to relative differences.
            if self.use_delta_action_space:
-                teleop_action = teleop_action - self.current_joint_positions
-                if torch.any(teleop_action < -self.delta_relative_bounds_size * self.delta) and torch.any(
-                    teleop_action > self.delta_relative_bounds_size
+                teleop_action = (teleop_action - self.current_joint_positions) / self.delta
+                if torch.any(teleop_action < -self.relative_bounds_size) and torch.any(
+                    teleop_action > self.relative_bounds_size
                ):
                    logging.debug(
-                        f"Relative teleop delta exceeded bounds {self.delta_relative_bounds_size}, teleop_action {teleop_action}\n"
-                        f"lower bounds condition {teleop_action < -self.delta_relative_bounds_size}\n"
-                        f"upper bounds condition {teleop_action > self.delta_relative_bounds_size}"
+                        f"Relative teleop delta exceeded bounds {self.relative_bounds_size}, teleop_action {teleop_action}\n"
+                        f"lower bounds condition {teleop_action < -self.relative_bounds_size}\n"
+                        f"upper bounds condition {teleop_action > self.relative_bounds_size}"
                    )

                    teleop_action = torch.clamp(
-                        teleop_action, -self.delta_relative_bounds_size, self.delta_relative_bounds_size
+                        teleop_action, -self.relative_bounds_size, self.relative_bounds_size
                    )
            # NOTE: To mimic the shape of a neural network output, we add a batch dimension to the teleop action.
            if teleop_action.dim() == 1:
@ -312,7 +312,7 @@ class RewardWrapper(gym.Wrapper):
        start_time = time.perf_counter()
        with torch.inference_mode():
            reward = (
-                self.reward_classifier.predict_reward(images, threshold=0.6)
+                self.reward_classifier.predict_reward(images, threshold=0.8)
                if self.reward_classifier is not None
                else 0.0
            )
@ -726,6 +726,24 @@ def get_classifier(pretrained_path, config_path, device="mps"):
    return model


+def replay_episode(env, repo_id, root=None, episode=0):
+    from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+
+    local_files_only = root is not None
+    dataset = LeRobotDataset(repo_id, root=root, episodes=[episode], local_files_only=local_files_only)
+    actions = dataset.hf_dataset.select_columns("action")
+
+    for idx in range(dataset.num_frames):
+        start_episode_t = time.perf_counter()
+
+        action = actions[idx]["action"][:4]
+        print(action)
+        env.step((action / env.unwrapped.delta, False))
+
+        dt_s = time.perf_counter() - start_episode_t
+        busy_wait(1 / 10 - dt_s)
+
+
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--fps", type=int, default=30, help="control frequency")
@ -776,6 +794,9 @@ if __name__ == "__main__":
    parser.add_argument("--env-overrides", type=str, default=None, help="Overrides for the env yaml file")
    parser.add_argument("--control-time-s", type=float, default=20, help="Maximum episode length in seconds")
    parser.add_argument("--reset-follower-pos", type=int, default=1, help="Reset follower between episodes")
+    parser.add_argument("--replay-repo-id", type=str, default=None, help="Repo ID of the episode to replay")
+    parser.add_argument("--replay-root", type=str, default=None, help="Root of the dataset to replay")
+    parser.add_argument("--replay-episode", type=int, default=0, help="Episode to replay")
    args = parser.parse_args()

    robot_cfg = init_hydra_config(args.robot_path, args.robot_overrides)
@ -795,6 +816,10 @@ if __name__ == "__main__":

    env.reset()

+    if args.replay_repo_id is not None:
+        replay_episode(env, args.replay_repo_id, root=args.replay_root, episode=args.replay_episode)
+        exit()
+
    # Retrieve the robot's action space for joint commands.
    action_space_robot = env.action_space.spaces[0]

--- a/lerobot/scripts/server/learner_server.py
+++ b/lerobot/scripts/server/learner_server.py
@ -600,6 +600,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
            device=device,
            state_keys=cfg.policy.input_shapes.keys(),
            action_mask=active_action_dims,
+            action_delta=cfg.env.wrapper.delta_action,
        )
        batch_size: int = batch_size // 2  # We will sample from both replay buffer