From 7ae368e98365f881790c519a39a8e31baf4a7234 Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Fri, 14 Feb 2025 15:17:16 +0100 Subject: [PATCH] Fixed bug in the action scale of the intervention actions and offline dataset actions. (scale by inverse delta) Co-authored-by: Adil Zouitine --- lerobot/configs/policy/sac_real.yaml | 4 +-- lerobot/scripts/server/buffer.py | 4 +++ lerobot/scripts/server/crop_dataset_roi.py | 8 ++--- lerobot/scripts/server/gym_manipulator.py | 41 +++++++++++++++++----- lerobot/scripts/server/learner_server.py | 1 + 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/lerobot/configs/policy/sac_real.yaml b/lerobot/configs/policy/sac_real.yaml index 5d248aef..4b021aaa 100644 --- a/lerobot/configs/policy/sac_real.yaml +++ b/lerobot/configs/policy/sac_real.yaml @@ -8,8 +8,8 @@ # env.gym.obs_type=environment_state_agent_pos \ seed: 1 -dataset_repo_id: aractingi/push_cube_square_offline_demo_cropped_resized -#aractingi/push_cube_square_light_offline_demo_cropped_resized +dataset_repo_id: aractingi/push_cube_overfit_cropped_resized +#aractingi/push_cube_square_offline_demo_cropped_resized training: # Offline training dataloader diff --git a/lerobot/scripts/server/buffer.py b/lerobot/scripts/server/buffer.py index dcfc259c..6a290e6e 100644 --- a/lerobot/scripts/server/buffer.py +++ b/lerobot/scripts/server/buffer.py @@ -209,6 +209,7 @@ class ReplayBuffer: state_keys: Optional[Sequence[str]] = None, capacity: Optional[int] = None, action_mask: Optional[Sequence[int]] = None, + action_delta: Optional[float] = None, ) -> "ReplayBuffer": """ Convert a LeRobotDataset into a ReplayBuffer. @@ -249,6 +250,9 @@ class ReplayBuffer: else: data["action"] = data["action"][:, action_mask] + if action_delta is not None: + data["action"] = data["action"] / action_delta + replay_buffer.add( state=data["state"], action=data["action"], diff --git a/lerobot/scripts/server/crop_dataset_roi.py b/lerobot/scripts/server/crop_dataset_roi.py index 172eb22c..fb9077c9 100644 --- a/lerobot/scripts/server/crop_dataset_roi.py +++ b/lerobot/scripts/server/crop_dataset_roi.py @@ -260,12 +260,8 @@ if __name__ == "__main__": rois = json.load(f) # rois = { - # "observation.images.side": (92, 123, 379, 349), - # "observation.images.front": (109, 37, 361, 557), - # } - # rois = { - # "observation.images.front": [109, 37, 361, 557], - # "observation.images.side": [94, 161, 372, 315], + # "observation.images.front": [102, 43, 358, 523], + # "observation.images.side": [92, 123, 379, 349], # } # Print the selected rectangular ROIs diff --git a/lerobot/scripts/server/gym_manipulator.py b/lerobot/scripts/server/gym_manipulator.py index a43f07ca..b3d71d4d 100644 --- a/lerobot/scripts/server/gym_manipulator.py +++ b/lerobot/scripts/server/gym_manipulator.py @@ -213,18 +213,18 @@ class HILSerlRobotEnv(gym.Env): # When applying the delta action space, convert teleop absolute values to relative differences. if self.use_delta_action_space: - teleop_action = teleop_action - self.current_joint_positions - if torch.any(teleop_action < -self.delta_relative_bounds_size * self.delta) and torch.any( - teleop_action > self.delta_relative_bounds_size + teleop_action = (teleop_action - self.current_joint_positions) / self.delta + if torch.any(teleop_action < -self.relative_bounds_size) and torch.any( + teleop_action > self.relative_bounds_size ): logging.debug( - f"Relative teleop delta exceeded bounds {self.delta_relative_bounds_size}, teleop_action {teleop_action}\n" - f"lower bounds condition {teleop_action < -self.delta_relative_bounds_size}\n" - f"upper bounds condition {teleop_action > self.delta_relative_bounds_size}" + f"Relative teleop delta exceeded bounds {self.relative_bounds_size}, teleop_action {teleop_action}\n" + f"lower bounds condition {teleop_action < -self.relative_bounds_size}\n" + f"upper bounds condition {teleop_action > self.relative_bounds_size}" ) teleop_action = torch.clamp( - teleop_action, -self.delta_relative_bounds_size, self.delta_relative_bounds_size + teleop_action, -self.relative_bounds_size, self.relative_bounds_size ) # NOTE: To mimic the shape of a neural network output, we add a batch dimension to the teleop action. if teleop_action.dim() == 1: @@ -312,7 +312,7 @@ class RewardWrapper(gym.Wrapper): start_time = time.perf_counter() with torch.inference_mode(): reward = ( - self.reward_classifier.predict_reward(images, threshold=0.6) + self.reward_classifier.predict_reward(images, threshold=0.8) if self.reward_classifier is not None else 0.0 ) @@ -726,6 +726,24 @@ def get_classifier(pretrained_path, config_path, device="mps"): return model +def replay_episode(env, repo_id, root=None, episode=0): + from lerobot.common.datasets.lerobot_dataset import LeRobotDataset + + local_files_only = root is not None + dataset = LeRobotDataset(repo_id, root=root, episodes=[episode], local_files_only=local_files_only) + actions = dataset.hf_dataset.select_columns("action") + + for idx in range(dataset.num_frames): + start_episode_t = time.perf_counter() + + action = actions[idx]["action"][:4] + print(action) + env.step((action / env.unwrapped.delta, False)) + + dt_s = time.perf_counter() - start_episode_t + busy_wait(1 / 10 - dt_s) + + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--fps", type=int, default=30, help="control frequency") @@ -776,6 +794,9 @@ if __name__ == "__main__": parser.add_argument("--env-overrides", type=str, default=None, help="Overrides for the env yaml file") parser.add_argument("--control-time-s", type=float, default=20, help="Maximum episode length in seconds") parser.add_argument("--reset-follower-pos", type=int, default=1, help="Reset follower between episodes") + parser.add_argument("--replay-repo-id", type=str, default=None, help="Repo ID of the episode to replay") + parser.add_argument("--replay-root", type=str, default=None, help="Root of the dataset to replay") + parser.add_argument("--replay-episode", type=int, default=0, help="Episode to replay") args = parser.parse_args() robot_cfg = init_hydra_config(args.robot_path, args.robot_overrides) @@ -795,6 +816,10 @@ if __name__ == "__main__": env.reset() + if args.replay_repo_id is not None: + replay_episode(env, args.replay_repo_id, root=args.replay_root, episode=args.replay_episode) + exit() + # Retrieve the robot's action space for joint commands. action_space_robot = env.action_space.spaces[0] diff --git a/lerobot/scripts/server/learner_server.py b/lerobot/scripts/server/learner_server.py index 31976537..2d8eab67 100644 --- a/lerobot/scripts/server/learner_server.py +++ b/lerobot/scripts/server/learner_server.py @@ -600,6 +600,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No device=device, state_keys=cfg.policy.input_shapes.keys(), action_mask=active_action_dims, + action_delta=cfg.env.wrapper.delta_action, ) batch_size: int = batch_size // 2 # We will sample from both replay buffer