From d8b5fae62290d2cae2b95d975acb4e551bf89274 Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Mon, 9 Dec 2024 19:17:47 +0100 Subject: [PATCH] Add human intervention mechanism and eval_robot script to evaluate policy on the robot (#541) Co-authored-by: Yoel --- lerobot/configs/robot/koch.yaml | 4 +- lerobot/configs/robot/so100.yaml | 2 +- lerobot/scripts/eval_on_robot.py | 335 +++++++++++++++++++++++++++++++ 3 files changed, 338 insertions(+), 3 deletions(-) create mode 100644 lerobot/scripts/eval_on_robot.py diff --git a/lerobot/configs/robot/koch.yaml b/lerobot/configs/robot/koch.yaml index 40969dc7..334db830 100644 --- a/lerobot/configs/robot/koch.yaml +++ b/lerobot/configs/robot/koch.yaml @@ -10,7 +10,7 @@ max_relative_target: null leader_arms: main: _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus - port: /dev/tty.usbmodem575E0031751 + port: /dev/tty.usbmodem58760430441 motors: # name: (index, model) shoulder_pan: [1, "xl330-m077"] @@ -23,7 +23,7 @@ leader_arms: follower_arms: main: _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus - port: /dev/tty.usbmodem575E0032081 + port: /dev/tty.usbmodem585A0083391 motors: # name: (index, model) shoulder_pan: [1, "xl430-w250"] diff --git a/lerobot/configs/robot/so100.yaml b/lerobot/configs/robot/so100.yaml index ec6f3e3f..0978de64 100644 --- a/lerobot/configs/robot/so100.yaml +++ b/lerobot/configs/robot/so100.yaml @@ -18,7 +18,7 @@ max_relative_target: null leader_arms: main: _target_: lerobot.common.robot_devices.motors.feetech.FeetechMotorsBus - port: /dev/tty.usbmodem585A0077581 + port: /dev/tty.usbmodem58760433331 motors: # name: (index, model) shoulder_pan: [1, "sts3215"] diff --git a/lerobot/scripts/eval_on_robot.py b/lerobot/scripts/eval_on_robot.py new file mode 100644 index 00000000..6a790f0a --- /dev/null +++ b/lerobot/scripts/eval_on_robot.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Evaluate a policy by running rollouts on the real robot and computing metrics. + +Usage examples: evaluate a checkpoint from the LeRobot training script for 10 episodes. + +``` +python lerobot/scripts/eval_on_robot.py \ + -p outputs/train/model/checkpoints/005000/pretrained_model \ + eval.n_episodes=10 +``` + +**NOTE** (michel-aractingi): This script is incomplete and it is being prepared +for running training on the real robot. +""" + +import argparse +import logging +import time +from copy import deepcopy + +import numpy as np +import torch +from tqdm import trange + +from lerobot.common.policies.policy_protocol import Policy +from lerobot.common.robot_devices.control_utils import busy_wait, is_headless +from lerobot.common.robot_devices.robots.factory import Robot, make_robot +from lerobot.common.utils.utils import ( + init_hydra_config, + init_logging, + log_say, +) + + +def rollout(robot: Robot, policy: Policy, fps: int, control_time_s: float = 20, use_amp: bool = True) -> dict: + """Run a batched policy rollout on the real robot. + + The return dictionary contains: + "robot": A a dictionary of (batch, sequence + 1, *) tensors mapped to observation + keys. NOTE the that this has an extra sequence element relative to the other keys in the + dictionary. This is because an extra observation is included for after the environment is + terminated or truncated. + "action": A (batch, sequence, action_dim) tensor of actions applied based on the observations (not + including the last observations). + "reward": A (batch, sequence) tensor of rewards received for applying the actions. + "success": A (batch, sequence) tensor of success conditions (the only time this can be True is upon + environment termination/truncation). + "done": A (batch, sequence) tensor of **cumulative** done conditions. For any given batch element, + the first True is followed by True's all the way till the end. This can be used for masking + extraneous elements from the sequences above. + + Args: + robot: The robot class that defines the interface with the real robot. + policy: The policy. Must be a PyTorch nn module. + + Returns: + The dictionary described above. + """ + # assert isinstance(policy, nn.Module), "Policy must be a PyTorch nn module." + # device = get_device_from_parameters(policy) + + # define keyboard listener + listener, events = init_keyboard_listener() + + # Reset the policy. TODO (michel-aractingi) add real policy evaluation once the code is ready. + # policy.reset() + + # Get observation from real robot + observation = robot.capture_observation() + + # Calculate reward. TODO (michel-aractingi) + # in HIL-SERL it will be with a reward classifier + reward = calculate_reward(observation) + all_observations = [] + all_actions = [] + all_rewards = [] + all_successes = [] + + start_episode_t = time.perf_counter() + timestamp = 0.0 + while timestamp < control_time_s: + start_loop_t = time.perf_counter() + + all_observations.append(deepcopy(observation)) + # observation = {key: observation[key].to(device, non_blocking=True) for key in observation} + + # Apply the next action. + while events["pause_policy"] and not events["human_intervention_step"]: + busy_wait(0.5) + + if events["human_intervention_step"]: + # take over the robot's actions + observation, action = robot.teleop_step(record_data=True) + action = action["action"] # teleop step returns torch tensors but in a dict + else: + # explore with policy + with torch.inference_mode(): + action = robot.follower_arms["main"].read("Present_Position") + action = torch.from_numpy(action) + robot.send_action(action) + # action = predict_action(observation, policy, device, use_amp) + + observation = robot.capture_observation() + # Calculate reward + # in HIL-SERL it will be with a reward classifier + reward = calculate_reward(observation) + + all_actions.append(action) + all_rewards.append(torch.from_numpy(reward)) + all_successes.append(torch.tensor([False])) + + dt_s = time.perf_counter() - start_loop_t + busy_wait(1 / fps - dt_s) + timestamp = time.perf_counter() - start_episode_t + if events["exit_early"]: + events["exit_early"] = False + events["human_intervention_step"] = False + events["pause_policy"] = False + break + all_observations.append(deepcopy(observation)) + + dones = torch.tensor([False] * len(all_actions)) + dones[-1] = True + # Stack the sequence along the first dimension so that we have (batch, sequence, *) tensors. + ret = { + "action": torch.stack(all_actions, dim=1), + "next.reward": torch.stack(all_rewards, dim=1), + "next.success": torch.stack(all_successes, dim=1), + "done": dones, + } + stacked_observations = {} + for key in all_observations[0]: + stacked_observations[key] = torch.stack([obs[key] for obs in all_observations], dim=1) + ret["observation"] = stacked_observations + + listener.stop() + + return ret + + +def eval_policy( + robot: Robot, + policy: torch.nn.Module, + fps: float, + n_episodes: int, + control_time_s: int = 20, + use_amp: bool = True, +) -> dict: + """ + Args: + env: The batch of environments. + policy: The policy. + n_episodes: The number of episodes to evaluate. + Returns: + Dictionary with metrics and data regarding the rollouts. + """ + # TODO (michel-aractingi) comment this out for testing with a fixed policy + # assert isinstance(policy, Policy) + # policy.eval() + + sum_rewards = [] + max_rewards = [] + successes = [] + rollouts = [] + + start_eval = time.perf_counter() + progbar = trange(n_episodes, desc="Evaluating policy on real robot") + for _batch_idx in progbar: + rollout_data = rollout(robot, policy, fps, control_time_s, use_amp) + + rollouts.append(rollout_data) + sum_rewards.append(sum(rollout_data["next.reward"])) + max_rewards.append(max(rollout_data["next.reward"])) + successes.append(rollout_data["next.success"][-1]) + + info = { + "per_episode": [ + { + "episode_ix": i, + "sum_reward": sum_reward, + "max_reward": max_reward, + "pc_success": success * 100, + } + for i, (sum_reward, max_reward, success) in enumerate( + zip( + sum_rewards[:n_episodes], + max_rewards[:n_episodes], + successes[:n_episodes], + strict=False, + ) + ) + ], + "aggregated": { + "avg_sum_reward": float(np.nanmean(torch.cat(sum_rewards[:n_episodes]))), + "avg_max_reward": float(np.nanmean(torch.cat(max_rewards[:n_episodes]))), + "pc_success": float(np.nanmean(torch.cat(successes[:n_episodes])) * 100), + "eval_s": time.time() - start_eval, + "eval_ep_s": (time.time() - start_eval) / n_episodes, + }, + } + + if robot.is_connected: + robot.disconnect() + + return info + + +def calculate_reward(observation): + """ + Method to calculate reward function in some way. + In HIL-SERL this is done through defining a reward classifier + """ + # reward = reward_classifier(observation) + return np.array([0.0]) + + +def init_keyboard_listener(): + # Allow to exit early while recording an episode or resetting the environment, + # by tapping the right arrow key '->'. This might require a sudo permission + # to allow your terminal to monitor keyboard events. + events = {} + events["exit_early"] = False + events["rerecord_episode"] = False + events["pause_policy"] = False + events["human_intervention_step"] = False + + if is_headless(): + logging.warning( + "Headless environment detected. On-screen cameras display and keyboard inputs will not be available." + ) + listener = None + return listener, events + + # Only import pynput if not in a headless environment + from pynput import keyboard + + def on_press(key): + try: + if key == keyboard.Key.right: + print("Right arrow key pressed. Exiting loop...") + events["exit_early"] = True + elif key == keyboard.Key.left: + print("Left arrow key pressed. Exiting loop and rerecord the last episode...") + events["rerecord_episode"] = True + events["exit_early"] = True + elif key == keyboard.Key.space: + # check if first space press then pause the policy for the user to get ready + # if second space press then the user is ready to start intervention + if not events["pause_policy"]: + print( + "Space key pressed. Human intervention required.\n" + "Place the leader in similar pose to the follower and press space again." + ) + events["pause_policy"] = True + log_say("Human intervention stage. Get ready to take over.", play_sounds=True) + else: + events["human_intervention_step"] = True + print("Space key pressed. Human intervention starting.") + log_say("Starting human intervention.", play_sounds=True) + + except Exception as e: + print(f"Error handling key press: {e}") + + listener = keyboard.Listener(on_press=on_press) + listener.start() + + return listener, events + + +if __name__ == "__main__": + init_logging() + + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + "--robot-path", + type=str, + default="lerobot/configs/robot/koch.yaml", + help="Path to robot yaml file used to instantiate the robot using `make_robot` factory function.", + ) + group.add_argument( + "--robot-overrides", + type=str, + nargs="*", + help="Any key=value arguments to override config values (use dots for.nested=overrides)", + ) + group.add_argument( + "-p", + "--pretrained-policy-name-or-path", + help=( + "Either the repo ID of a model hosted on the Hub or a path to a directory containing weights " + "saved using `Policy.save_pretrained`. If not provided, the policy is initialized from scratch " + "(useful for debugging). This argument is mutually exclusive with `--config`." + ), + ) + group.add_argument( + "--config", + help=( + "Path to a yaml config you want to use for initializing a policy from scratch (useful for " + "debugging). This argument is mutually exclusive with `--pretrained-policy-name-or-path` (`-p`)." + ), + ) + parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.") + parser.add_argument( + "--out-dir", + help=( + "Where to save the evaluation outputs. If not provided, outputs are saved in " + "outputs/eval/{timestamp}_{env_name}_{policy_name}" + ), + ) + + args = parser.parse_args() + + robot_cfg = init_hydra_config(args.robot_path, args.robot_overrides) + robot = make_robot(robot_cfg) + if not robot.is_connected: + robot.connect() + + eval_policy(robot, None, fps=40, n_episodes=2, control_time_s=100)