From 76234b7d14f5a9be06ea69eb032bf21ed38f689e Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Mon, 9 Dec 2024 19:17:47 +0100 Subject: [PATCH] Add human intervention mechanism and eval_robot script to evaluate policy on the robot (#541) Co-authored-by: Yoel --- lerobot/configs/robot/koch.yaml | 53 +++++ lerobot/configs/robot/so100.yaml | 56 ++++++ lerobot/scripts/eval_on_robot.py | 335 +++++++++++++++++++++++++++++++ 3 files changed, 444 insertions(+) create mode 100644 lerobot/configs/robot/koch.yaml create mode 100644 lerobot/configs/robot/so100.yaml create mode 100644 lerobot/scripts/eval_on_robot.py diff --git a/lerobot/configs/robot/koch.yaml b/lerobot/configs/robot/koch.yaml new file mode 100644 index 00000000..334db830 --- /dev/null +++ b/lerobot/configs/robot/koch.yaml @@ -0,0 +1,53 @@ +_target_: lerobot.common.robot_devices.robots.manipulator.ManipulatorRobot +robot_type: koch +calibration_dir: .cache/calibration/koch + +# `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes. +# Set this to a positive scalar to have the same value for all motors, or a list that is the same length as +# the number of motors in your follower arms. +max_relative_target: null + +leader_arms: + main: + _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus + port: /dev/tty.usbmodem58760430441 + motors: + # name: (index, model) + shoulder_pan: [1, "xl330-m077"] + shoulder_lift: [2, "xl330-m077"] + elbow_flex: [3, "xl330-m077"] + wrist_flex: [4, "xl330-m077"] + wrist_roll: [5, "xl330-m077"] + gripper: [6, "xl330-m077"] + +follower_arms: + main: + _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus + port: /dev/tty.usbmodem585A0083391 + motors: + # name: (index, model) + shoulder_pan: [1, "xl430-w250"] + shoulder_lift: [2, "xl430-w250"] + elbow_flex: [3, "xl330-m288"] + wrist_flex: [4, "xl330-m288"] + wrist_roll: [5, "xl330-m288"] + gripper: [6, "xl330-m288"] + +cameras: + laptop: + _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera + camera_index: 0 + fps: 30 + width: 640 + height: 480 + phone: + _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera + camera_index: 1 + fps: 30 + width: 640 + height: 480 + +# ~ Koch specific settings ~ +# Sets the leader arm in torque mode with the gripper motor set to this angle. This makes it possible +# to squeeze the gripper and have it spring back to an open position on its own. +gripper_open_degree: 35.156 diff --git a/lerobot/configs/robot/so100.yaml b/lerobot/configs/robot/so100.yaml new file mode 100644 index 00000000..0978de64 --- /dev/null +++ b/lerobot/configs/robot/so100.yaml @@ -0,0 +1,56 @@ +# [SO-100 robot arm](https://github.com/TheRobotStudio/SO-ARM100) + +# Requires installing extras packages +# With pip: `pip install -e ".[feetech]"` +# With poetry: `poetry install --sync --extras "feetech"` + +# See [tutorial](https://github.com/huggingface/lerobot/blob/main/examples/10_use_so100.md) + +_target_: lerobot.common.robot_devices.robots.manipulator.ManipulatorRobot +robot_type: so100 +calibration_dir: .cache/calibration/so100 + +# `max_relative_target` limits the magnitude of the relative positional target vector for safety purposes. +# Set this to a positive scalar to have the same value for all motors, or a list that is the same length as +# the number of motors in your follower arms. +max_relative_target: null + +leader_arms: + main: + _target_: lerobot.common.robot_devices.motors.feetech.FeetechMotorsBus + port: /dev/tty.usbmodem58760433331 + motors: + # name: (index, model) + shoulder_pan: [1, "sts3215"] + shoulder_lift: [2, "sts3215"] + elbow_flex: [3, "sts3215"] + wrist_flex: [4, "sts3215"] + wrist_roll: [5, "sts3215"] + gripper: [6, "sts3215"] + +follower_arms: + main: + _target_: lerobot.common.robot_devices.motors.feetech.FeetechMotorsBus + port: /dev/tty.usbmodem585A0080971 + motors: + # name: (index, model) + shoulder_pan: [1, "sts3215"] + shoulder_lift: [2, "sts3215"] + elbow_flex: [3, "sts3215"] + wrist_flex: [4, "sts3215"] + wrist_roll: [5, "sts3215"] + gripper: [6, "sts3215"] + +cameras: + laptop: + _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera + camera_index: 0 + fps: 30 + width: 640 + height: 480 + phone: + _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera + camera_index: 1 + fps: 30 + width: 640 + height: 480 diff --git a/lerobot/scripts/eval_on_robot.py b/lerobot/scripts/eval_on_robot.py new file mode 100644 index 00000000..6a790f0a --- /dev/null +++ b/lerobot/scripts/eval_on_robot.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Evaluate a policy by running rollouts on the real robot and computing metrics. + +Usage examples: evaluate a checkpoint from the LeRobot training script for 10 episodes. + +``` +python lerobot/scripts/eval_on_robot.py \ + -p outputs/train/model/checkpoints/005000/pretrained_model \ + eval.n_episodes=10 +``` + +**NOTE** (michel-aractingi): This script is incomplete and it is being prepared +for running training on the real robot. +""" + +import argparse +import logging +import time +from copy import deepcopy + +import numpy as np +import torch +from tqdm import trange + +from lerobot.common.policies.policy_protocol import Policy +from lerobot.common.robot_devices.control_utils import busy_wait, is_headless +from lerobot.common.robot_devices.robots.factory import Robot, make_robot +from lerobot.common.utils.utils import ( + init_hydra_config, + init_logging, + log_say, +) + + +def rollout(robot: Robot, policy: Policy, fps: int, control_time_s: float = 20, use_amp: bool = True) -> dict: + """Run a batched policy rollout on the real robot. + + The return dictionary contains: + "robot": A a dictionary of (batch, sequence + 1, *) tensors mapped to observation + keys. NOTE the that this has an extra sequence element relative to the other keys in the + dictionary. This is because an extra observation is included for after the environment is + terminated or truncated. + "action": A (batch, sequence, action_dim) tensor of actions applied based on the observations (not + including the last observations). + "reward": A (batch, sequence) tensor of rewards received for applying the actions. + "success": A (batch, sequence) tensor of success conditions (the only time this can be True is upon + environment termination/truncation). + "done": A (batch, sequence) tensor of **cumulative** done conditions. For any given batch element, + the first True is followed by True's all the way till the end. This can be used for masking + extraneous elements from the sequences above. + + Args: + robot: The robot class that defines the interface with the real robot. + policy: The policy. Must be a PyTorch nn module. + + Returns: + The dictionary described above. + """ + # assert isinstance(policy, nn.Module), "Policy must be a PyTorch nn module." + # device = get_device_from_parameters(policy) + + # define keyboard listener + listener, events = init_keyboard_listener() + + # Reset the policy. TODO (michel-aractingi) add real policy evaluation once the code is ready. + # policy.reset() + + # Get observation from real robot + observation = robot.capture_observation() + + # Calculate reward. TODO (michel-aractingi) + # in HIL-SERL it will be with a reward classifier + reward = calculate_reward(observation) + all_observations = [] + all_actions = [] + all_rewards = [] + all_successes = [] + + start_episode_t = time.perf_counter() + timestamp = 0.0 + while timestamp < control_time_s: + start_loop_t = time.perf_counter() + + all_observations.append(deepcopy(observation)) + # observation = {key: observation[key].to(device, non_blocking=True) for key in observation} + + # Apply the next action. + while events["pause_policy"] and not events["human_intervention_step"]: + busy_wait(0.5) + + if events["human_intervention_step"]: + # take over the robot's actions + observation, action = robot.teleop_step(record_data=True) + action = action["action"] # teleop step returns torch tensors but in a dict + else: + # explore with policy + with torch.inference_mode(): + action = robot.follower_arms["main"].read("Present_Position") + action = torch.from_numpy(action) + robot.send_action(action) + # action = predict_action(observation, policy, device, use_amp) + + observation = robot.capture_observation() + # Calculate reward + # in HIL-SERL it will be with a reward classifier + reward = calculate_reward(observation) + + all_actions.append(action) + all_rewards.append(torch.from_numpy(reward)) + all_successes.append(torch.tensor([False])) + + dt_s = time.perf_counter() - start_loop_t + busy_wait(1 / fps - dt_s) + timestamp = time.perf_counter() - start_episode_t + if events["exit_early"]: + events["exit_early"] = False + events["human_intervention_step"] = False + events["pause_policy"] = False + break + all_observations.append(deepcopy(observation)) + + dones = torch.tensor([False] * len(all_actions)) + dones[-1] = True + # Stack the sequence along the first dimension so that we have (batch, sequence, *) tensors. + ret = { + "action": torch.stack(all_actions, dim=1), + "next.reward": torch.stack(all_rewards, dim=1), + "next.success": torch.stack(all_successes, dim=1), + "done": dones, + } + stacked_observations = {} + for key in all_observations[0]: + stacked_observations[key] = torch.stack([obs[key] for obs in all_observations], dim=1) + ret["observation"] = stacked_observations + + listener.stop() + + return ret + + +def eval_policy( + robot: Robot, + policy: torch.nn.Module, + fps: float, + n_episodes: int, + control_time_s: int = 20, + use_amp: bool = True, +) -> dict: + """ + Args: + env: The batch of environments. + policy: The policy. + n_episodes: The number of episodes to evaluate. + Returns: + Dictionary with metrics and data regarding the rollouts. + """ + # TODO (michel-aractingi) comment this out for testing with a fixed policy + # assert isinstance(policy, Policy) + # policy.eval() + + sum_rewards = [] + max_rewards = [] + successes = [] + rollouts = [] + + start_eval = time.perf_counter() + progbar = trange(n_episodes, desc="Evaluating policy on real robot") + for _batch_idx in progbar: + rollout_data = rollout(robot, policy, fps, control_time_s, use_amp) + + rollouts.append(rollout_data) + sum_rewards.append(sum(rollout_data["next.reward"])) + max_rewards.append(max(rollout_data["next.reward"])) + successes.append(rollout_data["next.success"][-1]) + + info = { + "per_episode": [ + { + "episode_ix": i, + "sum_reward": sum_reward, + "max_reward": max_reward, + "pc_success": success * 100, + } + for i, (sum_reward, max_reward, success) in enumerate( + zip( + sum_rewards[:n_episodes], + max_rewards[:n_episodes], + successes[:n_episodes], + strict=False, + ) + ) + ], + "aggregated": { + "avg_sum_reward": float(np.nanmean(torch.cat(sum_rewards[:n_episodes]))), + "avg_max_reward": float(np.nanmean(torch.cat(max_rewards[:n_episodes]))), + "pc_success": float(np.nanmean(torch.cat(successes[:n_episodes])) * 100), + "eval_s": time.time() - start_eval, + "eval_ep_s": (time.time() - start_eval) / n_episodes, + }, + } + + if robot.is_connected: + robot.disconnect() + + return info + + +def calculate_reward(observation): + """ + Method to calculate reward function in some way. + In HIL-SERL this is done through defining a reward classifier + """ + # reward = reward_classifier(observation) + return np.array([0.0]) + + +def init_keyboard_listener(): + # Allow to exit early while recording an episode or resetting the environment, + # by tapping the right arrow key '->'. This might require a sudo permission + # to allow your terminal to monitor keyboard events. + events = {} + events["exit_early"] = False + events["rerecord_episode"] = False + events["pause_policy"] = False + events["human_intervention_step"] = False + + if is_headless(): + logging.warning( + "Headless environment detected. On-screen cameras display and keyboard inputs will not be available." + ) + listener = None + return listener, events + + # Only import pynput if not in a headless environment + from pynput import keyboard + + def on_press(key): + try: + if key == keyboard.Key.right: + print("Right arrow key pressed. Exiting loop...") + events["exit_early"] = True + elif key == keyboard.Key.left: + print("Left arrow key pressed. Exiting loop and rerecord the last episode...") + events["rerecord_episode"] = True + events["exit_early"] = True + elif key == keyboard.Key.space: + # check if first space press then pause the policy for the user to get ready + # if second space press then the user is ready to start intervention + if not events["pause_policy"]: + print( + "Space key pressed. Human intervention required.\n" + "Place the leader in similar pose to the follower and press space again." + ) + events["pause_policy"] = True + log_say("Human intervention stage. Get ready to take over.", play_sounds=True) + else: + events["human_intervention_step"] = True + print("Space key pressed. Human intervention starting.") + log_say("Starting human intervention.", play_sounds=True) + + except Exception as e: + print(f"Error handling key press: {e}") + + listener = keyboard.Listener(on_press=on_press) + listener.start() + + return listener, events + + +if __name__ == "__main__": + init_logging() + + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + "--robot-path", + type=str, + default="lerobot/configs/robot/koch.yaml", + help="Path to robot yaml file used to instantiate the robot using `make_robot` factory function.", + ) + group.add_argument( + "--robot-overrides", + type=str, + nargs="*", + help="Any key=value arguments to override config values (use dots for.nested=overrides)", + ) + group.add_argument( + "-p", + "--pretrained-policy-name-or-path", + help=( + "Either the repo ID of a model hosted on the Hub or a path to a directory containing weights " + "saved using `Policy.save_pretrained`. If not provided, the policy is initialized from scratch " + "(useful for debugging). This argument is mutually exclusive with `--config`." + ), + ) + group.add_argument( + "--config", + help=( + "Path to a yaml config you want to use for initializing a policy from scratch (useful for " + "debugging). This argument is mutually exclusive with `--pretrained-policy-name-or-path` (`-p`)." + ), + ) + parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.") + parser.add_argument( + "--out-dir", + help=( + "Where to save the evaluation outputs. If not provided, outputs are saved in " + "outputs/eval/{timestamp}_{env_name}_{policy_name}" + ), + ) + + args = parser.parse_args() + + robot_cfg = init_hydra_config(args.robot_path, args.robot_overrides) + robot = make_robot(robot_cfg) + if not robot.is_connected: + robot.connect() + + eval_policy(robot, None, fps=40, n_episodes=2, control_time_s=100)