Add human intervention mechanism and eval_robot script to evaluate policy on the robot (#541)

Co-authored-by: Yoel <yoel.chornton@gmail.com>
2024-12-09 19:17:47 +01:00 · 2024-12-09 19:17:47 +01:00 · 7fcf638c0d
parent e35546f58e
commit 7fcf638c0d
3 changed files with 338 additions and 3 deletions
--- a/lerobot/configs/robot/koch.yaml
+++ b/lerobot/configs/robot/koch.yaml
@ -10,7 +10,7 @@ max_relative_target: null
 leader_arms:
  main:
    _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
-    port: /dev/tty.usbmodem575E0031751
+    port: /dev/tty.usbmodem58760430441
    motors:
      # name: (index, model)
      shoulder_pan: [1, "xl330-m077"]
@ -23,7 +23,7 @@ leader_arms:
 follower_arms:
  main:
    _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
-    port: /dev/tty.usbmodem575E0032081
+    port: /dev/tty.usbmodem585A0083391
    motors:
      # name: (index, model)
      shoulder_pan: [1, "xl430-w250"]
--- a/lerobot/configs/robot/so100.yaml
+++ b/lerobot/configs/robot/so100.yaml
@ -18,7 +18,7 @@ max_relative_target: null
 leader_arms:
  main:
    _target_: lerobot.common.robot_devices.motors.feetech.FeetechMotorsBus
-    port: /dev/tty.usbmodem585A0077581
+    port: /dev/tty.usbmodem58760433331
    motors:
      # name: (index, model)
      shoulder_pan: [1, "sts3215"]
--- a/lerobot/scripts/eval_on_robot.py
+++ b/lerobot/scripts/eval_on_robot.py
@ -0,0 +1,335 @@
 #!/usr/bin/env python
 # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Evaluate a policy by running rollouts on the real robot and computing metrics.
 Usage examples: evaluate a checkpoint from the LeRobot training script for 10 episodes.
 ```
 python lerobot/scripts/eval_on_robot.py \
    -p outputs/train/model/checkpoints/005000/pretrained_model \
    eval.n_episodes=10
 ```
 **NOTE** (michel-aractingi): This script is incomplete and it is being prepared
 for running training on the real robot. 
 """
 import argparse
 import logging
 import time
 from copy import deepcopy
 import numpy as np
 import torch
 from tqdm import trange
 from lerobot.common.policies.policy_protocol import Policy
 from lerobot.common.robot_devices.control_utils import busy_wait, is_headless
 from lerobot.common.robot_devices.robots.factory import Robot, make_robot
 from lerobot.common.utils.utils import (
    init_hydra_config,
    init_logging,
    log_say,
 )
 def rollout(robot: Robot, policy: Policy, fps: int, control_time_s: float = 20, use_amp: bool = True) -> dict:
    """Run a batched policy rollout on the real robot. 
    The return dictionary contains:
        "robot": A a dictionary of (batch, sequence + 1, *) tensors mapped to observation
            keys. NOTE the that this has an extra sequence element relative to the other keys in the
            dictionary. This is because an extra observation is included for after the environment is
            terminated or truncated.
        "action": A (batch, sequence, action_dim) tensor of actions applied based on the observations (not
            including the last observations).
        "reward": A (batch, sequence) tensor of rewards received for applying the actions.
        "success": A (batch, sequence) tensor of success conditions (the only time this can be True is upon
            environment termination/truncation).
        "done": A (batch, sequence) tensor of **cumulative** done conditions. For any given batch element,
            the first True is followed by True's all the way till the end. This can be used for masking
            extraneous elements from the sequences above.
    Args:
        robot: The robot class that defines the interface with the real robot. 
        policy: The policy. Must be a PyTorch nn module.
    Returns:
        The dictionary described above.
    """
    # assert isinstance(policy, nn.Module), "Policy must be a PyTorch nn module."
    # device = get_device_from_parameters(policy)
    # define keyboard listener
    listener, events = init_keyboard_listener()
    # Reset the policy. TODO (michel-aractingi) add real policy evaluation once the code is ready.
    # policy.reset() 
    # Get observation from real robot
    observation = robot.capture_observation()
    # Calculate reward. TODO (michel-aractingi)
    # in HIL-SERL it will be with a reward classifier
    reward = calculate_reward(observation)
    all_observations = []
    all_actions = []
    all_rewards = []
    all_successes = []
    start_episode_t = time.perf_counter()
    timestamp = 0.0
    while timestamp < control_time_s:
        start_loop_t = time.perf_counter()
        all_observations.append(deepcopy(observation))
        # observation = {key: observation[key].to(device, non_blocking=True) for key in observation}
        # Apply the next action.
        while events["pause_policy"] and not events["human_intervention_step"]:
            busy_wait(0.5)
        if events["human_intervention_step"]:
            # take over the robot's actions
            observation, action = robot.teleop_step(record_data=True)
            action = action["action"]  # teleop step returns torch tensors but in a dict
        else:
            # explore with policy
            with torch.inference_mode():
                action = robot.follower_arms["main"].read("Present_Position")
                action = torch.from_numpy(action)
                robot.send_action(action)
                # action = predict_action(observation, policy, device, use_amp)
        observation = robot.capture_observation()
        # Calculate reward
        # in HIL-SERL it will be with a reward classifier
        reward = calculate_reward(observation)
        all_actions.append(action)
        all_rewards.append(torch.from_numpy(reward))
        all_successes.append(torch.tensor([False]))
        dt_s = time.perf_counter() - start_loop_t
        busy_wait(1 / fps - dt_s)
        timestamp = time.perf_counter() - start_episode_t
        if events["exit_early"]:
            events["exit_early"] = False
            events["human_intervention_step"] = False
            events["pause_policy"] = False
            break
    all_observations.append(deepcopy(observation))
    dones = torch.tensor([False] * len(all_actions))
    dones[-1] = True
    # Stack the sequence along the first dimension so that we have (batch, sequence, *) tensors.
    ret = {
        "action": torch.stack(all_actions, dim=1),
        "next.reward": torch.stack(all_rewards, dim=1),
        "next.success": torch.stack(all_successes, dim=1),
        "done": dones,
    }
    stacked_observations = {}
    for key in all_observations[0]:
        stacked_observations[key] = torch.stack([obs[key] for obs in all_observations], dim=1)
    ret["observation"] = stacked_observations
    listener.stop()
    return ret
 def eval_policy(
    robot: Robot,
    policy: torch.nn.Module,
    fps: float,
    n_episodes: int,
    control_time_s: int = 20,
    use_amp: bool = True,
 ) -> dict:
    """
    Args:
        env: The batch of environments.
        policy: The policy.
        n_episodes: The number of episodes to evaluate.
    Returns:
        Dictionary with metrics and data regarding the rollouts.
    """
    # TODO (michel-aractingi) comment this out for testing with a fixed policy
    # assert isinstance(policy, Policy)
    # policy.eval()
    sum_rewards = []
    max_rewards = []
    successes = []
    rollouts = []
    start_eval = time.perf_counter()
    progbar = trange(n_episodes, desc="Evaluating policy on real robot")
    for _batch_idx in progbar:
        rollout_data = rollout(robot, policy, fps, control_time_s, use_amp)
        rollouts.append(rollout_data)
        sum_rewards.append(sum(rollout_data["next.reward"]))
        max_rewards.append(max(rollout_data["next.reward"]))
        successes.append(rollout_data["next.success"][-1])
    info = {
        "per_episode": [
            {
                "episode_ix": i,
                "sum_reward": sum_reward,
                "max_reward": max_reward,
                "pc_success": success * 100,
            }
            for i, (sum_reward, max_reward, success) in enumerate(
                zip(
                    sum_rewards[:n_episodes],
                    max_rewards[:n_episodes],
                    successes[:n_episodes],
                    strict=False,
                )
            )
        ],
        "aggregated": {
            "avg_sum_reward": float(np.nanmean(torch.cat(sum_rewards[:n_episodes]))),
            "avg_max_reward": float(np.nanmean(torch.cat(max_rewards[:n_episodes]))),
            "pc_success": float(np.nanmean(torch.cat(successes[:n_episodes])) * 100),
            "eval_s": time.time() - start_eval,
            "eval_ep_s": (time.time() - start_eval) / n_episodes,
        },
    }
    if robot.is_connected:
        robot.disconnect()
    return info
 def calculate_reward(observation):
    """
    Method to calculate reward function in some way.
    In HIL-SERL this is done through defining a reward classifier
    """
    # reward = reward_classifier(observation)
    return np.array([0.0])
 def init_keyboard_listener():
    # Allow to exit early while recording an episode or resetting the environment,
    # by tapping the right arrow key '->'. This might require a sudo permission
    # to allow your terminal to monitor keyboard events.
    events = {}
    events["exit_early"] = False
    events["rerecord_episode"] = False
    events["pause_policy"] = False
    events["human_intervention_step"] = False
    if is_headless():
        logging.warning(
            "Headless environment detected. On-screen cameras display and keyboard inputs will not be available."
        )
        listener = None
        return listener, events
    # Only import pynput if not in a headless environment
    from pynput import keyboard
    def on_press(key):
        try:
            if key == keyboard.Key.right:
                print("Right arrow key pressed. Exiting loop...")
                events["exit_early"] = True
            elif key == keyboard.Key.left:
                print("Left arrow key pressed. Exiting loop and rerecord the last episode...")
                events["rerecord_episode"] = True
                events["exit_early"] = True
            elif key == keyboard.Key.space:
                # check if first space press then pause the policy for the user to get ready
                # if second space press then the user is ready to start intervention
                if not events["pause_policy"]:
                    print(
                        "Space key pressed. Human intervention required.\n"
                        "Place the leader in similar pose to the follower and press space again."
                    )
                    events["pause_policy"] = True
                    log_say("Human intervention stage. Get ready to take over.", play_sounds=True)
                else:
                    events["human_intervention_step"] = True
                    print("Space key pressed. Human intervention starting.")
                    log_say("Starting human intervention.", play_sounds=True)
        except Exception as e:
            print(f"Error handling key press: {e}")
    listener = keyboard.Listener(on_press=on_press)
    listener.start()
    return listener, events
 if __name__ == "__main__":
    init_logging()
    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
    )
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument(
        "--robot-path",
        type=str,
        default="lerobot/configs/robot/koch.yaml",
        help="Path to robot yaml file used to instantiate the robot using `make_robot` factory function.",
    )
    group.add_argument(
        "--robot-overrides",
        type=str,
        nargs="*",
        help="Any key=value arguments to override config values (use dots for.nested=overrides)",
    )
    group.add_argument(
        "-p",
        "--pretrained-policy-name-or-path",
        help=(
            "Either the repo ID of a model hosted on the Hub or a path to a directory containing weights "
            "saved using `Policy.save_pretrained`. If not provided, the policy is initialized from scratch "
            "(useful for debugging). This argument is mutually exclusive with `--config`."
        ),
    )
    group.add_argument(
        "--config",
        help=(
            "Path to a yaml config you want to use for initializing a policy from scratch (useful for "
            "debugging). This argument is mutually exclusive with `--pretrained-policy-name-or-path` (`-p`)."
        ),
    )
    parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.")
    parser.add_argument(
        "--out-dir",
        help=(
            "Where to save the evaluation outputs. If not provided, outputs are saved in "
            "outputs/eval/{timestamp}_{env_name}_{policy_name}"
        ),
    )
    args = parser.parse_args()
    robot_cfg = init_hydra_config(args.robot_path, args.robot_overrides)
    robot = make_robot(robot_cfg)
    if not robot.is_connected:
        robot.connect()
    eval_policy(robot, None, fps=40, n_episodes=2, control_time_s=100)