lerobot/lerobot/scripts/eval.py

import logging
import threading
import time
from pathlib import Path

import einops
import hydra
import imageio
import numpy as np
import torch
import tqdm
from tensordict.nn import TensorDictModule
from torchrl.envs import EnvBase
from torchrl.envs.batched_envs import BatchedEnvBase

from lerobot.common.datasets.factory import make_offline_buffer
from lerobot.common.envs.factory import make_env
from lerobot.common.logger import log_output_dir
from lerobot.common.policies.abstract import AbstractPolicy
from lerobot.common.policies.factory import make_policy
from lerobot.common.utils import init_logging, set_seed


def write_video(video_path, stacked_frames, fps):
    imageio.mimsave(video_path, stacked_frames, fps=fps)


def eval_policy(
    env: BatchedEnvBase,
    policy: AbstractPolicy,
    num_episodes: int = 10,
    max_steps: int = 30,
    save_video: bool = False,
    video_dir: Path = None,
    fps: int = 15,
    return_first_video: bool = False,
):
    policy.eval()
    start = time.time()
    sum_rewards = []
    max_rewards = []
    successes = []
    threads = []  # for video saving threads
    episode_counter = 0  # for saving the correct number of videos

    # TODO(alexander-soare): if num_episodes is not evenly divisible by the batch size, this will do more work than
    # needed as I'm currently taking a ceil.
    for i in tqdm.tqdm(range(-(-num_episodes // env.batch_size[0]))):
        ep_frames = []

        def maybe_render_frame(env: EnvBase, _):
            if save_video or (return_first_video and i == 0):  # noqa: B023
                ep_frames.append(env.render())  # noqa: B023

        with torch.inference_mode():
            # TODO(alexander-soare): When `break_when_any_done == False` this rolls out for max_steps even when all
            # envs are done the first time. But we only use the first rollout. This is a waste of compute.
            policy.clear_action_queue()
            rollout = env.rollout(
                max_steps=max_steps,
                policy=policy,
                auto_cast_to_device=True,
                callback=maybe_render_frame,
                break_when_any_done=env.batch_size[0] == 1,
            )
        # Figure out where in each rollout sequence the first done condition was encountered (results after this won't
        # be included).
        # Note: this assumes that the shape of the done key is (batch_size, max_steps, 1).
        # Note: this relies on a property of argmax: that it returns the first occurrence as a tiebreaker.
        rollout_steps = rollout["next", "done"].shape[1]
        done_indices = torch.argmax(rollout["next", "done"].to(int), axis=1)  # (batch_size, rollout_steps)
        mask = (torch.arange(rollout_steps) <= done_indices).unsqueeze(-1)  # (batch_size, rollout_steps, 1)
        batch_sum_reward = einops.reduce((rollout["next", "reward"] * mask), "b n 1 -> b", "sum")
        batch_max_reward = einops.reduce((rollout["next", "reward"] * mask), "b n 1 -> b", "max")
        batch_success = einops.reduce((rollout["next", "success"] * mask), "b n 1 -> b", "any")
        sum_rewards.extend(batch_sum_reward.tolist())
        max_rewards.extend(batch_max_reward.tolist())
        successes.extend(batch_success.tolist())

        if save_video or (return_first_video and i == 0):
            batch_stacked_frames = np.stack(ep_frames)  # (t, b, *)
            batch_stacked_frames = batch_stacked_frames.transpose(
                1, 0, *range(2, batch_stacked_frames.ndim)
            )  # (b, t, *)

            if save_video:
                for stacked_frames, done_index in zip(
                    batch_stacked_frames, done_indices.flatten().tolist(), strict=False
                ):
                    if episode_counter >= num_episodes:
                        continue
                    video_dir.mkdir(parents=True, exist_ok=True)
                    video_path = video_dir / f"eval_episode_{episode_counter}.mp4"
                    thread = threading.Thread(
                        target=write_video,
                        args=(str(video_path), stacked_frames[:done_index], fps),
                    )
                    thread.start()
                    threads.append(thread)
                    episode_counter += 1

            if return_first_video and i == 0:
                first_video = batch_stacked_frames[0].transpose(0, 3, 1, 2)

    for thread in threads:
        thread.join()

    info = {
        "avg_sum_reward": np.nanmean(sum_rewards[:num_episodes]),
        "avg_max_reward": np.nanmean(max_rewards[:num_episodes]),
        "pc_success": np.nanmean(successes[:num_episodes]) * 100,
        "eval_s": time.time() - start,
        "eval_ep_s": (time.time() - start) / num_episodes,
    }
    if return_first_video:
        return info, first_video
    return info


@hydra.main(version_base=None, config_name="default", config_path="../configs")
def eval_cli(cfg: dict):
    eval(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir)


def eval(cfg: dict, out_dir=None):
    if out_dir is None:
        raise NotImplementedError()

    init_logging()

    if cfg.device == "cuda":
        assert torch.cuda.is_available()
    else:
        logging.warning("Using CPU, this will be slow.")

    torch.backends.cudnn.benchmark = True
    torch.backends.cuda.matmul.allow_tf32 = True
    set_seed(cfg.seed)

    log_output_dir(out_dir)

    logging.info("make_offline_buffer")
    offline_buffer = make_offline_buffer(cfg)

    logging.info("make_env")
    env = make_env(cfg, transform=offline_buffer.transform)

    if cfg.policy.pretrained_model_path:
        policy = make_policy(cfg)
        policy = TensorDictModule(
            policy,
            in_keys=["observation", "step_count"],
            out_keys=["action"],
        )
    else:
        # when policy is None, rollout a random policy
        policy = None

    metrics = eval_policy(
        env,
        policy=policy,
        save_video=True,
        video_dir=Path(out_dir) / "eval",
        fps=cfg.env.fps,
        max_steps=cfg.env.episode_length,
        num_episodes=cfg.eval_episodes,
    )
    print(metrics)

    logging.info("End of eval")


if __name__ == "__main__":
    eval_cli()
Add Normalize, non_blocking=True in tdmpc, tdmpc run (TODO: diffusion) 2024-03-02 23:53:29 +08:00			`import logging`
format 2024-02-25 02:19:18 +08:00			`import threading`
Clean logging, Refactor 2024-03-01 07:13:06 +08:00			`import time`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`from pathlib import Path`

revision 2024-03-20 17:45:45 +08:00			`import einops`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`import hydra`
			`import imageio`
			`import numpy as np`
			`import torch`
Small fix and improve logging message 2024-02-27 19:44:26 +08:00			`import tqdm`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`from tensordict.nn import TensorDictModule`
backup wip 2024-03-20 00:02:09 +08:00			`from torchrl.envs import EnvBase`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`from torchrl.envs.batched_envs import BatchedEnvBase`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
pre-commit run -a 2024-03-02 23:58:21 +08:00			`from lerobot.common.datasets.factory import make_offline_buffer`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`from lerobot.common.envs.factory import make_env`
Improve visualize_dataset, Improve AbstractReplayBuffer, Small improvements 2024-03-06 18:14:03 +08:00			`from lerobot.common.logger import log_output_dir`
Clear action queue when environment is reset 2024-03-20 16:31:06 +08:00			`from lerobot.common.policies.abstract import AbstractPolicy`
Add policies/factory, Add test, Add _self_ in config 2024-02-25 18:50:23 +08:00			`from lerobot.common.policies.factory import make_policy`
Improve visualize_dataset, Improve AbstractReplayBuffer, Small improvements 2024-03-06 18:14:03 +08:00			`from lerobot.common.utils import init_logging, set_seed`
format 2024-02-25 02:19:18 +08:00
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
Add multithreading for video generation, Speed policy sampling 2024-02-25 02:18:39 +08:00			`def write_video(video_path, stacked_frames, fps):`
			`imageio.mimsave(video_path, stacked_frames, fps=fps)`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
format 2024-02-25 02:19:18 +08:00
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`def eval_policy(`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`env: BatchedEnvBase,`
Clear action queue when environment is reset 2024-03-20 16:31:06 +08:00			`policy: AbstractPolicy,`
Add option for random policy 2024-01-31 21:54:32 +08:00			`num_episodes: int = 10,`
			`max_steps: int = 30,`
			`save_video: bool = False,`
			`video_dir: Path = None,`
Add pusht dataset (TODO verify reward is aligned), Refactor visualize_dataset, Add video_dir, fps, state_dim, action_dim to config (Training works) 2024-02-21 08:49:40 +08:00			`fps: int = 15,`
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`return_first_video: bool = False,`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`):`
switch between train and eval 2024-03-18 17:45:17 +08:00			`policy.eval()`
Clean logging, Refactor 2024-03-01 07:13:06 +08:00			`start = time.time()`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`sum_rewards = []`
			`max_rewards = []`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`successes = []`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`threads = [] # for video saving threads`
			`episode_counter = 0 # for saving the correct number of videos`

			`# TODO(alexander-soare): if num_episodes is not evenly divisible by the batch size, this will do more work than`
			`# needed as I'm currently taking a ceil.`
			`for i in tqdm.tqdm(range(-(-num_episodes // env.batch_size[0]))):`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`ep_frames = []`

wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`def maybe_render_frame(env: EnvBase, _):`
			`if save_video or (return_first_video and i == 0): # noqa: B023`
Ran pre-commit run --all-files 2024-02-29 20:37:48 +08:00			`ep_frames.append(env.render()) # noqa: B023`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00
offline training + online finetuning converge to 33 reward! 2024-02-18 09:23:44 +08:00			`with torch.inference_mode():`
break_when_any_done==True for batch_size==1 2024-03-20 03:08:25 +08:00			# TODO(alexander-soare): When `break_when_any_done == False` this rolls out for max_steps even when all
backup wip 2024-03-20 02:50:04 +08:00			`# envs are done the first time. But we only use the first rollout. This is a waste of compute.`
Clear action queue when environment is reset 2024-03-20 16:31:06 +08:00			`policy.clear_action_queue()`
offline training + online finetuning converge to 33 reward! 2024-02-18 09:23:44 +08:00			`rollout = env.rollout(`
			`max_steps=max_steps,`
			`policy=policy,`
			`auto_cast_to_device=True,`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`callback=maybe_render_frame,`
break_when_any_done==True for batch_size==1 2024-03-20 03:08:25 +08:00			`break_when_any_done=env.batch_size[0] == 1,`
offline training + online finetuning converge to 33 reward! 2024-02-18 09:23:44 +08:00			`)`
backup wip 2024-03-20 02:50:04 +08:00			`# Figure out where in each rollout sequence the first done condition was encountered (results after this won't`
			`# be included).`
			`# Note: this assumes that the shape of the done key is (batch_size, max_steps, 1).`
			`# Note: this relies on a property of argmax: that it returns the first occurrence as a tiebreaker.`
			`rollout_steps = rollout["next", "done"].shape[1]`
			`done_indices = torch.argmax(rollout["next", "done"].to(int), axis=1) # (batch_size, rollout_steps)`
			`mask = (torch.arange(rollout_steps) <= done_indices).unsqueeze(-1) # (batch_size, rollout_steps, 1)`
revision 2024-03-20 17:45:45 +08:00			`batch_sum_reward = einops.reduce((rollout["next", "reward"] * mask), "b n 1 -> b", "sum")`
			`batch_max_reward = einops.reduce((rollout["next", "reward"] * mask), "b n 1 -> b", "max")`
			`batch_success = einops.reduce((rollout["next", "success"] * mask), "b n 1 -> b", "any")`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`sum_rewards.extend(batch_sum_reward.tolist())`
			`max_rewards.extend(batch_max_reward.tolist())`
			`successes.extend(batch_success.tolist())`
eval.mp4 works! 2024-01-31 07:30:14 +08:00
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`if save_video or (return_first_video and i == 0):`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`batch_stacked_frames = np.stack(ep_frames) # (t, b, *)`
			`batch_stacked_frames = batch_stacked_frames.transpose(`
			`1, 0, *range(2, batch_stacked_frames.ndim)`
			`) # (b, t, *)`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00
			`if save_video:`
Only save video frames in first rollout 2024-03-20 16:32:11 +08:00			`for stacked_frames, done_index in zip(`
			`batch_stacked_frames, done_indices.flatten().tolist(), strict=False`
			`):`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`if episode_counter >= num_episodes:`
			`continue`
			`video_dir.mkdir(parents=True, exist_ok=True)`
			`video_path = video_dir / f"eval_episode_{episode_counter}.mp4"`
			`thread = threading.Thread(`
			`target=write_video,`
Only save video frames in first rollout 2024-03-20 16:32:11 +08:00			`args=(str(video_path), stacked_frames[:done_index], fps),`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`)`
			`thread.start()`
			`threads.append(thread)`
			`episode_counter += 1`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`if return_first_video and i == 0:`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`first_video = batch_stacked_frames[0].transpose(0, 3, 1, 2)`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00
Add multithreading for video generation, Speed policy sampling 2024-02-25 02:18:39 +08:00			`for thread in threads:`
			`thread.join()`

Clean logging, Refactor 2024-03-01 07:13:06 +08:00			`info = {`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`"avg_sum_reward": np.nanmean(sum_rewards[:num_episodes]),`
			`"avg_max_reward": np.nanmean(max_rewards[:num_episodes]),`
			`"pc_success": np.nanmean(successes[:num_episodes]) * 100,`
Clean logging, Refactor 2024-03-01 07:13:06 +08:00			`"eval_s": time.time() - start,`
			`"eval_ep_s": (time.time() - start) / num_episodes,`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`}`
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`if return_first_video:`
Clean logging, Refactor 2024-03-01 07:13:06 +08:00			`return info, first_video`
			`return info`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00

			`@hydra.main(version_base=None, config_name="default", config_path="../configs")`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`def eval_cli(cfg: dict):`
			`eval(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir)`


			`def eval(cfg: dict, out_dir=None):`
			`if out_dir is None:`
			`raise NotImplementedError()`

Improve visualize_dataset, Improve AbstractReplayBuffer, Small improvements 2024-03-06 18:14:03 +08:00			`init_logging()`

			`if cfg.device == "cuda":`
			`assert torch.cuda.is_available()`
			`else:`
			`logging.warning("Using CPU, this will be slow.")`

Add multithreading for video generation, Speed policy sampling 2024-02-25 02:18:39 +08:00			`torch.backends.cudnn.benchmark = True`
Improve visualize_dataset, Improve AbstractReplayBuffer, Small improvements 2024-03-06 18:14:03 +08:00			`torch.backends.cuda.matmul.allow_tf32 = True`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`set_seed(cfg.seed)`
Improve visualize_dataset, Improve AbstractReplayBuffer, Small improvements 2024-03-06 18:14:03 +08:00
			`log_output_dir(out_dir)`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
Add Normalize, non_blocking=True in tdmpc, tdmpc run (TODO: diffusion) 2024-03-02 23:53:29 +08:00			`logging.info("make_offline_buffer")`
			`offline_buffer = make_offline_buffer(cfg)`

			`logging.info("make_env")`
backup wip 2024-03-20 00:02:09 +08:00			`env = make_env(cfg, transform=offline_buffer.transform)`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00
install fix 2024-02-28 19:35:49 +08:00			`if cfg.policy.pretrained_model_path:`
Add policies/factory, Add test, Add _self_ in config 2024-02-25 18:50:23 +08:00			`policy = make_policy(cfg)`
Fix unit tests, Refactor, Add pusht env, (TODO pusht replay buffer, image preprocessing) 2024-02-20 20:26:57 +08:00			`policy = TensorDictModule(`
			`policy,`
			`in_keys=["observation", "step_count"],`
			`out_keys=["action"],`
			`)`
			`else:`
			`# when policy is None, rollout a random policy`
			`policy = None`

Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`metrics = eval_policy(`
eval.mp4 works! 2024-01-31 07:30:14 +08:00			`env,`
Add option for random policy 2024-01-31 21:54:32 +08:00			`policy=policy,`
Fix unit tests, Refactor, Add pusht env, (TODO pusht replay buffer, image preprocessing) 2024-02-20 20:26:57 +08:00			`save_video=True,`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`video_dir=Path(out_dir) / "eval",`
Sanitize cfg.env 2024-02-25 20:02:29 +08:00			`fps=cfg.env.fps,`
wip: still needs batch logic for act and tdmp 2024-03-14 23:22:55 +08:00			`max_steps=cfg.env.episode_length,`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`num_episodes=cfg.eval_episodes,`
eval.mp4 works! 2024-01-31 07:30:14 +08:00			`)`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`print(metrics)`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
Improve visualize_dataset, Improve AbstractReplayBuffer, Small improvements 2024-03-06 18:14:03 +08:00			`logging.info("End of eval")`

First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
			`if __name__ == "__main__":`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`eval_cli()`