lerobot/lerobot/scripts/eval.py

import threading
from pathlib import Path

import hydra
import imageio
import numpy as np
import torch
import tqdm
from tensordict.nn import TensorDictModule
from termcolor import colored
from torchrl.envs import EnvBase

from lerobot.common.envs.factory import make_env
from lerobot.common.policies.factory import make_policy
from lerobot.common.utils import set_seed


def write_video(video_path, stacked_frames, fps):
    imageio.mimsave(video_path, stacked_frames, fps=fps)


def eval_policy(
    env: EnvBase,
    policy: TensorDictModule = None,
    num_episodes: int = 10,
    max_steps: int = 30,
    save_video: bool = False,
    video_dir: Path = None,
    fps: int = 15,
    return_first_video: bool = False,
):
    sum_rewards = []
    max_rewards = []
    successes = []
    threads = []
    for i in tqdm.tqdm(range(num_episodes)):
        tensordict = env.reset()

        ep_frames = []
        if save_video or (return_first_video and i == 0):

            def rendering_callback(env, td=None):
                ep_frames.append(env.render())

            # render first frame before rollout
            rendering_callback(env)
        else:
            rendering_callback = None

        with torch.inference_mode():
            rollout = env.rollout(
                max_steps=max_steps,
                policy=policy,
                callback=rendering_callback,
                auto_reset=False,
                tensordict=tensordict,
                auto_cast_to_device=True,
            )
        # print(", ".join([f"{x:.3f}" for x in rollout["next", "reward"][:,0].tolist()]))
        ep_sum_reward = rollout["next", "reward"].sum()
        ep_max_reward = rollout["next", "reward"].max()
        ep_success = rollout["next", "success"].any()
        sum_rewards.append(ep_sum_reward.item())
        max_rewards.append(ep_max_reward.item())
        successes.append(ep_success.item())

        if save_video or (return_first_video and i == 0):
            stacked_frames = np.stack(ep_frames)

            if save_video:
                video_dir.mkdir(parents=True, exist_ok=True)
                video_path = video_dir / f"eval_episode_{i}.mp4"
                thread = threading.Thread(
                    target=write_video,
                    args=(str(video_path), stacked_frames, fps),
                )
                thread.start()
                threads.append(thread)

            if return_first_video and i == 0:
                first_video = stacked_frames.transpose(0, 3, 1, 2)

    for thread in threads:
        thread.join()

    metrics = {
        "avg_sum_reward": np.nanmean(sum_rewards),
        "avg_max_reward": np.nanmean(max_rewards),
        "pc_success": np.nanmean(successes) * 100,
    }
    if return_first_video:
        return metrics, first_video
    return metrics


@hydra.main(version_base=None, config_name="default", config_path="../configs")
def eval_cli(cfg: dict):
    eval(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir)


def eval(cfg: dict, out_dir=None):
    if out_dir is None:
        raise NotImplementedError()

    assert torch.cuda.is_available()
    torch.backends.cudnn.benchmark = True
    set_seed(cfg.seed)
    print(colored("Log dir:", "yellow", attrs=["bold"]), out_dir)

    env = make_env(cfg)

    if cfg.pretrained_model_path:
        policy = make_policy(cfg)
        policy = TensorDictModule(
            policy,
            in_keys=["observation", "step_count"],
            out_keys=["action"],
        )
    else:
        # when policy is None, rollout a random policy
        policy = None

    metrics = eval_policy(
        env,
        policy=policy,
        save_video=True,
        video_dir=Path(out_dir) / "eval",
        fps=cfg.env.fps,
        max_steps=cfg.env.episode_length,
        num_episodes=cfg.eval_episodes,
    )
    print(metrics)


if __name__ == "__main__":
    eval_cli()
format 2024-02-25 02:19:18 +08:00			`import threading`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`from pathlib import Path`

			`import hydra`
			`import imageio`
			`import numpy as np`
			`import torch`
Small fix and improve logging message 2024-02-27 19:44:26 +08:00			`import tqdm`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`from tensordict.nn import TensorDictModule`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`from termcolor import colored`
Add option for random policy 2024-01-31 21:54:32 +08:00			`from torchrl.envs import EnvBase`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`from lerobot.common.envs.factory import make_env`
Add policies/factory, Add test, Add _self_ in config 2024-02-25 18:50:23 +08:00			`from lerobot.common.policies.factory import make_policy`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`from lerobot.common.utils import set_seed`
format 2024-02-25 02:19:18 +08:00
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
Add multithreading for video generation, Speed policy sampling 2024-02-25 02:18:39 +08:00			`def write_video(video_path, stacked_frames, fps):`
			`imageio.mimsave(video_path, stacked_frames, fps=fps)`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
format 2024-02-25 02:19:18 +08:00
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`def eval_policy(`
Add option for random policy 2024-01-31 21:54:32 +08:00			`env: EnvBase,`
			`policy: TensorDictModule = None,`
			`num_episodes: int = 10,`
			`max_steps: int = 30,`
			`save_video: bool = False,`
			`video_dir: Path = None,`
Add pusht dataset (TODO verify reward is aligned), Refactor visualize_dataset, Add video_dir, fps, state_dim, action_dim to config (Training works) 2024-02-21 08:49:40 +08:00			`fps: int = 15,`
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`return_first_video: bool = False,`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`):`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`sum_rewards = []`
			`max_rewards = []`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`successes = []`
Add multithreading for video generation, Speed policy sampling 2024-02-25 02:18:39 +08:00			`threads = []`
Small fix and improve logging message 2024-02-27 19:44:26 +08:00			`for i in tqdm.tqdm(range(num_episodes)):`
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`tensordict = env.reset()`

Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`ep_frames = []`
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`if save_video or (return_first_video and i == 0):`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`def rendering_callback(env, td=None):`
			`ep_frames.append(env.render())`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00
Eval reproduced! Train running (but not reproduced) 2024-02-10 23:46:24 +08:00			`# render first frame before rollout`
			`rendering_callback(env)`
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`else:`
			`rendering_callback = None`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00
offline training + online finetuning converge to 33 reward! 2024-02-18 09:23:44 +08:00			`with torch.inference_mode():`
			`rollout = env.rollout(`
			`max_steps=max_steps,`
			`policy=policy,`
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`callback=rendering_callback,`
offline training + online finetuning converge to 33 reward! 2024-02-18 09:23:44 +08:00			`auto_reset=False,`
			`tensordict=tensordict,`
			`auto_cast_to_device=True,`
			`)`
Eval reproduced! Train running (but not reproduced) 2024-02-10 23:46:24 +08:00			`# print(", ".join([f"{x:.3f}" for x in rollout["next", "reward"][:,0].tolist()]))`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`ep_sum_reward = rollout["next", "reward"].sum()`
			`ep_max_reward = rollout["next", "reward"].max()`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`ep_success = rollout["next", "success"].any()`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`sum_rewards.append(ep_sum_reward.item())`
			`max_rewards.append(ep_max_reward.item())`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`successes.append(ep_success.item())`
eval.mp4 works! 2024-01-31 07:30:14 +08:00
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`if save_video or (return_first_video and i == 0):`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`stacked_frames = np.stack(ep_frames)`

			`if save_video:`
			`video_dir.mkdir(parents=True, exist_ok=True)`
			`video_path = video_dir / f"eval_episode_{i}.mp4"`
Add multithreading for video generation, Speed policy sampling 2024-02-25 02:18:39 +08:00			`thread = threading.Thread(`
			`target=write_video,`
			`args=(str(video_path), stacked_frames, fps),`
			`)`
			`thread.start()`
			`threads.append(thread)`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`if return_first_video and i == 0:`
			`first_video = stacked_frames.transpose(0, 3, 1, 2)`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00
Add multithreading for video generation, Speed policy sampling 2024-02-25 02:18:39 +08:00			`for thread in threads:`
			`thread.join()`

Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`metrics = {`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`"avg_sum_reward": np.nanmean(sum_rewards),`
			`"avg_max_reward": np.nanmean(max_rewards),`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`"pc_success": np.nanmean(successes) * 100,`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`}`
Refactor train, eval_policy, logger, Add diffusion.yaml (WIP) 2024-02-26 09:10:09 +08:00			`if return_first_video:`
			`return metrics, first_video`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`return metrics`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00

			`@hydra.main(version_base=None, config_name="default", config_path="../configs")`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`def eval_cli(cfg: dict):`
			`eval(cfg, out_dir=hydra.core.hydra_config.HydraConfig.get().runtime.output_dir)`


			`def eval(cfg: dict, out_dir=None):`
			`if out_dir is None:`
			`raise NotImplementedError()`

First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`assert torch.cuda.is_available()`
Add multithreading for video generation, Speed policy sampling 2024-02-25 02:18:39 +08:00			`torch.backends.cudnn.benchmark = True`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00			`set_seed(cfg.seed)`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`print(colored("Log dir:", "yellow", attrs=["bold"]), out_dir)`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00
			`env = make_env(cfg)`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00
Fix unit tests, Refactor, Add pusht env, (TODO pusht replay buffer, image preprocessing) 2024-02-20 20:26:57 +08:00			`if cfg.pretrained_model_path:`
Add policies/factory, Add test, Add _self_ in config 2024-02-25 18:50:23 +08:00			`policy = make_policy(cfg)`
Fix unit tests, Refactor, Add pusht env, (TODO pusht replay buffer, image preprocessing) 2024-02-20 20:26:57 +08:00			`policy = TensorDictModule(`
			`policy,`
			`in_keys=["observation", "step_count"],`
			`out_keys=["action"],`
			`)`
			`else:`
			`# when policy is None, rollout a random policy`
			`policy = None`

Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`metrics = eval_policy(`
eval.mp4 works! 2024-01-31 07:30:14 +08:00			`env,`
Add option for random policy 2024-01-31 21:54:32 +08:00			`policy=policy,`
Fix unit tests, Refactor, Add pusht env, (TODO pusht replay buffer, image preprocessing) 2024-02-20 20:26:57 +08:00			`save_video=True,`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`video_dir=Path(out_dir) / "eval",`
Sanitize cfg.env 2024-02-25 20:02:29 +08:00			`fps=cfg.env.fps,`
			`max_steps=cfg.env.episode_length,`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`num_episodes=cfg.eval_episodes,`
eval.mp4 works! 2024-01-31 07:30:14 +08:00			`)`
Add common, refactor eval with eval_policy 2024-01-31 21:48:12 +08:00			`print(metrics)`
First real commit, simxarm env added with torchrl! 2024-01-29 20:49:30 +08:00

			`if __name__ == "__main__":`
Wandb works, One output dir 2024-02-22 20:14:12 +08:00			`eval_cli()`