From 8db94f73a19a2d18d2869da7f07943e022472126 Mon Sep 17 00:00:00 2001 From: Michel Aractingi Date: Sun, 27 Oct 2024 12:12:56 +0100 Subject: [PATCH] added success rate to envs --- lerobot/configs/robot/koch.yaml | 2 +- lerobot/scripts/control_robot.py | 1 + lerobot/scripts/control_sim_robot.py | 30 +++++++++++++++++++--------- lerobot/scripts/eval.py | 2 +- lerobot/scripts/train.py | 2 +- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/lerobot/configs/robot/koch.yaml b/lerobot/configs/robot/koch.yaml index 5fcd5596..0e69593e 100644 --- a/lerobot/configs/robot/koch.yaml +++ b/lerobot/configs/robot/koch.yaml @@ -10,7 +10,7 @@ max_relative_target: null leader_arms: main: _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus - port: /dev/tty.usbmodem585A0078211 + port: /dev/tty.usbmodem58760430441 motors: # name: (index, model) shoulder_pan: [1, "xl330-m077"] diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 3b6345b4..2682734c 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -1056,6 +1056,7 @@ if __name__ == "__main__": control_mode = args.mode robot_path = args.robot_path robot_overrides = args.robot_overrides + kwargs = vars(args) del kwargs["mode"] del kwargs["robot_path"] diff --git a/lerobot/scripts/control_sim_robot.py b/lerobot/scripts/control_sim_robot.py index 99268cf4..1e95c5b7 100644 --- a/lerobot/scripts/control_sim_robot.py +++ b/lerobot/scripts/control_sim_robot.py @@ -87,7 +87,7 @@ import gymnasium as gym import multiprocessing from contextlib import nullcontext - +import importlib import cv2 import torch import numpy as np @@ -142,6 +142,7 @@ def say(text, blocking=False): os.system(cmd) + def save_image(img_arr, key, frame_index, episode_index, videos_dir): img = Image.fromarray(img_arr) path = videos_dir / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png" @@ -155,7 +156,7 @@ def show_image_observations(observation_queue:multiprocessing.Queue): images = [] if keys is None: keys = [k for k in observations if 'image' in k] for key in keys: - images.append(observations[key].squeeze(0)) + images.append(observations[key])#.squeeze(0)) cat_image = np.concatenate(images, 1) cv2.imshow('observations', cv2.cvtColor(cat_image, cv2.COLOR_RGB2BGR)) cv2.waitKey(1) @@ -273,6 +274,8 @@ def create_rl_hf_dataset(data_dict): features["next.reward"] = Value(dtype="float32", id=None) features["seed"] = Value(dtype="int64", id=None) + features["next.success"] = Value(dtype="bool", id=None) + features["episode_index"] = Value(dtype="int64", id=None) features["frame_index"] = Value(dtype="int64", id=None) features["timestamp"] = Value(dtype="float32", id=None) @@ -417,7 +420,7 @@ def record( while episode_index < num_episodes: logging.info(f"Recording episode {episode_index}") say(f"Recording episode {episode_index}") - ep_dict = {'action':[], 'next.reward':[]} + ep_dict = {'action':[], 'next.reward':[], 'next.success':[]} for k in state_keys_dict: ep_dict[k] = [] frame_index = 0 @@ -441,7 +444,7 @@ def record( str_key = key if key.startswith('observation.images.') else 'observation.images.' + key futures += [ executor.submit( - save_image, observation[key].squeeze(0), str_key, frame_index, episode_index, videos_dir) + save_image, observation[key], str_key, frame_index, episode_index, videos_dir) ] if not is_headless() and visualize_images: @@ -453,15 +456,19 @@ def record( # Advance the sim environment if len(action.shape) == 1: action = np.expand_dims(action, 0) - observation, reward, _, _ , info = env.step(action) + observation, reward, terminated, _ , info = env.step(action) + + success = info.get('is_success', False) + ep_dict['action'].append(torch.from_numpy(action)) ep_dict['next.reward'].append(torch.tensor(reward)) - print(reward) + ep_dict['next.success'].append(torch.tensor(success)) frame_index += 1 timestamp = time.perf_counter() - start_episode_t - if exit_early: + + if exit_early or terminated: exit_early = False break @@ -506,6 +513,7 @@ def record( ep_dict[key] = torch.vstack(ep_dict[key]) * 180.0 / np.pi ep_dict['action'] = torch.vstack(ep_dict['action']) * 180.0 / np.pi ep_dict['next.reward'] = torch.stack(ep_dict['next.reward']) + ep_dict['next.success'] = torch.stack(ep_dict['next.success']) ep_dict["seed"] = torch.tensor([seed] * num_frames) ep_dict["episode_index"] = torch.tensor([episode_index] * num_frames) @@ -656,7 +664,7 @@ def replay(env, action = items[idx]["action"] - env.step(action.unsqueeze(0).numpy() * np.pi / 180.0) + env.step(action.numpy() * np.pi / 180.0) dt_s = time.perf_counter() - start_episode_t busy_wait(1 / fps - dt_s) @@ -806,7 +814,11 @@ if __name__ == "__main__": # make gym env env_cfg = init_hydra_config(env_config_path) - env_fn = lambda: make_env(env_cfg, n_envs=1) + #env_fn = lambda: make_env(env_cfg, n_envs=1) + package_name = f"gym_{env_cfg.env.name}" + + importlib.import_module(f"gym_{env_cfg.env.name}") + env_fn = lambda: gym.make(env_cfg.env.handle, disable_env_checker=True, **env_cfg.env.gym) robot = None if control_mode != 'replay': diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py index 90cbd280..ed5cd8ee 100644 --- a/lerobot/scripts/eval.py +++ b/lerobot/scripts/eval.py @@ -165,7 +165,7 @@ def rollout( # VectorEnv stores is_success in `info["final_info"][env_index]["is_success"]`. "final_info" isn't # available of none of the envs finished. - if False and "final_info" in info: + if "final_info" in info: successes = [info["is_success"] if info is not None else False for info in info["final_info"]] else: successes = [False] * env.num_envs diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 795189b0..6406295b 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -482,7 +482,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No **{k: {"shape": v, "dtype": np.dtype("float32")} for k, v in policy.config.output_shapes.items()}, "next.reward": {"shape": (), "dtype": np.dtype("float32")}, "next.done": {"shape": (), "dtype": np.dtype("?")}, - #"next.success": {"shape": (), "dtype": np.dtype("?")}, + "next.success": {"shape": (), "dtype": np.dtype("?")}, }, buffer_capacity=cfg.training.online_buffer_capacity, fps=online_env.unwrapped.metadata["render_fps"],