added success rate to envs

This commit is contained in:
Michel Aractingi 2024-10-27 12:12:56 +01:00
parent 5e01c21692
commit 8db94f73a1
5 changed files with 25 additions and 12 deletions

View File

@ -10,7 +10,7 @@ max_relative_target: null
leader_arms: leader_arms:
main: main:
_target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
port: /dev/tty.usbmodem585A0078211 port: /dev/tty.usbmodem58760430441
motors: motors:
# name: (index, model) # name: (index, model)
shoulder_pan: [1, "xl330-m077"] shoulder_pan: [1, "xl330-m077"]

View File

@ -1056,6 +1056,7 @@ if __name__ == "__main__":
control_mode = args.mode control_mode = args.mode
robot_path = args.robot_path robot_path = args.robot_path
robot_overrides = args.robot_overrides robot_overrides = args.robot_overrides
kwargs = vars(args) kwargs = vars(args)
del kwargs["mode"] del kwargs["mode"]
del kwargs["robot_path"] del kwargs["robot_path"]

View File

@ -87,7 +87,7 @@ import gymnasium as gym
import multiprocessing import multiprocessing
from contextlib import nullcontext from contextlib import nullcontext
import importlib
import cv2 import cv2
import torch import torch
import numpy as np import numpy as np
@ -142,6 +142,7 @@ def say(text, blocking=False):
os.system(cmd) os.system(cmd)
def save_image(img_arr, key, frame_index, episode_index, videos_dir): def save_image(img_arr, key, frame_index, episode_index, videos_dir):
img = Image.fromarray(img_arr) img = Image.fromarray(img_arr)
path = videos_dir / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png" path = videos_dir / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png"
@ -155,7 +156,7 @@ def show_image_observations(observation_queue:multiprocessing.Queue):
images = [] images = []
if keys is None: keys = [k for k in observations if 'image' in k] if keys is None: keys = [k for k in observations if 'image' in k]
for key in keys: for key in keys:
images.append(observations[key].squeeze(0)) images.append(observations[key])#.squeeze(0))
cat_image = np.concatenate(images, 1) cat_image = np.concatenate(images, 1)
cv2.imshow('observations', cv2.cvtColor(cat_image, cv2.COLOR_RGB2BGR)) cv2.imshow('observations', cv2.cvtColor(cat_image, cv2.COLOR_RGB2BGR))
cv2.waitKey(1) cv2.waitKey(1)
@ -273,6 +274,8 @@ def create_rl_hf_dataset(data_dict):
features["next.reward"] = Value(dtype="float32", id=None) features["next.reward"] = Value(dtype="float32", id=None)
features["seed"] = Value(dtype="int64", id=None) features["seed"] = Value(dtype="int64", id=None)
features["next.success"] = Value(dtype="bool", id=None)
features["episode_index"] = Value(dtype="int64", id=None) features["episode_index"] = Value(dtype="int64", id=None)
features["frame_index"] = Value(dtype="int64", id=None) features["frame_index"] = Value(dtype="int64", id=None)
features["timestamp"] = Value(dtype="float32", id=None) features["timestamp"] = Value(dtype="float32", id=None)
@ -417,7 +420,7 @@ def record(
while episode_index < num_episodes: while episode_index < num_episodes:
logging.info(f"Recording episode {episode_index}") logging.info(f"Recording episode {episode_index}")
say(f"Recording episode {episode_index}") say(f"Recording episode {episode_index}")
ep_dict = {'action':[], 'next.reward':[]} ep_dict = {'action':[], 'next.reward':[], 'next.success':[]}
for k in state_keys_dict: for k in state_keys_dict:
ep_dict[k] = [] ep_dict[k] = []
frame_index = 0 frame_index = 0
@ -441,7 +444,7 @@ def record(
str_key = key if key.startswith('observation.images.') else 'observation.images.' + key str_key = key if key.startswith('observation.images.') else 'observation.images.' + key
futures += [ futures += [
executor.submit( executor.submit(
save_image, observation[key].squeeze(0), str_key, frame_index, episode_index, videos_dir) save_image, observation[key], str_key, frame_index, episode_index, videos_dir)
] ]
if not is_headless() and visualize_images: if not is_headless() and visualize_images:
@ -453,15 +456,19 @@ def record(
# Advance the sim environment # Advance the sim environment
if len(action.shape) == 1: if len(action.shape) == 1:
action = np.expand_dims(action, 0) action = np.expand_dims(action, 0)
observation, reward, _, _ , info = env.step(action) observation, reward, terminated, _ , info = env.step(action)
success = info.get('is_success', False)
ep_dict['action'].append(torch.from_numpy(action)) ep_dict['action'].append(torch.from_numpy(action))
ep_dict['next.reward'].append(torch.tensor(reward)) ep_dict['next.reward'].append(torch.tensor(reward))
print(reward) ep_dict['next.success'].append(torch.tensor(success))
frame_index += 1 frame_index += 1
timestamp = time.perf_counter() - start_episode_t timestamp = time.perf_counter() - start_episode_t
if exit_early:
if exit_early or terminated:
exit_early = False exit_early = False
break break
@ -506,6 +513,7 @@ def record(
ep_dict[key] = torch.vstack(ep_dict[key]) * 180.0 / np.pi ep_dict[key] = torch.vstack(ep_dict[key]) * 180.0 / np.pi
ep_dict['action'] = torch.vstack(ep_dict['action']) * 180.0 / np.pi ep_dict['action'] = torch.vstack(ep_dict['action']) * 180.0 / np.pi
ep_dict['next.reward'] = torch.stack(ep_dict['next.reward']) ep_dict['next.reward'] = torch.stack(ep_dict['next.reward'])
ep_dict['next.success'] = torch.stack(ep_dict['next.success'])
ep_dict["seed"] = torch.tensor([seed] * num_frames) ep_dict["seed"] = torch.tensor([seed] * num_frames)
ep_dict["episode_index"] = torch.tensor([episode_index] * num_frames) ep_dict["episode_index"] = torch.tensor([episode_index] * num_frames)
@ -656,7 +664,7 @@ def replay(env,
action = items[idx]["action"] action = items[idx]["action"]
env.step(action.unsqueeze(0).numpy() * np.pi / 180.0) env.step(action.numpy() * np.pi / 180.0)
dt_s = time.perf_counter() - start_episode_t dt_s = time.perf_counter() - start_episode_t
busy_wait(1 / fps - dt_s) busy_wait(1 / fps - dt_s)
@ -806,7 +814,11 @@ if __name__ == "__main__":
# make gym env # make gym env
env_cfg = init_hydra_config(env_config_path) env_cfg = init_hydra_config(env_config_path)
env_fn = lambda: make_env(env_cfg, n_envs=1) #env_fn = lambda: make_env(env_cfg, n_envs=1)
package_name = f"gym_{env_cfg.env.name}"
importlib.import_module(f"gym_{env_cfg.env.name}")
env_fn = lambda: gym.make(env_cfg.env.handle, disable_env_checker=True, **env_cfg.env.gym)
robot = None robot = None
if control_mode != 'replay': if control_mode != 'replay':

View File

@ -165,7 +165,7 @@ def rollout(
# VectorEnv stores is_success in `info["final_info"][env_index]["is_success"]`. "final_info" isn't # VectorEnv stores is_success in `info["final_info"][env_index]["is_success"]`. "final_info" isn't
# available of none of the envs finished. # available of none of the envs finished.
if False and "final_info" in info: if "final_info" in info:
successes = [info["is_success"] if info is not None else False for info in info["final_info"]] successes = [info["is_success"] if info is not None else False for info in info["final_info"]]
else: else:
successes = [False] * env.num_envs successes = [False] * env.num_envs

View File

@ -482,7 +482,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
**{k: {"shape": v, "dtype": np.dtype("float32")} for k, v in policy.config.output_shapes.items()}, **{k: {"shape": v, "dtype": np.dtype("float32")} for k, v in policy.config.output_shapes.items()},
"next.reward": {"shape": (), "dtype": np.dtype("float32")}, "next.reward": {"shape": (), "dtype": np.dtype("float32")},
"next.done": {"shape": (), "dtype": np.dtype("?")}, "next.done": {"shape": (), "dtype": np.dtype("?")},
#"next.success": {"shape": (), "dtype": np.dtype("?")}, "next.success": {"shape": (), "dtype": np.dtype("?")},
}, },
buffer_capacity=cfg.training.online_buffer_capacity, buffer_capacity=cfg.training.online_buffer_capacity,
fps=online_env.unwrapped.metadata["render_fps"], fps=online_env.unwrapped.metadata["render_fps"],