From 8db94f73a19a2d18d2869da7f07943e022472126 Mon Sep 17 00:00:00 2001
From: Michel Aractingi <michel.aractingi@huggingface.co>
Date: Sun, 27 Oct 2024 12:12:56 +0100
Subject: [PATCH] added success rate to envs

---
 lerobot/configs/robot/koch.yaml      |  2 +-
 lerobot/scripts/control_robot.py     |  1 +
 lerobot/scripts/control_sim_robot.py | 30 +++++++++++++++++++---------
 lerobot/scripts/eval.py              |  2 +-
 lerobot/scripts/train.py             |  2 +-
 5 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/lerobot/configs/robot/koch.yaml b/lerobot/configs/robot/koch.yaml
index 5fcd5596..0e69593e 100644
--- a/lerobot/configs/robot/koch.yaml
+++ b/lerobot/configs/robot/koch.yaml
@@ -10,7 +10,7 @@ max_relative_target: null
 leader_arms:
   main:
     _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus
-    port: /dev/tty.usbmodem585A0078211
+    port: /dev/tty.usbmodem58760430441
     motors:
       # name: (index, model)
       shoulder_pan: [1, "xl330-m077"]
diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py
index 3b6345b4..2682734c 100644
--- a/lerobot/scripts/control_robot.py
+++ b/lerobot/scripts/control_robot.py
@@ -1056,6 +1056,7 @@ if __name__ == "__main__":
     control_mode = args.mode
     robot_path = args.robot_path
     robot_overrides = args.robot_overrides
+    
     kwargs = vars(args)
     del kwargs["mode"]
     del kwargs["robot_path"]
diff --git a/lerobot/scripts/control_sim_robot.py b/lerobot/scripts/control_sim_robot.py
index 99268cf4..1e95c5b7 100644
--- a/lerobot/scripts/control_sim_robot.py
+++ b/lerobot/scripts/control_sim_robot.py
@@ -87,7 +87,7 @@ import gymnasium as gym
 import multiprocessing 
 from contextlib import nullcontext
 
-
+import importlib
 import cv2
 import torch
 import numpy as np
@@ -142,6 +142,7 @@ def say(text, blocking=False):
     os.system(cmd)
 
 
+
 def save_image(img_arr, key, frame_index, episode_index, videos_dir):
     img = Image.fromarray(img_arr)
     path = videos_dir / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png"
@@ -155,7 +156,7 @@ def show_image_observations(observation_queue:multiprocessing.Queue):
         images = []
         if keys is None: keys = [k for k in observations if 'image' in k]
         for key in keys:
-            images.append(observations[key].squeeze(0))
+            images.append(observations[key])#.squeeze(0))
         cat_image = np.concatenate(images, 1)
         cv2.imshow('observations', cv2.cvtColor(cat_image, cv2.COLOR_RGB2BGR))
         cv2.waitKey(1)
@@ -273,6 +274,8 @@ def create_rl_hf_dataset(data_dict):
     features["next.reward"] = Value(dtype="float32", id=None)
 
     features["seed"] = Value(dtype="int64", id=None)
+    features["next.success"] = Value(dtype="bool", id=None)
+
     features["episode_index"] = Value(dtype="int64", id=None)
     features["frame_index"] = Value(dtype="int64", id=None)
     features["timestamp"] = Value(dtype="float32", id=None)
@@ -417,7 +420,7 @@ def record(
         while episode_index < num_episodes:
             logging.info(f"Recording episode {episode_index}")
             say(f"Recording episode {episode_index}")
-            ep_dict = {'action':[], 'next.reward':[]}
+            ep_dict = {'action':[], 'next.reward':[], 'next.success':[]}
             for k in state_keys_dict:
                 ep_dict[k] = []
             frame_index = 0
@@ -441,7 +444,7 @@ def record(
                     str_key = key if key.startswith('observation.images.') else 'observation.images.' + key
                     futures += [
                         executor.submit(
-                            save_image, observation[key].squeeze(0), str_key, frame_index, episode_index, videos_dir)
+                            save_image, observation[key], str_key, frame_index, episode_index, videos_dir)
                     ]
 
                 if not is_headless() and visualize_images:
@@ -453,15 +456,19 @@ def record(
                 # Advance the sim environment
                 if len(action.shape) == 1:
                     action = np.expand_dims(action, 0)
-                observation, reward, _, _ , info = env.step(action)
+                observation, reward, terminated, _ , info = env.step(action)
+
+                success = info.get('is_success', False)
+
                 ep_dict['action'].append(torch.from_numpy(action))
                 ep_dict['next.reward'].append(torch.tensor(reward))
-                print(reward)
+                ep_dict['next.success'].append(torch.tensor(success))
 
                 frame_index += 1
 
                 timestamp = time.perf_counter() - start_episode_t
-                if exit_early:
+
+                if exit_early or terminated:
                     exit_early = False
                     break
 
@@ -506,6 +513,7 @@ def record(
                 ep_dict[key] = torch.vstack(ep_dict[key]) * 180.0 / np.pi
             ep_dict['action'] = torch.vstack(ep_dict['action']) * 180.0 / np.pi
             ep_dict['next.reward'] = torch.stack(ep_dict['next.reward'])
+            ep_dict['next.success'] = torch.stack(ep_dict['next.success'])
 
             ep_dict["seed"] = torch.tensor([seed] * num_frames)
             ep_dict["episode_index"] = torch.tensor([episode_index] * num_frames)
@@ -656,7 +664,7 @@ def replay(env,
     
             action = items[idx]["action"]
     
-            env.step(action.unsqueeze(0).numpy() * np.pi / 180.0)
+            env.step(action.numpy() * np.pi / 180.0)
     
             dt_s = time.perf_counter() - start_episode_t
             busy_wait(1 / fps - dt_s)
@@ -806,7 +814,11 @@ if __name__ == "__main__":
 
     # make gym env
     env_cfg = init_hydra_config(env_config_path)
-    env_fn = lambda: make_env(env_cfg, n_envs=1)
+    #env_fn = lambda: make_env(env_cfg, n_envs=1)
+    package_name = f"gym_{env_cfg.env.name}"
+
+    importlib.import_module(f"gym_{env_cfg.env.name}")
+    env_fn = lambda: gym.make(env_cfg.env.handle, disable_env_checker=True, **env_cfg.env.gym)
     
     robot = None
     if control_mode != 'replay':
diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py
index 90cbd280..ed5cd8ee 100644
--- a/lerobot/scripts/eval.py
+++ b/lerobot/scripts/eval.py
@@ -165,7 +165,7 @@ def rollout(
 
         # VectorEnv stores is_success in `info["final_info"][env_index]["is_success"]`. "final_info" isn't
         # available of none of the envs finished.
-        if False and "final_info" in info:
+        if  "final_info" in info:
             successes = [info["is_success"] if info is not None else False for info in info["final_info"]]
         else:
             successes = [False] * env.num_envs
diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py
index 795189b0..6406295b 100644
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -482,7 +482,7 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
             **{k: {"shape": v, "dtype": np.dtype("float32")} for k, v in policy.config.output_shapes.items()},
             "next.reward": {"shape": (), "dtype": np.dtype("float32")},
             "next.done": {"shape": (), "dtype": np.dtype("?")},
-            #"next.success": {"shape": (), "dtype": np.dtype("?")},
+            "next.success": {"shape": (), "dtype": np.dtype("?")},
         },
         buffer_capacity=cfg.training.online_buffer_capacity,
         fps=online_env.unwrapped.metadata["render_fps"],