From 797f79f182c2191d1153e53362eacdcae65289a2 Mon Sep 17 00:00:00 2001
From: Thomas Wolf <thomas@huggingface.co>
Date: Sat, 8 Jun 2024 17:54:06 +0200
Subject: [PATCH] update

---
 README.md                                     |  11 +-
 .../0_record_training_data.py                 | 118 ++++++++++++------
 examples/real_robot_example/README.md         |  58 +++++++++
 .../__init__.py                               |   4 +-
 .../dynamixel.py                              |   4 +
 .../gym_environment.py}                       |   0
 .../{gym_real_env => gym_real_world}/robot.py |   7 +-
 7 files changed, 157 insertions(+), 45 deletions(-)
 create mode 100644 examples/real_robot_example/README.md
 rename examples/real_robot_example/{gym_real_env => gym_real_world}/__init__.py (55%)
 rename examples/real_robot_example/{gym_real_env => gym_real_world}/dynamixel.py (98%)
 rename examples/real_robot_example/{gym_real_env/env.py => gym_real_world/gym_environment.py} (100%)
 rename examples/real_robot_example/{gym_real_env => gym_real_world}/robot.py (96%)

diff --git a/README.md b/README.md
index 9f3ceff4..ee0e06e1 100644
--- a/README.md
+++ b/README.md
@@ -127,13 +127,21 @@ wandb login
 
 Check out [example 1](./examples/1_load_lerobot_dataset.py) that illustrates how to use our dataset class which automatically download data from the Hugging Face hub.
 
-You can also locally visualize episodes from a dataset by executing our script from the command line:
+You can also locally visualize episodes from a dataset on the hub by executing our script from the command line:
 ```bash
 python lerobot/scripts/visualize_dataset.py \
     --repo-id lerobot/pusht \
     --episode-index 0
 ```
 
+or from a dataset in a local folder with the root `DATA_DIR` environment variable
+```bash
+DATA_DIR='./my_local_data_dir' python lerobot/scripts/visualize_dataset.py \
+    --repo-id lerobot/pusht \
+    --episode-index 0
+```
+
+
 It will open `rerun.io` and display the camera streams, robot states and actions, like this:
 
 https://github-production-user-asset-6210df.s3.amazonaws.com/4681518/328035972-fd46b787-b532-47e2-bb6f-fd536a55a7ed.mov?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20240505%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240505T172924Z&X-Amz-Expires=300&X-Amz-Signature=d680b26c532eeaf80740f08af3320d22ad0b8a4e4da1bcc4f33142c15b509eda&X-Amz-SignedHeaders=host&actor_id=24889239&key_id=0&repo_id=748713144
@@ -184,6 +192,7 @@ A `LeRobotDataset` is serialised using several widespread file formats for each
 - stats saved using `safetensor` tensor serializtion format
 - info are saved using JSON
 
+Dataset can uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can set the `DATA_DIR` environment variable to you root dataset folder as illustrated in the above section on dataset visualization.
 
 ### Evaluate a pretrained policy
 
diff --git a/examples/real_robot_example/0_record_training_data.py b/examples/real_robot_example/0_record_training_data.py
index 6f483e4b..9fb42ed5 100644
--- a/examples/real_robot_example/0_record_training_data.py
+++ b/examples/real_robot_example/0_record_training_data.py
@@ -3,7 +3,7 @@ import copy
 import os
 import time
 
-import gym_real_env  # noqa: F401
+import gym_real_world  # noqa: F401
 import gymnasium as gym
 import numpy as np
 import torch
@@ -21,12 +21,22 @@ from lerobot.scripts.push_dataset_to_hub import push_meta_data_to_hub, push_vide
 
 # parse the repo_id name via command line
 parser = argparse.ArgumentParser()
-parser.add_argument("--repo-id", type=str, default="blue_red_sort")
+parser.add_argument("--repo-id", type=str, default="thomwolf/blue_red_sort")
 parser.add_argument("--num-episodes", type=int, default=2)
 parser.add_argument("--num-frames", type=int, default=400)
 parser.add_argument("--num-workers", type=int, default=16)
 parser.add_argument("--keep-last", action="store_true")
 parser.add_argument("--push-to-hub", action="store_true")
+parser.add_argument(
+    "--fps",
+    type=int,
+    default=30,
+    help="Frames per second of the recording."
+    "If we are not able to record at this fps, we will adjust the fps in the metadata.",
+)
+parser.add_argument(
+    "--tolerance", type=float, default=0.01, help="Tolerance in seconds for the recording time."
+)
 parser.add_argument(
     "--revision", type=str, default=CODEBASE_VERSION, help="Codebase version used to generate the dataset."
 )
@@ -36,12 +46,14 @@ repo_id = args.repo_id
 num_episodes = args.num_episodes
 num_frames = args.num_frames
 revision = args.revision
+fps = args.fps
+tolerance = args.tolerance
 
 out_data = DATA_DIR / repo_id
 
 # During data collection, frames are stored as png images in `images_dir`
 images_dir = out_data / "images"
-# After data collection, png images of each episode are encoded into a mp4 file stored in `videos_dir` 
+# After data collection, png images of each episode are encoded into a mp4 file stored in `videos_dir`
 videos_dir = out_data / "videos"
 meta_data_dir = out_data / "meta_data"
 
@@ -53,8 +65,8 @@ if not os.path.exists(videos_dir):
     os.makedirs(videos_dir, exist_ok=True)
 
 if __name__ == "__main__":
-    # Create the gym environment - check the kwargs in gym_real_env/src/env.py
-    gym_handle = "gym_real_env/RealEnv-v0"
+    # Create the gym environment - check the kwargs in gym_real_world/gym_environment.py
+    gym_handle = "gym_real_world/RealEnv-v0"
     env = gym.make(gym_handle, disable_env_checker=True, record=True)
 
     ep_dicts = []
@@ -62,9 +74,10 @@ if __name__ == "__main__":
     ep_fps = []
     id_from = 0
     id_to = 0
-    os.system('spd-say "env created"')
+    os.system('spd-say "gym environment created"')
 
-    for ep_idx in range(num_episodes):
+    ep_idx = 0
+    while ep_idx < num_episodes:
         # bring the follower to the leader and start camera
         env.reset()
 
@@ -84,48 +97,71 @@ if __name__ == "__main__":
             # store data
             for key in observation:
                 obs_replay[key].append(copy.deepcopy(observation[key]))
-            timestamps.append(time.time() - starting_time)
+
+            recording_time = time.time() - starting_time
+            timestamps.append(recording_time)
+
+            # Check if we are able to keep up with the desired fps
+            if recording_time > num_frames / fps + tolerance:
+                print(
+                    f"Error: recording time {recording_time:.2f} is greater than expected {num_frames / fps:.2f}"
+                    f" + tolerance {tolerance:.2f}"
+                    f" at frame {len(timestamps)}"
+                    f" in episode {ep_idx}."
+                    f"Dropping the rest of the episode."
+                )
+                break
+
+            # wait the right amount of time to stay at the desired fps
+            time.sleep(max(0, 1 / fps - (time.time() - starting_time)))
+
             # if cv2.waitKey(1) & 0xFF == ord('q'):
             #     break
 
         os.system('spd-say "stop"')
 
-        ep_dict = {}
-        # store images in png and create the video
-        for img_key in env.cameras:
-            save_images_concurrently(
-                obs_replay[f"images.{img_key}"],
-                images_dir / f"{img_key}_episode_{ep_idx:06d}",
-                args.num_workers,
+        if len(timestamps) == num_frames:
+            os.system(f'spd-say "saving episode {ep_idx}"')
+            ep_dict = {}
+            # store images in png and create the video
+            for img_key in env.cameras:
+                save_images_concurrently(
+                    obs_replay[f"images.{img_key}"],
+                    images_dir / f"{img_key}_episode_{ep_idx:06d}",
+                    args.num_workers,
+                )
+                # for i in tqdm(range(num_frames)):
+                #     cv2.imwrite(str(images_dir / f"{img_key}_episode_{ep_idx:06d}" / f"frame_{i:06d}.png"),
+                #                 obs_replay[i]['pixels'][img_key])
+                fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
+                # store the reference to the video frame
+                ep_dict[img_key] = [{"path": f"videos/{fname}", "timestamp": tstp} for tstp in timestamps]
+                # shutil.rmtree(tmp_imgs_dir)
+
+            state = torch.tensor(np.array(obs_replay["agent_pos"]))
+            action = torch.tensor(np.array(obs_replay["leader_pos"]))
+            next_done = torch.zeros(num_frames, dtype=torch.bool)
+            next_done[-1] = True
+
+            ep_dict["observation.state"] = state
+            ep_dict["action"] = action
+            ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64)
+            ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
+            ep_dict["timestamp"] = torch.tensor(timestamps)
+            ep_dict["next.done"] = next_done
+            ep_fps.append(num_frames / timestamps[-1])
+            ep_dicts.append(ep_dict)
+            print(f"Episode {ep_idx} done, fps: {ep_fps[-1]:.2f}")
+
+            episode_data_index["from"].append(id_from)
+            episode_data_index["to"].append(
+                id_from + num_frames if args.keep_last else id_from + num_frames - 1
             )
-            # for i in tqdm(range(num_frames)):
-            #     cv2.imwrite(str(images_dir / f"{img_key}_episode_{ep_idx:06d}" / f"frame_{i:06d}.png"),
-            #                 obs_replay[i]['pixels'][img_key])
-            fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
-            # store the reference to the video frame
-            ep_dict[img_key] = [{"path": f"videos/{fname}", "timestamp": tstp} for tstp in timestamps]
-            # shutil.rmtree(tmp_imgs_dir)
 
-        state = torch.tensor(np.array(obs_replay["agent_pos"]))
-        action = torch.tensor(np.array(obs_replay["leader_pos"]))
-        next_done = torch.zeros(num_frames, dtype=torch.bool)
-        next_done[-1] = True
+            id_to = id_from + num_frames if args.keep_last else id_from + num_frames - 1
+            id_from = id_to
 
-        ep_dict["observation.state"] = state
-        ep_dict["action"] = action
-        ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64)
-        ep_dict["frame_index"] = torch.arange(0, num_frames, 1)
-        ep_dict["timestamp"] = torch.tensor(timestamps)
-        ep_dict["next.done"] = next_done
-        ep_fps.append(num_frames / timestamps[-1])
-        ep_dicts.append(ep_dict)
-        print(f"Episode {ep_idx} done, fps: {ep_fps[-1]:.2f}")
-
-        episode_data_index["from"].append(id_from)
-        episode_data_index["to"].append(id_from + num_frames if args.keep_last else id_from + num_frames - 1)
-
-        id_to = id_from + num_frames if args.keep_last else id_from + num_frames - 1
-        id_from = id_to
+            ep_idx += 1
 
     env.close()
 
diff --git a/examples/real_robot_example/README.md b/examples/real_robot_example/README.md
new file mode 100644
index 00000000..f2567fd4
--- /dev/null
+++ b/examples/real_robot_example/README.md
@@ -0,0 +1,58 @@
+# Using `lerobot`  on a real world arm
+
+In this example, we'll be using `lerobot` on a real world arm to:
+- record a dataset in the `lerobot` format
+- (soon) train a policy on it
+- (soon) run the policy in the real-world
+
+## Which robotic arm to use
+
+In this example we're using the [open-source low-cost arm from Alexander Koch](https://github.com/AlexanderKoch-Koch/low_cost_robot) in the specific setup of:
+- having 6 servos per arm, i.e. using the elbow-to-wrist extension
+- adding two cameras around it, one on top and one in the front
+- having a teleoperation arm as well (build the leader and the follower arms in A. Koch repo, both with elbow-to-wrist extensions)
+
+I'm using these cameras (but the setup should not be sensitive to the exact cameras you're using):
+- C922 Pro Stream Webcam
+- Intel(R) RealSense D455 (using only the RGB input)
+
+
+In general, this example should be very easily extendable to any type of arm using Dynamixel servos with at least one camera by changing a couple of configuration in the gym env.
+
+## Install the example
+
+Follow these steps:
+- install `lerobot`
+- install the Dynamixel-sdk: ` pip install dynamixel-sdk`
+
+## 0 - record examples
+
+Run the `0_record_training_data.py` example, selecting the duration and number of episodes you want to record, e.g.
+```
+DATA_DIR='./data' python 0_record_training_data.py \
+--repo-id=thomwolf/blue_red_sort \
+--num-episodes=50 \
+--num-frames=400
+```
+
+TODO:
+- various length episodes
+- being able to drop episodes
+- checking uploading to the hub
+
+## 1 - visualize the dataset
+
+Use the standard dataset visualization script pointing it to the right folder:
+```
+DATA_DIR='./data' python visualize_dataset.py python lerobot/scripts/visualize_dataset.py \
+    --repo-id thomwolf/blue_red_sort \
+    --episode-index 0
+```
+
+## (soon) Train a policy
+
+Run `1_train_real_policy.py` example
+
+## (soon) Evaluate the policy in the real world
+
+Run `2_evaluate_real_policy.py` example
diff --git a/examples/real_robot_example/gym_real_env/__init__.py b/examples/real_robot_example/gym_real_world/__init__.py
similarity index 55%
rename from examples/real_robot_example/gym_real_env/__init__.py
rename to examples/real_robot_example/gym_real_world/__init__.py
index 50cc2a7f..713d5c8a 100644
--- a/examples/real_robot_example/gym_real_env/__init__.py
+++ b/examples/real_robot_example/gym_real_world/__init__.py
@@ -1,8 +1,8 @@
 from gymnasium.envs.registration import register
 
 register(
-    id="gym_real_env/RealEnv-v0",
-    entry_point="gym_real_env.env:RealEnv",
+    id="gym_real_world/RealEnv-v0",
+    entry_point="gym_real_world.gym_environment:RealEnv",
     max_episode_steps=300,
     nondeterministic=True,
 )
diff --git a/examples/real_robot_example/gym_real_env/dynamixel.py b/examples/real_robot_example/gym_real_world/dynamixel.py
similarity index 98%
rename from examples/real_robot_example/gym_real_env/dynamixel.py
rename to examples/real_robot_example/gym_real_world/dynamixel.py
index 5271acbb..7bc9730f 100644
--- a/examples/real_robot_example/gym_real_env/dynamixel.py
+++ b/examples/real_robot_example/gym_real_world/dynamixel.py
@@ -1,4 +1,8 @@
 # ruff: noqa
+"""From Alexander Koch low_cost_robot codebase at https://github.com/AlexanderKoch-Koch/low_cost_robot
+Dynamixel class to control the dynamixel servos
+"""
+
 from __future__ import annotations
 
 import enum
diff --git a/examples/real_robot_example/gym_real_env/env.py b/examples/real_robot_example/gym_real_world/gym_environment.py
similarity index 100%
rename from examples/real_robot_example/gym_real_env/env.py
rename to examples/real_robot_example/gym_real_world/gym_environment.py
diff --git a/examples/real_robot_example/gym_real_env/robot.py b/examples/real_robot_example/gym_real_world/robot.py
similarity index 96%
rename from examples/real_robot_example/gym_real_env/robot.py
rename to examples/real_robot_example/gym_real_world/robot.py
index 6c95ff16..21014f67 100644
--- a/examples/real_robot_example/gym_real_env/robot.py
+++ b/examples/real_robot_example/gym_real_world/robot.py
@@ -1,11 +1,16 @@
 # ruff: noqa
+"""From Alexander Koch low_cost_robot codebase at https://github.com/AlexanderKoch-Koch/low_cost_robot
+Class to control the robot using dynamixel servos.
+"""
+
 from enum import Enum, auto
 from typing import Union
 
 import numpy as np
-from dynamixel import Dynamixel, OperatingMode, ReadAttribute
 from dynamixel_sdk import DXL_HIBYTE, DXL_HIWORD, DXL_LOBYTE, DXL_LOWORD, GroupSyncRead, GroupSyncWrite
 
+from .dynamixel import Dynamixel, OperatingMode, ReadAttribute
+
 
 class MotorControlType(Enum):
     PWM = auto()