From 797f79f182c2191d1153e53362eacdcae65289a2 Mon Sep 17 00:00:00 2001 From: Thomas Wolf Date: Sat, 8 Jun 2024 17:54:06 +0200 Subject: [PATCH] update --- README.md | 11 +- .../0_record_training_data.py | 118 ++++++++++++------ examples/real_robot_example/README.md | 58 +++++++++ .../__init__.py | 4 +- .../dynamixel.py | 4 + .../gym_environment.py} | 0 .../{gym_real_env => gym_real_world}/robot.py | 7 +- 7 files changed, 157 insertions(+), 45 deletions(-) create mode 100644 examples/real_robot_example/README.md rename examples/real_robot_example/{gym_real_env => gym_real_world}/__init__.py (55%) rename examples/real_robot_example/{gym_real_env => gym_real_world}/dynamixel.py (98%) rename examples/real_robot_example/{gym_real_env/env.py => gym_real_world/gym_environment.py} (100%) rename examples/real_robot_example/{gym_real_env => gym_real_world}/robot.py (96%) diff --git a/README.md b/README.md index 9f3ceff4..ee0e06e1 100644 --- a/README.md +++ b/README.md @@ -127,13 +127,21 @@ wandb login Check out [example 1](./examples/1_load_lerobot_dataset.py) that illustrates how to use our dataset class which automatically download data from the Hugging Face hub. -You can also locally visualize episodes from a dataset by executing our script from the command line: +You can also locally visualize episodes from a dataset on the hub by executing our script from the command line: ```bash python lerobot/scripts/visualize_dataset.py \ --repo-id lerobot/pusht \ --episode-index 0 ``` +or from a dataset in a local folder with the root `DATA_DIR` environment variable +```bash +DATA_DIR='./my_local_data_dir' python lerobot/scripts/visualize_dataset.py \ + --repo-id lerobot/pusht \ + --episode-index 0 +``` + + It will open `rerun.io` and display the camera streams, robot states and actions, like this: https://github-production-user-asset-6210df.s3.amazonaws.com/4681518/328035972-fd46b787-b532-47e2-bb6f-fd536a55a7ed.mov?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20240505%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240505T172924Z&X-Amz-Expires=300&X-Amz-Signature=d680b26c532eeaf80740f08af3320d22ad0b8a4e4da1bcc4f33142c15b509eda&X-Amz-SignedHeaders=host&actor_id=24889239&key_id=0&repo_id=748713144 @@ -184,6 +192,7 @@ A `LeRobotDataset` is serialised using several widespread file formats for each - stats saved using `safetensor` tensor serializtion format - info are saved using JSON +Dataset can uploaded/downloaded from the HuggingFace hub seamlessly. To work on a local dataset, you can set the `DATA_DIR` environment variable to you root dataset folder as illustrated in the above section on dataset visualization. ### Evaluate a pretrained policy diff --git a/examples/real_robot_example/0_record_training_data.py b/examples/real_robot_example/0_record_training_data.py index 6f483e4b..9fb42ed5 100644 --- a/examples/real_robot_example/0_record_training_data.py +++ b/examples/real_robot_example/0_record_training_data.py @@ -3,7 +3,7 @@ import copy import os import time -import gym_real_env # noqa: F401 +import gym_real_world # noqa: F401 import gymnasium as gym import numpy as np import torch @@ -21,12 +21,22 @@ from lerobot.scripts.push_dataset_to_hub import push_meta_data_to_hub, push_vide # parse the repo_id name via command line parser = argparse.ArgumentParser() -parser.add_argument("--repo-id", type=str, default="blue_red_sort") +parser.add_argument("--repo-id", type=str, default="thomwolf/blue_red_sort") parser.add_argument("--num-episodes", type=int, default=2) parser.add_argument("--num-frames", type=int, default=400) parser.add_argument("--num-workers", type=int, default=16) parser.add_argument("--keep-last", action="store_true") parser.add_argument("--push-to-hub", action="store_true") +parser.add_argument( + "--fps", + type=int, + default=30, + help="Frames per second of the recording." + "If we are not able to record at this fps, we will adjust the fps in the metadata.", +) +parser.add_argument( + "--tolerance", type=float, default=0.01, help="Tolerance in seconds for the recording time." +) parser.add_argument( "--revision", type=str, default=CODEBASE_VERSION, help="Codebase version used to generate the dataset." ) @@ -36,12 +46,14 @@ repo_id = args.repo_id num_episodes = args.num_episodes num_frames = args.num_frames revision = args.revision +fps = args.fps +tolerance = args.tolerance out_data = DATA_DIR / repo_id # During data collection, frames are stored as png images in `images_dir` images_dir = out_data / "images" -# After data collection, png images of each episode are encoded into a mp4 file stored in `videos_dir` +# After data collection, png images of each episode are encoded into a mp4 file stored in `videos_dir` videos_dir = out_data / "videos" meta_data_dir = out_data / "meta_data" @@ -53,8 +65,8 @@ if not os.path.exists(videos_dir): os.makedirs(videos_dir, exist_ok=True) if __name__ == "__main__": - # Create the gym environment - check the kwargs in gym_real_env/src/env.py - gym_handle = "gym_real_env/RealEnv-v0" + # Create the gym environment - check the kwargs in gym_real_world/gym_environment.py + gym_handle = "gym_real_world/RealEnv-v0" env = gym.make(gym_handle, disable_env_checker=True, record=True) ep_dicts = [] @@ -62,9 +74,10 @@ if __name__ == "__main__": ep_fps = [] id_from = 0 id_to = 0 - os.system('spd-say "env created"') + os.system('spd-say "gym environment created"') - for ep_idx in range(num_episodes): + ep_idx = 0 + while ep_idx < num_episodes: # bring the follower to the leader and start camera env.reset() @@ -84,48 +97,71 @@ if __name__ == "__main__": # store data for key in observation: obs_replay[key].append(copy.deepcopy(observation[key])) - timestamps.append(time.time() - starting_time) + + recording_time = time.time() - starting_time + timestamps.append(recording_time) + + # Check if we are able to keep up with the desired fps + if recording_time > num_frames / fps + tolerance: + print( + f"Error: recording time {recording_time:.2f} is greater than expected {num_frames / fps:.2f}" + f" + tolerance {tolerance:.2f}" + f" at frame {len(timestamps)}" + f" in episode {ep_idx}." + f"Dropping the rest of the episode." + ) + break + + # wait the right amount of time to stay at the desired fps + time.sleep(max(0, 1 / fps - (time.time() - starting_time))) + # if cv2.waitKey(1) & 0xFF == ord('q'): # break os.system('spd-say "stop"') - ep_dict = {} - # store images in png and create the video - for img_key in env.cameras: - save_images_concurrently( - obs_replay[f"images.{img_key}"], - images_dir / f"{img_key}_episode_{ep_idx:06d}", - args.num_workers, + if len(timestamps) == num_frames: + os.system(f'spd-say "saving episode {ep_idx}"') + ep_dict = {} + # store images in png and create the video + for img_key in env.cameras: + save_images_concurrently( + obs_replay[f"images.{img_key}"], + images_dir / f"{img_key}_episode_{ep_idx:06d}", + args.num_workers, + ) + # for i in tqdm(range(num_frames)): + # cv2.imwrite(str(images_dir / f"{img_key}_episode_{ep_idx:06d}" / f"frame_{i:06d}.png"), + # obs_replay[i]['pixels'][img_key]) + fname = f"{img_key}_episode_{ep_idx:06d}.mp4" + # store the reference to the video frame + ep_dict[img_key] = [{"path": f"videos/{fname}", "timestamp": tstp} for tstp in timestamps] + # shutil.rmtree(tmp_imgs_dir) + + state = torch.tensor(np.array(obs_replay["agent_pos"])) + action = torch.tensor(np.array(obs_replay["leader_pos"])) + next_done = torch.zeros(num_frames, dtype=torch.bool) + next_done[-1] = True + + ep_dict["observation.state"] = state + ep_dict["action"] = action + ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64) + ep_dict["frame_index"] = torch.arange(0, num_frames, 1) + ep_dict["timestamp"] = torch.tensor(timestamps) + ep_dict["next.done"] = next_done + ep_fps.append(num_frames / timestamps[-1]) + ep_dicts.append(ep_dict) + print(f"Episode {ep_idx} done, fps: {ep_fps[-1]:.2f}") + + episode_data_index["from"].append(id_from) + episode_data_index["to"].append( + id_from + num_frames if args.keep_last else id_from + num_frames - 1 ) - # for i in tqdm(range(num_frames)): - # cv2.imwrite(str(images_dir / f"{img_key}_episode_{ep_idx:06d}" / f"frame_{i:06d}.png"), - # obs_replay[i]['pixels'][img_key]) - fname = f"{img_key}_episode_{ep_idx:06d}.mp4" - # store the reference to the video frame - ep_dict[img_key] = [{"path": f"videos/{fname}", "timestamp": tstp} for tstp in timestamps] - # shutil.rmtree(tmp_imgs_dir) - state = torch.tensor(np.array(obs_replay["agent_pos"])) - action = torch.tensor(np.array(obs_replay["leader_pos"])) - next_done = torch.zeros(num_frames, dtype=torch.bool) - next_done[-1] = True + id_to = id_from + num_frames if args.keep_last else id_from + num_frames - 1 + id_from = id_to - ep_dict["observation.state"] = state - ep_dict["action"] = action - ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64) - ep_dict["frame_index"] = torch.arange(0, num_frames, 1) - ep_dict["timestamp"] = torch.tensor(timestamps) - ep_dict["next.done"] = next_done - ep_fps.append(num_frames / timestamps[-1]) - ep_dicts.append(ep_dict) - print(f"Episode {ep_idx} done, fps: {ep_fps[-1]:.2f}") - - episode_data_index["from"].append(id_from) - episode_data_index["to"].append(id_from + num_frames if args.keep_last else id_from + num_frames - 1) - - id_to = id_from + num_frames if args.keep_last else id_from + num_frames - 1 - id_from = id_to + ep_idx += 1 env.close() diff --git a/examples/real_robot_example/README.md b/examples/real_robot_example/README.md new file mode 100644 index 00000000..f2567fd4 --- /dev/null +++ b/examples/real_robot_example/README.md @@ -0,0 +1,58 @@ +# Using `lerobot` on a real world arm + +In this example, we'll be using `lerobot` on a real world arm to: +- record a dataset in the `lerobot` format +- (soon) train a policy on it +- (soon) run the policy in the real-world + +## Which robotic arm to use + +In this example we're using the [open-source low-cost arm from Alexander Koch](https://github.com/AlexanderKoch-Koch/low_cost_robot) in the specific setup of: +- having 6 servos per arm, i.e. using the elbow-to-wrist extension +- adding two cameras around it, one on top and one in the front +- having a teleoperation arm as well (build the leader and the follower arms in A. Koch repo, both with elbow-to-wrist extensions) + +I'm using these cameras (but the setup should not be sensitive to the exact cameras you're using): +- C922 Pro Stream Webcam +- Intel(R) RealSense D455 (using only the RGB input) + + +In general, this example should be very easily extendable to any type of arm using Dynamixel servos with at least one camera by changing a couple of configuration in the gym env. + +## Install the example + +Follow these steps: +- install `lerobot` +- install the Dynamixel-sdk: ` pip install dynamixel-sdk` + +## 0 - record examples + +Run the `0_record_training_data.py` example, selecting the duration and number of episodes you want to record, e.g. +``` +DATA_DIR='./data' python 0_record_training_data.py \ +--repo-id=thomwolf/blue_red_sort \ +--num-episodes=50 \ +--num-frames=400 +``` + +TODO: +- various length episodes +- being able to drop episodes +- checking uploading to the hub + +## 1 - visualize the dataset + +Use the standard dataset visualization script pointing it to the right folder: +``` +DATA_DIR='./data' python visualize_dataset.py python lerobot/scripts/visualize_dataset.py \ + --repo-id thomwolf/blue_red_sort \ + --episode-index 0 +``` + +## (soon) Train a policy + +Run `1_train_real_policy.py` example + +## (soon) Evaluate the policy in the real world + +Run `2_evaluate_real_policy.py` example diff --git a/examples/real_robot_example/gym_real_env/__init__.py b/examples/real_robot_example/gym_real_world/__init__.py similarity index 55% rename from examples/real_robot_example/gym_real_env/__init__.py rename to examples/real_robot_example/gym_real_world/__init__.py index 50cc2a7f..713d5c8a 100644 --- a/examples/real_robot_example/gym_real_env/__init__.py +++ b/examples/real_robot_example/gym_real_world/__init__.py @@ -1,8 +1,8 @@ from gymnasium.envs.registration import register register( - id="gym_real_env/RealEnv-v0", - entry_point="gym_real_env.env:RealEnv", + id="gym_real_world/RealEnv-v0", + entry_point="gym_real_world.gym_environment:RealEnv", max_episode_steps=300, nondeterministic=True, ) diff --git a/examples/real_robot_example/gym_real_env/dynamixel.py b/examples/real_robot_example/gym_real_world/dynamixel.py similarity index 98% rename from examples/real_robot_example/gym_real_env/dynamixel.py rename to examples/real_robot_example/gym_real_world/dynamixel.py index 5271acbb..7bc9730f 100644 --- a/examples/real_robot_example/gym_real_env/dynamixel.py +++ b/examples/real_robot_example/gym_real_world/dynamixel.py @@ -1,4 +1,8 @@ # ruff: noqa +"""From Alexander Koch low_cost_robot codebase at https://github.com/AlexanderKoch-Koch/low_cost_robot +Dynamixel class to control the dynamixel servos +""" + from __future__ import annotations import enum diff --git a/examples/real_robot_example/gym_real_env/env.py b/examples/real_robot_example/gym_real_world/gym_environment.py similarity index 100% rename from examples/real_robot_example/gym_real_env/env.py rename to examples/real_robot_example/gym_real_world/gym_environment.py diff --git a/examples/real_robot_example/gym_real_env/robot.py b/examples/real_robot_example/gym_real_world/robot.py similarity index 96% rename from examples/real_robot_example/gym_real_env/robot.py rename to examples/real_robot_example/gym_real_world/robot.py index 6c95ff16..21014f67 100644 --- a/examples/real_robot_example/gym_real_env/robot.py +++ b/examples/real_robot_example/gym_real_world/robot.py @@ -1,11 +1,16 @@ # ruff: noqa +"""From Alexander Koch low_cost_robot codebase at https://github.com/AlexanderKoch-Koch/low_cost_robot +Class to control the robot using dynamixel servos. +""" + from enum import Enum, auto from typing import Union import numpy as np -from dynamixel import Dynamixel, OperatingMode, ReadAttribute from dynamixel_sdk import DXL_HIBYTE, DXL_HIWORD, DXL_LOBYTE, DXL_LOWORD, GroupSyncRead, GroupSyncWrite +from .dynamixel import Dynamixel, OperatingMode, ReadAttribute + class MotorControlType(Enum): PWM = auto()