diff --git a/README.md b/README.md index b7c3af23..7d7b80d4 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ conda activate lerobot Install 🤗 LeRobot: ```bash -pip install . +pip install -e . ``` > **NOTE:** Depending on your platform, If you encounter any build errors during this step @@ -91,7 +91,7 @@ For simulations, 🤗 LeRobot comes with gymnasium environments that can be inst For instance, to install 🤗 LeRobot with aloha and pusht, use: ```bash -pip install ".[aloha, pusht]" +pip install -e ".[aloha, pusht]" ``` To use [Weights and Biases](https://docs.wandb.ai/quickstart) for experiment tracking, log in with @@ -116,10 +116,12 @@ wandb login | | ├── datasets # various datasets of human demonstrations: aloha, pusht, xarm | | ├── envs # various sim environments: aloha, pusht, xarm | | ├── policies # various policies: act, diffusion, tdmpc +| | ├── robot_devices # various real devices: dynamixel motors, opencv cameras, koch robots | | └── utils # various utilities | └── scripts # contains functions to execute via command line | ├── eval.py # load policy and evaluate it on an environment | ├── train.py # train a policy via imitation learning and/or reinforcement learning +| ├── control_robot.py # teleoperate a real robot, record data, run a policy | ├── push_dataset_to_hub.py # convert your dataset into LeRobot dataset format and upload it to the Hugging Face hub | └── visualize_dataset.py # load a dataset and render its demonstrations ├── outputs # contains results of scripts execution: logs, videos, model checkpoints diff --git a/docker/lerobot-cpu/Dockerfile b/docker/lerobot-cpu/Dockerfile index 885a7752..7ad3bf6e 100644 --- a/docker/lerobot-cpu/Dockerfile +++ b/docker/lerobot-cpu/Dockerfile @@ -9,6 +9,7 @@ ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential cmake \ libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ + speech-dispatcher \ && apt-get clean && rm -rf /var/lib/apt/lists/* # Create virtual environment diff --git a/docker/lerobot-gpu-dev/Dockerfile b/docker/lerobot-gpu-dev/Dockerfile index f2c06fd3..1f525576 100644 --- a/docker/lerobot-gpu-dev/Dockerfile +++ b/docker/lerobot-gpu-dev/Dockerfile @@ -13,6 +13,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ sed gawk grep curl wget zip unzip \ tcpdump sysstat screen tmux \ libglib2.0-0 libgl1-mesa-glx libegl1-mesa \ + speech-dispatcher \ python${PYTHON_VERSION} python${PYTHON_VERSION}-venv \ && apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/docker/lerobot-gpu/Dockerfile b/docker/lerobot-gpu/Dockerfile index 7a27e8f0..4d6f772b 100644 --- a/docker/lerobot-gpu/Dockerfile +++ b/docker/lerobot-gpu/Dockerfile @@ -9,6 +9,7 @@ ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential cmake \ libglib2.0-0 libgl1-mesa-glx libegl1-mesa ffmpeg \ + speech-dispatcher \ python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ && apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/examples/7_get_started_with_real_robot.md b/examples/7_get_started_with_real_robot.md new file mode 100644 index 00000000..482587c7 --- /dev/null +++ b/examples/7_get_started_with_real_robot.md @@ -0,0 +1,1023 @@ +# Getting Started with Real-World Robots + +This tutorial will guide you through the process of setting up and training a neural network to autonomously control a real robot. + +**What You'll Learn:** +1. How to order and assemble your robot. +2. How to connect, configure, and calibrate your robot. +3. How to record and visualize your dataset. +4. How to train a policy using your data and prepare it for evaluation. +5. How to evaluate your policy and visualize the results. + +By following these steps, you'll be able to replicate tasks like picking up a Lego block and placing it in a bin with a high success rate, as demonstrated in [this video](https://x.com/RemiCadene/status/1814680760592572934). + +Although this tutorial is general and can be easily adapted to various types of robots by changing the configuration, it is specifically based on the [Koch v1.1](https://github.com/jess-moss/koch-v1-1), an affordable robot. The Koch v1.1 consists of a leader arm and a follower arm, each with 6 motors. It can work with one or several cameras to record the scene, which serve as visual sensors for the robot. + +During the data collection phase, you'll control the follower arm by moving the leader arm, a process known as "teleoperation." This technique is used to collect robot trajectories. Afterward, you'll train a neural network to imitate these trajectories and deploy the network to enable your robot to operate autonomously. + +If you encounter any issues at any step of the tutorial, feel free to seek help on [Discord](https://discord.com/invite/s3KuuzsPFb). + +## 1. Order and Assemble your Koch v1.1 + +Follow the sourcing and assembling instructions provided on the [Koch v1.1 Github page](https://github.com/jess-moss/koch-v1-1). This will guide you through setting up both the follower and leader arms, as shown in the image below. + +
+ Koch v1.1 leader and follower arms +
+ +For a visual walkthrough of the assembly process, you can refer to [this video tutorial](https://youtu.be/5mdxvMlxoos). + +## 2. Configure motors, Calibrate arms, Teleoperate your Koch v1.1 + +First, install the additional dependencies required for Koch v1.1 by running one of the following commands. + +Using `pip`: +```bash +pip install -e ".[koch]" +``` + +Or using `poetry`: +```bash +poetry install --sync --extras "koch" +``` + +You are now ready to plug the 5V power supply to the motor bus of the leader arm (the smaller one) since all its motors only require 5V. + +Then plug the 12V power supply to the motor bus of the follower arm. It has two motors that need 12V, and the rest will be powered with 5V through the voltage convertor. + +Finally, connect both arms to your computer via USB. Note that the USB doesn't provide any power, and both arms need to be plugged in with their associated power supply to be detected by your computer. + +*Copy pasting python code* + +In the upcoming sections, you'll learn about our classes and functions by running some python code, in an interactive session, or by copy-pasting it in a python file. If it's your first time using the tutorial, we highly recommend going through these steps to get deeper intuition about how things work. Once you're more familiar, you can streamline the process by directly running the teleoperate script (which is detailed further in the tutorial): +```bash +python lerobot/scripts/control_robot.py teleoperate \ + --robot-path lerobot/configs/robot/koch.yaml \ + --robot-overrides '~cameras' # do not instantiate the cameras +``` + +It will automatically: +1. Detect and help you correct any motor configurations issue. +2. Identify any missing calibrations and initiate the calibration procedure. +3. Connect the robot and start teleoperation. + +### a. Control your motors with DynamixelMotorsBus + +You can use the [`DynamixelMotorsBus`](../lerobot/common/robot_devices/motors/dynamixel.py) to communicate with the motors connected as a chain to the corresponding USB bus. This class leverages the Python [Dynamixel SDK](https://emanual.robotis.com/docs/en/software/dynamixel/dynamixel_sdk/sample_code/python_read_write_protocol_2_0/#python-read-write-protocol-20) to facilitate reading from and writing to the motors. + +**Instantiate the DynamixelMotorsBus** + +To begin, you'll need to create two instances of the [`DynamixelMotorsBus`](../lerobot/common/robot_devices/motors/dynamixel.py), one for each arm, using their corresponding USB ports (e.g. `DynamixelMotorsBus(port="/dev/tty.usbmodem575E0031751"`). + +To find the correct ports for each arm, run the utility script twice: +```bash +python lerobot/common/robot_devices/motors/dynamixel.py +``` + +Example output when identifying the leader arm's port (e.g., `/dev/tty.usbmodem575E0031751` on Mac, or possibly `/dev/ttyACM0` on Linux): +``` +Finding all available ports for the DynamixelMotorsBus. +['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751'] +Remove the usb cable from your DynamixelMotorsBus and press Enter when done. + +[...Disconnect leader arm and press Enter...] + +The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0031751 +Reconnect the usb cable. +``` + +Example output when identifying the follower arm's port (e.g., `/dev/tty.usbmodem575E0032081`, or possibly `/dev/ttyACM1` on Linux): +``` +Finding all available ports for the DynamixelMotorsBus. +['/dev/tty.usbmodem575E0032081', '/dev/tty.usbmodem575E0031751'] +Remove the usb cable from your DynamixelMotorsBus and press Enter when done. + +[...Disconnect follower arm and press Enter...] + +The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0032081 +Reconnect the usb cable. +``` + +Troubleshooting: On Linux, you might need to give access to the USB ports by running: +```bash +sudo chmod 666 /dev/ttyACM0 +sudo chmod 666 /dev/ttyACM1 +``` + +*Listing and Configuring Motors* + +Next, you'll need to list the motors for each arm, including their name, index, and model. Initially, each motor is assigned the factory default index `1`. Since each motor requires a unique index to function correctly when connected in a chain on a common bus, you'll need to assign different indices. It's recommended to use an ascending index order, starting from `1` (e.g., `1, 2, 3, 4, 5, 6`). These indices will be saved in the persistent memory of each motor during the first connection. + +To assign indices to the motors, run this code in an interactive Python session. Replace the `port` values with the ones you identified earlier: +```python +from lerobot.common.robot_devices.motors.dynamixel import DynamixelMotorsBus + +leader_port = "/dev/tty.usbmodem575E0031751" +follower_port = "/dev/tty.usbmodem575E0032081" + +leader_arm = DynamixelMotorsBus( + port=leader_port, + motors={ + # name: (index, model) + "shoulder_pan": (1, "xl330-m077"), + "shoulder_lift": (2, "xl330-m077"), + "elbow_flex": (3, "xl330-m077"), + "wrist_flex": (4, "xl330-m077"), + "wrist_roll": (5, "xl330-m077"), + "gripper": (6, "xl330-m077"), + }, +) + +follower_arm = DynamixelMotorsBus( + port=follower_port, + motors={ + # name: (index, model) + "shoulder_pan": (1, "xl430-w250"), + "shoulder_lift": (2, "xl430-w250"), + "elbow_flex": (3, "xl330-m288"), + "wrist_flex": (4, "xl330-m288"), + "wrist_roll": (5, "xl330-m288"), + "gripper": (6, "xl330-m288"), + }, +) +``` + +*Updating the YAML Configuration File* + +Then, update the port values in the YAML configuration file for the Koch robot at [`lerobot/configs/robot/koch.yaml`](../lerobot/configs/robot/koch.yaml) with the ports you've identified: +```yaml +[...] +leader_arms: + main: + _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus + port: /dev/tty.usbmodem575E0031751 # <- Update + motors: + # name: (index, model) + shoulder_pan: [1, "xl330-m077"] + shoulder_lift: [2, "xl330-m077"] + elbow_flex: [3, "xl330-m077"] + wrist_flex: [4, "xl330-m077"] + wrist_roll: [5, "xl330-m077"] + gripper: [6, "xl330-m077"] +follower_arms: + main: + _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus + port: /dev/tty.usbmodem575E0032081 # <- Update + motors: + # name: (index, model) + shoulder_pan: [1, "xl430-w250"] + shoulder_lift: [2, "xl430-w250"] + elbow_flex: [3, "xl330-m288"] + wrist_flex: [4, "xl330-m288"] + wrist_roll: [5, "xl330-m288"] + gripper: [6, "xl330-m288"] +[...] +``` + +This configuration file is used to instantiate your robot across all scripts. We'll cover how this works later on. + +**Connect and Configure your Motors** + +Before you can start using your motors, you'll need to configure them to ensure proper communication. When you first connect the motors, the [`DynamixelMotorsBus`](../lerobot/common/robot_devices/motors/dynamixel.py) automatically detects any mismatch between the current motor indices (factory set to `1`) and the specified indices (e.g., `1, 2, 3, 4, 5, 6`). This triggers a configuration procedure that requires you to unplug the power cord and motors, then reconnect each motor sequentially, starting from the one closest to the bus. + +For a visual guide, refer to the [video tutorial of the configuration procedure](https://youtu.be/U78QQ9wCdpY). + +To connect and configure the leader arm, run the following code in the same Python interactive session as earlier in the tutorial: +```python +leader_arm.connect() +``` + +When you connect the leader arm for the first time, you might see an output similar to this: +``` +Read failed due to communication error on port /dev/tty.usbmodem575E0032081 for group_key ID_shoulder_pan_shoulder_lift_elbow_flex_wrist_flex_wrist_roll_gripper: [TxRxResult] There is no status packet! + +/!\ A configuration issue has been detected with your motors: +If it's the first time that you use these motors, press enter to configure your motors... but before verify that all the cables are connected the proper way. If you find an issue, before making a modification, kill the python process, unplug the power cord to not damage the motors, rewire correctly, then plug the power again and relaunch the script. + +Motor indices detected: {9600: [1]} + +1. Unplug the power cord +2. Plug/unplug minimal number of cables to only have the first 1 motor(s) (['shoulder_pan']) connected. +3. Re-plug the power cord +Press Enter to continue... + +*Follow the procedure* + +Setting expected motor indices: [1, 2, 3, 4, 5, 6] +``` + +Once the leader arm is configured, repeat the process for the follower arm by running: +```python +follower_arm.connect() +``` + +Congratulations! Both arms are now properly configured and connected. You won't need to go through the configuration procedure again in the future. + +**Troubleshooting**: + +If the configuration process fails, you may need to do the configuration process via the Dynamixel Wizard. + +Known failure modes: +- Calling `arm.connect()` raises `OSError: No motor found, but one new motor expected. Verify power cord is plugged in and retry` on Ubuntu 22. + +Steps: +1. Visit https://emanual.robotis.com/docs/en/software/dynamixel/dynamixel_wizard2/#connect-dynamixel. +2. Follow the software installation instructions in section 3 of the web page. +3. Launch the software. +4. Configure the device scanning options in the menu under `Tools` > `Options` > `Scan`. Check only Protocol 2.0, select only the USB port identifier of interest, select all baudrates, set the ID range to `[0, 10]`. _While this step was not strictly necessary, it greatly speeds up scanning_. +5. For each motor in turn: + - Disconnect the power to the driver board. + - Connect **only** the motor of interest to the driver board, making sure to disconnect it from any other motors. + - Reconnect the power to the driver board. + - From the software menu select `Device` > `Scan` and let the scan run. A device should appear. + - If the device has an asterisk (*) near it, it means the firmware is indeed outdated. From the software menu, select `Tools` > `Firmware Update`. Follow the prompts. + - The main panel should have table with various parameters of the device (refer to the web page, section 5). Select the row with `ID`, and then set the desired ID on the bottom right panel by selecting and clicking `Save`. + - Just like you did with the ID, also set the `Baud Rate` to 1 Mbps. +6. Check everything has been done right: + - Rewire the arms in their final configuration and power both of them. + - Scan for devices. All 12 motors should appear. + - Select the motors one by one and move the arm. Check that the graphical indicator near the top right shows the movement. + +**Read and Write with DynamixelMotorsBus** + +To get familiar with how `DynamixelMotorsBus` communicates with the motors, you can start by reading data from them. Copy past this code in the same interactive python session: +```python +leader_pos = leader_arm.read("Present_Position") +follower_pos = follower_arm.read("Present_Position") +print(leader_pos) +print(follower_pos) +``` + +Expected output might look like: +``` +array([2054, 523, 3071, 1831, 3049, 2441], dtype=int32) +array([2003, 1601, 56, 2152, 3101, 2283], dtype=int32) +``` + +Try moving the arms to various positions and observe how the values change. + +Now let's try to enable torque in the follower arm by copy pasting this code: +```python +from lerobot.common.robot_devices.motors.dynamixel import TorqueMode + +follower_arm.write("Torque_Enable", TorqueMode.ENABLED.value) +``` + +With torque enabled, the follower arm will be locked in its current position. Do not attempt to manually move the arm while torque is enabled, as this could damage the motors. + +Now, to get more familiar with reading and writing, let's move the arm programmatically copy pasting the following example code: +```python +# Get the current position +position = follower_arm.read("Present_Position") + +# Update first motor (shoulder_pan) position by +10 steps +position[0] += 10 +follower_arm.write("Goal_Position", position) + +# Update all motors position by -30 steps +position -= 30 +follower_arm.write("Goal_Position", position) + +# Update gripper by +30 steps +position[-1] += 30 +follower_arm.write("Goal_Position", position[-1], "gripper") +``` + +When you're done playing, you can try to disable the torque, but make sure you hold your robot so that it doesn't fall: +```python +follower_arm.write("Torque_Enable", TorqueMode.DISABLED.value) +``` + +Finally, disconnect the arms: +```python +leader_arm.disconnect() +follower_arm.disconnect() +``` + +Alternatively, you can unplug the power cord, which will automatically disable torque and disconnect the motors. + +*/!\ Warning*: These motors tend to overheat, especially under torque or if left plugged in for too long. Unplug after use. + +### b. Teleoperate your Koch v1.1 with KochRobot + +**Instantiate the KochRobot** + +Before you can teleoperate your robot, you need to instantiate the [`KochRobot`](../lerobot/common/robot_devices/robots/koch.py) using the previously defined `leader_arm` and `follower_arm`. + +For the Koch robot, we only have one leader, so we refer to it as `"main"` and define it as `leader_arms={"main": leader_arm}`. We do the same for the follower arm. For other robots (like the Aloha), which may have two pairs of leader and follower arms, you would define them like this: `leader_arms={"left": left_leader_arm, "right": right_leader_arm},`. Same thing for the follower arms. + +You also need to provide a path to a calibration file, such as `calibration_path=".cache/calibration/koch.pkl"`. More on this in the next section. + +Run the following code to instantiate your Koch robot: +```python +from lerobot.common.robot_devices.robots.koch import KochRobot + +robot = KochRobot( + leader_arms={"main": leader_arm}, + follower_arms={"main": follower_arm}, + calibration_path=".cache/calibration/koch.pkl", +) +``` + +**Calibrate and Connect the KochRobot** + +Next, you'll need to calibrate your robot to ensure that the leader and follower arms have the same position values when they are in the same physical position. This calibration is essential because it allows a neural network trained on one Koch robot to work on another. + +When you connect your robot for the first time, the [`KochRobot`](../lerobot/common/robot_devices/robots/koch.py) will detect if the calibration file is missing and trigger the calibration procedure. During this process, you will be guided to move each arm to three different positions. + +Here are the positions you'll move the follower arm to: + +
+
+ Koch v1.1 follower arm zero position +

1. Zero position

+
+
+ Koch v1.1 follower arm rotated position +

2. Rotated position

+
+
+ Koch v1.1 follower arm rest position +

3. Rest position

+
+
+ +And here are the corresponding positions for the leader arm: + +
+
+ Koch v1.1 leader arm zero position +

1. Zero position

+
+
+ Koch v1.1 leader arm rotated position +

2. Rotated position

+
+
+ Koch v1.1 leader arm rest position +

3. Rest position

+
+
+ +You can watch a [video tutorial of the calibration procedure](https://youtu.be/8drnU9uRY24) for more details. + +During calibration, we count the number of full 360-degree rotations your motors have made since they were first used. That's why we ask yo to move to this arbitrary "zero" position. We don't actually "set" the zero position, so you don't need to be accurate. After calculating these "offsets" to shift the motor values around 0, we need to assess the rotation direction of each motor, which might differ. That's why we ask you to rotate all motors to roughly 90 degrees, to mesure if the values changed negatively or positively. + +Finally, the rest position ensures that the follower and leader arms are roughly aligned after calibration, preventing sudden movements that could damage the motors when starting teleoperation. + +Importantly, once calibrated, all Koch robots will move to the same positions (e.g. zero and rotated position) when commanded. + +Run the following code to calibrate and connect your robot: +```python +robot.connect() +``` + +The output will look like this: +``` +Connecting main follower arm +Connecting main leader arm +Missing calibration file '.cache/calibration/koch.pkl'. Starting calibration procedure. + +Running calibration of main follower... + +Move arm to zero position +[...] +Move arm to rotated position +[...] +Move arm to rest position +[...] + +Running calibration of main leader... + +Move arm to zero position +[...] +Move arm to rotated position +[...] +Move arm to rest position +[...] + +Calibration is done! Saving calibration file '.cache/calibration/koch.pkl' +``` + +*Verifying Calibration* + +Once calibration is complete, you can check the positions of the leader and follower arms to ensure they match. If the calibration was successful, the positions should be very similar. + +Run this code to get the positions in degrees: +```python +leader_pos = robot.leader_arms["main"].read("Present_Position") +follower_pos = robot.follower_arms["main"].read("Present_Position") + +print(leader_pos) +print(follower_pos) +``` + +Example output: +``` +array([-0.43945312, 133.94531, 179.82422, -18.984375, -1.9335938, 34.541016], dtype=float32) +array([-0.58723712, 131.72314, 174.98743, -16.872612, 0.786213, 35.271973], dtype=float32) +``` + +These values are in degrees, which makes them easier to interpret and debug. The zero position used during calibration should roughly correspond to 0 degrees for each motor, and the rotated position should roughly correspond to 90 degrees for each motor. + +**Teleoperate your Koch v1.1** + +You can easily teleoperate your robot by reading the positions from the leader arm and sending them as goal positions to the follower arm. + +To teleoperate your robot for 30 seconds at a frequency of approximately 200Hz, run the following code: +```python +import tqdm +seconds = 30 +frequency = 200 +for _ in tqdm.tqdm(range(seconds*frequency)): + leader_pos = robot.leader_arms["main"].read("Present_Position") + robot.follower_arms["main"].write("Goal_Position", leader_pos) +``` + +*Using `teleop_step` for Teleoperation* + +Alternatively, you can teleoperate the robot using the `teleop_step` method from [`KochRobot`](../lerobot/common/robot_devices/robots/koch.py). + +Run this code to teleoperate: +```python +for _ in tqdm.tqdm(range(seconds*frequency)): + robot.teleop_step() +``` + +*Recording data during Teleoperation* + +Teleoperation is particularly useful for recording data. You can use the `teleop_step(record_data=True)` to returns both the follower arm's position as `"observation.state"` and the leader arm's position as `"action"`. This function also converts the numpy arrays into PyTorch tensors. If you're working with a robot that has two leader and two follower arms (like the Aloha), the positions are concatenated. + +Run the following code to see how slowly moving the leader arm affects the observation and action: +```python +leader_pos = robot.leader_arms["main"].read("Present_Position") +follower_pos = robot.follower_arms["main"].read("Present_Position") +observation, action = robot.teleop_step(record_data=True) + +print(follower_pos) +print(observation) +print(leader_pos) +print(action) +``` + +Expected output: +``` +array([7.8223, 131.1328, 165.5859, -23.4668, -0.9668, 32.4316], dtype=float32) +{'observation.state': tensor([7.8223, 131.1328, 165.5859, -23.4668, -0.9668, 32.4316])} +array([3.4277, 134.1211, 179.8242, -18.5449, -1.5820, 34.7168], dtype=float32) +{'action': tensor([3.4277, 134.1211, 179.8242, -18.5449, -1.5820, 34.7168])} +``` + +*Asynchronous Frame Recording* + +Additionally, `teleop_step` can asynchronously record frames from multiple cameras and include them in the observation dictionary as `"observation.images.CAMERA_NAME"`. This feature will be covered in more detail in the next section. + +*Disconnecting the Robot* + +When you're finished, make sure to disconnect your robot by running: +```python +robot.disconnect() +``` + +Alternatively, you can unplug the power cord, which will also disable torque. + +*/!\ Warning*: These motors tend to overheat, especially under torque or if left plugged in for too long. Unplug after use. + +### c. Add your cameras with OpenCVCamera + +**(Optional) Use your phone as camera on Linux** + +If you want to use your phone as a camera on Linux, follow these steps to set up a virtual camera + +1. *Install `v4l2loopback-dkms` and `v4l-utils`*. Those packages are required to create virtual camera devices (`v4l2loopback`) and verify their settings with the `v4l2-ctl` utility from `v4l-utils`. Install them using: +```python +sudo apt install v4l2loopback-dkms v4l-utils +``` +2. *Install [DroidCam](https://droidcam.app) on your phone*. This app is available for both iOS and Android. +3. *Install [OBS Studio](https://obsproject.com)*. This software will help you manage the camera feed. Install it using [Flatpak](https://flatpak.org): +```python +flatpak install flathub com.obsproject.Studio +``` +4. *Install the DroidCam OBS plugin*. This plugin integrates DroidCam with OBS Studio. Install it with: +```python +flatpak install flathub com.obsproject.Studio.Plugin.DroidCam +``` +5. *Start OBS Studio*. Launch with: +```python +flatpak run com.obsproject.Studio +``` +6. *Add your phone as a source*. Follow the instructions [here](https://droidcam.app/obs/usage). Be sure to set the resolution to `640x480`. +7. *Adjust resolution settings*. In OBS Studio, go to `File > Settings > Video`. Change the `Base(Canvas) Resolution` and the `Output(Scaled) Resolution` to `640x480` by manually typing it in. +8. *Start virtual camera*. In OBS Studio, follow the instructions [here](https://obsproject.com/kb/virtual-camera-guide). +9. *Verify the virtual camera setup*. Use `v4l2-ctl` to list the devices: +```python +v4l2-ctl --list-devices +``` +You should see an entry like: +``` +VirtualCam (platform:v4l2loopback-000): +/dev/video1 +``` +10. *Check the camera resolution*. Use `v4l2-ctl` to ensure that the virtual camera output resolution is `640x480`. Change `/dev/video1` to the port of your virtual camera from the output of `v4l2-ctl --list-devices`. +```python +v4l2-ctl -d /dev/video1 --get-fmt-video +``` +You should see an entry like: +``` +>>> Format Video Capture: +>>> Width/Height : 640/480 +>>> Pixel Format : 'YUYV' (YUYV 4:2:2) +``` + +Troubleshooting: If the resolution is not correct you will have to delete the Virtual Camera port and try again as it cannot be changed. + +If everything is set up correctly, you can proceed with the rest of the tutorial. + +**(Optional) Use your iPhone as a camera on MacOS** + +To use your iPhone as a camera on macOS, enable the Continuity Camera feature: +- Ensure your Mac is running macOS 13 or later, and your iPhone is on iOS 16 or later. +- Sign in both devices with the same Apple ID. +- Connect your devices with a USB cable or turn on Wi-Fi and Bluetooth for a wireless connection. + +For more details, visit [Apple support](https://support.apple.com/en-gb/guide/mac-help/mchl77879b8a/mac). + +Your iPhone should be detected automatically when running the camera setup script in the next section. + +**Instantiate an OpenCVCamera** + +The [`OpenCVCamera`](../lerobot/common/robot_devices/cameras/opencv.py) class allows you to efficiently record frames from most cameras using the [`opencv2`](https://docs.opencv.org) library. For more details on compatibility, see [Video I/O with OpenCV Overview](https://docs.opencv.org/4.x/d0/da7/videoio_overview.html). + +To instantiate an [`OpenCVCamera`](../lerobot/common/robot_devices/cameras/opencv.py), you need a camera index (e.g. `OpenCVCamera(camera_index=0)`). When you only have one camera like a webcam of a laptop, the camera index is usually `0` but it might differ, and the camera index might change if you reboot your computer or re-plug your camera. This behavior depends on your operating system. + +To find the camera indices, run the following utility script, which will save a few frames from each detected camera: +```bash +python lerobot/common/robot_devices/cameras/opencv.py \ + --images-dir outputs/images_from_opencv_cameras +``` + +The output will look something like this if you have two cameras connected: +``` +Mac or Windows detected. Finding available camera indices through scanning all indices from 0 to 60 +[...] +Camera found at index 0 +Camera found at index 1 +[...] +Connecting cameras +OpenCVCamera(0, fps=30.0, width=1920.0, height=1080.0, color_mode=rgb) +OpenCVCamera(1, fps=24.0, width=1920.0, height=1080.0, color_mode=rgb) +Saving images to outputs/images_from_opencv_cameras +Frame: 0000 Latency (ms): 39.52 +[...] +Frame: 0046 Latency (ms): 40.07 +Images have been saved to outputs/images_from_opencv_cameras +``` + +Check the saved images in `outputs/images_from_opencv_cameras` to identify which camera index corresponds to which physical camera (e.g. `0` for `camera_00` or `1` for `camera_01`): +``` +camera_00_frame_000000.png +[...] +camera_00_frame_000047.png +camera_01_frame_000000.png +[...] +camera_01_frame_000047.png +``` + +Note: Some cameras may take a few seconds to warm up, and the first frame might be black or green. + +Finally, run this code to instantiate and connectyour camera: +```python +from lerobot.common.robot_devices.cameras.opencv import OpenCVCamera + +camera = OpenCVCamera(camera_index=0) +camera.connect() +color_image = camera.read() + +print(color_image.shape) +print(color_image.dtype) +``` + +Expected output for a laptop camera on MacBookPro: +``` +(1080, 1920, 3) +uint8 +``` + +Or like this if you followed our tutorial to set a virtual camera: +``` +(480, 640, 3) +uint8 +``` + +With certain camera, you can also specify additional parameters like frame rate, resolution, and color mode during instantiation. For instance: +```python +camera = OpenCVCamera(camera_index=0, fps=30, width=640, height=480) +``` + +If the provided arguments are not compatible with the camera, an exception will be raised. + +*Disconnecting the camera* + +When you're done using the camera, disconnect it by running: +```python +camera.disconnect() +``` + +**Instantiate your robot with cameras** + +Additionaly, you can set up your robot to work with your cameras. + +Modify the following Python code with the appropriate camera names and configurations: +```python +robot = KochRobot( + leader_arms={"main": leader_arm}, + follower_arms={"main": follower_arm}, + calibration_path=".cache/calibration/koch.pkl", + cameras={ + "laptop": OpenCVCamera(0, fps=30, width=640, height=480), + "phone": OpenCVCamera(1, fps=30, width=640, height=480), + }, +) +robot.connect() +``` + +As a result, `teleop_step(record_data=True` will return a frame for each camera following the pytorch "channel first" convention but we keep images in `uint8` with pixels in range [0,255] to easily save them. + +Modify this code with the names of your cameras and run it: +```python +observation, action = robot.teleop_step(record_data=True) +print(observation["observation.images.laptop"].shape) +print(observation["observation.images.phone"].shape) +print(observation["observation.images.laptop"].min().item()) +print(observation["observation.images.laptop"].max().item()) +``` + +The output should look like this: +``` +torch.Size([3, 480, 640]) +torch.Size([3, 480, 640]) +0 +255 +``` + +Also, update the following lines of the yaml file for Koch robot [`lerobot/configs/robot/koch.yaml`](../lerobot/configs/robot/koch.yaml) with the names and configurations of your cameras: +```yaml +[...] +cameras: + laptop: + _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera + camera_index: 0 + fps: 30 + width: 640 + height: 480 + phone: + _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera + camera_index: 1 + fps: 30 + width: 640 + height: 480 +``` + +This file is used to instantiate your robot in all our scripts. We will explain how this works in the next section. + +### d. Use `koch.yaml` and our `teleoperate` function + +Instead of manually running the python code in a terminal window, you can use [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) to instantiate your robot by providing the path to the robot yaml file (e.g. [`lerobot/configs/robot/koch.yaml`](../lerobot/configs/robot/koch.yaml)) and control your robot with various modes as explained next. + +Try running this code to teleoperate your robot (if you dont have a camera, keep reading): +```bash +python lerobot/scripts/control_robot.py teleoperate \ + --robot-path lerobot/configs/robot/koch.yaml +``` + +You will see a lot of lines appearing like this one: +``` +INFO 2024-08-10 11:15:03 ol_robot.py:209 dt: 5.12 (195.1hz) dtRlead: 4.93 (203.0hz) dtRfoll: 0.19 (5239.0hz) +``` + +It contains +- `2024-08-10 11:15:03` which is the date and time of the call to the print function. +- `ol_robot.py:209` which is the end of the file name and the line number where the print function is called (`lerobot/scripts/control_robot.py` line `209`). +- `dt: 5.12 (195.1hz)` which is the "delta time" or the number of milliseconds spent between the previous call to `robot.teleop_step()` and the current one, associated with the frequency (5.12 ms equals 195.1 Hz) ; note that you can control the maximum frequency by adding fps as argument such as `--fps 30`. +- `dtRlead: 4.93 (203.0hz)` which is the number of milliseconds it took to read the position of the leader arm using `leader_arm.read("Present_Position")`. +- `dtWfoll: 0.22 (4446.9hz)` which is the number of milliseconds it took to set a new goal position for the follower arm using `follower_arm.write("Goal_position", leader_pos)` ; note that writing is done asynchronously so it takes less time than reading. + +Note: you can override any entry in the yaml file using `--robot-overrides` and the [hydra.cc](https://hydra.cc/docs/advanced/override_grammar/basic) syntax. If needed, you can override the ports like this: +```bash +python lerobot/scripts/control_robot.py teleoperate \ + --robot-path lerobot/configs/robot/koch.yaml \ + --robot-overrides \ + leader_arms.main.port=/dev/tty.usbmodem575E0031751 \ + follower_arms.main.port=/dev/tty.usbmodem575E0032081 +``` + +Importantly: If you don't have any camera, you can remove them dynamically with this [hydra.cc](https://hydra.cc/docs/advanced/override_grammar/basic) syntax `'~cameras'`: +```bash +python lerobot/scripts/control_robot.py teleoperate \ + --robot-path lerobot/configs/robot/koch.yaml \ + --robot-overrides \ + '~cameras' +``` + +We advise to create a new yaml file when the command becomes too long. + +## 3. Record your Dataset and Visualize it + +Using what you've learned previously, you can now easily record a dataset of states and actions for one episode. You can use `busy_wait` to control the speed of teleoperation and record at a fixed `fps` (frame per seconds). + +Try this code to record 30 seconds at 60 fps: +```python +import time +from lerobot.scripts.control_robot import busy_wait + +record_time_s = 30 +fps = 60 + +states = [] +actions = [] +for _ in range(record_time_s * fps): + start_time = time.perf_counter() + observation, action = robot.teleop_step(record_data=True) + + states.append(observation["observation.state"]) + actions.append(action["action"]) + + dt_s = time.perf_counter() - start_time + busy_wait(1 / fps - dt_s) + +# Note that observation and action are available in RAM, but +# you could potentially store them on disk with pickle/hdf5 or +# our optimized format `LeRobotDataset`. More on this next. +``` + +Importantly, many utilities are still missing. For instance, if you have cameras, you will need to save the images on disk to not go out of RAM, and to do so in threads to not slow down communication with your robot. Also, you will need to store your data in a format optimized for training and web sharing like [`LeRobotDataset`](../lerobot/common/datasets/lerobot_dataset.py). More on this in the next section. + +### a. Use `koch.yaml` and the `record` function + +You can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) to achieve efficient data recording. It encompasses many recording utilities: +1. Frames from cameras are saved on disk in threads, and encoded into videos at the end of recording. +2. Video streams from cameras are displayed in window so that you can verify them. +3. Data is stored with [`LeRobotDataset`](../lerobot/common/datasets/lerobot_dataset.py) format which is pushed to your Hugging Face page (unless `--push-to-hub 0` is provided). +4. Checkpoints are done during recording, so if any issue occurs, you can resume recording by re-running the same command again. You can also use `--force-override 1` to start recording from scratch. +5. Set the flow of data recording using command line arguments: + - `--warmup-time-s` defines the number of seconds before starting data collection. It allows the robot devices to warmup and synchronize (10 seconds by default). + - `--episode-time-s` defines the number of seconds for data recording for each episode (60 seconds by default). + - `--reset-time-s` defines the number of seconds for resetting the environment after each episode (60 seconds by default). + - `--num-episodes` defines the number of episodes to record (50 by default). +6. Control the flow during data recording using keyboard keys: + - Press right arrow `->` at any time during episode recording to early stop and go to resetting. Same during resetting, to early stop and to go to the next episode recording. + - Press left arrow `<-` at any time during episode recording or resetting to early stop, cancel the current episode, and re-record it. + - Press escape `ESC` at any time during episode recording to end the session early and go straight to video encoding and dataset uploading. +7. Similarly to `teleoperate`, you can also use `--robot-path` and `--robot-overrides` to specify your robots. + +Before trying `record`, if you want to push your dataset to the hub, make sure you've logged in using a write-access token, which can be generated from the [Hugging Face settings](https://huggingface.co/settings/tokens): +```bash +huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential +``` +Also, store your Hugging Face repositery name in a variable (e.g. `cadene` or `lerobot`). For instance, run this to use your Hugging Face user name as repositery: +```bash +HF_USER=$(huggingface-cli whoami | head -n 1) +echo $HF_USER +``` +If you don't want to push to hub, use `--push-to-hub 0`. + +Now run this to record 2 episodes: +```bash +python lerobot/scripts/control_robot.py record \ + --robot-path lerobot/configs/robot/koch.yaml \ + --fps 30 \ + --root data \ + --repo-id ${HF_USER}/koch_test \ + --warmup-time-s 5 \ + --episode-time-s 30 \ + --reset-time-s 30 \ + --num-episodes 2 +``` + +This will write your dataset to `{root}/{repo-id}` (e.g. `data/cadene/koch_test`). + +Remember to add `--robot-overrides '~cameras'` if you don't have any cameras and you still use the default `koch.yaml` configuration. + +You will see a lot of lines appearing like this one: +``` +INFO 2024-08-10 15:02:58 ol_robot.py:219 dt:33.34 (30.0hz) dtRlead: 5.06 (197.5hz) dtWfoll: 0.25 (3963.7hz) dtRfoll: 6.22 (160.7hz) dtRlaptop: 32.57 (30.7hz) dtRphone: 33.84 (29.5hz) +``` +It contains: +- `2024-08-10 15:02:58` which is the date and time of the call to the print function, +- `ol_robot.py:219` which is the end of the file name and the line number where the print function is called (`lerobot/scripts/control_robot.py` line `219`). +- `dt:33.34 (30.0hz)` which is the "delta time" or the number of milliseconds spent between the previous call to `robot.teleop_step(record_data=True)` and the current one, associated with the frequency (33.34 ms equals 30.0 Hz) ; note that we use `--fps 30` so we expect 30.0 Hz ; when a step takes more time, the line appears in yellow. +- `dtRlead: 5.06 (197.5hz)` which is the delta time of reading the present position of the leader arm. +- `dtWfoll: 0.25 (3963.7hz)` which is the delta time of writing the goal position on the follower arm ; writing is asynchronous so it takes less time than reading. +- `dtRfoll: 6.22 (160.7hz)` which is the delta time of reading the present position on the follower arm. +- `dtRlaptop:32.57 (30.7hz) ` which is the delta time of capturing an image from the laptop camera in the thread running asynchrously. +- `dtRphone:33.84 (29.5hz)` which is the delta time of capturing an image from the phone camera in the thread running asynchrously. + +Troubleshooting: +- On Linux, if you encounter a hanging issue when using cameras, uninstall opencv and re-install it with conda: +```bash +pip uninstall opencv-python +conda install -c conda-forge opencv=4.10.0 +``` +- On Linux, if you encounter any issue during video encoding with `ffmpeg: unknown encoder libsvtav1`, you can: + - install with conda-forge by running `conda install -c conda-forge ffmpeg` (it should be compiled with `libsvtav1`), + - or, install [Homebrew](https://brew.sh) and run `brew install ffmpeg` (it should be compiled with `libsvtav1`), + - or, install [ffmpeg build dependencies](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#GettheDependencies) and [compile ffmpeg from source with libsvtav1](https://trac.ffmpeg.org/wiki/CompilationGuide/Ubuntu#libsvtav1), + - and, make sure you use the corresponding ffmpeg binary to your install with `which ffmpeg`. +- On Linux, if the left and right arrow keys and escape key don't have any effect during data recording, make sure you've set the `$DISPLAY` environment variable. See [pynput limitations](https://pynput.readthedocs.io/en/latest/limitations.html#linux). + +At the end of data recording, your dataset will be uploaded on your Hugging Face page (e.g. https://huggingface.co/datasets/cadene/koch_test) that you can obtain by running: +```bash +echo https://huggingface.co/datasets/${HF_USER}/koch_test +``` + +### b. Advices for recording dataset + +Once you're comfortable with data recording, it's time to create a larger dataset for training. A good starting task is grasping an object at different locations and placing it in a bin. We suggest recording at least 50 episodes, with 10 episodes per location. Keep the cameras fixed and maintain consistent grasping behavior throughout the recordings. + +In the following sections, you’ll train your neural network. After achieving reliable grasping performance, you can start introducing more variations during data collection, such as additional grasp locations, different grasping techniques, and altering camera positions. + +Avoid adding too much variation too quickly, as it may hinder your results. + +In the coming months, we plan to release a foundational model for robotics. We anticipate that fine-tuning this model will enhance generalization, reducing the need for strict consistency during data collection. + +### c. Visualize all episodes + +You can visualize your dataset by running: +```bash +python lerobot/scripts/visualize_dataset_html.py \ + --root data \ + --repo-id ${HF_USER}/koch_test +``` + +This will launch a local web server that looks like this: +
+ Koch v1.1 leader and follower arms +
+ +### d. Replay episode on your robot with the `replay` function + +A useful feature of [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) is the `replay` function, which allows to replay on your robot any episode that you've recorded or episodes from any dataset out there. This function helps you test the repeatability of your robot's actions and assess transferability across robots of the same model. + +To replay the first episode of the dataset you just recorded, run the following command: +```bash +python lerobot/scripts/control_robot.py replay \ + --robot-path lerobot/configs/robot/koch.yaml \ + --fps 30 \ + --root data \ + --repo-id ${HF_USER}/koch_test \ + --episode 0 +``` + +Your robot should replicate movements similar to those you recorded. For example, check out [this video](https://x.com/RemiCadene/status/1793654950905680090) where we use `replay` on a Aloha robot from [Trossen Robotics](https://www.trossenrobotics.com). + +## 4. Train a policy on your data + +### a. Use the `train` script + +To train a policy to control your robot, use the [`python lerobot/scripts/train.py`](../lerobot/scripts/train.py) script. A few arguments are required. Here is an example command: +```bash +DATA_DIR=data python lerobot/scripts/train.py \ + dataset_repo_id=${HF_USER}/koch_test \ + policy=act_koch_real \ + env=koch_real \ + hydra.run.dir=outputs/train/act_koch_test \ + hydra.job.name=act_koch_test \ + device=cuda \ + wandb.enable=true +``` + +Let's explain it: +1. We provided the dataset as argument with `dataset_repo_id=${HF_USER}/koch_test`. +2. We provided the policy with `policy=act_koch_real`. This loads configurations from [`lerobot/configs/policy/act_koch_real.yaml`](../lerobot/configs/policy/act_koch_real.yaml). Importantly, this policy uses 2 cameras as input `laptop` and `phone`. If your dataset has different cameras, update the yaml file to account for it in the following parts: +```yaml +... +override_dataset_stats: + observation.images.laptop: + # stats from imagenet, since we use a pretrained vision model + mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1) + std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1) + observation.images.phone: + # stats from imagenet, since we use a pretrained vision model + mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1) + std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1) +... + input_shapes: + observation.images.laptop: [3, 480, 640] + observation.images.phone: [3, 480, 640] +... + input_normalization_modes: + observation.images.laptop: mean_std + observation.images.phone: mean_std +... +``` +3. We provided an environment as argument with `env=koch_real`. This loads configurations from [`lerobot/configs/env/koch_real.yaml`](../lerobot/configs/env/koch_real.yaml). It looks like +```yaml +fps: 30 +env: + name: real_world + task: null + state_dim: 6 + action_dim: 6 + fps: ${fps} +``` +It should match your dataset (e.g. `fps: 30`) and your robot (e.g. `state_dim: 6` and `action_dim: 6`). We are still working on simplifying this in future versions of `lerobot`. +4. We provided `device=cuda` since we are training on a Nvidia GPU, but you could use `device=mps` to train on Apple silicon. +5. We provided `wandb.enable=true` to use [Weights and Biases](https://docs.wandb.ai/quickstart) for visualizing training plots. This is optional but if you use it, make sure you are logged in by running `wandb login`. +6. We added `DATA_DIR=data` to access your dataset stored in your local `data` directory. If you dont provide `DATA_DIR`, your dataset will be downloaded from Hugging Face hub to your cache folder `$HOME/.cache/hugginface`. In future versions of `lerobot`, both directories will be in sync. + +For more information on the `train` script see the previous tutorial: [`examples/4_train_policy_with_script.md`](../examples/4_train_policy_with_script.md) + +### b. (Optional) Upload policy checkpoints to the hub + +Once training is done, upload the latest checkpoint with: +```bash +huggingface-cli upload ${HF_USER}/act_koch_test \ + outputs/train/act_koch_test/checkpoints/last/pretrained_model +``` + +You can also upload intermediate checkpoints with: +```bash +CKPT=010000 +huggingface-cli upload ${HF_USER}/act_koch_test_${CKPT} \ + outputs/train/act_koch_test/checkpoints/${CKPT}/pretrained_model +``` + +## 5. Evaluate your policy + +Now that you have a policy checkpoint, you can easily control your robot with it using methods from [`KochRobot`](../lerobot/common/robot_devices/robots/koch.py) and the policy. + +Try this code for running inference for 60 seconds at 30 fps: +```python +from lerobot.common.policies.act.modeling_act import ACTPolicy + +inference_time_s = 60 +fps = 30 +device = "cuda" # TODO: On Mac, use "mps" or "cpu" + +ckpt_path = "outputs/train/act_koch_test/checkpoints/last/pretrained_model" +policy = ACTPolicy.from_pretrained(ckpt_path) +policy.to(device) + +for _ in range(inference_time_s * fps): + start_time = time.perf_counter() + + # Read the follower state and access the frames from the cameras + observation = robot.capture_observation() + + # Convert to pytorch format: channel first and float32 in [0,1] + # with batch dimension + for name in observation: + if "image" in name: + observation[name] = observation[name].type(torch.float32) / 255 + observation[name] = observation[name].permute(2, 0, 1).contiguous() + observation[name] = observation[name].unsqueeze(0) + observation[name] = observation[name].to(device) + + # Compute the next action with the policy + # based on the current observation + action = policy.select_action(observation) + # Remove batch dimension + action = action.squeeze(0) + # Move to cpu, if not already the case + action = action.to("cpu") + # Order the robot to move + robot.send_action(action) + + dt_s = time.perf_counter() - start_time + busy_wait(1 / fps - dt_s) +``` + +### a. Use `koch.yaml` and our `record` function + +Ideally, when controlling your robot with your neural network, you would want to record evaluation episodes and to be able to visualize them later on, or even train on them like in Reinforcement Learning. This pretty much corresponds to recording a new dataset but with a neural network providing the actions instead of teleoperation. + +To this end, you can use the `record` function from [`lerobot/scripts/control_robot.py`](../lerobot/scripts/control_robot.py) but with a policy checkpoint as input. For instance, run this command to record 10 evaluation episodes: +```bash +python lerobot/scripts/control_robot.py record \ + --robot-path lerobot/configs/robot/koch.yaml \ + --fps 30 \ + --root data \ + --repo-id ${HF_USER}/eval_koch_test \ + --warmup-time-s 5 \ + --episode-time-s 30 \ + --reset-time-s 30 \ + --num-episodes 10 \ + -p outputs/train/act_koch_test/checkpoints/last/pretrained_model +``` + +As you can see, it's almost the same command as previously used to record your training dataset. Two things changed: +1. There is an additional `-p` argument which indicates the path to your policy checkpoint with (e.g. `-p outputs/train/eval_koch_test/checkpoints/last/pretrained_model`). You can also use the model repository if you uploaded a model checkpoint to the hub (e.g. `-p ${HF_USER}/act_koch_test`). +2. The name of dataset begins by `eval` to reflect that you are running inference (e.g. `--repo-id ${HF_USER}/eval_koch_test`). + +### b. Visualize evaluation afterwards + +You can then visualize your evaluation dataset by running the same command as before but with the new inference dataset as argument: +```bash +python lerobot/scripts/visualize_dataset.py \ + --root data \ + --repo-id ${HF_USER}/eval_koch_test +``` + +## 6. Next step + +Join our [Discord](https://discord.com/invite/s3KuuzsPFb) to collaborate on data collection and help us train a fully open-source foundational models for robotics! diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index af1a3db6..fca18db1 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -385,3 +385,18 @@ def cycle(iterable): yield next(iterator) except StopIteration: iterator = iter(iterable) + + +def create_branch(repo_id, *, branch: str, repo_type: str | None = None): + """Create a branch on a existing Hugging Face repo. Delete the branch if it already + exists before creating it. + """ + api = HfApi() + + branches = api.list_repo_refs(repo_id, repo_type=repo_type).branches + refs = [branch.ref for branch in branches] + ref = f"refs/heads/{branch}" + if ref in refs: + api.delete_branch(repo_id, repo_type=repo_type, branch=branch) + + api.create_branch(repo_id, repo_type=repo_type, branch=branch) diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py index d2cc5f83..4d4ac6b0 100644 --- a/lerobot/common/datasets/video_utils.py +++ b/lerobot/common/datasets/video_utils.py @@ -210,6 +210,12 @@ def encode_video_frames( # redirect stdin to subprocess.DEVNULL to prevent reading random keyboard inputs from terminal subprocess.run(ffmpeg_cmd, check=True, stdin=subprocess.DEVNULL) + if not video_path.exists(): + raise OSError( + f"Video encoding did not work. File not found: {video_path}. " + f"Try running the command manually to debug: `{''.join(ffmpeg_cmd)}`" + ) + @dataclass class VideoFrame: diff --git a/lerobot/common/robot_devices/cameras/opencv.py b/lerobot/common/robot_devices/cameras/opencv.py index 68a0e4f5..5556dcce 100644 --- a/lerobot/common/robot_devices/cameras/opencv.py +++ b/lerobot/common/robot_devices/cameras/opencv.py @@ -5,6 +5,7 @@ This file contains utilities for recording frames from cameras. For more info lo import argparse import concurrent.futures import math +import platform import shutil import threading import time @@ -33,8 +34,22 @@ MAX_OPENCV_INDEX = 60 def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENCV_INDEX): + if platform.system() == "Linux": + # Linux uses camera ports + print("Linux detected. Finding available camera indices through scanning '/dev/video*' ports") + possible_camera_ids = [] + for port in Path("/dev").glob("video*"): + camera_idx = int(str(port).replace("/dev/video", "")) + possible_camera_ids.append(camera_idx) + else: + print( + "Mac or Windows detected. Finding available camera indices through " + f"scanning all indices from 0 to {MAX_OPENCV_INDEX}" + ) + possible_camera_ids = range(max_index_search_range) + camera_ids = [] - for camera_idx in range(max_index_search_range): + for camera_idx in possible_camera_ids: camera = cv2.VideoCapture(camera_idx) is_open = camera.isOpened() camera.release() @@ -45,7 +60,8 @@ def find_camera_indices(raise_when_empty=False, max_index_search_range=MAX_OPENC if raise_when_empty and len(camera_ids) == 0: raise OSError( - "Not a single camera was detected. Try re-plugging, or re-installing `opencv2`, or your camera driver, or make sure your camera is compatible with opencv2." + "Not a single camera was detected. Try re-plugging, or re-installing `opencv2`, " + "or your camera driver, or make sure your camera is compatible with opencv2." ) return camera_ids @@ -59,10 +75,9 @@ def save_image(img_array, camera_index, frame_index, images_dir): def save_images_from_cameras( - images_dir: Path, camera_ids=None, fps=None, width=None, height=None, record_time_s=2 + images_dir: Path, camera_ids: list[int] | None = None, fps=None, width=None, height=None, record_time_s=2 ): if camera_ids is None: - print("Finding available camera indices") camera_ids = find_camera_indices() print("Connecting cameras") @@ -71,13 +86,12 @@ def save_images_from_cameras( camera = OpenCVCamera(cam_idx, fps=fps, width=width, height=height) camera.connect() print( - f"OpenCVCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, height={camera.height}, color_mode={camera.color_mode})" + f"OpenCVCamera({camera.camera_index}, fps={camera.fps}, width={camera.width}, " + f"height={camera.height}, color_mode={camera.color_mode})" ) cameras.append(camera) - images_dir = Path( - images_dir, - ) + images_dir = Path(images_dir) if images_dir.exists(): shutil.rmtree( images_dir, @@ -160,7 +174,7 @@ class OpenCVCamera: When an OpenCVCamera is instantiated, if no specific config is provided, the default fps, width, height and color_mode of the given camera will be used. - Example of usage of the class: + Example of usage: ```python camera = OpenCVCamera(camera_index=0) camera.connect() @@ -194,11 +208,6 @@ class OpenCVCamera: self.height = config.height self.color_mode = config.color_mode - if not isinstance(self.camera_index, int): - raise ValueError( - f"Camera index must be provided as an int, but {self.camera_index} was given instead." - ) - self.camera = None self.is_connected = False self.thread = None @@ -212,7 +221,13 @@ class OpenCVCamera: # First create a temporary camera trying to access `camera_index`, # and verify it is a valid camera by calling `isOpened`. - tmp_camera = cv2.VideoCapture(self.camera_index) + + if platform.system() == "Linux": + # Linux uses ports for connecting to cameras + tmp_camera = cv2.VideoCapture(f"/dev/video{self.camera_index}") + else: + tmp_camera = cv2.VideoCapture(self.camera_index) + is_camera_open = tmp_camera.isOpened() # Release camera to make it accessible for `find_camera_indices` del tmp_camera @@ -224,7 +239,8 @@ class OpenCVCamera: available_cam_ids = find_camera_indices() if self.camera_index not in available_cam_ids: raise ValueError( - f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead." + f"`camera_index` is expected to be one of these available cameras {available_cam_ids}, but {self.camera_index} is provided instead. " + "To find the camera index you should use, run `python lerobot/common/robot_devices/cameras/opencv.py`." ) raise OSError(f"Can't access camera {self.camera_index}.") @@ -232,7 +248,10 @@ class OpenCVCamera: # Secondly, create the camera that will be used downstream. # Note: For some unknown reason, calling `isOpened` blocks the camera which then # needs to be re-created. - self.camera = cv2.VideoCapture(self.camera_index) + if platform.system() == "Linux": + self.camera = cv2.VideoCapture(f"/dev/video{self.camera_index}") + else: + self.camera = cv2.VideoCapture(self.camera_index) if self.fps is not None: self.camera.set(cv2.CAP_PROP_FPS, self.fps) diff --git a/lerobot/common/robot_devices/cameras/utils.py b/lerobot/common/robot_devices/cameras/utils.py index 08c9465f..0f329d9f 100644 --- a/lerobot/common/robot_devices/cameras/utils.py +++ b/lerobot/common/robot_devices/cameras/utils.py @@ -2,6 +2,7 @@ from pathlib import Path from typing import Protocol import cv2 +import einops import numpy as np @@ -39,6 +40,16 @@ def save_depth_image(depth, path, write_shape=False): cv2.imwrite(str(path), depth_image) +def convert_torch_image_to_cv2(tensor, rgb_to_bgr=True): + assert tensor.ndim == 3 + c, h, w = tensor.shape + assert c < h and c < w + color_image = einops.rearrange(tensor, "c h w -> h w c").numpy() + if rgb_to_bgr: + color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR) + return color_image + + # Defines a camera type class Camera(Protocol): def connect(self): ... diff --git a/lerobot/common/robot_devices/motors/dynamixel.py b/lerobot/common/robot_devices/motors/dynamixel.py index 0135d8f2..687db174 100644 --- a/lerobot/common/robot_devices/motors/dynamixel.py +++ b/lerobot/common/robot_devices/motors/dynamixel.py @@ -5,6 +5,7 @@ from copy import deepcopy from pathlib import Path import numpy as np +import tqdm from dynamixel_sdk import ( COMM_SUCCESS, DXL_HIBYTE, @@ -21,9 +22,11 @@ from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, from lerobot.common.utils.utils import capture_timestamp_utc PROTOCOL_VERSION = 2.0 -BAUD_RATE = 1_000_000 +BAUDRATE = 1_000_000 TIMEOUT_MS = 1000 +MAX_ID_RANGE = 252 + # https://emanual.robotis.com/docs/en/dxl/x/xl330-m077 # https://emanual.robotis.com/docs/en/dxl/x/xl330-m288 # https://emanual.robotis.com/docs/en/dxl/x/xl430-w250 @@ -86,6 +89,16 @@ X_SERIES_CONTROL_TABLE = { "Present_Temperature": (146, 1), } +X_SERIES_BAUDRATE_TABLE = { + 0: 9_600, + 1: 57_600, + 2: 115_200, + 3: 1_000_000, + 4: 2_000_000, + 5: 3_000_000, + 6: 4_000_000, +} + CALIBRATION_REQUIRED = ["Goal_Position", "Present_Position"] CONVERT_UINT32_TO_INT32_REQUIRED = ["Goal_Position", "Present_Position"] @@ -98,7 +111,67 @@ MODEL_CONTROL_TABLE = { "xm540-w270": X_SERIES_CONTROL_TABLE, } +MODEL_RESOLUTION = { + "x_series": 4096, + "xl330-m077": 4096, + "xl330-m288": 4096, + "xl430-w250": 4096, + "xm430-w350": 4096, + "xm540-w270": 4096, +} + +MODEL_BAUDRATE_TABLE = { + "x_series": X_SERIES_BAUDRATE_TABLE, + "xl330-m077": X_SERIES_BAUDRATE_TABLE, + "xl330-m288": X_SERIES_BAUDRATE_TABLE, + "xl430-w250": X_SERIES_BAUDRATE_TABLE, + "xm430-w350": X_SERIES_BAUDRATE_TABLE, + "xm540-w270": X_SERIES_BAUDRATE_TABLE, +} + NUM_READ_RETRY = 10 +NUM_WRITE_RETRY = 10 + + +def convert_degrees_to_steps(degrees: float | np.ndarray, models: str | list[str]): + """This function convert the degree range to the step range for indicating motors rotation. + It assums a motor achieves a full rotation by going from -180 degree position to +180. + The motor resolution (e.g. 4096) corresponds to the number of steps needed to achieve a full rotation. + """ + if isinstance(degrees, float): + degrees = np.array(degrees) + + resolutions = [MODEL_RESOLUTION[model] for model in models] + steps = degrees / 180 * np.array(resolutions) / 2 + steps = steps.astype(int) + return steps + + +def convert_to_bytes(value, bytes): + # Note: No need to convert back into unsigned int, since this byte preprocessing + # already handles it for us. + if bytes == 1: + data = [ + DXL_LOBYTE(DXL_LOWORD(value)), + ] + elif bytes == 2: + data = [ + DXL_LOBYTE(DXL_LOWORD(value)), + DXL_HIBYTE(DXL_LOWORD(value)), + ] + elif bytes == 4: + data = [ + DXL_LOBYTE(DXL_LOWORD(value)), + DXL_HIBYTE(DXL_LOWORD(value)), + DXL_LOBYTE(DXL_HIWORD(value)), + DXL_HIBYTE(DXL_HIWORD(value)), + ] + else: + raise NotImplementedError( + f"Value of the number of bytes to be sent is expected to be in [1, 2, 4], but " + f"{bytes} is provided instead." + ) + return data def get_group_sync_key(data_name, motor_names): @@ -207,13 +280,12 @@ class DynamixelMotorsBus: >>> The port of this DynamixelMotorsBus is /dev/tty.usbmodem575E0031751. >>> Reconnect the usb cable. ``` - To find the motor indices, use [DynamixelWizzard2](https://emanual.robotis.com/docs/en/software/dynamixel/dynamixel_wizard2). Example of usage for 1 motor connected to the bus: ```python motor_name = "gripper" motor_index = 6 - motor_model = "xl330-m077" + motor_model = "xl330-m288" motors_bus = DynamixelMotorsBus( port="/dev/tty.usbmodem575E0031751", @@ -221,7 +293,11 @@ class DynamixelMotorsBus: ) motors_bus.connect() - motors_bus.teleop_step() + position = motors_bus.read("Present_Position") + + # move from a few motor steps as an example + few_steps = 30 + motors_bus.write("Goal_Position", position + few_steps) # when done, consider disconnecting motors_bus.disconnect() @@ -233,6 +309,7 @@ class DynamixelMotorsBus: port: str, motors: dict[str, tuple[int, str]], extra_model_control_table: dict[str, list[tuple]] | None = None, + extra_model_resolution: dict[str, int] | None = None, ): self.port = port self.motors = motors @@ -241,6 +318,10 @@ class DynamixelMotorsBus: if extra_model_control_table: self.model_ctrl_table.update(extra_model_control_table) + self.model_resolution = deepcopy(MODEL_RESOLUTION) + if extra_model_resolution: + self.model_resolution.update(extra_model_resolution) + self.port_handler = None self.packet_handler = None self.calibration = None @@ -268,52 +349,286 @@ class DynamixelMotorsBus: ) raise - self.port_handler.setBaudRate(BAUD_RATE) - self.port_handler.setPacketTimeoutMillis(TIMEOUT_MS) - + # Allow to read and write self.is_connected = True + self.port_handler.setPacketTimeoutMillis(TIMEOUT_MS) + + # Set expected baudrate for the bus + self.set_bus_baudrate(BAUDRATE) + + if not self.are_motors_configured(): + input( + "\n/!\\ A configuration issue has been detected with your motors: \n" + "If it's the first time that you use these motors, press enter to configure your motors... but before " + "verify that all the cables are connected the proper way. If you find an issue, before making a modification, " + "kill the python process, unplug the power cord to not damage the motors, rewire correctly, then plug the power " + "again and relaunch the script.\n" + ) + print() + self.configure_motors() + + def reconnect(self): + self.port_handler = PortHandler(self.port) + self.packet_handler = PacketHandler(PROTOCOL_VERSION) + if not self.port_handler.openPort(): + raise OSError(f"Failed to open port '{self.port}'.") + self.is_connected = True + + def are_motors_configured(self): + # Only check the motor indices and not baudrate, since if the motor baudrates are incorrect, + # a ConnectionError will be raised anyway. + try: + return (self.motor_indices == self.read("ID")).all() + except ConnectionError as e: + print(e) + return False + + def configure_motors(self): + # TODO(rcadene): This script assumes motors follow the X_SERIES baudrates + # TODO(rcadene): Refactor this function with intermediate high-level functions + + print("Scanning all baudrates and motor indices") + all_baudrates = set(X_SERIES_BAUDRATE_TABLE.values()) + ids_per_baudrate = {} + for baudrate in all_baudrates: + self.set_bus_baudrate(baudrate) + present_ids = self.find_motor_indices() + if len(present_ids) > 0: + ids_per_baudrate[baudrate] = present_ids + print(f"Motor indices detected: {ids_per_baudrate}") + print() + + possible_baudrates = list(ids_per_baudrate.keys()) + possible_ids = list({idx for sublist in ids_per_baudrate.values() for idx in sublist}) + untaken_ids = list(set(range(MAX_ID_RANGE)) - set(possible_ids) - set(self.motor_indices)) + + # Connect successively one motor to the chain and write a unique random index for each + for i in range(len(self.motors)): + self.disconnect() + input( + "1. Unplug the power cord\n" + "2. Plug/unplug minimal number of cables to only have the first " + f"{i+1} motor(s) ({self.motor_names[:i+1]}) connected.\n" + "3. Re-plug the power cord\n" + "Press Enter to continue..." + ) + print() + self.reconnect() + + if i > 0: + try: + self._read_with_motor_ids(self.motor_models, untaken_ids[:i], "ID") + except ConnectionError: + print(f"Failed to read from {untaken_ids[:i+1]}. Make sure the power cord is plugged in.") + input("Press Enter to continue...") + print() + self.reconnect() + + print("Scanning possible baudrates and motor indices") + motor_found = False + for baudrate in possible_baudrates: + self.set_bus_baudrate(baudrate) + present_ids = self.find_motor_indices(possible_ids) + if len(present_ids) == 1: + present_idx = present_ids[0] + print(f"Detected motor with index {present_idx}") + + if baudrate != BAUDRATE: + print(f"Setting its baudrate to {BAUDRATE}") + baudrate_idx = list(X_SERIES_BAUDRATE_TABLE.values()).index(BAUDRATE) + + # The write can fail, so we allow retries + for _ in range(NUM_WRITE_RETRY): + self._write_with_motor_ids( + self.motor_models, present_idx, "Baud_Rate", baudrate_idx + ) + time.sleep(0.5) + self.set_bus_baudrate(BAUDRATE) + try: + present_baudrate_idx = self._read_with_motor_ids( + self.motor_models, present_idx, "Baud_Rate" + ) + except ConnectionError: + print("Failed to write baudrate. Retrying.") + self.set_bus_baudrate(baudrate) + continue + break + else: + raise + + if present_baudrate_idx != baudrate_idx: + raise OSError("Failed to write baudrate.") + + print(f"Setting its index to a temporary untaken index ({untaken_ids[i]})") + self._write_with_motor_ids(self.motor_models, present_idx, "ID", untaken_ids[i]) + + present_idx = self._read_with_motor_ids(self.motor_models, untaken_ids[i], "ID") + if present_idx != untaken_ids[i]: + raise OSError("Failed to write index.") + + motor_found = True + break + elif len(present_ids) > 1: + raise OSError(f"More than one motor detected ({present_ids}), but only one was expected.") + + if not motor_found: + raise OSError( + "No motor found, but one new motor expected. Verify power cord is plugged in and retry." + ) + print() + + print(f"Setting expected motor indices: {self.motor_indices}") + self.set_bus_baudrate(BAUDRATE) + self._write_with_motor_ids( + self.motor_models, untaken_ids[: len(self.motors)], "ID", self.motor_indices + ) + print() + + if (self.read("ID") != self.motor_indices).any(): + raise OSError("Failed to write motors indices.") + + print("Configuration is done!") + + def find_motor_indices(self, possible_ids=None): + if possible_ids is None: + possible_ids = range(MAX_ID_RANGE) + + indices = [] + for idx in tqdm.tqdm(possible_ids): + try: + present_idx = self._read_with_motor_ids(self.motor_models, [idx], "ID")[0] + except ConnectionError: + continue + + if idx != present_idx: + # sanity check + raise OSError( + "Motor index used to communicate through the bus is not the same as the one present in the motor memory. The motor memory might be damaged." + ) + indices.append(idx) + + return indices + + def set_bus_baudrate(self, baudrate): + present_bus_baudrate = self.port_handler.getBaudRate() + if present_bus_baudrate != baudrate: + print(f"Setting bus baud rate to {baudrate}. Previously {present_bus_baudrate}.") + self.port_handler.setBaudRate(baudrate) + + if self.port_handler.getBaudRate() != baudrate: + raise OSError("Failed to write bus baud rate.") + @property - def motor_names(self) -> list[int]: + def motor_names(self) -> list[str]: return list(self.motors.keys()) + @property + def motor_models(self) -> list[str]: + return [model for _, model in self.motors.values()] + + @property + def motor_indices(self) -> list[int]: + return [idx for idx, _ in self.motors.values()] + def set_calibration(self, calibration: dict[str, tuple[int, bool]]): self.calibration = calibration def apply_calibration(self, values: np.ndarray | list, motor_names: list[str] | None): - if not self.calibration: - return values + """Convert from unsigned int32 joint position range [0, 2**32[ to the universal float32 nominal degree range ]-180.0, 180.0[ with + a "zero position" at 0 degree. + Note: We say "nominal degree range" since the motors can take values outside this range. For instance, 190 degrees, if the motor + rotate more than a half a turn from the zero position. However, most motors can't rotate more than 180 degrees and will stay in this range. + + Joints values are original in [0, 2**32[ (unsigned int32). Each motor are expected to complete a full rotation + when given a goal position that is + or - their resolution. For instance, dynamixel xl330-m077 have a resolution of 4096, and + at any position in their original range, let's say the position 56734, they complete a full rotation clockwise by moving to 60830, + or anticlockwise by moving to 52638. The position in the original range is arbitrary and might change a lot between each motor. + To harmonize between motors of the same model, different robots, or even models of different brands, we propose to work + in the centered nominal degree range ]-180, 180[. + """ if motor_names is None: motor_names = self.motor_names + # Convert from unsigned int32 original range [0, 2**32[ to centered signed int32 range [-2**31, 2**31[ + values = values.astype(np.int32) + for i, name in enumerate(motor_names): homing_offset, drive_mode = self.calibration[name] - if values[i] is not None: - if drive_mode: - values[i] *= -1 - values[i] += homing_offset + # Update direction of rotation of the motor to match between leader and follower. In fact, the motor of the leader for a given joint + # can be assembled in an opposite direction in term of rotation than the motor of the follower on the same joint. + if drive_mode: + values[i] *= -1 + + # Convert from range [-2**31, 2**31[ to nominal range ]-resolution, resolution[ (e.g. ]-2048, 2048[) + values[i] += homing_offset + + # Convert from range ]-resolution, resolution[ to the universal float32 centered degree range ]-180, 180[ + values = values.astype(np.float32) + for i, name in enumerate(motor_names): + _, model = self.motors[name] + resolution = self.model_resolution[model] + values[i] = values[i] / (resolution // 2) * 180 return values def revert_calibration(self, values: np.ndarray | list, motor_names: list[str] | None): - if not self.calibration: - return values - + """Inverse of `apply_calibration`.""" if motor_names is None: motor_names = self.motor_names + # Convert from the universal float32 centered degree range ]-180, 180[ to resolution range ]-resolution, resolution[ + for i, name in enumerate(motor_names): + _, model = self.motors[name] + resolution = self.model_resolution[model] + values[i] = values[i] / 180 * (resolution // 2) + + values = np.round(values).astype(np.int32) + + # Convert from nominal range ]-resolution, resolution[ to centered signed int32 range [-2**31, 2**31[ for i, name in enumerate(motor_names): homing_offset, drive_mode = self.calibration[name] + values[i] -= homing_offset - if values[i] is not None: - values[i] -= homing_offset - if drive_mode: - values[i] *= -1 + # Update direction of rotation of the motor that was matching between leader and follower to their original direction. + # In fact, the motor of the leader for a given joint can be assembled in an opposite direction in term of rotation + # than the motor of the follower on the same joint. + if drive_mode: + values[i] *= -1 return values + def _read_with_motor_ids(self, motor_models, motor_ids, data_name): + return_list = True + if not isinstance(motor_ids, list): + return_list = False + motor_ids = [motor_ids] + + assert_same_address(self.model_ctrl_table, self.motor_models, data_name) + addr, bytes = self.model_ctrl_table[motor_models[0]][data_name] + group = GroupSyncRead(self.port_handler, self.packet_handler, addr, bytes) + for idx in motor_ids: + group.addParam(idx) + + comm = group.txRxPacket() + if comm != COMM_SUCCESS: + raise ConnectionError( + f"Read failed due to communication error on port {self.port_handler.port_name} for indices {motor_ids}: " + f"{self.packet_handler.getTxRxResult(comm)}" + ) + + values = [] + for idx in motor_ids: + value = group.getData(idx, addr, bytes) + values.append(value) + + if return_list: + return values + else: + return values[0] + def read(self, data_name, motor_names: str | list[str] | None = None): if not self.is_connected: raise RobotDeviceNotConnectedError( @@ -367,9 +682,21 @@ class DynamixelMotorsBus: if data_name in CONVERT_UINT32_TO_INT32_REQUIRED: values = values.astype(np.int32) - if data_name in CALIBRATION_REQUIRED: + if data_name in CALIBRATION_REQUIRED and self.calibration is not None: values = self.apply_calibration(values, motor_names) + # We expect our motors to stay in a nominal range of [-180, 180] degrees + # which corresponds to a half turn rotation. + # However, some motors can turn a bit more, hence we extend the nominal range to [-270, 270] + # which is less than a full 360 degree rotation. + if not np.all((values > -270) & (values < 270)): + raise ValueError( + f"Wrong motor position range detected. " + f"Expected to be in [-270, +270] but in [{values.min()}, {values.max()}]. " + "This might be due to a cable connection issue creating an artificial 360 degrees jump in motor values. " + "You need to recalibrate by running: `python lerobot/scripts/control_robot.py calibrate`" + ) + # log the number of seconds it took to read the data from the motors delta_ts_name = get_log_name("delta_timestamp_s", "read", data_name, motor_names) self.logs[delta_ts_name] = time.perf_counter() - start_time @@ -380,6 +707,26 @@ class DynamixelMotorsBus: return values + def _write_with_motor_ids(self, motor_models, motor_ids, data_name, values): + if not isinstance(motor_ids, list): + motor_ids = [motor_ids] + if not isinstance(values, list): + values = [values] + + assert_same_address(self.model_ctrl_table, motor_models, data_name) + addr, bytes = self.model_ctrl_table[motor_models[0]][data_name] + group = GroupSyncWrite(self.port_handler, self.packet_handler, addr, bytes) + for idx, value in zip(motor_ids, values, strict=True): + data = convert_to_bytes(value, bytes) + group.addParam(idx, data) + + comm = group.txPacket() + if comm != COMM_SUCCESS: + raise ConnectionError( + f"Write failed due to communication error on port {self.port_handler.port_name} for indices {motor_ids}: " + f"{self.packet_handler.getTxRxResult(comm)}" + ) + def write(self, data_name, values: int | float | np.ndarray, motor_names: str | list[str] | None = None): if not self.is_connected: raise RobotDeviceNotConnectedError( @@ -406,7 +753,7 @@ class DynamixelMotorsBus: motor_ids.append(motor_idx) models.append(model) - if data_name in CALIBRATION_REQUIRED: + if data_name in CALIBRATION_REQUIRED and self.calibration is not None: values = self.revert_calibration(values, motor_names) values = values.tolist() @@ -422,30 +769,7 @@ class DynamixelMotorsBus: ) for idx, value in zip(motor_ids, values, strict=True): - # Note: No need to convert back into unsigned int, since this byte preprocessing - # already handles it for us. - if bytes == 1: - data = [ - DXL_LOBYTE(DXL_LOWORD(value)), - ] - elif bytes == 2: - data = [ - DXL_LOBYTE(DXL_LOWORD(value)), - DXL_HIBYTE(DXL_LOWORD(value)), - ] - elif bytes == 4: - data = [ - DXL_LOBYTE(DXL_LOWORD(value)), - DXL_HIBYTE(DXL_LOWORD(value)), - DXL_LOBYTE(DXL_HIWORD(value)), - DXL_HIBYTE(DXL_HIWORD(value)), - ] - else: - raise NotImplementedError( - f"Value of the number of bytes to be sent is expected to be in [1, 2, 4], but " - f"{bytes} is provided instead." - ) - + data = convert_to_bytes(value, bytes) if init_group: self.group_writers[group_key].addParam(idx, data) else: diff --git a/lerobot/common/robot_devices/robots/factory.py b/lerobot/common/robot_devices/robots/factory.py index 749a1d85..2edcd292 100644 --- a/lerobot/common/robot_devices/robots/factory.py +++ b/lerobot/common/robot_devices/robots/factory.py @@ -1,46 +1,7 @@ -def make_robot(name): - if name == "koch": - # TODO(rcadene): Add configurable robot from command line and yaml config - # TODO(rcadene): Add example with and without cameras - from lerobot.common.robot_devices.cameras.opencv import OpenCVCamera - from lerobot.common.robot_devices.motors.dynamixel import DynamixelMotorsBus - from lerobot.common.robot_devices.robots.koch import KochRobot +import hydra +from omegaconf import DictConfig - robot = KochRobot( - leader_arms={ - "main": DynamixelMotorsBus( - port="/dev/tty.usbmodem575E0031751", - motors={ - # name: (index, model) - "shoulder_pan": (1, "xl330-m077"), - "shoulder_lift": (2, "xl330-m077"), - "elbow_flex": (3, "xl330-m077"), - "wrist_flex": (4, "xl330-m077"), - "wrist_roll": (5, "xl330-m077"), - "gripper": (6, "xl330-m077"), - }, - ), - }, - follower_arms={ - "main": DynamixelMotorsBus( - port="/dev/tty.usbmodem575E0032081", - motors={ - # name: (index, model) - "shoulder_pan": (1, "xl430-w250"), - "shoulder_lift": (2, "xl430-w250"), - "elbow_flex": (3, "xl330-m288"), - "wrist_flex": (4, "xl330-m288"), - "wrist_roll": (5, "xl330-m288"), - "gripper": (6, "xl330-m288"), - }, - ), - }, - cameras={ - "laptop": OpenCVCamera(0, fps=30, width=640, height=480), - "phone": OpenCVCamera(1, fps=30, width=640, height=480), - }, - ) - else: - raise ValueError(f"Robot '{name}' not found.") +def make_robot(cfg: DictConfig): + robot = hydra.utils.instantiate(cfg) return robot diff --git a/lerobot/common/robot_devices/robots/koch.py b/lerobot/common/robot_devices/robots/koch.py index c6d1a4d4..f5966999 100644 --- a/lerobot/common/robot_devices/robots/koch.py +++ b/lerobot/common/robot_devices/robots/koch.py @@ -8,122 +8,43 @@ import torch from lerobot.common.robot_devices.cameras.utils import Camera from lerobot.common.robot_devices.motors.dynamixel import ( - DriveMode, - DynamixelMotorsBus, OperatingMode, TorqueMode, + convert_degrees_to_steps, ) from lerobot.common.robot_devices.motors.utils import MotorsBus from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError -URL_HORIZONTAL_POSITION = { - "follower": "https://raw.githubusercontent.com/huggingface/lerobot/main/media/koch/follower_horizontal.png", - "leader": "https://raw.githubusercontent.com/huggingface/lerobot/main/media/koch/leader_horizontal.png", -} -URL_90_DEGREE_POSITION = { - "follower": "https://raw.githubusercontent.com/huggingface/lerobot/main/media/koch/follower_90_degree.png", - "leader": "https://raw.githubusercontent.com/huggingface/lerobot/main/media/koch/leader_90_degree.png", -} - ######################################################################## # Calibration logic ######################################################################## -TARGET_HORIZONTAL_POSITION = np.array([0, -1024, 1024, 0, -1024, 0]) -TARGET_90_DEGREE_POSITION = np.array([1024, 0, 0, 1024, 0, -1024]) -GRIPPER_OPEN = np.array([-400]) +URL_TEMPLATE = ( + "https://raw.githubusercontent.com/huggingface/lerobot/main/media/{robot}/{arm}_{position}.webp" +) + +# In nominal degree range ]-180, +180[ +ZERO_POSITION_DEGREE = 0 +ROTATED_POSITION_DEGREE = 90 +GRIPPER_OPEN_DEGREE = 35.156 -def apply_homing_offset(values: np.array, homing_offset: np.array) -> np.array: - for i in range(len(values)): - if values[i] is not None: - values[i] += homing_offset[i] - return values +def assert_drive_mode(drive_mode): + # `drive_mode` is in [0,1] with 0 means original rotation direction for the motor, and 1 means inverted. + if not np.all(np.isin(drive_mode, [0, 1])): + raise ValueError(f"`drive_mode` contains values other than 0 or 1: ({drive_mode})") -def apply_drive_mode(values: np.array, drive_mode: np.array) -> np.array: - for i in range(len(values)): - if values[i] is not None and drive_mode[i]: - values[i] = -values[i] - return values +def apply_drive_mode(position, drive_mode): + assert_drive_mode(drive_mode) + # Convert `drive_mode` from [0, 1] with 0 indicates original rotation direction and 1 inverted, + # to [-1, 1] with 1 indicates original rotation direction and -1 inverted. + signed_drive_mode = -(drive_mode * 2 - 1) + position *= signed_drive_mode + return position -def apply_calibration(values: np.array, homing_offset: np.array, drive_mode: np.array) -> np.array: - values = apply_drive_mode(values, drive_mode) - values = apply_homing_offset(values, homing_offset) - return values - - -def revert_calibration(values: np.array, homing_offset: np.array, drive_mode: np.array) -> np.array: - """ - Transform working position into real position for the robot. - """ - values = apply_homing_offset( - values, - np.array([-homing_offset if homing_offset is not None else None for homing_offset in homing_offset]), - ) - values = apply_drive_mode(values, drive_mode) - return values - - -def revert_appropriate_positions(positions: np.array, drive_mode: list[bool]) -> np.array: - for i, revert in enumerate(drive_mode): - if not revert and positions[i] is not None: - positions[i] = -positions[i] - return positions - - -def compute_corrections(positions: np.array, drive_mode: list[bool], target_position: np.array) -> np.array: - correction = revert_appropriate_positions(positions, drive_mode) - - for i in range(len(positions)): - if correction[i] is not None: - if drive_mode[i]: - correction[i] -= target_position[i] - else: - correction[i] += target_position[i] - - return correction - - -def compute_nearest_rounded_positions(positions: np.array) -> np.array: - return np.array( - [ - round(positions[i] / 1024) * 1024 if positions[i] is not None else None - for i in range(len(positions)) - ] - ) - - -def compute_homing_offset( - arm: DynamixelMotorsBus, drive_mode: list[bool], target_position: np.array -) -> np.array: - # Get the present positions of the servos - present_positions = apply_calibration( - arm.read("Present_Position"), np.array([0, 0, 0, 0, 0, 0]), drive_mode - ) - - nearest_positions = compute_nearest_rounded_positions(present_positions) - correction = compute_corrections(nearest_positions, drive_mode, target_position) - return correction - - -def compute_drive_mode(arm: DynamixelMotorsBus, offset: np.array): - # Get current positions - present_positions = apply_calibration( - arm.read("Present_Position"), offset, np.array([False, False, False, False, False, False]) - ) - - nearest_positions = compute_nearest_rounded_positions(present_positions) - - # construct 'drive_mode' list comparing nearest_positions and TARGET_90_DEGREE_POSITION - drive_mode = [] - for i in range(len(nearest_positions)): - drive_mode.append(nearest_positions[i] != TARGET_90_DEGREE_POSITION[i]) - return drive_mode - - -def reset_arm(arm: MotorsBus): +def reset_torque_mode(arm: MotorsBus): # To be configured, all servos must be in "torque disable" mode arm.write("Torque_Enable", TorqueMode.DISABLED.value) @@ -132,55 +53,95 @@ def reset_arm(arm: MotorsBus): # you could end up with a servo with a position 0 or 4095 at a crucial point See [ # https://emanual.robotis.com/docs/en/dxl/x/x_series/#operating-mode11] all_motors_except_gripper = [name for name in arm.motor_names if name != "gripper"] - arm.write("Operating_Mode", OperatingMode.EXTENDED_POSITION.value, all_motors_except_gripper) + if len(all_motors_except_gripper) > 0: + arm.write("Operating_Mode", OperatingMode.EXTENDED_POSITION.value, all_motors_except_gripper) - # TODO(rcadene): why? - # Use 'position control current based' for gripper + # Use 'position control current based' for gripper to be limited by the limit of the current. + # For the follower gripper, it means it can grasp an object without forcing too much even tho, + # it's goal position is a complete grasp (both gripper fingers are ordered to join and reach a touch). + # For the leader gripper, it means we can use it as a physical trigger, since we can force with our finger + # to make it move, and it will move back to its original target position when we release the force. arm.write("Operating_Mode", OperatingMode.CURRENT_CONTROLLED_POSITION.value, "gripper") - # Make sure the native calibration (homing offset abd drive mode) is disabled, since we use our own calibration layer to be more generic - arm.write("Homing_Offset", 0) - arm.write("Drive_Mode", DriveMode.NON_INVERTED.value) - def run_arm_calibration(arm: MotorsBus, name: str, arm_type: str): - """Example of usage: + """This function ensures that a neural network trained on data collected on a given robot + can work on another robot. For instance before calibration, setting a same goal position + for each motor of two different robots will get two very different positions. But after calibration, + the two robots will move to the same position.To this end, this function computes the homing offset + and the drive mode for each motor of a given robot. + + Homing offset is used to shift the motor position to a ]-2048, +2048[ nominal range (when the motor uses 2048 steps + to complete a half a turn). This range is set around an arbitrary "zero position" corresponding to all motor positions + being 0. During the calibration process, you will need to manually move the robot to this "zero position". + + Drive mode is used to invert the rotation direction of the motor. This is useful when some motors have been assembled + in the opposite orientation for some robots. During the calibration process, you will need to manually move the robot + to the "rotated position". + + After calibration, the homing offsets and drive modes are stored in a cache. + + Example of usage: ```python run_arm_calibration(arm, "left", "follower") ``` """ - reset_arm(arm) + reset_torque_mode(arm) - # TODO(rcadene): document what position 1 mean - print( - f"Please move the '{name} {arm_type}' arm to the horizontal position (gripper fully closed, see {URL_HORIZONTAL_POSITION[arm_type]})" - ) + print(f"\nRunning calibration of {name} {arm_type}...") + + print("\nMove arm to zero position") + print("See: " + URL_TEMPLATE.format(robot="koch", arm=arm_type, position="zero")) input("Press Enter to continue...") - horizontal_homing_offset = compute_homing_offset( - arm, [False, False, False, False, False, False], TARGET_HORIZONTAL_POSITION - ) + # We arbitrarely choosed our zero target position to be a straight horizontal position with gripper upwards and closed. + # It is easy to identify and all motors are in a "quarter turn" position. Once calibration is done, this position will + # corresponds to every motor angle being 0. If you set all 0 as Goal Position, the arm will move in this position. + zero_position = convert_degrees_to_steps(ZERO_POSITION_DEGREE, arm.motor_models) - # TODO(rcadene): document what position 2 mean - print( - f"Please move the '{name} {arm_type}' arm to the 90 degree position (gripper fully open, see {URL_90_DEGREE_POSITION[arm_type]})" - ) + def _compute_nearest_rounded_position(position, models): + # TODO(rcadene): Rework this function since some motors cant physically rotate a quarter turn + # (e.g. the gripper of Aloha arms can only rotate ~50 degree) + quarter_turn_degree = 90 + quarter_turn = convert_degrees_to_steps(quarter_turn_degree, models) + nearest_pos = np.round(position.astype(float) / quarter_turn) * quarter_turn + return nearest_pos.astype(position.dtype) + + # Compute homing offset so that `present_position + homing_offset ~= target_position`. + position = arm.read("Present_Position") + position = _compute_nearest_rounded_position(position, arm.motor_models) + homing_offset = zero_position - position + + print("\nMove arm to rotated target position") + print("See: " + URL_TEMPLATE.format(robot="koch", arm=arm_type, position="rotated")) input("Press Enter to continue...") - drive_mode = compute_drive_mode(arm, horizontal_homing_offset) - homing_offset = compute_homing_offset(arm, drive_mode, TARGET_90_DEGREE_POSITION) + # The rotated target position corresponds to a rotation of a quarter turn from the zero position. + # This allows to identify the rotation direction of each motor. + # For instance, if the motor rotates 90 degree, and its value is -90 after applying the homing offset, then we know its rotation direction + # is inverted. However, for the calibration being successful, we need everyone to follow the same target position. + # Sometimes, there is only one possible rotation direction. For instance, if the gripper is closed, there is only one direction which + # corresponds to opening the gripper. When the rotation direction is ambiguous, we arbitrarely rotate clockwise from the point of view + # of the previous motor in the kinetic chain. + rotated_position = convert_degrees_to_steps(ROTATED_POSITION_DEGREE, arm.motor_models) - # Invert offset for all drive_mode servos - for i in range(len(drive_mode)): - if drive_mode[i]: - homing_offset[i] = -homing_offset[i] + # Find drive mode by rotating each motor by a quarter of a turn. + # Drive mode indicates if the motor rotation direction should be inverted (=1) or not (=0). + position = arm.read("Present_Position") + position += homing_offset + position = _compute_nearest_rounded_position(position, arm.motor_models) + drive_mode = (position != rotated_position).astype(np.int32) - print("Calibration is done!") + # Re-compute homing offset to take into account drive mode + position = arm.read("Present_Position") + position = apply_drive_mode(position, drive_mode) + position = _compute_nearest_rounded_position(position, arm.motor_models) + homing_offset = rotated_position - position - print("=====================================") - print(" HOMING_OFFSET: ", " ".join([str(i) for i in homing_offset])) - print(" DRIVE_MODE: ", " ".join([str(i) for i in drive_mode])) - print("=====================================") + print("\nMove arm to rest position") + print("See: " + URL_TEMPLATE.format(robot="koch", arm=arm_type, position="rest")) + input("Press Enter to continue...") + print() return homing_offset, drive_mode @@ -207,7 +168,12 @@ class KochRobotConfig: class KochRobot: # TODO(rcadene): Implement force feedback - """Tau Robotics: https://tau-robotics.com + """This class allows to control any Koch robot of various number of motors. + + A few versions are available: + - [Koch v1.0](https://github.com/AlexanderKoch-Koch/low_cost_robot), with and without the wrist-to-elbow expansion, which was developed + by Alexander Koch from [Tau Robotics](https://tau-robotics.com): [Github for sourcing and assembly]( + - [Koch v1.1])https://github.com/jess-moss/koch-v1-1), which was developed by Jess Moss. Example of highest frequency teleoperation without camera: ```python @@ -261,12 +227,12 @@ class KochRobot: Example of highest frequency data collection with cameras: ```python # Defines how to communicate with 2 cameras connected to the computer. - # Here, the webcam of the mackbookpro and the iphone (connected in USB to the macbookpro) + # Here, the webcam of the laptop and the phone (connected in USB to the laptop) # can be reached respectively using the camera indices 0 and 1. These indices can be # arbitrary. See the documentation of `OpenCVCamera` to find your own camera indices. cameras = { - "macbookpro": OpenCVCamera(camera_index=0, fps=30, width=640, height=480), - "iphone": OpenCVCamera(camera_index=1, fps=30, width=640, height=480), + "laptop": OpenCVCamera(camera_index=0, fps=30, width=640, height=480), + "phone": OpenCVCamera(camera_index=1, fps=30, width=640, height=480), } # Assumes leader and follower arms have been instantiated already (see first example) @@ -330,23 +296,27 @@ class KochRobot: # Connect the arms for name in self.follower_arms: + print(f"Connecting {name} follower arm.") self.follower_arms[name].connect() + print(f"Connecting {name} leader arm.") self.leader_arms[name].connect() # Reset the arms and load or run calibration if self.calibration_path.exists(): # Reset all arms before setting calibration for name in self.follower_arms: - reset_arm(self.follower_arms[name]) + reset_torque_mode(self.follower_arms[name]) for name in self.leader_arms: - reset_arm(self.leader_arms[name]) + reset_torque_mode(self.leader_arms[name]) with open(self.calibration_path, "rb") as f: calibration = pickle.load(f) else: + print(f"Missing calibration file '{self.calibration_path}'. Starting calibration precedure.") # Run calibration process which begins by reseting all arms calibration = self.run_calibration() + print(f"Calibration is done! Saving calibration file '{self.calibration_path}'") self.calibration_path.parent.mkdir(parents=True, exist_ok=True) with open(self.calibration_path, "wb") as f: pickle.dump(calibration, f) @@ -366,13 +336,14 @@ class KochRobot: # Enable torque on all motors of the follower arms for name in self.follower_arms: + print(f"Activating torque on {name} follower arm.") self.follower_arms[name].write("Torque_Enable", 1) # Enable torque on the gripper of the leader arms, and move it to 45 degrees, # so that we can use it as a trigger to close the gripper of the follower arms. for name in self.leader_arms: self.leader_arms[name].write("Torque_Enable", 1, "gripper") - self.leader_arms[name].write("Goal_Position", GRIPPER_OPEN, "gripper") + self.leader_arms[name].write("Goal_Position", GRIPPER_OPEN_DEGREE, "gripper") # Connect the cameras for name in self.cameras: @@ -407,12 +378,12 @@ class KochRobot: "KochRobot is not connected. You need to run `robot.connect()`." ) - # Prepare to assign the positions of the leader to the follower + # Prepare to assign the position of the leader to the follower leader_pos = {} for name in self.leader_arms: - now = time.perf_counter() + before_lread_t = time.perf_counter() leader_pos[name] = self.leader_arms[name].read("Present_Position") - self.logs[f"read_leader_{name}_pos_dt_s"] = time.perf_counter() - now + self.logs[f"read_leader_{name}_pos_dt_s"] = time.perf_counter() - before_lread_t follower_goal_pos = {} for name in self.leader_arms: @@ -420,9 +391,9 @@ class KochRobot: # Send action for name in self.follower_arms: - now = time.perf_counter() + before_fwrite_t = time.perf_counter() self.follower_arms[name].write("Goal_Position", follower_goal_pos[name]) - self.logs[f"write_follower_{name}_goal_pos_dt_s"] = time.perf_counter() - now + self.logs[f"write_follower_{name}_goal_pos_dt_s"] = time.perf_counter() - before_fwrite_t # Early exit when recording data is not requested if not record_data: @@ -432,9 +403,9 @@ class KochRobot: # Read follower position follower_pos = {} for name in self.follower_arms: - now = time.perf_counter() + before_fread_t = time.perf_counter() follower_pos[name] = self.follower_arms[name].read("Present_Position") - self.logs[f"read_follower_{name}_pos_dt_s"] = time.perf_counter() - now + self.logs[f"read_follower_{name}_pos_dt_s"] = time.perf_counter() - before_fread_t # Create state by concatenating follower current position state = [] @@ -453,10 +424,10 @@ class KochRobot: # Capture images from cameras images = {} for name in self.cameras: - now = time.perf_counter() + before_camread_t = time.perf_counter() images[name] = self.cameras[name].async_read() self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"] - self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - now + self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t # Populate output dictionnaries and format to pytorch obs_dict, action_dict = {}, {} @@ -477,9 +448,9 @@ class KochRobot: # Read follower position follower_pos = {} for name in self.follower_arms: - now = time.perf_counter() + before_fread_t = time.perf_counter() follower_pos[name] = self.follower_arms[name].read("Present_Position") - self.logs[f"read_follower_{name}_pos_dt_s"] = time.perf_counter() - now + self.logs[f"read_follower_{name}_pos_dt_s"] = time.perf_counter() - before_fread_t # Create state by concatenating follower current position state = [] @@ -491,20 +462,16 @@ class KochRobot: # Capture images from cameras images = {} for name in self.cameras: - now = time.perf_counter() + before_camread_t = time.perf_counter() images[name] = self.cameras[name].async_read() self.logs[f"read_camera_{name}_dt_s"] = self.cameras[name].logs["delta_timestamp_s"] - self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - now + self.logs[f"async_read_camera_{name}_dt_s"] = time.perf_counter() - before_camread_t # Populate output dictionnaries and format to pytorch obs_dict = {} obs_dict["observation.state"] = torch.from_numpy(state) for name in self.cameras: - # Convert to pytorch format: channel first and float32 in [0,1] - img = torch.from_numpy(images[name]) - img = img.type(torch.float32) / 255 - img = img.permute(2, 0, 1).contiguous() - obs_dict[f"observation.images.{name}"] = img + obs_dict[f"observation.images.{name}"] = torch.from_numpy(images[name]) return obs_dict def send_action(self, action: torch.Tensor): diff --git a/lerobot/common/utils/utils.py b/lerobot/common/utils/utils.py index 79db627a..ef5e8375 100644 --- a/lerobot/common/utils/utils.py +++ b/lerobot/common/utils/utils.py @@ -158,6 +158,7 @@ def init_hydra_config(config_path: str, overrides: list[str] | None = None) -> D version_base="1.2", ) cfg = hydra.compose(Path(config_path).stem, overrides) + return cfg diff --git a/lerobot/configs/robot/koch.yaml b/lerobot/configs/robot/koch.yaml new file mode 100644 index 00000000..224040ab --- /dev/null +++ b/lerobot/configs/robot/koch.yaml @@ -0,0 +1,39 @@ +_target_: lerobot.common.robot_devices.robots.koch.KochRobot +calibration_path: .cache/calibration/koch.pkl +leader_arms: + main: + _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus + port: /dev/tty.usbmodem575E0031751 + motors: + # name: (index, model) + shoulder_pan: [1, "xl330-m077"] + shoulder_lift: [2, "xl330-m077"] + elbow_flex: [3, "xl330-m077"] + wrist_flex: [4, "xl330-m077"] + wrist_roll: [5, "xl330-m077"] + gripper: [6, "xl330-m077"] +follower_arms: + main: + _target_: lerobot.common.robot_devices.motors.dynamixel.DynamixelMotorsBus + port: /dev/tty.usbmodem575E0032081 + motors: + # name: (index, model) + shoulder_pan: [1, "xl430-w250"] + shoulder_lift: [2, "xl430-w250"] + elbow_flex: [3, "xl330-m288"] + wrist_flex: [4, "xl330-m288"] + wrist_roll: [5, "xl330-m288"] + gripper: [6, "xl330-m288"] +cameras: + laptop: + _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera + camera_index: 0 + fps: 30 + width: 640 + height: 480 + phone: + _target_: lerobot.common.robot_devices.cameras.opencv.OpenCVCamera + camera_index: 1 + fps: 30 + width: 640 + height: 480 diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 078e3067..5b502e69 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -1,9 +1,22 @@ """ +Utilities to control a robot. + +Useful to record a dataset, replay a recorded episode, run the policy on your robot +and record an evaluation dataset, and to recalibrate your robot if needed. + Examples of usage: +- Recalibrate your robot: +```bash +python lerobot/scripts/control_robot.py calibrate +``` + - Unlimited teleoperation at highest frequency (~200 Hz is expected), to exit with CTRL+C: ```bash python lerobot/scripts/control_robot.py teleoperate + +# Remove the cameras from the robot definition. They are not used in 'teleoperate' anyway. +python lerobot/scripts/control_robot.py teleoperate --robot-overrides '~cameras' ``` - Unlimited teleoperation at a limited frequency of 30 Hz, to simulate data recording frequency: @@ -14,7 +27,7 @@ python lerobot/scripts/control_robot.py teleoperate \ - Record one episode in order to test replay: ```bash -python lerobot/scripts/control_robot.py record_dataset \ +python lerobot/scripts/control_robot.py record \ --fps 30 \ --root tmp/data \ --repo-id $USER/koch_test \ @@ -32,7 +45,7 @@ python lerobot/scripts/visualize_dataset.py \ - Replay this test episode: ```bash -python lerobot/scripts/control_robot.py replay_episode \ +python lerobot/scripts/control_robot.py replay \ --fps 30 \ --root tmp/data \ --repo-id $USER/koch_test \ @@ -42,12 +55,11 @@ python lerobot/scripts/control_robot.py replay_episode \ - Record a full dataset in order to train a policy, with 2 seconds of warmup, 30 seconds of recording for each episode, and 10 seconds to reset the environment in between episodes: ```bash -python lerobot/scripts/control_robot.py record_dataset \ +python lerobot/scripts/control_robot.py record \ --fps 30 \ --root data \ --repo-id $USER/koch_pick_place_lego \ --num-episodes 50 \ - --run-compute-stats 1 \ --warmup-time-s 2 \ --episode-time-s 30 \ --reset-time-s 10 @@ -74,7 +86,14 @@ DATA_DIR=data python lerobot/scripts/train.py \ - Run the pretrained policy on the robot: ```bash -python lerobot/scripts/control_robot.py run_policy \ +python lerobot/scripts/control_robot.py record \ + --fps 30 \ + --root data \ + --repo-id $USER/eval_act_koch_real \ + --num-episodes 10 \ + --warmup-time-s 2 \ + --episode-time-s 30 \ + --reset-time-s 10 -p outputs/train/act_koch_real/checkpoints/080000/pretrained_model ``` """ @@ -87,12 +106,14 @@ import os import platform import shutil import time +import traceback from contextlib import nullcontext +from functools import cache from pathlib import Path +import cv2 import torch import tqdm -from huggingface_hub import create_branch from omegaconf import DictConfig from PIL import Image from termcolor import colored @@ -102,7 +123,7 @@ from lerobot.common.datasets.compute_stats import compute_stats from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import to_hf_dataset from lerobot.common.datasets.push_dataset_to_hub.utils import concatenate_episodes, get_default_encoding -from lerobot.common.datasets.utils import calculate_episode_data_index +from lerobot.common.datasets.utils import calculate_episode_data_index, create_branch from lerobot.common.datasets.video_utils import encode_video_frames from lerobot.common.policies.factory import make_policy from lerobot.common.robot_devices.robots.factory import make_robot @@ -116,6 +137,26 @@ from lerobot.scripts.push_dataset_to_hub import push_meta_data_to_hub, push_vide ######################################################################################## +def say(text, blocking=False): + # Check if mac, linux, or windows. + if platform.system() == "Darwin": + cmd = f'say "{text}"' + elif platform.system() == "Linux": + cmd = f'spd-say "{text}"' + elif platform.system() == "Windows": + cmd = ( + 'PowerShell -Command "Add-Type -AssemblyName System.Speech; ' + f"(New-Object System.Speech.Synthesis.SpeechSynthesizer).Speak('{text}')\"" + ) + + if not blocking and platform.system() in ["Darwin", "Linux"]: + # TODO(rcadene): Make it work for Windows + # Use the ampersand to run command in the background + cmd += " &" + + os.system(cmd) + + def save_image(img_tensor, key, frame_index, episode_index, videos_dir): img = Image.fromarray(img_tensor.numpy()) path = videos_dir / f"{key}_episode_{episode_index:06d}" / f"frame_{frame_index:06d}.png" @@ -160,11 +201,11 @@ def log_control_info(robot, dt_s, episode_index=None, frame_index=None, fps=None for name in robot.follower_arms: key = f"write_follower_{name}_goal_pos_dt_s" if key in robot.logs: - log_dt("dtRfoll", robot.logs[key]) + log_dt("dtWfoll", robot.logs[key]) key = f"read_follower_{name}_pos_dt_s" if key in robot.logs: - log_dt("dtWfoll", robot.logs[key]) + log_dt("dtRfoll", robot.logs[key]) for name in robot.cameras: key = f"read_camera_{name}_dt_s" @@ -179,12 +220,23 @@ def log_control_info(robot, dt_s, episode_index=None, frame_index=None, fps=None logging.info(info_str) -def get_is_headless(): - if platform.system() == "Linux": - display = os.environ.get("DISPLAY") - if display is None or display == "": - return True - return False +@cache +def is_headless(): + """Detects if python is running without a monitor.""" + try: + import pynput # noqa + + return False + except Exception: + print( + "Error trying to import pynput. Switching to headless mode. " + "As a result, the video stream from the cameras won't be shown, " + "and you won't be able to change the control flow with keyboards. " + "For more info, see traceback below.\n" + ) + traceback.print_exc() + print() + return True ######################################################################################## @@ -192,29 +244,44 @@ def get_is_headless(): ######################################################################################## +def calibrate(robot: Robot): + if robot.calibration_path.exists(): + print(f"Removing '{robot.calibration_path}'") + robot.calibration_path.unlink() + + if robot.is_connected: + robot.disconnect() + + # Calling `connect` automatically runs calibration + # when the calibration file is missing + robot.connect() + + def teleoperate(robot: Robot, fps: int | None = None, teleop_time_s: float | None = None): # TODO(rcadene): Add option to record logs if not robot.is_connected: robot.connect() - start_time = time.perf_counter() + start_teleop_t = time.perf_counter() while True: - now = time.perf_counter() + start_loop_t = time.perf_counter() robot.teleop_step() if fps is not None: - dt_s = time.perf_counter() - now + dt_s = time.perf_counter() - start_loop_t busy_wait(1 / fps - dt_s) - dt_s = time.perf_counter() - now + dt_s = time.perf_counter() - start_loop_t log_control_info(robot, dt_s, fps=fps) - if teleop_time_s is not None and time.perf_counter() - start_time > teleop_time_s: + if teleop_time_s is not None and time.perf_counter() - start_teleop_t > teleop_time_s: break -def record_dataset( +def record( robot: Robot, + policy: torch.nn.Module | None = None, + hydra_cfg: DictConfig | None = None, fps: int | None = None, root="data", repo_id="lerobot/debug", @@ -229,6 +296,13 @@ def record_dataset( force_override=False, ): # TODO(rcadene): Add option to record logs + # TODO(rcadene): Clean this function via decomposition in higher level functions + + _, dataset_name = repo_id.split("/") + if dataset_name.startswith("eval_") and policy is None: + raise ValueError( + f"Your dataset name begins by 'eval_' ({dataset_name}) but no policy is provided ({policy})." + ) if not video: raise NotImplementedError() @@ -255,32 +329,10 @@ def record_dataset( else: episode_index = 0 - is_headless = get_is_headless() - - # Execute a few seconds without recording data, to give times - # to the robot devices to connect and start synchronizing. - timestamp = 0 - start_time = time.perf_counter() - is_warmup_print = False - while timestamp < warmup_time_s: - if not is_warmup_print: - logging.info("Warming up (no data recording)") - os.system('say "Warmup" &') - is_warmup_print = True - - now = time.perf_counter() - observation, action = robot.teleop_step(record_data=True) - - if not is_headless: - image_keys = [key for key in observation if "image" in key] - - dt_s = time.perf_counter() - now - busy_wait(1 / fps - dt_s) - - dt_s = time.perf_counter() - now - log_control_info(robot, dt_s, fps=fps) - - timestamp = time.perf_counter() - start_time + if is_headless(): + logging.info( + "Headless environment detected. On-screen cameras display and keyboard inputs will not be available." + ) # Allow to exit early while recording an episode or resetting the environment, # by tapping the right arrow key '->'. This might require a sudo permission @@ -290,9 +342,7 @@ def record_dataset( stop_recording = False # Only import pynput if not in a headless environment - if is_headless: - logging.info("Headless environment detected. Keyboard input will not be available.") - else: + if not is_headless(): from pynput import keyboard def on_press(key): @@ -315,6 +365,53 @@ def record_dataset( listener = keyboard.Listener(on_press=on_press) listener.start() + # Load policy if any + if policy is not None: + # Check device is available + device = get_safe_torch_device(hydra_cfg.device, log=True) + + policy.eval() + policy.to(device) + + torch.backends.cudnn.benchmark = True + torch.backends.cuda.matmul.allow_tf32 = True + set_global_seed(hydra_cfg.seed) + + # override fps using policy fps + fps = hydra_cfg.env.fps + + # Execute a few seconds without recording data, to give times + # to the robot devices to connect and start synchronizing. + timestamp = 0 + start_warmup_t = time.perf_counter() + is_warmup_print = False + while timestamp < warmup_time_s: + if not is_warmup_print: + logging.info("Warming up (no data recording)") + say("Warming up") + is_warmup_print = True + + start_loop_t = time.perf_counter() + + if policy is None: + observation, action = robot.teleop_step(record_data=True) + else: + observation = robot.capture_observation() + + if not is_headless(): + image_keys = [key for key in observation if "image" in key] + for key in image_keys: + cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR)) + cv2.waitKey(1) + + dt_s = time.perf_counter() - start_loop_t + busy_wait(1 / fps - dt_s) + + dt_s = time.perf_counter() - start_loop_t + log_control_info(robot, dt_s, fps=fps) + + timestamp = time.perf_counter() - start_warmup_t + # Save images using threads to reach high fps (30 and more) # Using `with` to exist smoothly if an execption is raised. # Using only 4 worker threads to avoid blocking the main thread. @@ -323,14 +420,18 @@ def record_dataset( # Start recording all episodes while episode_index < num_episodes: logging.info(f"Recording episode {episode_index}") - os.system(f'say "Recording episode {episode_index}" &') + say(f"Recording episode {episode_index}") ep_dict = {} frame_index = 0 timestamp = 0 - start_time = time.perf_counter() + start_episode_t = time.perf_counter() while timestamp < episode_time_s: - now = time.perf_counter() - observation, action = robot.teleop_step(record_data=True) + start_loop_t = time.perf_counter() + + if policy is None: + observation, action = robot.teleop_step(record_data=True) + else: + observation = robot.capture_observation() image_keys = [key for key in observation if "image" in key] not_image_keys = [key for key in observation if "image" not in key] @@ -342,11 +443,46 @@ def record_dataset( ) ] + if not is_headless(): + image_keys = [key for key in observation if "image" in key] + for key in image_keys: + cv2.imshow(key, cv2.cvtColor(observation[key].numpy(), cv2.COLOR_RGB2BGR)) + cv2.waitKey(1) + for key in not_image_keys: if key not in ep_dict: ep_dict[key] = [] ep_dict[key].append(observation[key]) + if policy is not None: + with ( + torch.inference_mode(), + torch.autocast(device_type=device.type) + if device.type == "cuda" and hydra_cfg.use_amp + else nullcontext(), + ): + # Convert to pytorch format: channel first and float32 in [0,1] with batch dimension + for name in observation: + if "image" in name: + observation[name] = observation[name].type(torch.float32) / 255 + observation[name] = observation[name].permute(2, 0, 1).contiguous() + observation[name] = observation[name].unsqueeze(0) + observation[name] = observation[name].to(device) + + # Compute the next action with the policy + # based on the current observation + action = policy.select_action(observation) + + # Remove batch dimension + action = action.squeeze(0) + + # Move to cpu, if not already the case + action = action.to("cpu") + + # Order the robot to move + robot.send_action(action) + action = {"action": action} + for key in action: if key not in ep_dict: ep_dict[key] = [] @@ -354,14 +490,13 @@ def record_dataset( frame_index += 1 - dt_s = time.perf_counter() - now + dt_s = time.perf_counter() - start_loop_t busy_wait(1 / fps - dt_s) - dt_s = time.perf_counter() - now + dt_s = time.perf_counter() - start_loop_t log_control_info(robot, dt_s, fps=fps) - timestamp = time.perf_counter() - start_time - + timestamp = time.perf_counter() - start_episode_t if exit_early: exit_early = False break @@ -369,10 +504,10 @@ def record_dataset( if not stop_recording: # Start resetting env while the executor are finishing logging.info("Reset the environment") - os.system('say "Reset the environment" &') + say("Reset the environment") timestamp = 0 - start_time = time.perf_counter() + start_vencod_t = time.perf_counter() # During env reset we save the data and encode the videos num_frames = frame_index @@ -418,7 +553,7 @@ def record_dataset( with tqdm.tqdm(total=reset_time_s, desc="Waiting") as pbar: while timestamp < reset_time_s and not is_last_episode: time.sleep(1) - timestamp = time.perf_counter() - start_time + timestamp = time.perf_counter() - start_vencod_t pbar.update(1) if exit_early: exit_early = False @@ -433,8 +568,8 @@ def record_dataset( if is_last_episode: logging.info("Done recording") - os.system('say "Done recording"') - if not is_headless: + say("Done recording", blocking=True) + if not is_headless(): listener.stop() logging.info("Waiting for threads writing the images on disk to terminate...") @@ -444,10 +579,14 @@ def record_dataset( pass break + robot.disconnect() + if not is_headless(): + cv2.destroyAllWindows() + num_episodes = episode_index logging.info("Encoding videos") - os.system('say "Encoding videos" &') + say("Encoding videos") # Use ffmpeg to convert frames stored as png into mp4 videos for episode_index in tqdm.tqdm(range(num_episodes)): for key in image_keys: @@ -455,6 +594,7 @@ def record_dataset( fname = f"{key}_episode_{episode_index:06d}.mp4" video_path = local_dir / "videos" / fname if video_path.exists(): + # Skip if video is already encoded. Could be the case when resuming data recording. continue # note: `encode_video_frames` is a blocking call. Making it asynchronous shouldn't speedup encoding, # since video encoding with ffmpeg is already using multithreading. @@ -491,11 +631,12 @@ def record_dataset( ) if run_compute_stats: logging.info("Computing dataset statistics") - os.system('say "Computing dataset statistics" &') + say("Computing dataset statistics") stats = compute_stats(lerobot_dataset) lerobot_dataset.stats = stats else: - logging.info("Skipping computation of the dataset statistrics") + stats = {} + logging.info("Skipping computation of the dataset statistics") hf_dataset = hf_dataset.with_format(None) # to remove transforms that cant be saved hf_dataset.save_to_disk(str(local_dir / "train")) @@ -511,12 +652,11 @@ def record_dataset( create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION) logging.info("Exiting") - os.system('say "Exiting" &') - + say("Exiting") return lerobot_dataset -def replay_episode(robot: Robot, episode: int, fps: int | None = None, root="data", repo_id="lerobot/debug"): +def replay(robot: Robot, episode: int, fps: int | None = None, root="data", repo_id="lerobot/debug"): # TODO(rcadene): Add option to record logs local_dir = Path(root) / repo_id if not local_dir.exists(): @@ -531,76 +671,20 @@ def replay_episode(robot: Robot, episode: int, fps: int | None = None, root="dat robot.connect() logging.info("Replaying episode") - os.system('say "Replaying episode"') - + say("Replaying episode", blocking=True) for idx in range(from_idx, to_idx): - now = time.perf_counter() + start_episode_t = time.perf_counter() action = items[idx]["action"] robot.send_action(action) - dt_s = time.perf_counter() - now + dt_s = time.perf_counter() - start_episode_t busy_wait(1 / fps - dt_s) - dt_s = time.perf_counter() - now + dt_s = time.perf_counter() - start_episode_t log_control_info(robot, dt_s, fps=fps) -def run_policy(robot: Robot, policy: torch.nn.Module, hydra_cfg: DictConfig, run_time_s: float | None = None): - # TODO(rcadene): Add option to record eval dataset and logs - - # Check device is available - device = get_safe_torch_device(hydra_cfg.device, log=True) - - policy.eval() - policy.to(device) - - torch.backends.cudnn.benchmark = True - torch.backends.cuda.matmul.allow_tf32 = True - set_global_seed(hydra_cfg.seed) - - fps = hydra_cfg.env.fps - - if not robot.is_connected: - robot.connect() - - start_time = time.perf_counter() - while True: - now = time.perf_counter() - - observation = robot.capture_observation() - - with ( - torch.inference_mode(), - torch.autocast(device_type=device.type) - if device.type == "cuda" and hydra_cfg.use_amp - else nullcontext(), - ): - # add batch dimension to 1 - for name in observation: - observation[name] = observation[name].unsqueeze(0) - - if device.type == "mps": - for name in observation: - observation[name] = observation[name].to(device) - - action = policy.select_action(observation) - - # remove batch dimension - action = action.squeeze(0) - - robot.send_action(action.to("cpu")) - - dt_s = time.perf_counter() - now - busy_wait(1 / fps - dt_s) - - dt_s = time.perf_counter() - now - log_control_info(robot, dt_s, fps=fps) - - if run_time_s is not None and time.perf_counter() - start_time > run_time_s: - break - - if __name__ == "__main__": parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest="mode", required=True) @@ -608,18 +692,26 @@ if __name__ == "__main__": # Set common options for all the subparsers base_parser = argparse.ArgumentParser(add_help=False) base_parser.add_argument( - "--robot", + "--robot-path", type=str, - default="koch", - help="Name of the robot provided to the `make_robot(name)` factory function.", + default="lerobot/configs/robot/koch.yaml", + help="Path to robot yaml file used to instantiate the robot using `make_robot` factory function.", ) + base_parser.add_argument( + "--robot-overrides", + type=str, + nargs="*", + help="Any key=value arguments to override config values (use dots for.nested=overrides)", + ) + + parser_calib = subparsers.add_parser("calibrate", parents=[base_parser]) parser_teleop = subparsers.add_parser("teleoperate", parents=[base_parser]) parser_teleop.add_argument( "--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)" ) - parser_record = subparsers.add_parser("record_dataset", parents=[base_parser]) + parser_record = subparsers.add_parser("record", parents=[base_parser]) parser_record.add_argument( "--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)" ) @@ -638,19 +730,19 @@ if __name__ == "__main__": parser_record.add_argument( "--warmup-time-s", type=int, - default=2, + default=10, help="Number of seconds before starting data collection. It allows the robot devices to warmup and synchronize.", ) parser_record.add_argument( "--episode-time-s", type=int, - default=10, + default=60, help="Number of seconds for data recording for each episode.", ) parser_record.add_argument( "--reset-time-s", type=int, - default=5, + default=60, help="Number of seconds for resetting the environment after each episode.", ) parser_record.add_argument("--num-episodes", type=int, default=50, help="Number of episodes to record.") @@ -678,8 +770,23 @@ if __name__ == "__main__": default=0, help="By default, data recording is resumed. When set to 1, delete the local directory and start data recording from scratch.", ) + parser_record.add_argument( + "-p", + "--pretrained-policy-name-or-path", + type=str, + help=( + "Either the repo ID of a model hosted on the Hub or a path to a directory containing weights " + "saved using `Policy.save_pretrained`." + ), + ) + parser_record.add_argument( + "--policy-overrides", + type=str, + nargs="*", + help="Any key=value arguments to override config values (use dots for.nested=overrides)", + ) - parser_replay = subparsers.add_parser("replay_episode", parents=[base_parser]) + parser_replay = subparsers.add_parser("replay", parents=[base_parser]) parser_replay.add_argument( "--fps", type=none_or_int, default=None, help="Frames per second (set to None to disable)" ) @@ -697,41 +804,46 @@ if __name__ == "__main__": ) parser_replay.add_argument("--episode", type=int, default=0, help="Index of the episode to replay.") - parser_policy = subparsers.add_parser("run_policy", parents=[base_parser]) - parser_policy.add_argument( - "-p", - "--pretrained-policy-name-or-path", - type=str, - help=( - "Either the repo ID of a model hosted on the Hub or a path to a directory containing weights " - "saved using `Policy.save_pretrained`." - ), - ) - parser_policy.add_argument( - "overrides", - nargs="*", - help="Any key=value arguments to override config values (use dots for.nested=overrides)", - ) args = parser.parse_args() init_logging() control_mode = args.mode - robot_name = args.robot + robot_path = args.robot_path + robot_overrides = args.robot_overrides kwargs = vars(args) del kwargs["mode"] - del kwargs["robot"] + del kwargs["robot_path"] + del kwargs["robot_overrides"] - robot = make_robot(robot_name) - if control_mode == "teleoperate": + robot_cfg = init_hydra_config(robot_path, robot_overrides) + robot = make_robot(robot_cfg) + + if control_mode == "calibrate": + calibrate(robot, **kwargs) + + elif control_mode == "teleoperate": teleoperate(robot, **kwargs) - elif control_mode == "record_dataset": - record_dataset(robot, **kwargs) - elif control_mode == "replay_episode": - replay_episode(robot, **kwargs) - elif control_mode == "run_policy": - pretrained_policy_path = get_pretrained_policy_path(args.pretrained_policy_name_or_path) - hydra_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", args.overrides) - policy = make_policy(hydra_cfg=hydra_cfg, pretrained_policy_name_or_path=pretrained_policy_path) - run_policy(robot, policy, hydra_cfg) + elif control_mode == "record": + pretrained_policy_name_or_path = args.pretrained_policy_name_or_path + policy_overrides = args.policy_overrides + del kwargs["pretrained_policy_name_or_path"] + del kwargs["policy_overrides"] + + policy_cfg = None + if pretrained_policy_name_or_path is not None: + pretrained_policy_path = get_pretrained_policy_path(pretrained_policy_name_or_path) + policy_cfg = init_hydra_config(pretrained_policy_path / "config.yaml", policy_overrides) + policy = make_policy(hydra_cfg=policy_cfg, pretrained_policy_name_or_path=pretrained_policy_path) + record(robot, policy, policy_cfg, **kwargs) + else: + record(robot, **kwargs) + + elif control_mode == "replay": + replay(robot, **kwargs) + + if robot.is_connected: + # Disconnect manually to avoid a "Core dump" during process + # termination due to camera threads not properly exiting. + robot.disconnect() diff --git a/lerobot/scripts/push_dataset_to_hub.py b/lerobot/scripts/push_dataset_to_hub.py index 7969b61e..898a1c42 100644 --- a/lerobot/scripts/push_dataset_to_hub.py +++ b/lerobot/scripts/push_dataset_to_hub.py @@ -56,7 +56,7 @@ from safetensors.torch import save_file from lerobot.common.datasets.compute_stats import compute_stats from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset from lerobot.common.datasets.push_dataset_to_hub.utils import check_repo_id -from lerobot.common.datasets.utils import flatten_dict +from lerobot.common.datasets.utils import create_branch, flatten_dict def get_from_raw_to_lerobot_format_fn(raw_format: str): @@ -215,8 +215,7 @@ def push_dataset_to_hub( push_meta_data_to_hub(repo_id, meta_data_dir, revision="main") if video: push_videos_to_hub(repo_id, videos_dir, revision="main") - api = HfApi() - api.create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION) + create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION) if tests_data_dir: # get the first episode diff --git a/media/koch/follower_90_degree.png b/media/koch/follower_90_degree.png deleted file mode 100644 index 66af68a1..00000000 Binary files a/media/koch/follower_90_degree.png and /dev/null differ diff --git a/media/koch/follower_horizontal.png b/media/koch/follower_horizontal.png deleted file mode 100644 index d2ffb6c5..00000000 Binary files a/media/koch/follower_horizontal.png and /dev/null differ diff --git a/media/koch/follower_rest.webp b/media/koch/follower_rest.webp new file mode 100644 index 00000000..0a14d074 Binary files /dev/null and b/media/koch/follower_rest.webp differ diff --git a/media/koch/follower_rotated.webp b/media/koch/follower_rotated.webp new file mode 100644 index 00000000..3a91d249 Binary files /dev/null and b/media/koch/follower_rotated.webp differ diff --git a/media/koch/follower_zero.webp b/media/koch/follower_zero.webp new file mode 100644 index 00000000..aa107ed3 Binary files /dev/null and b/media/koch/follower_zero.webp differ diff --git a/media/koch/leader_90_degree.png b/media/koch/leader_90_degree.png deleted file mode 100644 index 3b802617..00000000 Binary files a/media/koch/leader_90_degree.png and /dev/null differ diff --git a/media/koch/leader_horizontal.png b/media/koch/leader_horizontal.png deleted file mode 100644 index c55b51a8..00000000 Binary files a/media/koch/leader_horizontal.png and /dev/null differ diff --git a/media/koch/leader_rest.webp b/media/koch/leader_rest.webp new file mode 100644 index 00000000..e0454cfd Binary files /dev/null and b/media/koch/leader_rest.webp differ diff --git a/media/koch/leader_rotated.webp b/media/koch/leader_rotated.webp new file mode 100644 index 00000000..183e4206 Binary files /dev/null and b/media/koch/leader_rotated.webp differ diff --git a/media/koch/leader_zero.webp b/media/koch/leader_zero.webp new file mode 100644 index 00000000..f3b885ac Binary files /dev/null and b/media/koch/leader_zero.webp differ diff --git a/media/tutorial/koch_v1_1_leader_follower.webp b/media/tutorial/koch_v1_1_leader_follower.webp new file mode 100644 index 00000000..f576a531 Binary files /dev/null and b/media/tutorial/koch_v1_1_leader_follower.webp differ diff --git a/media/tutorial/visualize_dataset_html.webp b/media/tutorial/visualize_dataset_html.webp new file mode 100644 index 00000000..e71bc562 Binary files /dev/null and b/media/tutorial/visualize_dataset_html.webp differ diff --git a/poetry.lock b/poetry.lock index 084af6b6..56a318ef 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1373,6 +1373,7 @@ files = [ filelock = "*" fsspec = ">=2023.5.0" hf-transfer = {version = ">=0.1.4", optional = true, markers = "extra == \"hf-transfer\""} +InquirerPy = {version = "0.3.4", optional = true, markers = "extra == \"cli\""} packaging = ">=20.9" pyyaml = ">=5.1" requests = "*" @@ -1559,6 +1560,24 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "inquirerpy" +version = "0.3.4" +description = "Python port of Inquirer.js (A collection of common interactive command-line user interfaces)" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "InquirerPy-0.3.4-py3-none-any.whl", hash = "sha256:c65fdfbac1fa00e3ee4fb10679f4d3ed7a012abf4833910e63c295827fe2a7d4"}, + {file = "InquirerPy-0.3.4.tar.gz", hash = "sha256:89d2ada0111f337483cb41ae31073108b2ec1e618a49d7110b0d7ade89fc197e"}, +] + +[package.dependencies] +pfzy = ">=0.3.1,<0.4.0" +prompt-toolkit = ">=3.0.1,<4.0.0" + +[package.extras] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] + [[package]] name = "intel-openmp" version = "2021.4.0" @@ -2564,6 +2583,20 @@ other = ["pillow (>=8.0.1)"] sisl = ["box2d-py (==2.3.5)", "pygame (==2.3.0)", "pymunk (==6.2.0)", "scipy (>=1.4.1)"] testing = ["AutoROM", "pre-commit", "pynput", "pytest", "pytest-cov", "pytest-markdown-docs", "pytest-xdist"] +[[package]] +name = "pfzy" +version = "0.3.4" +description = "Python port of the fzy fuzzy string matching algorithm" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "pfzy-0.3.4-py3-none-any.whl", hash = "sha256:5f50d5b2b3207fa72e7ec0ef08372ef652685470974a107d0d4999fc5a903a96"}, + {file = "pfzy-0.3.4.tar.gz", hash = "sha256:717ea765dd10b63618e7298b2d98efd819e0b30cd5905c9707223dceeb94b3f1"}, +] + +[package.extras] +docs = ["Sphinx (>=4.1.2,<5.0.0)", "furo (>=2021.8.17-beta.43,<2022.0.0)", "myst-parser (>=0.15.1,<0.16.0)", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.4.0,<0.5.0)"] + [[package]] name = "pillow" version = "10.4.0" @@ -2710,6 +2743,20 @@ nodeenv = ">=0.11.1" pyyaml = ">=5.1" virtualenv = ">=20.10.0" +[[package]] +name = "prompt-toolkit" +version = "3.0.47" +description = "Library for building powerful interactive command lines in Python" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "prompt_toolkit-3.0.47-py3-none-any.whl", hash = "sha256:0d7bfa67001d5e39d02c224b663abc33687405033a8c422d0d675a5a13361d10"}, + {file = "prompt_toolkit-3.0.47.tar.gz", hash = "sha256:1e1b29cb58080b1e69f207c893a1a7bf16d127a5c30c9d17a25a5d77792e5360"}, +] + +[package.dependencies] +wcwidth = "*" + [[package]] name = "protobuf" version = "5.27.2" @@ -4216,6 +4263,17 @@ perf = ["orjson"] sweeps = ["sweeps (>=0.2.0)"] workspaces = ["wandb-workspaces"] +[[package]] +name = "wcwidth" +version = "0.2.13" +description = "Measures the displayed width of unicode strings in a terminal" +optional = false +python-versions = "*" +files = [ + {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, + {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, +] + [[package]] name = "werkzeug" version = "3.0.3" @@ -4503,4 +4561,4 @@ xarm = ["gym-xarm"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "25d5a270d770d37b13a93bf72868d3b9e683f8af5252b6332ec926a26fd0c096" +content-hash = "a340f2ed23db2f3c371c494cbc9a33392e122ed6713e6098277a87b3fb805f2b" diff --git a/pyproject.toml b/pyproject.toml index 8d4ea792..999a7203 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ opencv-python = ">=4.9.0" diffusers = ">=0.27.2" torchvision = ">=0.17.1" h5py = ">=3.10.0" -huggingface-hub = {extras = ["hf-transfer"], version = ">=0.23.0"} +huggingface-hub = {extras = ["hf-transfer", "cli"], version = ">=0.23.0"} gymnasium = ">=0.29.1" cmake = ">=3.29.0.1" gym-dora = { git = "https://github.com/dora-rs/dora-lerobot.git", subdirectory = "gym_dora", optional = true } diff --git a/tests/conftest.py b/tests/conftest.py index 9d58b7f9..eaf1b476 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,7 +15,9 @@ # limitations under the License. import pytest -from .utils import DEVICE +from lerobot.common.utils.utils import init_hydra_config + +from .utils import DEVICE, KOCH_ROBOT_CONFIG_PATH def pytest_collection_finish(): @@ -27,11 +29,12 @@ def is_koch_available(): try: from lerobot.common.robot_devices.robots.factory import make_robot - robot = make_robot("koch") + robot_cfg = init_hydra_config(KOCH_ROBOT_CONFIG_PATH) + robot = make_robot(robot_cfg) robot.connect() del robot return True except Exception as e: - print("An alexander koch robot is not available.") + print("A koch robot is not available.") print(e) return False diff --git a/tests/test_control_robot.py b/tests/test_control_robot.py index ae97fe1f..5dae28e4 100644 --- a/tests/test_control_robot.py +++ b/tests/test_control_robot.py @@ -3,13 +3,20 @@ from pathlib import Path from lerobot.common.policies.factory import make_policy from lerobot.common.robot_devices.robots.factory import make_robot from lerobot.common.utils.utils import init_hydra_config -from lerobot.scripts.control_robot import record_dataset, replay_episode, run_policy, teleoperate -from tests.utils import DEFAULT_CONFIG_PATH, DEVICE, require_koch +from lerobot.scripts.control_robot import calibrate, record, replay, teleoperate +from tests.utils import DEFAULT_CONFIG_PATH, DEVICE, KOCH_ROBOT_CONFIG_PATH, require_koch + + +def make_robot_(overrides=None): + robot_cfg = init_hydra_config(KOCH_ROBOT_CONFIG_PATH, overrides) + robot = make_robot(robot_cfg) + return robot @require_koch +# `require_koch` uses `request` to access `is_koch_available` fixture def test_teleoperate(request): - robot = make_robot("koch") + robot = make_robot_() teleoperate(robot, teleop_time_s=1) teleoperate(robot, fps=30, teleop_time_s=1) teleoperate(robot, fps=60, teleop_time_s=1) @@ -17,20 +24,35 @@ def test_teleoperate(request): @require_koch -def test_record_dataset_and_replay_episode_and_run_policy(tmpdir, request): - robot_name = "koch" +def test_calibrate(request): + robot = make_robot_() + calibrate(robot) + del robot + + +@require_koch +def test_record_without_cameras(tmpdir, request): + root = Path(tmpdir) + repo_id = "lerobot/debug" + + robot = make_robot_(overrides=["~cameras"]) + record(robot, fps=30, root=root, repo_id=repo_id, warmup_time_s=1, episode_time_s=1, num_episodes=2) + + +@require_koch +def test_record_and_replay_and_policy(tmpdir, request): env_name = "koch_real" policy_name = "act_koch_real" root = Path(tmpdir) repo_id = "lerobot/debug" - robot = make_robot(robot_name) - dataset = record_dataset( + robot = make_robot_() + dataset = record( robot, fps=30, root=root, repo_id=repo_id, warmup_time_s=1, episode_time_s=1, num_episodes=2 ) - replay_episode(robot, episode=0, fps=30, root=root, repo_id=repo_id) + replay(robot, episode=0, fps=30, root=root, repo_id=repo_id) cfg = init_hydra_config( DEFAULT_CONFIG_PATH, @@ -43,6 +65,6 @@ def test_record_dataset_and_replay_episode_and_run_policy(tmpdir, request): policy = make_policy(hydra_cfg=cfg, dataset_stats=dataset.stats) - run_policy(robot, policy, cfg, run_time_s=1) + record(robot, policy, cfg, run_time_s=1) del robot diff --git a/tests/test_datasets.py b/tests/test_datasets.py index da0ae755..d587ef6b 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -23,6 +23,7 @@ import einops import pytest import torch from datasets import Dataset +from huggingface_hub import HfApi from safetensors.torch import load_file import lerobot @@ -34,6 +35,7 @@ from lerobot.common.datasets.compute_stats import ( from lerobot.common.datasets.factory import make_dataset from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, MultiLeRobotDataset from lerobot.common.datasets.utils import ( + create_branch, flatten_dict, hf_transform_to_torch, load_previous_and_future_frames, @@ -385,3 +387,29 @@ def test_aggregate_stats(): for agg_fn in ["mean", "min", "max"]: assert torch.allclose(stats[data_key][agg_fn], einops.reduce(data, "n -> 1", agg_fn)) assert torch.allclose(stats[data_key]["std"], torch.std(data, correction=0)) + + +@pytest.mark.skip("Requires internet access") +def test_create_branch(): + api = HfApi() + + repo_id = "cadene/test_create_branch" + repo_type = "dataset" + branch = "test" + ref = f"refs/heads/{branch}" + + # Prepare a repo with a test branch + api.delete_repo(repo_id, repo_type=repo_type, missing_ok=True) + api.create_repo(repo_id, repo_type=repo_type) + create_branch(repo_id, repo_type=repo_type, branch=branch) + + # Make sure the test branch exists + branches = api.list_repo_refs(repo_id, repo_type=repo_type).branches + refs = [branch.ref for branch in branches] + assert ref in refs + + # Overwrite it + create_branch(repo_id, repo_type=repo_type, branch=branch) + + # Clean + api.delete_repo(repo_id, repo_type=repo_type) diff --git a/tests/test_motors.py b/tests/test_motors.py index 87c000f5..db9ca1f9 100644 --- a/tests/test_motors.py +++ b/tests/test_motors.py @@ -1,33 +1,54 @@ +# TODO(rcadene): measure fps in nightly? +# TODO(rcadene): test logs +# TODO(rcadene): test calibration +# TODO(rcadene): add compatibility with other motors bus + import time +import hydra import numpy as np import pytest from lerobot.common.robot_devices.utils import RobotDeviceAlreadyConnectedError, RobotDeviceNotConnectedError -from tests.utils import require_koch +from lerobot.common.utils.utils import init_hydra_config +from tests.utils import KOCH_ROBOT_CONFIG_PATH, require_koch + + +def make_motors_bus(): + robot_cfg = init_hydra_config(KOCH_ROBOT_CONFIG_PATH) + # Instantiating a common motors structure. + # Here the one from Alexander Koch follower arm. + motors_bus = hydra.utils.instantiate(robot_cfg.leader_arms.main) + return motors_bus + + +@require_koch +def test_find_port(request): + from lerobot.common.robot_devices.motors.dynamixel import find_port + + find_port() + + +@require_koch +def test_configure_motors_all_ids_1(request): + # This test expect the configuration was already correct. + motors_bus = make_motors_bus() + motors_bus.connect() + motors_bus.write("Baud_Rate", [0] * len(motors_bus.motors)) + motors_bus.set_bus_baudrate(9_600) + motors_bus.write("ID", [1] * len(motors_bus.motors)) + del motors_bus + + # Test configure + motors_bus = make_motors_bus() + motors_bus.connect() + assert motors_bus.are_motors_configured() + del motors_bus @require_koch def test_motors_bus(request): - # TODO(rcadene): measure fps in nightly? - # TODO(rcadene): test logs - # TODO(rcadene): test calibration - # TODO(rcadene): add compatibility with other motors bus - from lerobot.common.robot_devices.motors.dynamixel import DynamixelMotorsBus - - # Test instantiating a common motors structure. - # Here the one from Alexander Koch follower arm. - port = "/dev/tty.usbmodem575E0032081" - motors = { - # name: (index, model) - "shoulder_pan": (1, "xl430-w250"), - "shoulder_lift": (2, "xl430-w250"), - "elbow_flex": (3, "xl330-m288"), - "wrist_flex": (4, "xl330-m288"), - "wrist_roll": (5, "xl330-m288"), - "gripper": (6, "xl330-m288"), - } - motors_bus = DynamixelMotorsBus(port, motors) + motors_bus = make_motors_bus() # Test reading and writting before connecting raises an error with pytest.raises(RobotDeviceNotConnectedError): @@ -41,7 +62,7 @@ def test_motors_bus(request): del motors_bus # Test connecting - motors_bus = DynamixelMotorsBus(port, motors) + motors_bus = make_motors_bus() motors_bus.connect() # Test connecting twice raises an error @@ -52,7 +73,7 @@ def test_motors_bus(request): motors_bus.write("Torque_Enable", 0) values = motors_bus.read("Torque_Enable") assert isinstance(values, np.ndarray) - assert len(values) == len(motors) + assert len(values) == len(motors_bus.motors) assert (values == 0).all() # Test writing torque on a specific motor @@ -83,10 +104,3 @@ def test_motors_bus(request): time.sleep(1) new_values = motors_bus.read("Present_Position") assert (new_values == values).all() - - -@require_koch -def test_find_port(request): - from lerobot.common.robot_devices.motors.dynamixel import find_port - - find_port() diff --git a/tests/utils.py b/tests/utils.py index ff732478..f79ad249 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -23,6 +23,7 @@ from lerobot.common.utils.import_utils import is_package_available # Pass this as the first argument to init_hydra_config. DEFAULT_CONFIG_PATH = "lerobot/configs/default.yaml" +KOCH_ROBOT_CONFIG_PATH = "lerobot/configs/robot/koch.yaml" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" @@ -161,6 +162,7 @@ def require_koch(func): if request is None: raise ValueError("The 'request' fixture must be passed to the test function as a parameter.") + # The function `is_koch_available` is defined in `tests/conftest.py` if not request.getfixturevalue("is_koch_available"): pytest.skip("An alexander koch robot is not available.") return func(*args, **kwargs)