From 0caa33309a8046a5f1e8dfc7cdb5cdee904722ea Mon Sep 17 00:00:00 2001
From: Cadene <re.cadene@gmail.com>
Date: Sun, 5 May 2024 17:01:50 +0000
Subject: [PATCH] image_keys -> camera_keys, add visualization video and wandb
 image to README

---
 .github/PULL_REQUEST_TEMPLATE.md              | 10 +-
 README.md                                     | 96 ++++++-------------
 examples/1_load_lerobot_dataset.py            |  2 +-
 lerobot/common/datasets/lerobot_dataset.py    | 21 ++--
 .../push_dataset_to_hub/aloha_hdf5_format.py  |  8 +-
 lerobot/scripts/push_dataset_to_hub.py        |  4 +-
 lerobot/scripts/visualize_dataset.py          |  4 +-
 tests/test_datasets.py                        |  4 +-
 8 files changed, 59 insertions(+), 90 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 17d4e530..4063e395 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -27,10 +27,8 @@ DATA_DIR=tests/data pytest -sx tests/test_stuff.py::test_something
 python lerobot/scripts/train.py --some.option=true
 ```
 
-## Before submitting
-Please read the [contributor guideline](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md#submitting-a-pull-request-pr).
-
-
-## Who can review?
-Anyone in the community is free to review the PR once the tests have passed. Feel free to tag
+## SECTION TO REMOVE BEFORE SUBMITTING YOUR PR
+**Note**: Anyone in the community is free to review the PR once the tests have passed. Feel free to tag
 members/contributors who may be interested in your PR. Try to avoid tagging more than 3 people.
+
+**Note**: Before submitting this PR, please read the [contributor guideline](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md#submitting-a-pull-request-pr).
diff --git a/README.md b/README.md
index 7659d016..cfdb7508 100644
--- a/README.md
+++ b/README.md
@@ -117,21 +117,25 @@ wandb login
 
 ### Visualize datasets
 
-You can easily visualize episodes from a dataset by executing our script from the command line:
+Check out [example 1](./examples/1_load_lerobot_dataset.py) that illustrates how to use our dataset class which automatically download data from the Hugging Face hub.
+
+You can also locally visualize episodes from a dataset by executing our script from the command line:
 ```bash
 python lerobot/scripts/visualize_dataset.py \
     --repo-id lerobot/pusht \
     --episode-index 0
 ```
 
-Check out [example 1](./examples/1_load_lerobot_dataset.py) to learn how you can import and use our dataset class and download the data from the Hugging Face hub.
+It will open `rerun.io` and display the camera streams, robot states and actions.
+![](media/battery-720p.mov)
 
+Our script can also visualize datasets stored on a distant server. See `python lerobot/scripts/visualize_dataset.py --help` for more instructions.
 
 ### Evaluate a pretrained policy
 
-Check out [example 2](./examples/2_evaluate_pretrained_policy.py) to see how you can load a pretrained policy from Hugging Face hub, load up the corresponding environment and model, and run an evaluation.
+Check out [example 2](./examples/2_evaluate_pretrained_policy.py) that illustrates how to download a pretrained policy from Hugging Face hub, and run an evaluation on its corresponding environment.
 
-Or you can achieve the same result by executing our script from the command line:
+We also provide a more capable script to parallelize the evaluation over multiple environments during the same rollout. Here is an example with a pretrained model hosted on [lerobot/diffusion_pusht](https://huggingface.co/lerobot/diffusion_pusht):
 ```bash
 python lerobot/scripts/eval.py \
     -p lerobot/diffusion_pusht \
@@ -139,8 +143,7 @@ python lerobot/scripts/eval.py \
     eval.batch_size=10
 ```
 
-After training your own policy, you can also re-evaluate the checkpoints with:
-
+Note: After training your own policy, you can re-evaluate the checkpoints with:
 ```bash
 python lerobot/scripts/eval.py \
     -p PATH/TO/TRAIN/OUTPUT/FOLDER
@@ -150,20 +153,29 @@ See `python lerobot/scripts/eval.py --help` for more instructions.
 
 ### Train your own policy
 
-Check out [example 3](./examples/3_train_policy.py) to see how you can start training a model on a dataset, which will be automatically downloaded if needed.
+Check out [example 3](./examples/3_train_policy.py) that illustrates how to start training a model.
 
-In general, you can use our training script to easily train any policy on its environment:
+In general, you can use our training script to easily train any policy. To use wandb for logging training and evaluation curves, make sure you ran `wandb login`. Here is an example of training the ACT policy on trajectories collected by humans on the Aloha simulation environment for the insertion task:
 ```bash
-# TODO(aliberts): not working
 python lerobot/scripts/train.py \
-    env=aloha \
-    task=sim_insertion \
-    repo_id=lerobot/aloha_sim_insertion_scripted \
     policy=act \
+    env=aloha \
+    env.task=AlohaInsertion-v0 \
+    dataset_repo_id=lerobot/aloha_sim_insertion_human \
     hydra.run.dir=outputs/train/aloha_act
 ```
 
-After training, you may want to revisit model evaluation to change the evaluation settings. In fact, during training every checkpoint is already evaluated but on a low number of episodes for efficiency. Check out [example](./examples) to evaluate any model checkpoint on more episodes to increase statistical significance.
+Here is an example of logs from wandb:
+![](media/battery-720p.mov)
+
+You can deactivate wandb by adding these arguments to the command line:
+```
+    wandb.disable_artifact=true \
+    wandb.enable=false
+```
+
+Note: During training, every checkpoint is evaluated on a low number of episodes for efficiency. After training, you may want to re-evaluate your best checkpoints on more episodes or change the evaluation settings. See `python lerobot/scripts/eval.py --help` for more instructions.
+
 
 ## Contribute
 
@@ -176,57 +188,18 @@ To add a dataset to the hub, begin by logging in with a token that has write acc
 huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
 ```
 
-Then, push your dataset to the hub using the following command:
-
+Then move your dataset folder in `data` directory (e.g. `data/aloha_ping_pong`), and push your dataset to the hub using the following command:
 ```bash
 python lerobot/scripts/push_dataset_to_hub.py \
 --data-dir data \
---dataset-id pusht \
---raw-format pusht_zarr \
---community-id lerobot \
---revision v1.3 \
---dry-run 0 \
---save-to-disk 0 \
---save-tests-to-disk 0 \
---debug 0
+--dataset-id aloha_ping_ping \
+--raw-format aloha_hdf5 \
+--community-id lerobot
 ```
 
-For detailed explanations of the arguments, consult the help command:
+See `python lerobot/scripts/push_dataset_to_hub.py --help` for more instructions.
 
-```bash
-python lerobot/scripts/push_dataset_to_hub.py --help
-```
-
-We currently support the following raw formats:
-
-```
-pusht_zarr | umi_zarr | aloha_hdf5 | xarm_pkl
-```
-
-For the `revision` parameter, set the version to match `CODEBASE_VERSION` using:
-
-```bash
-python -c "from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION; print(CODEBASE_VERSION)"
-```
-
-If there is a need to update the unit tests, set `save-tests-to-disk` to 1 to mock the dataset:
-
-```bash
-python lerobot/scripts/push_dataset_to_hub.py \
---data-dir data \
---dataset-id pusht \
---raw-format pusht_zarr \
---community-id lerobot \
---revision v1.3 \
---dry-run 0 \
---save-to-disk 0 \
---save-tests-to-disk 1 \
---debug 0
-```
-
-The mock dataset will be located in `tests/data/$COMMUNITY_ID/$DATASET_ID/`, which can be used to update the unit tests.
-
-To implement a new raw format, create a file in `lerobot/common/datasets/push_dataset_to_hub/{raw_format}_format.py` and implement the functions: `check_format`, `load_from_raw`, and `to_hf_dataset`. Combine these functions in `from_raw_to_lerobot_format`. You can find examples here: [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py), and [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py). Then, add the new format to [`get_from_raw_to_lerobot_format_fn`](https://github.com/huggingface/lerobot/blob/main/lerobot/scripts/push_dataset_to_hub.py#L69) in [`lerobot/scripts/push_dataset_to_hub.py`](https://github.com/huggingface/lerobot/blob/main/lerobot/scripts/push_dataset_to_hub.py). Et voilà! You are now ready to use this new format in [`push_dataset_to_hub.py`](https://github.com/huggingface/lerobot/blob/main/lerobot/scripts/push_dataset_to_hub.py) and can submit a PR to add it 🤗.
+If your dataset format is not supported, implement your own in `lerobot/common/datasets/push_dataset_to_hub/${raw_format}_format.py` by copying examples like [pusht_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py), [umi_zarr](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py), [aloha_hdf5](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py), or [xarm_pkl](https://github.com/huggingface/lerobot/blob/main/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py).
 
 
 ### Add a pretrained policy
@@ -283,10 +256,3 @@ with profile(
             prof.step()
             # insert code to profile, potentially whole body of eval_policy function
 ```
-
-```bash
-python lerobot/scripts/eval.py \
-    --config outputs/pusht/.hydra/config.yaml \
-    pretrained_model_path=outputs/pusht/model.pt \
-    eval_episodes=7
-```
diff --git a/examples/1_load_lerobot_dataset.py b/examples/1_load_lerobot_dataset.py
index c5f172ca..3846926a 100644
--- a/examples/1_load_lerobot_dataset.py
+++ b/examples/1_load_lerobot_dataset.py
@@ -39,7 +39,7 @@ print(dataset.hf_dataset)
 # And provides additional utilities for robotics and compatibility with Pytorch
 print(f"\naverage number of frames per episode: {dataset.num_samples / dataset.num_episodes:.3f}")
 print(f"frames per second used during data collection: {dataset.fps=}")
-print(f"keys to access images from cameras: {dataset.image_keys=}\n")
+print(f"keys to access images from cameras: {dataset.camera_keys=}\n")
 
 # Access frame indexes associated to first episode
 episode_index = 0
diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index d17c4307..f7bc5bd2 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -62,17 +62,22 @@ class LeRobotDataset(torch.utils.data.Dataset):
         return self.hf_dataset.features
 
     @property
-    def image_keys(self) -> list[str]:
-        """Keys to access images from cameras."""
-        image_keys = []
+    def camera_keys(self) -> list[str]:
+        """Keys to access image and video stream from cameras."""
+        keys = []
         for key, feats in self.hf_dataset.features.items():
-            if isinstance(feats, datasets.Image):
-                image_keys.append(key)
-        return image_keys + self.video_frame_keys
+            if isinstance(feats, (datasets.Image, VideoFrame)):
+                keys.append(key)
+        return keys
 
     @property
     def video_frame_keys(self) -> list[str]:
-        """Keys to access video frames from cameras."""
+        """Keys to access video frames that requires to be decoded into images.
+
+        Note: It is empty if the dataset contains images only,
+        or equal to `self.cameras` if the dataset contains videos only,
+        or can even be a subset of `self.cameras` in a case of a mixed image/video dataset.
+        """
         video_frame_keys = []
         for key, feats in self.hf_dataset.features.items():
             if isinstance(feats, VideoFrame):
@@ -136,7 +141,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             f"  Number of Episodes: {self.num_episodes},\n"
             f"  Type: {'video (.mp4)' if self.video else 'image (.png)'},\n"
             f"  Recorded Frames per Second: {self.fps},\n"
-            f"  Image Keys: {self.image_keys},\n"
+            f"  Camera Keys: {self.camera_keys},\n"
             f"  Video Frame Keys: {self.video_frame_keys if self.video else 'N/A'},\n"
             f"  Transformations: {self.transform},\n"
             f")"
diff --git a/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py b/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py
index db9cd036..eed37634 100644
--- a/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py
+++ b/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py
@@ -120,12 +120,12 @@ def load_from_raw(raw_dir, out_dir, fps, video, debug):
 def to_hf_dataset(data_dict, video) -> Dataset:
     features = {}
 
-    image_keys = [key for key in data_dict if "observation.images." in key]
-    for image_key in image_keys:
+    keys = [key for key in data_dict if "observation.images." in key]
+    for key in keys:
         if video:
-            features[image_key] = VideoFrame()
+            features[key] = VideoFrame()
         else:
-            features[image_key] = Image()
+            features[key] = Image()
 
     features["observation.state"] = Sequence(
         length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
diff --git a/lerobot/scripts/push_dataset_to_hub.py b/lerobot/scripts/push_dataset_to_hub.py
index ca8c4600..dfac410b 100644
--- a/lerobot/scripts/push_dataset_to_hub.py
+++ b/lerobot/scripts/push_dataset_to_hub.py
@@ -60,7 +60,7 @@ import torch
 from huggingface_hub import HfApi
 from safetensors.torch import save_file
 
-from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset
 from lerobot.common.datasets.push_dataset_to_hub._download_raw import download_raw
 from lerobot.common.datasets.push_dataset_to_hub.compute_stats import compute_stats
 from lerobot.common.datasets.utils import flatten_dict
@@ -252,7 +252,7 @@ def main():
     parser.add_argument(
         "--revision",
         type=str,
-        default="v1.2",
+        default=CODEBASE_VERSION,
         help="Codebase version used to generate the dataset.",
     )
     parser.add_argument(
diff --git a/lerobot/scripts/visualize_dataset.py b/lerobot/scripts/visualize_dataset.py
index 44acd416..1835e90c 100644
--- a/lerobot/scripts/visualize_dataset.py
+++ b/lerobot/scripts/visualize_dataset.py
@@ -32,7 +32,7 @@ local$ rerun lerobot_pusht_episode_0.rrd
 ```
 
 - Visualize data stored on a distant machine through streaming:
-(You need to forward the websocket port to the distant machine, with 
+(You need to forward the websocket port to the distant machine, with
 `ssh -L 9087:localhost:9087 username@remote-host`)
 ```
 distant$ python lerobot/scripts/visualize_dataset.py \
@@ -131,7 +131,7 @@ def visualize_dataset(
             rr.set_time_seconds("timestamp", batch["timestamp"][i].item())
 
             # display each camera image
-            for key in dataset.image_keys:
+            for key in dataset.camera_keys:
                 # TODO(rcadene): add `.compress()`? is it lossless?
                 rr.log(key, rr.Image(to_hwc_uint8_numpy(batch[key][i])))
 
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 22b271be..1d93d48f 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -41,7 +41,7 @@ def test_factory(env_name, repo_id, policy_name):
     )
     dataset = make_dataset(cfg)
     delta_timestamps = dataset.delta_timestamps
-    image_keys = dataset.image_keys
+    camera_keys = dataset.camera_keys
 
     item = dataset[0]
 
@@ -71,7 +71,7 @@ def test_factory(env_name, repo_id, policy_name):
         else:
             assert item[key].ndim == ndim, f"{key}"
 
-        if key in image_keys:
+        if key in camera_keys:
             assert item[key].dtype == torch.float32, f"{key}"
             # TODO(rcadene): we assume for now that image normalization takes place in the model
             assert item[key].max() <= 1.0, f"{key}"