From 6d2bc11365d3ac9f0ebd04e6aea2e49e50400027 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Fri, 11 Oct 2024 10:59:38 +0200 Subject: [PATCH] Add doc, scrap video_frame_keys attribute --- lerobot/common/datasets/lerobot_dataset.py | 107 +++++++++++++++------ lerobot/common/datasets/utils.py | 4 + lerobot/scripts/push_dataset_to_hub.py | 2 +- lerobot/scripts/visualize_dataset_html.py | 3 +- 4 files changed, 85 insertions(+), 31 deletions(-) diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index b91eb75f..52d3377c 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -54,6 +54,83 @@ class LeRobotDataset(torch.utils.data.Dataset): tolerance_s: float = 1e-4, video_backend: str | None = None, ): + """LeRobotDataset encapsulates 3 main things: + - metadata: + - info contains various information about the dataset like shapes, keys, fps etc. + - stats stores the dataset statistics of the different modalities for normalization + - tasks contains the prompts for each task of the dataset, which can be used for + task-conditionned training. + - hf_dataset (from datasets.Dataset), which will read any values from parquet files. + - (optional) videos from which frames are loaded to be synchronous with data from parquet files. + + 3 use modes are available for this class, depending on 3 different use cases: + + 1. Your dataset already exists on the Hugging Face Hub at the address + https://huggingface.co/datasets/{repo_id} and is not on your local disk in the 'root' folder: + Instantiating this class with this 'repo_id' will download the dataset from that address and load + it, pending your dataset is compliant with codebase_version v2.0. If your dataset has been created + before this new format, you will be prompted to convert it using our conversion script from v1.6 + to v2.0, which you can find at [TODO(aliberts): move conversion script & add location here]. + + 2. Your dataset already exists on your local disk in the 'root' folder: + This is typically the case when you recorded your dataset locally and you may or may not have + pushed it to the hub yet. Instantiating this class with 'root' will load your dataset directly + from disk. This can happen while you're offline (no internet connection). + + 3. Your dataset doesn't already exists (either on local disk or on the Hub): + [TODO(aliberts): add classmethod for this case?] + + + In terms of files, a typical LeRobotDataset looks like this from its root path: + . + ├── README.md + ├── data + │ ├── train-00000-of-00050.parquet + │ ├── train-00001-of-00050.parquet + │ ├── train-00002-of-00050.parquet + │ ... + ├── meta + │ ├── info.json + │ ├── stats.json + │ └── tasks.json + └── videos (optional) + ├── observation.images.laptop_episode_000000.mp4 + ├── observation.images.laptop_episode_000001.mp4 + ├── observation.images.laptop_episode_000002.mp4 + ... + ├── observation.images.phone_episode_000000.mp4 + ├── observation.images.phone_episode_000001.mp4 + ├── observation.images.phone_episode_000002.mp4 + ... + + Note that this file-based structure is designed to be as versatile as possible. The files are split by + episodes which allows a more granular control over which episodes one wants to use and download. The + structure of the dataset is entirely described in the info.json file, which can be easily downloaded + or viewed directly on the hub before downloading any actual data. The type of files used are very + simple and do not need complex tools to be read, it only uses .parquet, .json and .mp4 files (and .md + for the README). + + Args: + repo_id (str): This is the repo id that will be used to fetch the dataset. Locally, the dataset + will be stored under root/repo_id. + root (Path | None, optional): Local directory to use for downloading/writing files. You can also + set the LEROBOT_HOME environment variable to point to a different location. Defaults to + '~/.cache/huggingface/lerobot'. + episodes (list[int] | None, optional): If specified, this will only load episodes specified by + their episode_index in this list. Defaults to None. + split (str, optional): _description_. Defaults to "train". + image_transforms (Callable | None, optional): You can pass standard v2 image transforms from + torchvision.transforms.v2 here which will be applied to visual modalities (whether they come + from videos or images). Defaults to None. + delta_timestamps (dict[list[float]] | None, optional): _description_. Defaults to None. + tolerance_s (float, optional): Tolerance in seconds used to ensure data timestamps are actually in + sync with the fps value. It is used at the init of the dataset to make sure that each + timestamps is separated to the next by 1/fps +/- tolerance_s. This also applies to frames + decoded from video files. It is also used to check that `delta_timestamps` (when provided) are + multiples of 1/fps. Defaults to 1e-4. + video_backend (str | None, optional): Video backend to use for decoding videos. There is currently + a single option which is the pyav decoder used by Torchvision. Defaults to pyav. + """ super().__init__() self.repo_id = repo_id self.root = root if root is not None else LEROBOT_HOME / repo_id @@ -88,6 +165,7 @@ class LeRobotDataset(torch.utils.data.Dataset): # TODO(aliberts): # - [X] Move delta_timestamp logic outside __get_item__ # - [X] Update __get_item__ + # - [/] Add doc # - [ ] Add self.add_frame() # - [ ] Add self.consolidate() for: # - [X] Check timestamps sync @@ -168,23 +246,6 @@ class LeRobotDataset(torch.utils.data.Dataset): """Keys to access image and video streams from cameras (regardless of their storage method).""" return self.image_keys + self.video_keys - @property - def video_frame_keys(self) -> list[str]: - """ - DEPRECATED, USE 'video_keys' INSTEAD - Keys to access video frames that requires to be decoded into images. - - Note: It is empty if the dataset contains images only, - or equal to `self.cameras` if the dataset contains videos only, - or can even be a subset of `self.cameras` in a case of a mixed image/video dataset. - """ - # TODO(aliberts): remove - video_frame_keys = [] - for key, feats in self.hf_dataset.features.items(): - if isinstance(feats, VideoFrame): - video_frame_keys.append(key) - return video_frame_keys - @property def num_samples(self) -> int: """Number of samples/frames.""" @@ -200,16 +261,6 @@ class LeRobotDataset(torch.utils.data.Dataset): """Total number of episodes available.""" return self.info["total_episodes"] - # @property - # def tolerance_s(self) -> float: - # """Tolerance in seconds used to discard loaded frames when their timestamps - # are not close enough from the requested frames. It is used at the init of the dataset to make sure - # that each timestamps is separated to the next by 1/fps +/- tolerance. It is only used when - # `delta_timestamps` is provided or when loading video frames from mp4 files. - # """ - # # 1e-4 to account for possible numerical error - # return 1e-4 - @property def shapes(self) -> dict: """Shapes for the different features.""" @@ -308,7 +359,7 @@ class LeRobotDataset(torch.utils.data.Dataset): f" Type: {'video (.mp4)' if self.video else 'image (.png)'},\n" f" Recorded Frames per Second: {self.fps},\n" f" Camera Keys: {self.camera_keys},\n" - f" Video Frame Keys: {self.video_frame_keys if self.video else 'N/A'},\n" + f" Video Frame Keys: {self.camera_keys if self.video else 'N/A'},\n" f" Transformations: {self.image_transforms},\n" f" Codebase Version: {self.info.get('codebase_version', '< v1.6')},\n" f")" diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index 9b70d4f6..b20b63fe 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -263,6 +263,10 @@ def check_timestamps_sync( def check_delta_timestamps( delta_timestamps: dict[str, list[float]], fps: int, tolerance_s: float, raise_value_error: bool = True ) -> bool: + """This will check if all the values in delta_timestamps are multiples of 1/fps +/- tolerance. + This is to ensure that these delta_timestamps added to any timestamp from a dataset will themselves be + actual timestamps from the dataset. + """ outside_tolerance = {} for key, delta_ts in delta_timestamps.items(): abs_delta_ts = torch.abs(torch.tensor(delta_ts)) diff --git a/lerobot/scripts/push_dataset_to_hub.py b/lerobot/scripts/push_dataset_to_hub.py index adc4c72a..6eac4d0e 100644 --- a/lerobot/scripts/push_dataset_to_hub.py +++ b/lerobot/scripts/push_dataset_to_hub.py @@ -260,7 +260,7 @@ def push_dataset_to_hub( episode_index = 0 tests_videos_dir = tests_data_dir / repo_id / "videos" tests_videos_dir.mkdir(parents=True, exist_ok=True) - for key in lerobot_dataset.video_frame_keys: + for key in lerobot_dataset.camera_keys: fname = f"{key}_episode_{episode_index:06d}.mp4" shutil.copy(videos_dir / fname, tests_videos_dir / fname) diff --git a/lerobot/scripts/visualize_dataset_html.py b/lerobot/scripts/visualize_dataset_html.py index c035e562..d9d153a0 100644 --- a/lerobot/scripts/visualize_dataset_html.py +++ b/lerobot/scripts/visualize_dataset_html.py @@ -171,8 +171,7 @@ def get_episode_video_paths(dataset: LeRobotDataset, ep_index: int) -> list[str] # get first frame of episode (hack to get video_path of the episode) first_frame_idx = dataset.episode_data_index["from"][ep_index].item() return [ - dataset.hf_dataset.select_columns(key)[first_frame_idx][key]["path"] - for key in dataset.video_frame_keys + dataset.hf_dataset.select_columns(key)[first_frame_idx][key]["path"] for key in dataset.camera_keys ]