From 38c14571cc09a2a87c9e62086f584781180227c4 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Mon, 10 Feb 2025 16:39:34 +0100 Subject: [PATCH 01/28] Bump CODEBASE_VERSION --- lerobot/common/datasets/lerobot_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 9483bf0a..c1a4a6d5 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -67,7 +67,7 @@ from lerobot.common.datasets.video_utils import ( from lerobot.common.robot_devices.robots.utils import Robot # For maintainers, see lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md -CODEBASE_VERSION = "v2.0" +CODEBASE_VERSION = "v2.1" LEROBOT_HOME = Path(os.getenv("LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser() From 9d6886dd086a1ed469e7b49ed86a17999aad7fab Mon Sep 17 00:00:00 2001 From: Remi Date: Fri, 14 Feb 2025 14:22:22 +0100 Subject: [PATCH 02/28] Add frame level task (#693) Co-authored-by: Simon Alibert <75076266+aliberts@users.noreply.github.com> --- examples/port_datasets/pusht_zarr.py | 3 +- lerobot/common/datasets/lerobot_dataset.py | 117 +++++++++++------- lerobot/common/datasets/utils.py | 4 +- lerobot/common/robot_devices/control_utils.py | 8 +- lerobot/scripts/control_robot.py | 5 +- tests/test_datasets.py | 18 +++ 6 files changed, 105 insertions(+), 50 deletions(-) diff --git a/examples/port_datasets/pusht_zarr.py b/examples/port_datasets/pusht_zarr.py index 1506f427..e9015d2c 100644 --- a/examples/port_datasets/pusht_zarr.py +++ b/examples/port_datasets/pusht_zarr.py @@ -180,6 +180,7 @@ def main(raw_dir: Path, repo_id: str, mode: str = "video", push_to_hub: bool = T # Shift reward and success by +1 until the last item of the episode "next.reward": reward[i + (frame_idx < num_frames - 1)], "next.success": success[i + (frame_idx < num_frames - 1)], + "task": PUSHT_TASK, } frame["observation.state"] = torch.from_numpy(agent_pos[i]) @@ -191,7 +192,7 @@ def main(raw_dir: Path, repo_id: str, mode: str = "video", push_to_hub: bool = T dataset.add_frame(frame) - dataset.save_episode(task=PUSHT_TASK) + dataset.save_episode() dataset.consolidate() diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index c1a4a6d5..877703d7 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -87,7 +87,7 @@ class LeRobotDatasetMetadata: self.pull_from_repo(allow_patterns="meta/") self.info = load_info(self.root) self.stats = load_stats(self.root) - self.tasks = load_tasks(self.root) + self.tasks, self.task_to_task_index = load_tasks(self.root) self.episodes = load_episodes(self.root) def pull_from_repo( @@ -202,31 +202,35 @@ class LeRobotDatasetMetadata: """Max number of episodes per chunk.""" return self.info["chunks_size"] - @property - def task_to_task_index(self) -> dict: - return {task: task_idx for task_idx, task in self.tasks.items()} - - def get_task_index(self, task: str) -> int: + def get_task_index(self, task: str) -> int | None: """ Given a task in natural language, returns its task_index if the task already exists in the dataset, - otherwise creates a new task_index. + otherwise return None. """ - task_index = self.task_to_task_index.get(task, None) - return task_index if task_index is not None else self.total_tasks + return self.task_to_task_index.get(task, None) - def save_episode(self, episode_index: int, episode_length: int, task: str, task_index: int) -> None: + def add_task(self, task: str): + """ + Given a task in natural language, add it to the dictionnary of tasks. + """ + if task in self.task_to_task_index: + raise ValueError(f"The task '{task}' already exists and can't be added twice.") + + task_index = self.info["total_tasks"] + self.task_to_task_index[task] = task_index + self.tasks[task_index] = task + self.info["total_tasks"] += 1 + + task_dict = { + "task_index": task_index, + "task": task, + } + append_jsonlines(task_dict, self.root / TASKS_PATH) + + def save_episode(self, episode_index: int, episode_length: int, episode_tasks: list[str]) -> None: self.info["total_episodes"] += 1 self.info["total_frames"] += episode_length - if task_index not in self.tasks: - self.info["total_tasks"] += 1 - self.tasks[task_index] = task - task_dict = { - "task_index": task_index, - "task": task, - } - append_jsonlines(task_dict, self.root / TASKS_PATH) - chunk = self.get_episode_chunk(episode_index) if chunk >= self.total_chunks: self.info["total_chunks"] += 1 @@ -237,7 +241,7 @@ class LeRobotDatasetMetadata: episode_dict = { "episode_index": episode_index, - "tasks": [task], + "tasks": episode_tasks, "length": episode_length, } self.episodes.append(episode_dict) @@ -313,7 +317,8 @@ class LeRobotDatasetMetadata: features = {**features, **DEFAULT_FEATURES} - obj.tasks, obj.stats, obj.episodes = {}, {}, [] + obj.tasks, obj.task_to_task_index = {}, {} + obj.stats, obj.episodes = {}, [] obj.info = create_empty_dataset_info(CODEBASE_VERSION, fps, robot_type, features, use_videos) if len(obj.video_keys) > 0 and not use_videos: raise ValueError() @@ -691,10 +696,13 @@ class LeRobotDataset(torch.utils.data.Dataset): def create_episode_buffer(self, episode_index: int | None = None) -> dict: current_ep_idx = self.meta.total_episodes if episode_index is None else episode_index - return { - "size": 0, - **{key: current_ep_idx if key == "episode_index" else [] for key in self.features}, - } + ep_buffer = {} + # size and task are special cases that are not in self.features + ep_buffer["size"] = 0 + ep_buffer["task"] = [] + for key in self.features: + ep_buffer[key] = current_ep_idx if key == "episode_index" else [] + return ep_buffer def _get_image_file_path(self, episode_index: int, image_key: str, frame_index: int) -> Path: fpath = DEFAULT_IMAGE_PATH.format( @@ -718,6 +726,8 @@ class LeRobotDataset(torch.utils.data.Dataset): """ # TODO(aliberts, rcadene): Add sanity check for the input, check it's numpy or torch, # check the dtype and shape matches, etc. + if "task" not in frame: + raise ValueError("The mandatory feature 'task' wasn't found in `frame` dictionnary.") if self.episode_buffer is None: self.episode_buffer = self.create_episode_buffer() @@ -728,13 +738,17 @@ class LeRobotDataset(torch.utils.data.Dataset): self.episode_buffer["timestamp"].append(timestamp) for key in frame: - if key not in self.features: - raise ValueError(key) + if key == "task": + # Note: we associate the task in natural language to its task index during `save_episode` + self.episode_buffer["task"].append(frame["task"]) + continue - if self.features[key]["dtype"] not in ["image", "video"]: - item = frame[key].numpy() if isinstance(frame[key], torch.Tensor) else frame[key] - self.episode_buffer[key].append(item) - elif self.features[key]["dtype"] in ["image", "video"]: + if key not in self.features: + raise ValueError( + f"An element of the frame is not in the features. '{key}' not in '{self.features.keys()}'." + ) + + if self.features[key]["dtype"] in ["image", "video"]: img_path = self._get_image_file_path( episode_index=self.episode_buffer["episode_index"], image_key=key, frame_index=frame_index ) @@ -742,10 +756,13 @@ class LeRobotDataset(torch.utils.data.Dataset): img_path.parent.mkdir(parents=True, exist_ok=True) self._save_image(frame[key], img_path) self.episode_buffer[key].append(str(img_path)) + else: + item = frame[key].numpy() if isinstance(frame[key], torch.Tensor) else frame[key] + self.episode_buffer[key].append(item) self.episode_buffer["size"] += 1 - def save_episode(self, task: str, encode_videos: bool = True, episode_data: dict | None = None) -> None: + def save_episode(self, encode_videos: bool = True, episode_data: dict | None = None) -> None: """ This will save to disk the current episode in self.episode_buffer. Note that since it affects files on disk, it sets self.consolidated to False to ensure proper consolidation later on before uploading to @@ -758,7 +775,11 @@ class LeRobotDataset(torch.utils.data.Dataset): if not episode_data: episode_buffer = self.episode_buffer + # size and task are special cases that won't be added to hf_dataset episode_length = episode_buffer.pop("size") + tasks = episode_buffer.pop("task") + episode_tasks = list(set(tasks)) + episode_index = episode_buffer["episode_index"] if episode_index != self.meta.total_episodes: # TODO(aliberts): Add option to use existing episode_index @@ -772,21 +793,27 @@ class LeRobotDataset(torch.utils.data.Dataset): "You must add one or several frames with `add_frame` before calling `add_episode`." ) - task_index = self.meta.get_task_index(task) - if not set(episode_buffer.keys()) == set(self.features): - raise ValueError() + raise ValueError( + f"Features from `episode_buffer` don't match the ones in `self.features`: '{set(episode_buffer.keys())}' vs '{set(self.features)}'" + ) + + episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length) + episode_buffer["episode_index"] = np.full((episode_length,), episode_index) + + # Add new tasks to the tasks dictionnary + for task in episode_tasks: + task_index = self.meta.get_task_index(task) + if task_index is None: + self.meta.add_task(task) + + # Given tasks in natural language, find their corresponding task indices + episode_buffer["task_index"] = np.array([self.meta.get_task_index(task) for task in tasks]) for key, ft in self.features.items(): - if key == "index": - episode_buffer[key] = np.arange( - self.meta.total_frames, self.meta.total_frames + episode_length - ) - elif key == "episode_index": - episode_buffer[key] = np.full((episode_length,), episode_index) - elif key == "task_index": - episode_buffer[key] = np.full((episode_length,), task_index) - elif ft["dtype"] in ["image", "video"]: + # index, episode_index, task_index are already processed above, and image and video + # are processed separately by storing image path and frame info as meta data + if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["image", "video"]: continue elif len(ft["shape"]) == 1 and ft["shape"][0] == 1: episode_buffer[key] = np.array(episode_buffer[key], dtype=ft["dtype"]) @@ -798,7 +825,7 @@ class LeRobotDataset(torch.utils.data.Dataset): self._wait_image_writer() self._save_episode_table(episode_buffer, episode_index) - self.meta.save_episode(episode_index, episode_length, task, task_index) + self.meta.save_episode(episode_index, episode_length, episode_tasks) if encode_videos and len(self.meta.video_keys) > 0: video_paths = self.encode_episode_videos(episode_index) diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index 612bac39..e6ec169e 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -170,7 +170,9 @@ def load_stats(local_dir: Path) -> dict: def load_tasks(local_dir: Path) -> dict: tasks = load_jsonlines(local_dir / TASKS_PATH) - return {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])} + tasks = {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])} + task_to_task_index = {task: task_index for task_index, task in tasks.items()} + return tasks, task_to_task_index def load_episodes(local_dir: Path) -> dict: diff --git a/lerobot/common/robot_devices/control_utils.py b/lerobot/common/robot_devices/control_utils.py index 9368b89d..5dcafa69 100644 --- a/lerobot/common/robot_devices/control_utils.py +++ b/lerobot/common/robot_devices/control_utils.py @@ -183,6 +183,7 @@ def record_episode( device, use_amp, fps, + single_task, ): control_loop( robot=robot, @@ -195,6 +196,7 @@ def record_episode( use_amp=use_amp, fps=fps, teleoperate=policy is None, + single_task=single_task, ) @@ -210,6 +212,7 @@ def control_loop( device: torch.device | str | None = None, use_amp: bool | None = None, fps: int | None = None, + single_task: str | None = None, ): # TODO(rcadene): Add option to record logs if not robot.is_connected: @@ -224,6 +227,9 @@ def control_loop( if teleoperate and policy is not None: raise ValueError("When `teleoperate` is True, `policy` should be None.") + if dataset is not None and single_task is None: + raise ValueError("You need to provide a task as argument in `single_task`.") + if dataset is not None and fps is not None and dataset.fps != fps: raise ValueError(f"The dataset fps should be equal to requested fps ({dataset['fps']} != {fps}).") @@ -248,7 +254,7 @@ def control_loop( action = {"action": action} if dataset is not None: - frame = {**observation, **action} + frame = {**observation, **action, "task": single_task} dataset.add_frame(frame) if display_cameras and not is_headless(): diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 3fdb0acc..de67e331 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -263,8 +263,8 @@ def record( log_say(f"Recording episode {dataset.num_episodes}", cfg.play_sounds) record_episode( - dataset=dataset, robot=robot, + dataset=dataset, events=events, episode_time_s=cfg.episode_time_s, display_cameras=cfg.display_cameras, @@ -272,6 +272,7 @@ def record( device=cfg.device, use_amp=cfg.use_amp, fps=cfg.fps, + single_task=cfg.single_task, ) # Execute a few seconds without recording to give time to manually reset the environment @@ -291,7 +292,7 @@ def record( dataset.clear_episode_buffer() continue - dataset.save_episode(cfg.single_task) + dataset.save_episode() recorded_episodes += 1 if events["stop_recording"]: diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 2945df41..460954c1 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -93,6 +93,24 @@ def test_dataset_initialization(lerobot_dataset_factory, tmp_path): assert dataset.num_frames == len(dataset) +def test_add_frame_no_task(tmp_path): + features = {"1d": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = LeRobotDataset.create(repo_id=DUMMY_REPO_ID, fps=30, root=tmp_path / "test", features=features) + with pytest.raises(ValueError, match="The mandatory feature 'task' wasn't found in `frame` dictionnary."): + dataset.add_frame({"1d": torch.randn(1)}) + + +def test_add_frame(tmp_path): + features = {"1d": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = LeRobotDataset.create(repo_id=DUMMY_REPO_ID, fps=30, root=tmp_path / "test", features=features) + dataset.add_frame({"1d": torch.randn(1), "task": "dummy"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + assert len(dataset) == 1 + assert dataset[0]["task"] == "dummy" + assert dataset[0]["task_index"] == 0 + + # TODO(aliberts): # - [ ] test various attributes & state from init and create # - [ ] test init with episodes and check num_frames From 7c2bbee6136cca964383f9c1eb3e4e82ab818312 Mon Sep 17 00:00:00 2001 From: Remi Date: Fri, 14 Feb 2025 19:59:48 +0100 Subject: [PATCH 03/28] Validate features during `add_frame` + Add 2D-to-5D + Add string (#720) --- lerobot/common/datasets/image_writer.py | 28 ++- lerobot/common/datasets/lerobot_dataset.py | 21 +- lerobot/common/datasets/utils.py | 103 +++++++- lerobot/common/utils/utils.py | 16 +- tests/fixtures/constants.py | 2 + tests/fixtures/dataset_factories.py | 18 ++ tests/test_datasets.py | 263 +++++++++++++++++++-- tests/test_image_writer.py | 50 ++-- 8 files changed, 448 insertions(+), 53 deletions(-) diff --git a/lerobot/common/datasets/image_writer.py b/lerobot/common/datasets/image_writer.py index 85dd6830..6fc0ee2f 100644 --- a/lerobot/common/datasets/image_writer.py +++ b/lerobot/common/datasets/image_writer.py @@ -38,22 +38,40 @@ def safe_stop_image_writer(func): return wrapper -def image_array_to_image(image_array: np.ndarray) -> PIL.Image.Image: +def image_array_to_pil_image(image_array: np.ndarray, range_check: bool = True) -> PIL.Image.Image: # TODO(aliberts): handle 1 channel and 4 for depth images - if image_array.ndim == 3 and image_array.shape[0] in [1, 3]: + if image_array.ndim != 3: + raise ValueError(f"The array has {image_array.ndim} dimensions, but 3 is expected for an image.") + + if image_array.shape[0] == 3: # Transpose from pytorch convention (C, H, W) to (H, W, C) image_array = image_array.transpose(1, 2, 0) + + elif image_array.shape[-1] != 3: + raise NotImplementedError( + f"The image has {image_array.shape[-1]} channels, but 3 is required for now." + ) + if image_array.dtype != np.uint8: - # Assume the image is in [0, 1] range for floating-point data - image_array = np.clip(image_array, 0, 1) + if range_check: + max_ = image_array.max().item() + min_ = image_array.min().item() + if max_ > 1.0 or min_ < 0.0: + raise ValueError( + "The image data type is float, which requires values in the range [0.0, 1.0]. " + f"However, the provided range is [{min_}, {max_}]. Please adjust the range or " + "provide a uint8 image with values in the range [0, 255]." + ) + image_array = (image_array * 255).astype(np.uint8) + return PIL.Image.fromarray(image_array) def write_image(image: np.ndarray | PIL.Image.Image, fpath: Path): try: if isinstance(image, np.ndarray): - img = image_array_to_image(image) + img = image_array_to_pil_image(image) elif isinstance(image, PIL.Image.Image): img = image else: diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 877703d7..5c4ae68e 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -39,6 +39,7 @@ from lerobot.common.datasets.utils import ( TASKS_PATH, append_jsonlines, check_delta_timestamps, + check_frame_features, check_timestamps_sync, check_version_compatibility, create_branch, @@ -724,10 +725,12 @@ class LeRobotDataset(torch.utils.data.Dataset): temporary directory — nothing is written to disk. To save those frames, the 'save_episode()' method then needs to be called. """ - # TODO(aliberts, rcadene): Add sanity check for the input, check it's numpy or torch, - # check the dtype and shape matches, etc. - if "task" not in frame: - raise ValueError("The mandatory feature 'task' wasn't found in `frame` dictionnary.") + # Convert torch to numpy if needed + for name in frame: + if isinstance(frame[name], torch.Tensor): + frame[name] = frame[name].numpy() + + check_frame_features(frame, self.features) if self.episode_buffer is None: self.episode_buffer = self.create_episode_buffer() @@ -757,8 +760,7 @@ class LeRobotDataset(torch.utils.data.Dataset): self._save_image(frame[key], img_path) self.episode_buffer[key].append(str(img_path)) else: - item = frame[key].numpy() if isinstance(frame[key], torch.Tensor) else frame[key] - self.episode_buffer[key].append(item) + self.episode_buffer[key].append(frame[key]) self.episode_buffer["size"] += 1 @@ -815,12 +817,7 @@ class LeRobotDataset(torch.utils.data.Dataset): # are processed separately by storing image path and frame info as meta data if key in ["index", "episode_index", "task_index"] or ft["dtype"] in ["image", "video"]: continue - elif len(ft["shape"]) == 1 and ft["shape"][0] == 1: - episode_buffer[key] = np.array(episode_buffer[key], dtype=ft["dtype"]) - elif len(ft["shape"]) == 1 and ft["shape"][0] > 1: - episode_buffer[key] = np.stack(episode_buffer[key]) - else: - raise ValueError(key) + episode_buffer[key] = np.stack(episode_buffer[key]) self._wait_image_writer() self._save_episode_table(episode_buffer, episode_index) diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index e6ec169e..505a5492 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -35,6 +35,7 @@ from PIL import Image as PILImage from torchvision import transforms from lerobot.common.robot_devices.robots.utils import Robot +from lerobot.common.utils.utils import is_valid_numpy_dtype_string from lerobot.configs.types import DictLike, FeatureType, PolicyFeature DEFAULT_CHUNK_SIZE = 1000 # Max number of episodes per chunk @@ -203,7 +204,7 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]): elif first_item is None: pass else: - items_dict[key] = [torch.tensor(x) for x in items_dict[key]] + items_dict[key] = [x if isinstance(x, str) else torch.tensor(x) for x in items_dict[key]] return items_dict @@ -285,11 +286,20 @@ def get_hf_features_from_features(features: dict) -> datasets.Features: hf_features[key] = datasets.Image() elif ft["shape"] == (1,): hf_features[key] = datasets.Value(dtype=ft["dtype"]) - else: - assert len(ft["shape"]) == 1 + elif len(ft["shape"]) == 1: hf_features[key] = datasets.Sequence( length=ft["shape"][0], feature=datasets.Value(dtype=ft["dtype"]) ) + elif len(ft["shape"]) == 2: + hf_features[key] = datasets.Array2D(shape=ft["shape"], dtype=ft["dtype"]) + elif len(ft["shape"]) == 3: + hf_features[key] = datasets.Array3D(shape=ft["shape"], dtype=ft["dtype"]) + elif len(ft["shape"]) == 4: + hf_features[key] = datasets.Array4D(shape=ft["shape"], dtype=ft["dtype"]) + elif len(ft["shape"]) == 5: + hf_features[key] = datasets.Array5D(shape=ft["shape"], dtype=ft["dtype"]) + else: + raise ValueError(f"Corresponding feature is not valid: {ft}") return datasets.Features(hf_features) @@ -606,3 +616,90 @@ class IterableNamespace(SimpleNamespace): def keys(self): return vars(self).keys() + + +def check_frame_features(frame: dict, features: dict): + optional_features = {"timestamp"} + expected_features = (set(features) - set(DEFAULT_FEATURES.keys())) | {"task"} + actual_features = set(frame.keys()) + + error_message = check_features_presence(actual_features, expected_features, optional_features) + + if "task" in frame: + error_message += check_feature_string("task", frame["task"]) + + common_features = actual_features & (expected_features | optional_features) + for name in common_features - {"task"}: + error_message += check_feature_dtype_and_shape(name, features[name], frame[name]) + + if error_message: + raise ValueError(error_message) + + +def check_features_presence( + actual_features: set[str], expected_features: set[str], optional_features: set[str] +): + error_message = "" + missing_features = expected_features - actual_features + extra_features = actual_features - (expected_features | optional_features) + + if missing_features or extra_features: + error_message += "Feature mismatch in `frame` dictionary:\n" + if missing_features: + error_message += f"Missing features: {missing_features}\n" + if extra_features: + error_message += f"Extra features: {extra_features}\n" + + return error_message + + +def check_feature_dtype_and_shape(name: str, feature: dict, value: np.ndarray | PILImage.Image | str): + expected_dtype = feature["dtype"] + expected_shape = feature["shape"] + if is_valid_numpy_dtype_string(expected_dtype): + return check_feature_numpy_array(name, expected_dtype, expected_shape, value) + elif expected_dtype in ["image", "video"]: + return check_feature_image_or_video(name, expected_shape, value) + elif expected_dtype == "string": + return check_feature_string(name, value) + else: + raise NotImplementedError(f"The feature dtype '{expected_dtype}' is not implemented yet.") + + +def check_feature_numpy_array(name: str, expected_dtype: str, expected_shape: list[int], value: np.ndarray): + error_message = "" + if isinstance(value, np.ndarray): + actual_dtype = value.dtype + actual_shape = value.shape + + if actual_dtype != np.dtype(expected_dtype): + error_message += f"The feature '{name}' of dtype '{actual_dtype}' is not of the expected dtype '{expected_dtype}'.\n" + + if actual_shape != expected_shape: + error_message += f"The feature '{name}' of shape '{actual_shape}' does not have the expected shape '{expected_shape}'.\n" + else: + error_message += f"The feature '{name}' is not a 'np.ndarray'. Expected type is '{expected_dtype}', but type '{type(value)}' provided instead.\n" + + return error_message + + +def check_feature_image_or_video(name: str, expected_shape: list[str], value: np.ndarray | PILImage.Image): + # Note: The check of pixels range ([0,1] for float and [0,255] for uint8) is done by the image writer threads. + error_message = "" + if isinstance(value, np.ndarray): + actual_shape = value.shape + c, h, w = expected_shape + if len(actual_shape) != 3 or (actual_shape != (c, h, w) and actual_shape != (h, w, c)): + error_message += f"The feature '{name}' of shape '{actual_shape}' does not have the expected shape '{(c, h, w)}' or '{(h, w, c)}'.\n" + elif isinstance(value, PILImage.Image): + pass + else: + error_message += f"The feature '{name}' is expected to be of type 'PIL.Image' or 'np.ndarray' channel first or channel last, but type '{type(value)}' provided instead.\n" + + return error_message + + +def check_feature_string(name: str, value: str): + if not isinstance(value, str): + return f"The feature '{name}' is expected to be of type 'str', but type '{type(value)}' provided instead.\n" + return "" diff --git a/lerobot/common/utils/utils.py b/lerobot/common/utils/utils.py index 015d1ede..d0c12b30 100644 --- a/lerobot/common/utils/utils.py +++ b/lerobot/common/utils/utils.py @@ -21,6 +21,7 @@ from copy import copy from datetime import datetime, timezone from pathlib import Path +import numpy as np import torch @@ -200,5 +201,18 @@ def get_channel_first_image_shape(image_shape: tuple) -> tuple: return shape -def has_method(cls: object, method_name: str): +def has_method(cls: object, method_name: str) -> bool: return hasattr(cls, method_name) and callable(getattr(cls, method_name)) + + +def is_valid_numpy_dtype_string(dtype_str: str) -> bool: + """ + Return True if a given string can be converted to a numpy dtype. + """ + try: + # Attempt to convert the string to a numpy dtype + np.dtype(dtype_str) + return True + except TypeError: + # If a TypeError is raised, the string is not a valid dtype + return False diff --git a/tests/fixtures/constants.py b/tests/fixtures/constants.py index bfe6c339..7d80d2b7 100644 --- a/tests/fixtures/constants.py +++ b/tests/fixtures/constants.py @@ -27,3 +27,5 @@ DUMMY_VIDEO_INFO = { "video.is_depth_map": False, "has_audio": False, } +DUMMY_CHW = (3, 96, 128) +DUMMY_HWC = (96, 128, 3) diff --git a/tests/fixtures/dataset_factories.py b/tests/fixtures/dataset_factories.py index c28a1165..bdd2dc54 100644 --- a/tests/fixtures/dataset_factories.py +++ b/tests/fixtures/dataset_factories.py @@ -17,6 +17,7 @@ from lerobot.common.datasets.utils import ( get_hf_features_from_features, hf_transform_to_torch, ) +from lerobot.common.robot_devices.robots.utils import Robot from tests.fixtures.constants import ( DEFAULT_FPS, DUMMY_CAMERA_FEATURES, @@ -394,3 +395,20 @@ def lerobot_dataset_factory( return LeRobotDataset(repo_id=repo_id, root=root, **kwargs) return _create_lerobot_dataset + + +@pytest.fixture(scope="session") +def empty_lerobot_dataset_factory(): + def _create_empty_lerobot_dataset( + root: Path, + repo_id: str = DUMMY_REPO_ID, + fps: int = DEFAULT_FPS, + robot: Robot | None = None, + robot_type: str | None = None, + features: dict | None = None, + ): + return LeRobotDataset.create( + repo_id=repo_id, fps=fps, root=root, robot=robot, robot_type=robot_type, features=features + ) + + return _create_empty_lerobot_dataset diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 460954c1..54d92125 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -15,15 +15,18 @@ # limitations under the License. import json import logging +import re from copy import deepcopy from itertools import chain from pathlib import Path import einops +import numpy as np import pytest import torch from datasets import Dataset from huggingface_hub import HfApi +from PIL import Image from safetensors.torch import load_file import lerobot @@ -33,6 +36,7 @@ from lerobot.common.datasets.compute_stats import ( get_stats_einops_patterns, ) from lerobot.common.datasets.factory import make_dataset +from lerobot.common.datasets.image_writer import image_array_to_pil_image from lerobot.common.datasets.lerobot_dataset import ( LeRobotDataset, MultiLeRobotDataset, @@ -49,11 +53,27 @@ from lerobot.common.robot_devices.robots.utils import make_robot from lerobot.common.utils.random_utils import seeded_context from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TrainPipelineConfig -from tests.fixtures.constants import DUMMY_REPO_ID +from tests.fixtures.constants import DUMMY_CHW, DUMMY_HWC, DUMMY_REPO_ID from tests.utils import DEVICE, require_x86_64_kernel -def test_same_attributes_defined(lerobot_dataset_factory, tmp_path): +@pytest.fixture +def image_dataset(tmp_path, empty_lerobot_dataset_factory): + features = { + "image": { + "dtype": "image", + "shape": DUMMY_CHW, + "names": [ + "channels", + "height", + "width", + ], + } + } + return empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + + +def test_same_attributes_defined(tmp_path, lerobot_dataset_factory): """ Instantiate a LeRobotDataset both ways with '__init__()' and 'create()' and verify that instantiated objects have the same sets of attributes defined. @@ -76,14 +96,14 @@ def test_same_attributes_defined(lerobot_dataset_factory, tmp_path): assert init_attr == create_attr -def test_dataset_initialization(lerobot_dataset_factory, tmp_path): +def test_dataset_initialization(tmp_path, lerobot_dataset_factory): kwargs = { "repo_id": DUMMY_REPO_ID, "total_episodes": 10, "total_frames": 400, "episodes": [2, 5, 6], } - dataset = lerobot_dataset_factory(root=tmp_path, **kwargs) + dataset = lerobot_dataset_factory(root=tmp_path / "test", **kwargs) assert dataset.repo_id == kwargs["repo_id"] assert dataset.meta.total_episodes == kwargs["total_episodes"] @@ -93,28 +113,243 @@ def test_dataset_initialization(lerobot_dataset_factory, tmp_path): assert dataset.num_frames == len(dataset) -def test_add_frame_no_task(tmp_path): - features = {"1d": {"dtype": "float32", "shape": (1,), "names": None}} - dataset = LeRobotDataset.create(repo_id=DUMMY_REPO_ID, fps=30, root=tmp_path / "test", features=features) - with pytest.raises(ValueError, match="The mandatory feature 'task' wasn't found in `frame` dictionnary."): - dataset.add_frame({"1d": torch.randn(1)}) +def test_add_frame_missing_task(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + with pytest.raises( + ValueError, match="Feature mismatch in `frame` dictionary:\nMissing features: {'task'}\n" + ): + dataset.add_frame({"state": torch.randn(1)}) -def test_add_frame(tmp_path): - features = {"1d": {"dtype": "float32", "shape": (1,), "names": None}} - dataset = LeRobotDataset.create(repo_id=DUMMY_REPO_ID, fps=30, root=tmp_path / "test", features=features) - dataset.add_frame({"1d": torch.randn(1), "task": "dummy"}) +def test_add_frame_missing_feature(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + with pytest.raises( + ValueError, match="Feature mismatch in `frame` dictionary:\nMissing features: {'state'}\n" + ): + dataset.add_frame({"task": "Dummy task"}) + + +def test_add_frame_extra_feature(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + with pytest.raises( + ValueError, match="Feature mismatch in `frame` dictionary:\nExtra features: {'extra'}\n" + ): + dataset.add_frame({"state": torch.randn(1), "task": "Dummy task", "extra": "dummy_extra"}) + + +def test_add_frame_wrong_type(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + with pytest.raises( + ValueError, match="The feature 'state' of dtype 'float16' is not of the expected dtype 'float32'.\n" + ): + dataset.add_frame({"state": torch.randn(1, dtype=torch.float16), "task": "Dummy task"}) + + +def test_add_frame_wrong_shape(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (2,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + with pytest.raises( + ValueError, + match=re.escape("The feature 'state' of shape '(1,)' does not have the expected shape '(2,)'.\n"), + ): + dataset.add_frame({"state": torch.randn(1), "task": "Dummy task"}) + + +def test_add_frame_wrong_shape_python_float(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + with pytest.raises( + ValueError, + match=re.escape( + "The feature 'state' is not a 'np.ndarray'. Expected type is 'float32', but type '' provided instead.\n" + ), + ): + dataset.add_frame({"state": 1.0, "task": "Dummy task"}) + + +def test_add_frame_wrong_shape_torch_ndim_0(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + with pytest.raises( + ValueError, + match=re.escape("The feature 'state' of shape '()' does not have the expected shape '(1,)'.\n"), + ): + dataset.add_frame({"state": torch.tensor(1.0), "task": "Dummy task"}) + + +def test_add_frame_wrong_shape_numpy_ndim_0(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + with pytest.raises( + ValueError, + match=re.escape( + "The feature 'state' is not a 'np.ndarray'. Expected type is 'float32', but type '' provided instead.\n" + ), + ): + dataset.add_frame({"state": np.float32(1.0), "task": "Dummy task"}) + + +def test_add_frame(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + dataset.add_frame({"state": torch.randn(1), "task": "dummy"}) dataset.save_episode(encode_videos=False) dataset.consolidate(run_compute_stats=False) + assert len(dataset) == 1 assert dataset[0]["task"] == "dummy" assert dataset[0]["task_index"] == 0 + assert dataset[0]["state"].ndim == 0 + + +def test_add_frame_state_1d(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (2,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + dataset.add_frame({"state": torch.randn(2), "task": "dummy"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["state"].shape == torch.Size([2]) + + +def test_add_frame_state_2d(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (2, 4), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + dataset.add_frame({"state": torch.randn(2, 4), "task": "dummy"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["state"].shape == torch.Size([2, 4]) + + +def test_add_frame_state_3d(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (2, 4, 3), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + dataset.add_frame({"state": torch.randn(2, 4, 3), "task": "dummy"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["state"].shape == torch.Size([2, 4, 3]) + + +def test_add_frame_state_4d(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + dataset.add_frame({"state": torch.randn(2, 4, 3, 5), "task": "dummy"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5]) + + +def test_add_frame_state_5d(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5, 1), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + dataset.add_frame({"state": torch.randn(2, 4, 3, 5, 1), "task": "dummy"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5, 1]) + + +def test_add_frame_state_numpy(tmp_path, empty_lerobot_dataset_factory): + features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + dataset.add_frame({"state": np.array([1], dtype=np.float32), "task": "Dummy task"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["state"].ndim == 0 + + +def test_add_frame_string(tmp_path, empty_lerobot_dataset_factory): + features = {"caption": {"dtype": "string", "shape": (1,), "names": None}} + dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) + dataset.add_frame({"caption": "Dummy caption", "task": "Dummy task"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["caption"] == "Dummy caption" + + +def test_add_frame_image_wrong_shape(image_dataset): + dataset = image_dataset + with pytest.raises( + ValueError, + match=re.escape( + "The feature 'image' of shape '(3, 128, 96)' does not have the expected shape '(3, 96, 128)' or '(96, 128, 3)'.\n" + ), + ): + c, h, w = DUMMY_CHW + dataset.add_frame({"image": torch.randn(c, w, h), "task": "Dummy task"}) + + +def test_add_frame_image_wrong_range(image_dataset): + """This test will display the following error message from a thread: + ``` + Error writing image ...test_add_frame_image_wrong_ran0/test/images/image/episode_000000/frame_000000.png: + The image data type is float, which requires values in the range [0.0, 1.0]. However, the provided range is [0.009678772038470007, 254.9776492089887]. + Please adjust the range or provide a uint8 image with values in the range [0, 255] + ``` + Hence the image won't be saved on disk and save_episode will raise `FileNotFoundError`. + """ + dataset = image_dataset + dataset.add_frame({"image": np.random.rand(*DUMMY_CHW) * 255, "task": "Dummy task"}) + with pytest.raises(FileNotFoundError): + dataset.save_episode(encode_videos=False) + + +def test_add_frame_image(image_dataset): + dataset = image_dataset + dataset.add_frame({"image": np.random.rand(*DUMMY_CHW), "task": "Dummy task"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) + + +def test_add_frame_image_h_w_c(image_dataset): + dataset = image_dataset + dataset.add_frame({"image": np.random.rand(*DUMMY_HWC), "task": "Dummy task"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) + + +def test_add_frame_image_uint8(image_dataset): + dataset = image_dataset + image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8) + dataset.add_frame({"image": image, "task": "Dummy task"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) + + +def test_add_frame_image_pil(image_dataset): + dataset = image_dataset + image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8) + dataset.add_frame({"image": Image.fromarray(image), "task": "Dummy task"}) + dataset.save_episode(encode_videos=False) + dataset.consolidate(run_compute_stats=False) + + assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) + + +def test_image_array_to_pil_image_wrong_range_float_0_255(): + image = np.random.rand(*DUMMY_HWC) * 255 + with pytest.raises(ValueError): + image_array_to_pil_image(image) # TODO(aliberts): # - [ ] test various attributes & state from init and create # - [ ] test init with episodes and check num_frames -# - [ ] test add_frame # - [ ] test add_episode # - [ ] test consolidate # - [ ] test push_to_hub diff --git a/tests/test_image_writer.py b/tests/test_image_writer.py index f51e86b4..c7fc11f2 100644 --- a/tests/test_image_writer.py +++ b/tests/test_image_writer.py @@ -9,10 +9,11 @@ from PIL import Image from lerobot.common.datasets.image_writer import ( AsyncImageWriter, - image_array_to_image, + image_array_to_pil_image, safe_stop_image_writer, write_image, ) +from tests.fixtures.constants import DUMMY_HWC DUMMY_IMAGE = "test_image.png" @@ -48,49 +49,62 @@ def test_zero_threads(): AsyncImageWriter(num_processes=0, num_threads=0) -def test_image_array_to_image_rgb(img_array_factory): +def test_image_array_to_pil_image_float_array_wrong_range_0_255(): + image = np.random.rand(*DUMMY_HWC) * 255 + with pytest.raises(ValueError): + image_array_to_pil_image(image) + + +def test_image_array_to_pil_image_float_array_wrong_range_neg_1_1(): + image = np.random.rand(*DUMMY_HWC) * 2 - 1 + with pytest.raises(ValueError): + image_array_to_pil_image(image) + + +def test_image_array_to_pil_image_rgb(img_array_factory): img_array = img_array_factory(100, 100) - result_image = image_array_to_image(img_array) + result_image = image_array_to_pil_image(img_array) assert isinstance(result_image, Image.Image) assert result_image.size == (100, 100) assert result_image.mode == "RGB" -def test_image_array_to_image_pytorch_format(img_array_factory): +def test_image_array_to_pil_image_pytorch_format(img_array_factory): img_array = img_array_factory(100, 100).transpose(2, 0, 1) - result_image = image_array_to_image(img_array) + result_image = image_array_to_pil_image(img_array) assert isinstance(result_image, Image.Image) assert result_image.size == (100, 100) assert result_image.mode == "RGB" -@pytest.mark.skip("TODO: implement") -def test_image_array_to_image_single_channel(img_array_factory): +def test_image_array_to_pil_image_single_channel(img_array_factory): img_array = img_array_factory(channels=1) - result_image = image_array_to_image(img_array) - assert isinstance(result_image, Image.Image) - assert result_image.size == (100, 100) - assert result_image.mode == "L" + with pytest.raises(NotImplementedError): + image_array_to_pil_image(img_array) -def test_image_array_to_image_float_array(img_array_factory): +def test_image_array_to_pil_image_4_channels(img_array_factory): + img_array = img_array_factory(channels=4) + with pytest.raises(NotImplementedError): + image_array_to_pil_image(img_array) + + +def test_image_array_to_pil_image_float_array(img_array_factory): img_array = img_array_factory(dtype=np.float32) - result_image = image_array_to_image(img_array) + result_image = image_array_to_pil_image(img_array) assert isinstance(result_image, Image.Image) assert result_image.size == (100, 100) assert result_image.mode == "RGB" assert np.array(result_image).dtype == np.uint8 -def test_image_array_to_image_out_of_bounds_float(): - # Float array with values out of [0, 1] - img_array = np.random.uniform(-1, 2, size=(100, 100, 3)).astype(np.float32) - result_image = image_array_to_image(img_array) +def test_image_array_to_pil_image_uint8_array(img_array_factory): + img_array = img_array_factory(dtype=np.float32) + result_image = image_array_to_pil_image(img_array) assert isinstance(result_image, Image.Image) assert result_image.size == (100, 100) assert result_image.mode == "RGB" assert np.array(result_image).dtype == np.uint8 - assert np.array(result_image).min() >= 0 and np.array(result_image).max() <= 255 def test_write_image_numpy(tmp_path, img_array_factory): From 8426c64f4229a76929e03269e5ef7827abefc9c2 Mon Sep 17 00:00:00 2001 From: Simon Alibert <75076266+aliberts@users.noreply.github.com> Date: Sat, 15 Feb 2025 15:47:16 +0100 Subject: [PATCH 04/28] Per-episode stats (#521) Co-authored-by: Remi Cadene Co-authored-by: Remi --- examples/port_datasets/pusht_zarr.py | 4 + lerobot/common/datasets/compute_stats.py | 300 ++++++-------- lerobot/common/datasets/lerobot_dataset.py | 108 +++-- lerobot/common/datasets/utils.py | 75 +++- .../v21/convert_dataset_v20_to_v21.py | 87 ++++ lerobot/common/datasets/v21/convert_stats.py | 85 ++++ lerobot/common/datasets/video_utils.py | 4 +- lerobot/common/policies/normalize.py | 34 +- .../common/robot_devices/control_configs.py | 2 - lerobot/scripts/control_robot.py | 5 +- tests/fixtures/dataset_factories.py | 66 +++- tests/fixtures/files.py | 26 +- tests/fixtures/hub.py | 21 +- tests/test_cameras.py | 4 +- tests/test_compute_stats.py | 311 +++++++++++++++ tests/test_control_robot.py | 58 ++- tests/test_datasets.py | 137 +------ tests/test_push_dataset_to_hub.py | 370 ------------------ tests/test_robots.py | 7 +- 19 files changed, 906 insertions(+), 798 deletions(-) create mode 100644 lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py create mode 100644 lerobot/common/datasets/v21/convert_stats.py create mode 100644 tests/test_compute_stats.py delete mode 100644 tests/test_push_dataset_to_hub.py diff --git a/examples/port_datasets/pusht_zarr.py b/examples/port_datasets/pusht_zarr.py index e9015d2c..2eaf1c1c 100644 --- a/examples/port_datasets/pusht_zarr.py +++ b/examples/port_datasets/pusht_zarr.py @@ -148,6 +148,10 @@ def main(raw_dir: Path, repo_id: str, mode: str = "video", push_to_hub: bool = T action = zarr_data["action"][:] image = zarr_data["img"] # (b, h, w, c) + if image.dtype == np.float32 and image.max() == np.float32(255): + # HACK: images are loaded as float32 but they actually encode uint8 data + image = image.astype(np.uint8) + episode_data_index = { "from": np.concatenate(([0], zarr_data.meta["episode_ends"][:-1])), "to": zarr_data.meta["episode_ends"], diff --git a/lerobot/common/datasets/compute_stats.py b/lerobot/common/datasets/compute_stats.py index c6211699..7519c743 100644 --- a/lerobot/common/datasets/compute_stats.py +++ b/lerobot/common/datasets/compute_stats.py @@ -13,202 +13,148 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from copy import deepcopy -from math import ceil +import numpy as np -import einops -import torch -import tqdm +from lerobot.common.datasets.utils import load_image_as_numpy -def get_stats_einops_patterns(dataset, num_workers=0): - """These einops patterns will be used to aggregate batches and compute statistics. +def estimate_num_samples( + dataset_len: int, min_num_samples: int = 100, max_num_samples: int = 10_000, power: float = 0.75 +) -> int: + """Heuristic to estimate the number of samples based on dataset size. + The power controls the sample growth relative to dataset size. + Lower the power for less number of samples. - Note: We assume the images are in channel first format + For default arguments, we have: + - from 1 to ~500, num_samples=100 + - at 1000, num_samples=177 + - at 2000, num_samples=299 + - at 5000, num_samples=594 + - at 10000, num_samples=1000 + - at 20000, num_samples=1681 """ + if dataset_len < min_num_samples: + min_num_samples = dataset_len + return max(min_num_samples, min(int(dataset_len**power), max_num_samples)) - dataloader = torch.utils.data.DataLoader( - dataset, - num_workers=num_workers, - batch_size=2, - shuffle=False, - ) - batch = next(iter(dataloader)) - stats_patterns = {} +def sample_indices(data_len: int) -> list[int]: + num_samples = estimate_num_samples(data_len) + return np.round(np.linspace(0, data_len - 1, num_samples)).astype(int).tolist() - for key in dataset.features: - # sanity check that tensors are not float64 - assert batch[key].dtype != torch.float64 - # if isinstance(feats_type, (VideoFrame, Image)): - if key in dataset.meta.camera_keys: - # sanity check that images are channel first - _, c, h, w = batch[key].shape - assert c < h and c < w, f"expect channel first images, but instead {batch[key].shape}" +def sample_images(image_paths: list[str]) -> np.ndarray: + sampled_indices = sample_indices(len(image_paths)) + images = [] + for idx in sampled_indices: + path = image_paths[idx] + # we load as uint8 to reduce memory usage + img = load_image_as_numpy(path, dtype=np.uint8, channel_first=True) + images.append(img) - # sanity check that images are float32 in range [0,1] - assert batch[key].dtype == torch.float32, f"expect torch.float32, but instead {batch[key].dtype=}" - assert batch[key].max() <= 1, f"expect pixels lower than 1, but instead {batch[key].max()=}" - assert batch[key].min() >= 0, f"expect pixels greater than 1, but instead {batch[key].min()=}" + images = np.stack(images) + return images - stats_patterns[key] = "b c h w -> c 1 1" - elif batch[key].ndim == 2: - stats_patterns[key] = "b c -> c " - elif batch[key].ndim == 1: - stats_patterns[key] = "b -> 1" + +def get_feature_stats(array: np.ndarray, axis: tuple, keepdims: bool) -> dict[str, np.ndarray]: + return { + "min": np.min(array, axis=axis, keepdims=keepdims), + "max": np.max(array, axis=axis, keepdims=keepdims), + "mean": np.mean(array, axis=axis, keepdims=keepdims), + "std": np.std(array, axis=axis, keepdims=keepdims), + "count": np.array([len(array)]), + } + + +def compute_episode_stats(episode_data: dict[str, list[str] | np.ndarray], features: dict) -> dict: + ep_stats = {} + for key, data in episode_data.items(): + if features[key]["dtype"] == "string": + continue # HACK: we should receive np.arrays of strings + elif features[key]["dtype"] in ["image", "video"]: + ep_ft_array = sample_images(data) # data is a list of image paths + axes_to_reduce = (0, 2, 3) # keep channel dim + keepdims = True else: - raise ValueError(f"{key}, {batch[key].shape}") + ep_ft_array = data # data is alreay a np.ndarray + axes_to_reduce = 0 # compute stats over the first axis + keepdims = data.ndim == 1 # keep as np.array - return stats_patterns + ep_stats[key] = get_feature_stats(ep_ft_array, axis=axes_to_reduce, keepdims=keepdims) + + # finally, we normalize and remove batch dim for images + if features[key]["dtype"] in ["image", "video"]: + ep_stats[key] = { + k: v if k == "count" else np.squeeze(v / 255.0, axis=0) for k, v in ep_stats[key].items() + } + + return ep_stats -def compute_stats(dataset, batch_size=8, num_workers=8, max_num_samples=None): - """Compute mean/std and min/max statistics of all data keys in a LeRobotDataset.""" - if max_num_samples is None: - max_num_samples = len(dataset) - - # for more info on why we need to set the same number of workers, see `load_from_videos` - stats_patterns = get_stats_einops_patterns(dataset, num_workers) - - # mean and std will be computed incrementally while max and min will track the running value. - mean, std, max, min = {}, {}, {}, {} - for key in stats_patterns: - mean[key] = torch.tensor(0.0).float() - std[key] = torch.tensor(0.0).float() - max[key] = torch.tensor(-float("inf")).float() - min[key] = torch.tensor(float("inf")).float() - - def create_seeded_dataloader(dataset, batch_size, seed): - generator = torch.Generator() - generator.manual_seed(seed) - dataloader = torch.utils.data.DataLoader( - dataset, - num_workers=num_workers, - batch_size=batch_size, - shuffle=True, - drop_last=False, - generator=generator, - ) - return dataloader - - # Note: Due to be refactored soon. The point of storing `first_batch` is to make sure we don't get - # surprises when rerunning the sampler. - first_batch = None - running_item_count = 0 # for online mean computation - dataloader = create_seeded_dataloader(dataset, batch_size, seed=1337) - for i, batch in enumerate( - tqdm.tqdm(dataloader, total=ceil(max_num_samples / batch_size), desc="Compute mean, min, max") - ): - this_batch_size = len(batch["index"]) - running_item_count += this_batch_size - if first_batch is None: - first_batch = deepcopy(batch) - for key, pattern in stats_patterns.items(): - batch[key] = batch[key].float() - # Numerically stable update step for mean computation. - batch_mean = einops.reduce(batch[key], pattern, "mean") - # Hint: to update the mean we need x̄ₙ = (Nₙ₋₁x̄ₙ₋₁ + Bₙxₙ) / Nₙ, where the subscript represents - # the update step, N is the running item count, B is this batch size, x̄ is the running mean, - # and x is the current batch mean. Some rearrangement is then required to avoid risking - # numerical overflow. Another hint: Nₙ₋₁ = Nₙ - Bₙ. Rearrangement yields - # x̄ₙ = x̄ₙ₋₁ + Bₙ * (xₙ - x̄ₙ₋₁) / Nₙ - mean[key] = mean[key] + this_batch_size * (batch_mean - mean[key]) / running_item_count - max[key] = torch.maximum(max[key], einops.reduce(batch[key], pattern, "max")) - min[key] = torch.minimum(min[key], einops.reduce(batch[key], pattern, "min")) - - if i == ceil(max_num_samples / batch_size) - 1: - break - - first_batch_ = None - running_item_count = 0 # for online std computation - dataloader = create_seeded_dataloader(dataset, batch_size, seed=1337) - for i, batch in enumerate( - tqdm.tqdm(dataloader, total=ceil(max_num_samples / batch_size), desc="Compute std") - ): - this_batch_size = len(batch["index"]) - running_item_count += this_batch_size - # Sanity check to make sure the batches are still in the same order as before. - if first_batch_ is None: - first_batch_ = deepcopy(batch) - for key in stats_patterns: - assert torch.equal(first_batch_[key], first_batch[key]) - for key, pattern in stats_patterns.items(): - batch[key] = batch[key].float() - # Numerically stable update step for mean computation (where the mean is over squared - # residuals).See notes in the mean computation loop above. - batch_std = einops.reduce((batch[key] - mean[key]) ** 2, pattern, "mean") - std[key] = std[key] + this_batch_size * (batch_std - std[key]) / running_item_count - - if i == ceil(max_num_samples / batch_size) - 1: - break - - for key in stats_patterns: - std[key] = torch.sqrt(std[key]) - - stats = {} - for key in stats_patterns: - stats[key] = { - "mean": mean[key], - "std": std[key], - "max": max[key], - "min": min[key], - } - return stats +def _assert_type_and_shape(stats_list: list[dict[str, dict]]): + for i in range(len(stats_list)): + for fkey in stats_list[i]: + for k, v in stats_list[i][fkey].items(): + if not isinstance(v, np.ndarray): + raise ValueError( + f"Stats must be composed of numpy array, but key '{k}' of feature '{fkey}' is of type '{type(v)}' instead." + ) + if v.ndim == 0: + raise ValueError("Number of dimensions must be at least 1, and is 0 instead.") + if k == "count" and v.shape != (1,): + raise ValueError(f"Shape of 'count' must be (1), but is {v.shape} instead.") + if "image" in fkey and k != "count" and v.shape != (3, 1, 1): + raise ValueError(f"Shape of '{k}' must be (3,1,1), but is {v.shape} instead.") -def aggregate_stats(ls_datasets) -> dict[str, torch.Tensor]: - """Aggregate stats of multiple LeRobot datasets into one set of stats without recomputing from scratch. +def aggregate_feature_stats(stats_ft_list: list[dict[str, dict]]) -> dict[str, dict[str, np.ndarray]]: + """Aggregates stats for a single feature.""" + means = np.stack([s["mean"] for s in stats_ft_list]) + variances = np.stack([s["std"] ** 2 for s in stats_ft_list]) + counts = np.stack([s["count"] for s in stats_ft_list]) + total_count = counts.sum(axis=0) - The final stats will have the union of all data keys from each of the datasets. + # Prepare weighted mean by matching number of dimensions + while counts.ndim < means.ndim: + counts = np.expand_dims(counts, axis=-1) - The final stats will have the union of all data keys from each of the datasets. For instance: - - new_max = max(max_dataset_0, max_dataset_1, ...) + # Compute the weighted mean + weighted_means = means * counts + total_mean = weighted_means.sum(axis=0) / total_count + + # Compute the variance using the parallel algorithm + delta_means = means - total_mean + weighted_variances = (variances + delta_means**2) * counts + total_variance = weighted_variances.sum(axis=0) / total_count + + return { + "min": np.min(np.stack([s["min"] for s in stats_ft_list]), axis=0), + "max": np.max(np.stack([s["max"] for s in stats_ft_list]), axis=0), + "mean": total_mean, + "std": np.sqrt(total_variance), + "count": total_count, + } + + +def aggregate_stats(stats_list: list[dict[str, dict]]) -> dict[str, dict[str, np.ndarray]]: + """Aggregate stats from multiple compute_stats outputs into a single set of stats. + + The final stats will have the union of all data keys from each of the stats dicts. + + For instance: - new_min = min(min_dataset_0, min_dataset_1, ...) - - new_mean = (mean of all data) + - new_max = max(max_dataset_0, max_dataset_1, ...) + - new_mean = (mean of all data, weighted by counts) - new_std = (std of all data) """ - data_keys = set() - for dataset in ls_datasets: - data_keys.update(dataset.meta.stats.keys()) - stats = {k: {} for k in data_keys} - for data_key in data_keys: - for stat_key in ["min", "max"]: - # compute `max(dataset_0["max"], dataset_1["max"], ...)` - stats[data_key][stat_key] = einops.reduce( - torch.stack( - [ds.meta.stats[data_key][stat_key] for ds in ls_datasets if data_key in ds.meta.stats], - dim=0, - ), - "n ... -> ...", - stat_key, - ) - total_samples = sum(d.num_frames for d in ls_datasets if data_key in d.meta.stats) - # Compute the "sum" statistic by multiplying each mean by the number of samples in the respective - # dataset, then divide by total_samples to get the overall "mean". - # NOTE: the brackets around (d.num_frames / total_samples) are needed tor minimize the risk of - # numerical overflow! - stats[data_key]["mean"] = sum( - d.meta.stats[data_key]["mean"] * (d.num_frames / total_samples) - for d in ls_datasets - if data_key in d.meta.stats - ) - # The derivation for standard deviation is a little more involved but is much in the same spirit as - # the computation of the mean. - # Given two sets of data where the statistics are known: - # σ_combined = sqrt[ (n1 * (σ1^2 + d1^2) + n2 * (σ2^2 + d2^2)) / (n1 + n2) ] - # where d1 = μ1 - μ_combined, d2 = μ2 - μ_combined - # NOTE: the brackets around (d.num_frames / total_samples) are needed tor minimize the risk of - # numerical overflow! - stats[data_key]["std"] = torch.sqrt( - sum( - ( - d.meta.stats[data_key]["std"] ** 2 - + (d.meta.stats[data_key]["mean"] - stats[data_key]["mean"]) ** 2 - ) - * (d.num_frames / total_samples) - for d in ls_datasets - if data_key in d.meta.stats - ) - ) - return stats + + _assert_type_and_shape(stats_list) + + data_keys = {key for stats in stats_list for key in stats} + aggregated_stats = {key: {} for key in data_keys} + + for key in data_keys: + stats_with_key = [stats[key] for stats in stats_list if key in stats] + aggregated_stats[key] = aggregate_feature_stats(stats_with_key) + + return aggregated_stats diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 5c4ae68e..c7f0b2b3 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -26,18 +26,17 @@ import PIL.Image import torch import torch.utils from datasets import load_dataset -from huggingface_hub import create_repo, snapshot_download, upload_folder +from huggingface_hub import HfApi, snapshot_download -from lerobot.common.datasets.compute_stats import aggregate_stats, compute_stats +from lerobot.common.datasets.compute_stats import aggregate_stats, compute_episode_stats from lerobot.common.datasets.image_writer import AsyncImageWriter, write_image from lerobot.common.datasets.utils import ( DEFAULT_FEATURES, DEFAULT_IMAGE_PATH, - EPISODES_PATH, INFO_PATH, - STATS_PATH, TASKS_PATH, append_jsonlines, + backward_compatible_episodes_stats, check_delta_timestamps, check_frame_features, check_timestamps_sync, @@ -52,10 +51,13 @@ from lerobot.common.datasets.utils import ( get_hub_safe_version, hf_transform_to_torch, load_episodes, + load_episodes_stats, load_info, load_stats, load_tasks, - serialize_dict, + write_episode, + write_episode_stats, + write_info, write_json, write_parquet, ) @@ -90,6 +92,17 @@ class LeRobotDatasetMetadata: self.stats = load_stats(self.root) self.tasks, self.task_to_task_index = load_tasks(self.root) self.episodes = load_episodes(self.root) + try: + self.episodes_stats = load_episodes_stats(self.root) + self.stats = aggregate_stats(list(self.episodes_stats.values())) + except FileNotFoundError: + logging.warning( + f"""'episodes_stats.jsonl' not found. Using global dataset stats for each episode instead. + Convert your dataset stats to the new format using this command: + python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id={self.repo_id} """ + ) + self.stats = load_stats(self.root) + self.episodes_stats = backward_compatible_episodes_stats(self.stats, self.episodes) def pull_from_repo( self, @@ -228,7 +241,13 @@ class LeRobotDatasetMetadata: } append_jsonlines(task_dict, self.root / TASKS_PATH) - def save_episode(self, episode_index: int, episode_length: int, episode_tasks: list[str]) -> None: + def save_episode( + self, + episode_index: int, + episode_length: int, + episode_tasks: list[str], + episode_stats: dict[str, dict], + ) -> None: self.info["total_episodes"] += 1 self.info["total_frames"] += episode_length @@ -238,21 +257,19 @@ class LeRobotDatasetMetadata: self.info["splits"] = {"train": f"0:{self.info['total_episodes']}"} self.info["total_videos"] += len(self.video_keys) - write_json(self.info, self.root / INFO_PATH) + write_info(self.info, self.root) episode_dict = { "episode_index": episode_index, "tasks": episode_tasks, "length": episode_length, } - self.episodes.append(episode_dict) - append_jsonlines(episode_dict, self.root / EPISODES_PATH) + self.episodes[episode_index] = episode_dict + write_episode(episode_dict, self.root) - # TODO(aliberts): refactor stats in save_episodes - # image_sampling = int(self.fps / 2) # sample 2 img/s for the stats - # ep_stats = compute_episode_stats(episode_buffer, self.features, episode_length, image_sampling=image_sampling) - # ep_stats = serialize_dict(ep_stats) - # append_jsonlines(ep_stats, self.root / STATS_PATH) + self.episodes_stats[episode_index] = episode_stats + self.stats = aggregate_stats([self.stats, episode_stats]) if self.stats else episode_stats + write_episode_stats(episode_index, episode_stats, self.root) def write_video_info(self) -> None: """ @@ -309,6 +326,7 @@ class LeRobotDatasetMetadata: ) else: # TODO(aliberts, rcadene): implement sanity check for features + features = {**features, **DEFAULT_FEATURES} # check if none of the features contains a "/" in their names, # as this would break the dict flattening in the stats computation, which uses '/' as separator @@ -319,7 +337,7 @@ class LeRobotDatasetMetadata: features = {**features, **DEFAULT_FEATURES} obj.tasks, obj.task_to_task_index = {}, {} - obj.stats, obj.episodes = {}, [] + obj.episodes_stats, obj.stats, obj.episodes = {}, {}, {} obj.info = create_empty_dataset_info(CODEBASE_VERSION, fps, robot_type, features, use_videos) if len(obj.video_keys) > 0 and not use_videos: raise ValueError() @@ -457,6 +475,9 @@ class LeRobotDataset(torch.utils.data.Dataset): # Load metadata self.meta = LeRobotDatasetMetadata(self.repo_id, self.root, self.local_files_only) + if self.episodes is not None and self.meta._version == CODEBASE_VERSION: + episodes_stats = [self.meta.episodes_stats[ep_idx] for ep_idx in self.episodes] + self.stats = aggregate_stats(episodes_stats) # Check version check_version_compatibility(self.repo_id, self.meta._version, CODEBASE_VERSION) @@ -479,10 +500,13 @@ class LeRobotDataset(torch.utils.data.Dataset): def push_to_hub( self, + branch: str | None = None, + create_card: bool = True, tags: list | None = None, license: str | None = "apache-2.0", push_videos: bool = True, private: bool = False, + allow_patterns: list[str] | str | None = None, **card_kwargs, ) -> None: if not self.consolidated: @@ -496,24 +520,32 @@ class LeRobotDataset(torch.utils.data.Dataset): if not push_videos: ignore_patterns.append("videos/") - create_repo( + hub_api = HfApi() + hub_api.create_repo( repo_id=self.repo_id, private=private, repo_type="dataset", exist_ok=True, ) + if branch: + create_branch(repo_id=self.repo_id, branch=branch, repo_type="dataset") - upload_folder( + hub_api.upload_folder( repo_id=self.repo_id, folder_path=self.root, repo_type="dataset", + revision=branch, + allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, ) - card = create_lerobot_dataset_card( - tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs - ) - card.push_to_hub(repo_id=self.repo_id, repo_type="dataset") - create_branch(repo_id=self.repo_id, branch=CODEBASE_VERSION, repo_type="dataset") + if create_card: + card = create_lerobot_dataset_card( + tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs + ) + card.push_to_hub(repo_id=self.repo_id, repo_type="dataset", revision=branch) + + if not branch: + create_branch(repo_id=self.repo_id, branch=CODEBASE_VERSION, repo_type="dataset") def pull_from_repo( self, @@ -630,7 +662,7 @@ class LeRobotDataset(torch.utils.data.Dataset): if key not in self.meta.video_keys } - def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict: + def _query_videos(self, query_timestamps: dict[str, list[float]], ep_idx: int) -> dict[str, torch.Tensor]: """Note: When using data workers (e.g. DataLoader with num_workers>0), do not call this function in the main process (e.g. by using a second Dataloader with num_workers=0). It will result in a Segmentation Fault. This probably happens because a memory reference to the video loader is created in @@ -660,8 +692,7 @@ class LeRobotDataset(torch.utils.data.Dataset): query_indices = None if self.delta_indices is not None: - current_ep_idx = self.episodes.index(ep_idx) if self.episodes is not None else ep_idx - query_indices, padding = self._get_query_indices(idx, current_ep_idx) + query_indices, padding = self._get_query_indices(idx, ep_idx) query_result = self._query_hf_dataset(query_indices) item = {**item, **padding} for key, val in query_result.items(): @@ -735,11 +766,13 @@ class LeRobotDataset(torch.utils.data.Dataset): if self.episode_buffer is None: self.episode_buffer = self.create_episode_buffer() + # Automatically add frame_index and timestamp to episode buffer frame_index = self.episode_buffer["size"] timestamp = frame.pop("timestamp") if "timestamp" in frame else frame_index / self.fps self.episode_buffer["frame_index"].append(frame_index) self.episode_buffer["timestamp"].append(timestamp) + # Add frame features to episode_buffer for key in frame: if key == "task": # Note: we associate the task in natural language to its task index during `save_episode` @@ -787,7 +820,7 @@ class LeRobotDataset(torch.utils.data.Dataset): # TODO(aliberts): Add option to use existing episode_index raise NotImplementedError( "You might have manually provided the episode_buffer with an episode_index that doesn't " - "match the total number of episodes in the dataset. This is not supported for now." + "match the total number of episodes already in the dataset. This is not supported for now." ) if episode_length == 0: @@ -821,8 +854,8 @@ class LeRobotDataset(torch.utils.data.Dataset): self._wait_image_writer() self._save_episode_table(episode_buffer, episode_index) - - self.meta.save_episode(episode_index, episode_length, episode_tasks) + ep_stats = compute_episode_stats(episode_buffer, self.features) + self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats) if encode_videos and len(self.meta.video_keys) > 0: video_paths = self.encode_episode_videos(episode_index) @@ -908,7 +941,7 @@ class LeRobotDataset(torch.utils.data.Dataset): return video_paths - def consolidate(self, run_compute_stats: bool = True, keep_image_files: bool = False) -> None: + def consolidate(self, keep_image_files: bool = False) -> None: self.hf_dataset = self.load_hf_dataset() self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes) check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s) @@ -928,17 +961,7 @@ class LeRobotDataset(torch.utils.data.Dataset): parquet_files = list(self.root.rglob("*.parquet")) assert len(parquet_files) == self.num_episodes - if run_compute_stats: - self.stop_image_writer() - # TODO(aliberts): refactor stats in save_episodes - self.meta.stats = compute_stats(self) - serialized_stats = serialize_dict(self.meta.stats) - write_json(serialized_stats, self.root / STATS_PATH) - self.consolidated = True - else: - logging.warning( - "Skipping computation of the dataset statistics, dataset is not fully consolidated." - ) + self.consolidated = True @classmethod def create( @@ -1056,7 +1079,10 @@ class MultiLeRobotDataset(torch.utils.data.Dataset): self.image_transforms = image_transforms self.delta_timestamps = delta_timestamps - self.stats = aggregate_stats(self._datasets) + # TODO(rcadene, aliberts): We should not perform this aggregation for datasets + # with multiple robots of different ranges. Instead we should have one normalization + # per robot. + self.stats = aggregate_stats([dataset.meta.stats for dataset in self._datasets]) @property def repo_id_to_index(self): diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index 505a5492..8b734042 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -43,6 +43,7 @@ DEFAULT_CHUNK_SIZE = 1000 # Max number of episodes per chunk INFO_PATH = "meta/info.json" EPISODES_PATH = "meta/episodes.jsonl" STATS_PATH = "meta/stats.json" +EPISODES_STATS_PATH = "meta/episodes_stats.jsonl" TASKS_PATH = "meta/tasks.jsonl" DEFAULT_VIDEO_PATH = "videos/chunk-{episode_chunk:03d}/{video_key}/episode_{episode_index:06d}.mp4" @@ -113,7 +114,16 @@ def get_nested_item(obj: DictLike, flattened_key: str, sep: str = "/") -> Any: def serialize_dict(stats: dict[str, torch.Tensor | np.ndarray | dict]) -> dict: - serialized_dict = {key: value.tolist() for key, value in flatten_dict(stats).items()} + serialized_dict = {} + for key, value in flatten_dict(stats).items(): + if isinstance(value, (torch.Tensor, np.ndarray)): + serialized_dict[key] = value.tolist() + elif isinstance(value, np.generic): + serialized_dict[key] = value.item() + elif isinstance(value, (int, float)): + serialized_dict[key] = value + else: + raise NotImplementedError(f"The value '{value}' of type '{type(value)}' is not supported.") return unflatten_dict(serialized_dict) @@ -154,6 +164,10 @@ def append_jsonlines(data: dict, fpath: Path) -> None: writer.write(data) +def write_info(info: dict, local_dir: Path): + write_json(info, local_dir / INFO_PATH) + + def load_info(local_dir: Path) -> dict: info = load_json(local_dir / INFO_PATH) for ft in info["features"].values(): @@ -161,12 +175,29 @@ def load_info(local_dir: Path) -> dict: return info -def load_stats(local_dir: Path) -> dict: +def write_stats(stats: dict, local_dir: Path): + serialized_stats = serialize_dict(stats) + write_json(serialized_stats, local_dir / STATS_PATH) + + +def cast_stats_to_numpy(stats) -> dict[str, dict[str, np.ndarray]]: + stats = {key: np.array(value) for key, value in flatten_dict(stats).items()} + return unflatten_dict(stats) + + +def load_stats(local_dir: Path) -> dict[str, dict[str, np.ndarray]]: if not (local_dir / STATS_PATH).exists(): return None stats = load_json(local_dir / STATS_PATH) - stats = {key: torch.tensor(value) for key, value in flatten_dict(stats).items()} - return unflatten_dict(stats) + return cast_stats_to_numpy(stats) + + +def write_task(task_index: int, task: dict, local_dir: Path): + task_dict = { + "task_index": task_index, + "task": task, + } + append_jsonlines(task_dict, local_dir / TASKS_PATH) def load_tasks(local_dir: Path) -> dict: @@ -176,16 +207,42 @@ def load_tasks(local_dir: Path) -> dict: return tasks, task_to_task_index +def write_episode(episode: dict, local_dir: Path): + append_jsonlines(episode, local_dir / EPISODES_PATH) + + def load_episodes(local_dir: Path) -> dict: - return load_jsonlines(local_dir / EPISODES_PATH) + episodes = load_jsonlines(local_dir / EPISODES_PATH) + return {item["episode_index"]: item for item in sorted(episodes, key=lambda x: x["episode_index"])} -def load_image_as_numpy(fpath: str | Path, dtype="float32", channel_first: bool = True) -> np.ndarray: +def write_episode_stats(episode_index: int, episode_stats: dict, local_dir: Path): + # We wrap episode_stats in a dictionnary since `episode_stats["episode_index"]` + # is a dictionary of stats and not an integer. + episode_stats = {"episode_index": episode_index, "stats": serialize_dict(episode_stats)} + append_jsonlines(episode_stats, local_dir / EPISODES_STATS_PATH) + + +def load_episodes_stats(local_dir: Path) -> dict: + episodes_stats = load_jsonlines(local_dir / EPISODES_STATS_PATH) + return { + item["episode_index"]: cast_stats_to_numpy(item["stats"]) + for item in sorted(episodes_stats, key=lambda x: x["episode_index"]) + } + + +def backward_compatible_episodes_stats(stats, episodes: list[int]) -> dict[str, dict[str, np.ndarray]]: + return {ep_idx: stats for ep_idx in episodes} + + +def load_image_as_numpy( + fpath: str | Path, dtype: np.dtype = np.float32, channel_first: bool = True +) -> np.ndarray: img = PILImage.open(fpath).convert("RGB") img_array = np.array(img, dtype=dtype) if channel_first: # (H, W, C) -> (C, H, W) img_array = np.transpose(img_array, (2, 0, 1)) - if "float" in dtype: + if np.issubdtype(dtype, np.floating): img_array /= 255.0 return img_array @@ -370,9 +427,9 @@ def create_empty_dataset_info( def get_episode_data_index( - episode_dicts: list[dict], episodes: list[int] | None = None + episode_dicts: dict[dict], episodes: list[int] | None = None ) -> dict[str, torch.Tensor]: - episode_lengths = {ep_idx: ep_dict["length"] for ep_idx, ep_dict in enumerate(episode_dicts)} + episode_lengths = {ep_idx: ep_dict["length"] for ep_idx, ep_dict in episode_dicts.items()} if episodes is not None: episode_lengths = {ep_idx: episode_lengths[ep_idx] for ep_idx in episodes} diff --git a/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py b/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py new file mode 100644 index 00000000..0c5d2688 --- /dev/null +++ b/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py @@ -0,0 +1,87 @@ +""" +This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 2.0 to +2.1. It performs the following: + +- Generates per-episodes stats and writes them in `episodes_stats.jsonl` +- Removes the deprecated `stats.json` (by default) +- Updates codebase_version in `info.json` + +Usage: + +```bash +python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py \ + --repo-id=aliberts/koch_tutorial +``` + +""" +# TODO(rcadene, aliberts): ensure this script works for any other changes for the final v2.1 + +import argparse + +from huggingface_hub import HfApi + +from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset +from lerobot.common.datasets.utils import EPISODES_STATS_PATH, STATS_PATH, load_stats, write_info +from lerobot.common.datasets.v21.convert_stats import check_aggregate_stats, convert_stats + + +def main( + repo_id: str, + test_branch: str | None = None, + delete_old_stats: bool = False, + num_workers: int = 4, +): + dataset = LeRobotDataset(repo_id) + if (dataset.root / EPISODES_STATS_PATH).is_file(): + raise FileExistsError("episodes_stats.jsonl already exists.") + + convert_stats(dataset, num_workers=num_workers) + ref_stats = load_stats(dataset.root) + check_aggregate_stats(dataset, ref_stats) + + dataset.meta.info["codebase_version"] = CODEBASE_VERSION + write_info(dataset.meta.info, dataset.root) + + dataset.push_to_hub(branch=test_branch, create_card=False, allow_patterns="meta/") + + if delete_old_stats: + if (dataset.root / STATS_PATH).is_file: + (dataset.root / STATS_PATH).unlink() + hub_api = HfApi() + if hub_api.file_exists( + STATS_PATH, repo_id=dataset.repo_id, revision=test_branch, repo_type="dataset" + ): + hub_api.delete_file( + STATS_PATH, repo_id=dataset.repo_id, revision=test_branch, repo_type="dataset" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--repo-id", + type=str, + required=True, + help="Repository identifier on Hugging Face: a community or a user name `/` the name of the dataset (e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).", + ) + parser.add_argument( + "--test-branch", + type=str, + default=None, + help="Repo branch to test your conversion first (e.g. 'v2.0.test')", + ) + parser.add_argument( + "--delete-old-stats", + type=bool, + default=False, + help="Delete the deprecated `stats.json`", + ) + parser.add_argument( + "--num-workers", + type=int, + default=4, + help="Number of workers for parallelizing compute", + ) + + args = parser.parse_args() + main(**vars(args)) diff --git a/lerobot/common/datasets/v21/convert_stats.py b/lerobot/common/datasets/v21/convert_stats.py new file mode 100644 index 00000000..b13e0e19 --- /dev/null +++ b/lerobot/common/datasets/v21/convert_stats.py @@ -0,0 +1,85 @@ +from concurrent.futures import ThreadPoolExecutor, as_completed + +import numpy as np +from tqdm import tqdm + +from lerobot.common.datasets.compute_stats import aggregate_stats, get_feature_stats, sample_indices +from lerobot.common.datasets.lerobot_dataset import LeRobotDataset +from lerobot.common.datasets.utils import write_episode_stats + + +def sample_episode_video_frames(dataset: LeRobotDataset, episode_index: int, ft_key: str) -> np.ndarray: + ep_len = dataset.meta.episodes[episode_index]["length"] + sampled_indices = sample_indices(ep_len) + query_timestamps = dataset._get_query_timestamps(0.0, {ft_key: sampled_indices}) + video_frames = dataset._query_videos(query_timestamps, episode_index) + return video_frames[ft_key].numpy() + + +def convert_episode_stats(dataset: LeRobotDataset, ep_idx: int): + ep_start_idx = dataset.episode_data_index["from"][ep_idx] + ep_end_idx = dataset.episode_data_index["to"][ep_idx] + ep_data = dataset.hf_dataset.select(range(ep_start_idx, ep_end_idx)) + + ep_stats = {} + for key, ft in dataset.features.items(): + if ft["dtype"] == "video": + # We sample only for videos + ep_ft_data = sample_episode_video_frames(dataset, ep_idx, key) + else: + ep_ft_data = np.array(ep_data[key]) + + axes_to_reduce = (0, 2, 3) if ft["dtype"] in ["image", "video"] else 0 + keepdims = True if ft["dtype"] in ["image", "video"] else ep_ft_data.ndim == 1 + ep_stats[key] = get_feature_stats(ep_ft_data, axis=axes_to_reduce, keepdims=keepdims) + + if ft["dtype"] in ["image", "video"]: # remove batch dim + ep_stats[key] = { + k: v if k == "count" else np.squeeze(v, axis=0) for k, v in ep_stats[key].items() + } + + dataset.meta.episodes_stats[ep_idx] = ep_stats + + +def convert_stats(dataset: LeRobotDataset, num_workers: int = 0): + assert dataset.episodes is None + print("Computing episodes stats") + total_episodes = dataset.meta.total_episodes + if num_workers > 0: + with ThreadPoolExecutor(max_workers=num_workers) as executor: + futures = { + executor.submit(convert_episode_stats, dataset, ep_idx): ep_idx + for ep_idx in range(total_episodes) + } + for future in tqdm(as_completed(futures), total=total_episodes): + future.result() + else: + for ep_idx in tqdm(range(total_episodes)): + convert_episode_stats(dataset, ep_idx) + + for ep_idx in tqdm(range(total_episodes)): + write_episode_stats(ep_idx, dataset.meta.episodes_stats[ep_idx], dataset.root) + + +def check_aggregate_stats( + dataset: LeRobotDataset, + reference_stats: dict[str, dict[str, np.ndarray]], + video_rtol_atol: tuple[float] = (1e-2, 1e-2), + default_rtol_atol: tuple[float] = (5e-6, 0.0), +): + """Verifies that the aggregated stats from episodes_stats are close to reference stats.""" + agg_stats = aggregate_stats(list(dataset.meta.episodes_stats.values())) + for key, ft in dataset.features.items(): + # These values might need some fine-tuning + if ft["dtype"] == "video": + # to account for image sub-sampling + rtol, atol = video_rtol_atol + else: + rtol, atol = default_rtol_atol + + for stat, val in agg_stats[key].items(): + if key in reference_stats and stat in reference_stats[key]: + err_msg = f"feature='{key}' stats='{stat}'" + np.testing.assert_allclose( + val, reference_stats[key][stat], rtol=rtol, atol=atol, err_msg=err_msg + ) diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py index 8ed3318d..8be53483 100644 --- a/lerobot/common/datasets/video_utils.py +++ b/lerobot/common/datasets/video_utils.py @@ -69,8 +69,8 @@ def decode_video_frames_torchvision( # set the first and last requested timestamps # Note: previous timestamps are usually loaded, since we need to access the previous key frame - first_ts = timestamps[0] - last_ts = timestamps[-1] + first_ts = min(timestamps) + last_ts = max(timestamps) # access closest key frame of the first requested frame # Note: closest key frame timestamp is usally smaller than `first_ts` (e.g. key frame can be the first frame of the video) diff --git a/lerobot/common/policies/normalize.py b/lerobot/common/policies/normalize.py index 95219273..b3255ec1 100644 --- a/lerobot/common/policies/normalize.py +++ b/lerobot/common/policies/normalize.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import torch from torch import Tensor, nn @@ -77,17 +78,29 @@ def create_stats_buffers( } ) + # TODO(aliberts, rcadene): harmonize this to only use one framework (np or torch) if stats: - # Note: The clone is needed to make sure that the logic in save_pretrained doesn't see duplicated - # tensors anywhere (for example, when we use the same stats for normalization and - # unnormalization). See the logic here - # https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L97. - if norm_mode is NormalizationMode.MEAN_STD: - buffer["mean"].data = stats[key]["mean"].clone() - buffer["std"].data = stats[key]["std"].clone() - elif norm_mode is NormalizationMode.MIN_MAX: - buffer["min"].data = stats[key]["min"].clone() - buffer["max"].data = stats[key]["max"].clone() + if isinstance(stats[key]["mean"], np.ndarray): + if norm_mode is NormalizationMode.MEAN_STD: + buffer["mean"].data = torch.from_numpy(stats[key]["mean"]).to(dtype=torch.float32) + buffer["std"].data = torch.from_numpy(stats[key]["std"]).to(dtype=torch.float32) + elif norm_mode is NormalizationMode.MIN_MAX: + buffer["min"].data = torch.from_numpy(stats[key]["min"]).to(dtype=torch.float32) + buffer["max"].data = torch.from_numpy(stats[key]["max"]).to(dtype=torch.float32) + elif isinstance(stats[key]["mean"], torch.Tensor): + # Note: The clone is needed to make sure that the logic in save_pretrained doesn't see duplicated + # tensors anywhere (for example, when we use the same stats for normalization and + # unnormalization). See the logic here + # https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L97. + if norm_mode is NormalizationMode.MEAN_STD: + buffer["mean"].data = stats[key]["mean"].clone().to(dtype=torch.float32) + buffer["std"].data = stats[key]["std"].clone().to(dtype=torch.float32) + elif norm_mode is NormalizationMode.MIN_MAX: + buffer["min"].data = stats[key]["min"].clone().to(dtype=torch.float32) + buffer["max"].data = stats[key]["max"].clone().to(dtype=torch.float32) + else: + type_ = type(stats[key]["mean"]) + raise ValueError(f"np.ndarray or torch.Tensor expected, but type is '{type_}' instead.") stats_buffers[key] = buffer return stats_buffers @@ -141,6 +154,7 @@ class Normalize(nn.Module): batch = dict(batch) # shallow copy avoids mutating the input batch for key, ft in self.features.items(): if key not in batch: + # FIXME(aliberts, rcadene): This might lead to silent fail! continue norm_mode = self.norm_map.get(ft.type, NormalizationMode.IDENTITY) diff --git a/lerobot/common/robot_devices/control_configs.py b/lerobot/common/robot_devices/control_configs.py index a2f3889c..c96a87f0 100644 --- a/lerobot/common/robot_devices/control_configs.py +++ b/lerobot/common/robot_devices/control_configs.py @@ -60,8 +60,6 @@ class RecordControlConfig(ControlConfig): num_episodes: int = 50 # Encode frames in the dataset into video video: bool = True - # By default, run the computation of the data statistics at the end of data collection. Compute intensive and not required to just replay an episode. - run_compute_stats: bool = True # Upload dataset to Hugging Face hub. push_to_hub: bool = True # Upload on private repository on the Hugging Face hub. diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index de67e331..5f51c81b 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -301,10 +301,7 @@ def record( log_say("Stop recording", cfg.play_sounds, blocking=True) stop_recording(robot, listener, cfg.display_cameras) - if cfg.run_compute_stats: - logging.info("Computing dataset statistics") - - dataset.consolidate(cfg.run_compute_stats) + dataset.consolidate() if cfg.push_to_hub: dataset.push_to_hub(tags=cfg.tags, private=cfg.private) diff --git a/tests/fixtures/dataset_factories.py b/tests/fixtures/dataset_factories.py index bdd2dc54..f57f945b 100644 --- a/tests/fixtures/dataset_factories.py +++ b/tests/fixtures/dataset_factories.py @@ -29,7 +29,7 @@ from tests.fixtures.constants import ( def get_task_index(task_dicts: dict, task: str) -> int: - tasks = {d["task_index"]: d["task"] for d in task_dicts} + tasks = {d["task_index"]: d["task"] for d in task_dicts.values()} task_to_task_index = {task: task_idx for task_idx, task in tasks.items()} return task_to_task_index[task] @@ -142,6 +142,7 @@ def stats_factory(): "mean": np.full((3, 1, 1), 0.5, dtype=np.float32).tolist(), "min": np.full((3, 1, 1), 0, dtype=np.float32).tolist(), "std": np.full((3, 1, 1), 0.25, dtype=np.float32).tolist(), + "count": [10], } else: stats[key] = { @@ -149,20 +150,38 @@ def stats_factory(): "mean": np.full(shape, 0.5, dtype=dtype).tolist(), "min": np.full(shape, 0, dtype=dtype).tolist(), "std": np.full(shape, 0.25, dtype=dtype).tolist(), + "count": [10], } return stats return _create_stats +@pytest.fixture(scope="session") +def episodes_stats_factory(stats_factory): + def _create_episodes_stats( + features: dict[str], + total_episodes: int = 3, + ) -> dict: + episodes_stats = {} + for episode_index in range(total_episodes): + episodes_stats[episode_index] = { + "episode_index": episode_index, + "stats": stats_factory(features), + } + return episodes_stats + + return _create_episodes_stats + + @pytest.fixture(scope="session") def tasks_factory(): def _create_tasks(total_tasks: int = 3) -> int: - tasks_list = [] - for i in range(total_tasks): - task_dict = {"task_index": i, "task": f"Perform action {i}."} - tasks_list.append(task_dict) - return tasks_list + tasks = {} + for task_index in range(total_tasks): + task_dict = {"task_index": task_index, "task": f"Perform action {task_index}."} + tasks[task_index] = task_dict + return tasks return _create_tasks @@ -191,10 +210,10 @@ def episodes_factory(tasks_factory): # Generate random lengths that sum up to total_length lengths = np.random.multinomial(total_frames, [1 / total_episodes] * total_episodes).tolist() - tasks_list = [task_dict["task"] for task_dict in tasks] + tasks_list = [task_dict["task"] for task_dict in tasks.values()] num_tasks_available = len(tasks_list) - episodes_list = [] + episodes = {} remaining_tasks = tasks_list.copy() for ep_idx in range(total_episodes): num_tasks_in_episode = random.randint(1, min(3, num_tasks_available)) if multi_task else 1 @@ -204,15 +223,13 @@ def episodes_factory(tasks_factory): for task in episode_tasks: remaining_tasks.remove(task) - episodes_list.append( - { - "episode_index": ep_idx, - "tasks": episode_tasks, - "length": lengths[ep_idx], - } - ) + episodes[ep_idx] = { + "episode_index": ep_idx, + "tasks": episode_tasks, + "length": lengths[ep_idx], + } - return episodes_list + return episodes return _create_episodes @@ -236,7 +253,7 @@ def hf_dataset_factory(features_factory, tasks_factory, episodes_factory, img_ar frame_index_col = np.array([], dtype=np.int64) episode_index_col = np.array([], dtype=np.int64) task_index = np.array([], dtype=np.int64) - for ep_dict in episodes: + for ep_dict in episodes.values(): timestamp_col = np.concatenate((timestamp_col, np.arange(ep_dict["length"]) / fps)) frame_index_col = np.concatenate((frame_index_col, np.arange(ep_dict["length"], dtype=int))) episode_index_col = np.concatenate( @@ -279,6 +296,7 @@ def hf_dataset_factory(features_factory, tasks_factory, episodes_factory, img_ar def lerobot_dataset_metadata_factory( info_factory, stats_factory, + episodes_stats_factory, tasks_factory, episodes_factory, mock_snapshot_download_factory, @@ -288,6 +306,7 @@ def lerobot_dataset_metadata_factory( repo_id: str = DUMMY_REPO_ID, info: dict | None = None, stats: dict | None = None, + episodes_stats: list[dict] | None = None, tasks: list[dict] | None = None, episodes: list[dict] | None = None, local_files_only: bool = False, @@ -296,6 +315,10 @@ def lerobot_dataset_metadata_factory( info = info_factory() if not stats: stats = stats_factory(features=info["features"]) + if not episodes_stats: + episodes_stats = episodes_stats_factory( + features=info["features"], total_episodes=info["total_episodes"] + ) if not tasks: tasks = tasks_factory(total_tasks=info["total_tasks"]) if not episodes: @@ -306,6 +329,7 @@ def lerobot_dataset_metadata_factory( mock_snapshot_download = mock_snapshot_download_factory( info=info, stats=stats, + episodes_stats=episodes_stats, tasks=tasks, episodes=episodes, ) @@ -329,6 +353,7 @@ def lerobot_dataset_metadata_factory( def lerobot_dataset_factory( info_factory, stats_factory, + episodes_stats_factory, tasks_factory, episodes_factory, hf_dataset_factory, @@ -344,6 +369,7 @@ def lerobot_dataset_factory( multi_task: bool = False, info: dict | None = None, stats: dict | None = None, + episodes_stats: list[dict] | None = None, tasks: list[dict] | None = None, episode_dicts: list[dict] | None = None, hf_dataset: datasets.Dataset | None = None, @@ -355,6 +381,8 @@ def lerobot_dataset_factory( ) if not stats: stats = stats_factory(features=info["features"]) + if not episodes_stats: + episodes_stats = episodes_stats_factory(features=info["features"], total_episodes=total_episodes) if not tasks: tasks = tasks_factory(total_tasks=info["total_tasks"]) if not episode_dicts: @@ -370,6 +398,7 @@ def lerobot_dataset_factory( mock_snapshot_download = mock_snapshot_download_factory( info=info, stats=stats, + episodes_stats=episodes_stats, tasks=tasks, episodes=episode_dicts, hf_dataset=hf_dataset, @@ -379,6 +408,7 @@ def lerobot_dataset_factory( repo_id=repo_id, info=info, stats=stats, + episodes_stats=episodes_stats, tasks=tasks, episodes=episode_dicts, local_files_only=kwargs.get("local_files_only", False), @@ -406,7 +436,7 @@ def empty_lerobot_dataset_factory(): robot: Robot | None = None, robot_type: str | None = None, features: dict | None = None, - ): + ) -> LeRobotDataset: return LeRobotDataset.create( repo_id=repo_id, fps=fps, root=root, robot=robot, robot_type=robot_type, features=features ) diff --git a/tests/fixtures/files.py b/tests/fixtures/files.py index 5fe8a314..4ef12e49 100644 --- a/tests/fixtures/files.py +++ b/tests/fixtures/files.py @@ -7,7 +7,13 @@ import pyarrow.compute as pc import pyarrow.parquet as pq import pytest -from lerobot.common.datasets.utils import EPISODES_PATH, INFO_PATH, STATS_PATH, TASKS_PATH +from lerobot.common.datasets.utils import ( + EPISODES_PATH, + EPISODES_STATS_PATH, + INFO_PATH, + STATS_PATH, + TASKS_PATH, +) @pytest.fixture(scope="session") @@ -38,6 +44,20 @@ def stats_path(stats_factory): return _create_stats_json_file +@pytest.fixture(scope="session") +def episodes_stats_path(episodes_stats_factory): + def _create_episodes_stats_jsonl_file(dir: Path, episodes_stats: list[dict] | None = None) -> Path: + if not episodes_stats: + episodes_stats = episodes_stats_factory() + fpath = dir / EPISODES_STATS_PATH + fpath.parent.mkdir(parents=True, exist_ok=True) + with jsonlines.open(fpath, "w") as writer: + writer.write_all(episodes_stats.values()) + return fpath + + return _create_episodes_stats_jsonl_file + + @pytest.fixture(scope="session") def tasks_path(tasks_factory): def _create_tasks_jsonl_file(dir: Path, tasks: list | None = None) -> Path: @@ -46,7 +66,7 @@ def tasks_path(tasks_factory): fpath = dir / TASKS_PATH fpath.parent.mkdir(parents=True, exist_ok=True) with jsonlines.open(fpath, "w") as writer: - writer.write_all(tasks) + writer.write_all(tasks.values()) return fpath return _create_tasks_jsonl_file @@ -60,7 +80,7 @@ def episode_path(episodes_factory): fpath = dir / EPISODES_PATH fpath.parent.mkdir(parents=True, exist_ok=True) with jsonlines.open(fpath, "w") as writer: - writer.write_all(episodes) + writer.write_all(episodes.values()) return fpath return _create_episodes_jsonl_file diff --git a/tests/fixtures/hub.py b/tests/fixtures/hub.py index 351768c0..ae309cb4 100644 --- a/tests/fixtures/hub.py +++ b/tests/fixtures/hub.py @@ -4,7 +4,13 @@ import datasets import pytest from huggingface_hub.utils import filter_repo_objects -from lerobot.common.datasets.utils import EPISODES_PATH, INFO_PATH, STATS_PATH, TASKS_PATH +from lerobot.common.datasets.utils import ( + EPISODES_PATH, + EPISODES_STATS_PATH, + INFO_PATH, + STATS_PATH, + TASKS_PATH, +) from tests.fixtures.constants import LEROBOT_TEST_DIR @@ -14,6 +20,8 @@ def mock_snapshot_download_factory( info_path, stats_factory, stats_path, + episodes_stats_factory, + episodes_stats_path, tasks_factory, tasks_path, episodes_factory, @@ -29,6 +37,7 @@ def mock_snapshot_download_factory( def _mock_snapshot_download_func( info: dict | None = None, stats: dict | None = None, + episodes_stats: list[dict] | None = None, tasks: list[dict] | None = None, episodes: list[dict] | None = None, hf_dataset: datasets.Dataset | None = None, @@ -37,6 +46,10 @@ def mock_snapshot_download_factory( info = info_factory() if not stats: stats = stats_factory(features=info["features"]) + if not episodes_stats: + episodes_stats = episodes_stats_factory( + features=info["features"], total_episodes=info["total_episodes"] + ) if not tasks: tasks = tasks_factory(total_tasks=info["total_tasks"]) if not episodes: @@ -67,11 +80,11 @@ def mock_snapshot_download_factory( # List all possible files all_files = [] - meta_files = [INFO_PATH, STATS_PATH, TASKS_PATH, EPISODES_PATH] + meta_files = [INFO_PATH, STATS_PATH, EPISODES_STATS_PATH, TASKS_PATH, EPISODES_PATH] all_files.extend(meta_files) data_files = [] - for episode_dict in episodes: + for episode_dict in episodes.values(): ep_idx = episode_dict["episode_index"] ep_chunk = ep_idx // info["chunks_size"] data_path = info["data_path"].format(episode_chunk=ep_chunk, episode_index=ep_idx) @@ -92,6 +105,8 @@ def mock_snapshot_download_factory( _ = info_path(local_dir, info) elif rel_path == STATS_PATH: _ = stats_path(local_dir, stats) + elif rel_path == EPISODES_STATS_PATH: + _ = episodes_stats_path(local_dir, episodes_stats) elif rel_path == TASKS_PATH: _ = tasks_path(local_dir, tasks) elif rel_path == EPISODES_PATH: diff --git a/tests/test_cameras.py b/tests/test_cameras.py index 1a1812f7..7c043c25 100644 --- a/tests/test_cameras.py +++ b/tests/test_cameras.py @@ -182,7 +182,7 @@ def test_camera(request, camera_type, mock): @pytest.mark.parametrize("camera_type, mock", TEST_CAMERA_TYPES) @require_camera -def test_save_images_from_cameras(tmpdir, request, camera_type, mock): +def test_save_images_from_cameras(tmp_path, request, camera_type, mock): # TODO(rcadene): refactor if camera_type == "opencv": from lerobot.common.robot_devices.cameras.opencv import save_images_from_cameras @@ -190,4 +190,4 @@ def test_save_images_from_cameras(tmpdir, request, camera_type, mock): from lerobot.common.robot_devices.cameras.intelrealsense import save_images_from_cameras # Small `record_time_s` to speedup unit tests - save_images_from_cameras(tmpdir, record_time_s=0.02, mock=mock) + save_images_from_cameras(tmp_path, record_time_s=0.02, mock=mock) diff --git a/tests/test_compute_stats.py b/tests/test_compute_stats.py new file mode 100644 index 00000000..d9032c8a --- /dev/null +++ b/tests/test_compute_stats.py @@ -0,0 +1,311 @@ +#!/usr/bin/env python + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest.mock import patch + +import numpy as np +import pytest + +from lerobot.common.datasets.compute_stats import ( + _assert_type_and_shape, + aggregate_feature_stats, + aggregate_stats, + compute_episode_stats, + estimate_num_samples, + get_feature_stats, + sample_images, + sample_indices, +) + + +def mock_load_image_as_numpy(path, dtype, channel_first): + return np.ones((3, 32, 32), dtype=dtype) if channel_first else np.ones((32, 32, 3), dtype=dtype) + + +@pytest.fixture +def sample_array(): + return np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + + +def test_estimate_num_samples(): + assert estimate_num_samples(1) == 1 + assert estimate_num_samples(10) == 10 + assert estimate_num_samples(100) == 100 + assert estimate_num_samples(200) == 100 + assert estimate_num_samples(1000) == 177 + assert estimate_num_samples(2000) == 299 + assert estimate_num_samples(5000) == 594 + assert estimate_num_samples(10_000) == 1000 + assert estimate_num_samples(20_000) == 1681 + assert estimate_num_samples(50_000) == 3343 + assert estimate_num_samples(500_000) == 10_000 + + +def test_sample_indices(): + indices = sample_indices(10) + assert len(indices) > 0 + assert indices[0] == 0 + assert indices[-1] == 9 + assert len(indices) == estimate_num_samples(10) + + +@patch("lerobot.common.datasets.compute_stats.load_image_as_numpy", side_effect=mock_load_image_as_numpy) +def test_sample_images(mock_load): + image_paths = [f"image_{i}.jpg" for i in range(100)] + images = sample_images(image_paths) + assert isinstance(images, np.ndarray) + assert images.shape[1:] == (3, 32, 32) + assert images.dtype == np.uint8 + assert len(images) == estimate_num_samples(100) + + +def test_get_feature_stats_images(): + data = np.random.rand(100, 3, 32, 32) + stats = get_feature_stats(data, axis=(0, 2, 3), keepdims=True) + assert "min" in stats and "max" in stats and "mean" in stats and "std" in stats and "count" in stats + np.testing.assert_equal(stats["count"], np.array([100])) + assert stats["min"].shape == stats["max"].shape == stats["mean"].shape == stats["std"].shape + + +def test_get_feature_stats_axis_0_keepdims(sample_array): + expected = { + "min": np.array([[1, 2, 3]]), + "max": np.array([[7, 8, 9]]), + "mean": np.array([[4.0, 5.0, 6.0]]), + "std": np.array([[2.44948974, 2.44948974, 2.44948974]]), + "count": np.array([3]), + } + result = get_feature_stats(sample_array, axis=(0,), keepdims=True) + for key in expected: + np.testing.assert_allclose(result[key], expected[key]) + + +def test_get_feature_stats_axis_1(sample_array): + expected = { + "min": np.array([1, 4, 7]), + "max": np.array([3, 6, 9]), + "mean": np.array([2.0, 5.0, 8.0]), + "std": np.array([0.81649658, 0.81649658, 0.81649658]), + "count": np.array([3]), + } + result = get_feature_stats(sample_array, axis=(1,), keepdims=False) + for key in expected: + np.testing.assert_allclose(result[key], expected[key]) + + +def test_get_feature_stats_no_axis(sample_array): + expected = { + "min": np.array(1), + "max": np.array(9), + "mean": np.array(5.0), + "std": np.array(2.5819889), + "count": np.array([3]), + } + result = get_feature_stats(sample_array, axis=None, keepdims=False) + for key in expected: + np.testing.assert_allclose(result[key], expected[key]) + + +def test_get_feature_stats_empty_array(): + array = np.array([]) + with pytest.raises(ValueError): + get_feature_stats(array, axis=(0,), keepdims=True) + + +def test_get_feature_stats_single_value(): + array = np.array([[1337]]) + result = get_feature_stats(array, axis=None, keepdims=True) + np.testing.assert_equal(result["min"], np.array(1337)) + np.testing.assert_equal(result["max"], np.array(1337)) + np.testing.assert_equal(result["mean"], np.array(1337.0)) + np.testing.assert_equal(result["std"], np.array(0.0)) + np.testing.assert_equal(result["count"], np.array([1])) + + +def test_compute_episode_stats(): + episode_data = { + "observation.image": [f"image_{i}.jpg" for i in range(100)], + "observation.state": np.random.rand(100, 10), + } + features = { + "observation.image": {"dtype": "image"}, + "observation.state": {"dtype": "numeric"}, + } + + with patch( + "lerobot.common.datasets.compute_stats.load_image_as_numpy", side_effect=mock_load_image_as_numpy + ): + stats = compute_episode_stats(episode_data, features) + + assert "observation.image" in stats and "observation.state" in stats + assert stats["observation.image"]["count"].item() == 100 + assert stats["observation.state"]["count"].item() == 100 + assert stats["observation.image"]["mean"].shape == (3, 1, 1) + + +def test_assert_type_and_shape_valid(): + valid_stats = [ + { + "feature1": { + "min": np.array([1.0]), + "max": np.array([10.0]), + "mean": np.array([5.0]), + "std": np.array([2.0]), + "count": np.array([1]), + } + } + ] + _assert_type_and_shape(valid_stats) + + +def test_assert_type_and_shape_invalid_type(): + invalid_stats = [ + { + "feature1": { + "min": [1.0], # Not a numpy array + "max": np.array([10.0]), + "mean": np.array([5.0]), + "std": np.array([2.0]), + "count": np.array([1]), + } + } + ] + with pytest.raises(ValueError, match="Stats must be composed of numpy array"): + _assert_type_and_shape(invalid_stats) + + +def test_assert_type_and_shape_invalid_shape(): + invalid_stats = [ + { + "feature1": { + "count": np.array([1, 2]), # Wrong shape + } + } + ] + with pytest.raises(ValueError, match=r"Shape of 'count' must be \(1\)"): + _assert_type_and_shape(invalid_stats) + + +def test_aggregate_feature_stats(): + stats_ft_list = [ + { + "min": np.array([1.0]), + "max": np.array([10.0]), + "mean": np.array([5.0]), + "std": np.array([2.0]), + "count": np.array([1]), + }, + { + "min": np.array([2.0]), + "max": np.array([12.0]), + "mean": np.array([6.0]), + "std": np.array([2.5]), + "count": np.array([1]), + }, + ] + result = aggregate_feature_stats(stats_ft_list) + np.testing.assert_allclose(result["min"], np.array([1.0])) + np.testing.assert_allclose(result["max"], np.array([12.0])) + np.testing.assert_allclose(result["mean"], np.array([5.5])) + np.testing.assert_allclose(result["std"], np.array([2.318405]), atol=1e-6) + np.testing.assert_allclose(result["count"], np.array([2])) + + +def test_aggregate_stats(): + all_stats = [ + { + "observation.image": { + "min": [1, 2, 3], + "max": [10, 20, 30], + "mean": [5.5, 10.5, 15.5], + "std": [2.87, 5.87, 8.87], + "count": 10, + }, + "observation.state": {"min": 1, "max": 10, "mean": 5.5, "std": 2.87, "count": 10}, + "extra_key_0": {"min": 5, "max": 25, "mean": 15, "std": 6, "count": 6}, + }, + { + "observation.image": { + "min": [2, 1, 0], + "max": [15, 10, 5], + "mean": [8.5, 5.5, 2.5], + "std": [3.42, 2.42, 1.42], + "count": 15, + }, + "observation.state": {"min": 2, "max": 15, "mean": 8.5, "std": 3.42, "count": 15}, + "extra_key_1": {"min": 0, "max": 20, "mean": 10, "std": 5, "count": 5}, + }, + ] + + expected_agg_stats = { + "observation.image": { + "min": [1, 1, 0], + "max": [15, 20, 30], + "mean": [7.3, 7.5, 7.7], + "std": [3.5317, 4.8267, 8.5581], + "count": 25, + }, + "observation.state": { + "min": 1, + "max": 15, + "mean": 7.3, + "std": 3.5317, + "count": 25, + }, + "extra_key_0": { + "min": 5, + "max": 25, + "mean": 15.0, + "std": 6.0, + "count": 6, + }, + "extra_key_1": { + "min": 0, + "max": 20, + "mean": 10.0, + "std": 5.0, + "count": 5, + }, + } + + # cast to numpy + for ep_stats in all_stats: + for fkey, stats in ep_stats.items(): + for k in stats: + stats[k] = np.array(stats[k], dtype=np.int64 if k == "count" else np.float32) + if fkey == "observation.image" and k != "count": + stats[k] = stats[k].reshape(3, 1, 1) # for normalization on image channels + else: + stats[k] = stats[k].reshape(1) + + # cast to numpy + for fkey, stats in expected_agg_stats.items(): + for k in stats: + stats[k] = np.array(stats[k], dtype=np.int64 if k == "count" else np.float32) + if fkey == "observation.image" and k != "count": + stats[k] = stats[k].reshape(3, 1, 1) # for normalization on image channels + else: + stats[k] = stats[k].reshape(1) + + results = aggregate_stats(all_stats) + + for fkey in expected_agg_stats: + np.testing.assert_allclose(results[fkey]["min"], expected_agg_stats[fkey]["min"]) + np.testing.assert_allclose(results[fkey]["max"], expected_agg_stats[fkey]["max"]) + np.testing.assert_allclose(results[fkey]["mean"], expected_agg_stats[fkey]["mean"]) + np.testing.assert_allclose( + results[fkey]["std"], expected_agg_stats[fkey]["std"], atol=1e-04, rtol=1e-04 + ) + np.testing.assert_allclose(results[fkey]["count"], expected_agg_stats[fkey]["count"]) diff --git a/tests/test_control_robot.py b/tests/test_control_robot.py index 36ee096f..a4f538a6 100644 --- a/tests/test_control_robot.py +++ b/tests/test_control_robot.py @@ -24,7 +24,6 @@ pytest -sx 'tests/test_control_robot.py::test_teleoperate[aloha-True]' """ import multiprocessing -from pathlib import Path from unittest.mock import patch import pytest @@ -45,7 +44,7 @@ from tests.utils import DEVICE, TEST_ROBOT_TYPES, mock_calibration_dir, require_ @pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES) @require_robot -def test_teleoperate(tmpdir, request, robot_type, mock): +def test_teleoperate(tmp_path, request, robot_type, mock): robot_kwargs = {"robot_type": robot_type, "mock": mock} if mock and robot_type != "aloha": @@ -53,8 +52,7 @@ def test_teleoperate(tmpdir, request, robot_type, mock): # Create an empty calibration directory to trigger manual calibration # and avoid writing calibration files in user .cache/calibration folder - tmpdir = Path(tmpdir) - calibration_dir = tmpdir / robot_type + calibration_dir = tmp_path / robot_type mock_calibration_dir(calibration_dir) robot_kwargs["calibration_dir"] = calibration_dir else: @@ -70,15 +68,14 @@ def test_teleoperate(tmpdir, request, robot_type, mock): @pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES) @require_robot -def test_calibrate(tmpdir, request, robot_type, mock): +def test_calibrate(tmp_path, request, robot_type, mock): robot_kwargs = {"robot_type": robot_type, "mock": mock} if mock: request.getfixturevalue("patch_builtins_input") # Create an empty calibration directory to trigger manual calibration - tmpdir = Path(tmpdir) - calibration_dir = tmpdir / robot_type + calibration_dir = tmp_path / robot_type robot_kwargs["calibration_dir"] = calibration_dir robot = make_robot(**robot_kwargs) @@ -89,7 +86,7 @@ def test_calibrate(tmpdir, request, robot_type, mock): @pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES) @require_robot -def test_record_without_cameras(tmpdir, request, robot_type, mock): +def test_record_without_cameras(tmp_path, request, robot_type, mock): robot_kwargs = {"robot_type": robot_type, "mock": mock} # Avoid using cameras @@ -100,7 +97,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock): # Create an empty calibration directory to trigger manual calibration # and avoid writing calibration files in user .cache/calibration folder - calibration_dir = Path(tmpdir) / robot_type + calibration_dir = tmp_path / robot_type mock_calibration_dir(calibration_dir) robot_kwargs["calibration_dir"] = calibration_dir else: @@ -108,7 +105,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock): pass repo_id = "lerobot/debug" - root = Path(tmpdir) / "data" / repo_id + root = tmp_path / "data" / repo_id single_task = "Do something." robot = make_robot(**robot_kwargs) @@ -121,7 +118,6 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock): episode_time_s=1, reset_time_s=0.1, num_episodes=2, - run_compute_stats=False, push_to_hub=False, video=False, play_sounds=False, @@ -131,8 +127,7 @@ def test_record_without_cameras(tmpdir, request, robot_type, mock): @pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES) @require_robot -def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock): - tmpdir = Path(tmpdir) +def test_record_and_replay_and_policy(tmp_path, request, robot_type, mock): robot_kwargs = {"robot_type": robot_type, "mock": mock} if mock and robot_type != "aloha": @@ -140,7 +135,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock): # Create an empty calibration directory to trigger manual calibration # and avoid writing calibration files in user .cache/calibration folder - calibration_dir = tmpdir / robot_type + calibration_dir = tmp_path / robot_type mock_calibration_dir(calibration_dir) robot_kwargs["calibration_dir"] = calibration_dir else: @@ -148,7 +143,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock): pass repo_id = "lerobot_test/debug" - root = tmpdir / "data" / repo_id + root = tmp_path / "data" / repo_id single_task = "Do something." robot = make_robot(**robot_kwargs) @@ -180,7 +175,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock): policy_cfg = ACTConfig() policy = make_policy(policy_cfg, ds_meta=dataset.meta, device=DEVICE) - out_dir = tmpdir / "logger" + out_dir = tmp_path / "logger" pretrained_policy_path = out_dir / "checkpoints/last/pretrained_model" policy.save_pretrained(pretrained_policy_path) @@ -207,7 +202,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock): num_image_writer_processes = 0 eval_repo_id = "lerobot/eval_debug" - eval_root = tmpdir / "data" / eval_repo_id + eval_root = tmp_path / "data" / eval_repo_id rec_eval_cfg = RecordControlConfig( repo_id=eval_repo_id, @@ -218,7 +213,6 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock): episode_time_s=1, reset_time_s=0.1, num_episodes=2, - run_compute_stats=False, push_to_hub=False, video=False, display_cameras=False, @@ -240,7 +234,7 @@ def test_record_and_replay_and_policy(tmpdir, request, robot_type, mock): @pytest.mark.parametrize("robot_type, mock", [("koch", True)]) @require_robot -def test_resume_record(tmpdir, request, robot_type, mock): +def test_resume_record(tmp_path, request, robot_type, mock): robot_kwargs = {"robot_type": robot_type, "mock": mock} if mock and robot_type != "aloha": @@ -248,7 +242,7 @@ def test_resume_record(tmpdir, request, robot_type, mock): # Create an empty calibration directory to trigger manual calibration # and avoid writing calibration files in user .cache/calibration folder - calibration_dir = tmpdir / robot_type + calibration_dir = tmp_path / robot_type mock_calibration_dir(calibration_dir) robot_kwargs["calibration_dir"] = calibration_dir else: @@ -258,7 +252,7 @@ def test_resume_record(tmpdir, request, robot_type, mock): robot = make_robot(**robot_kwargs) repo_id = "lerobot/debug" - root = Path(tmpdir) / "data" / repo_id + root = tmp_path / "data" / repo_id single_task = "Do something." rec_cfg = RecordControlConfig( @@ -272,7 +266,6 @@ def test_resume_record(tmpdir, request, robot_type, mock): video=False, display_cameras=False, play_sounds=False, - run_compute_stats=False, local_files_only=True, num_episodes=1, ) @@ -291,7 +284,7 @@ def test_resume_record(tmpdir, request, robot_type, mock): @pytest.mark.parametrize("robot_type, mock", [("koch", True)]) @require_robot -def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock): +def test_record_with_event_rerecord_episode(tmp_path, request, robot_type, mock): robot_kwargs = {"robot_type": robot_type, "mock": mock} if mock and robot_type != "aloha": @@ -299,7 +292,7 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock): # Create an empty calibration directory to trigger manual calibration # and avoid writing calibration files in user .cache/calibration folder - calibration_dir = tmpdir / robot_type + calibration_dir = tmp_path / robot_type mock_calibration_dir(calibration_dir) robot_kwargs["calibration_dir"] = calibration_dir else: @@ -316,7 +309,7 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock): mock_listener.return_value = (None, mock_events) repo_id = "lerobot/debug" - root = Path(tmpdir) / "data" / repo_id + root = tmp_path / "data" / repo_id single_task = "Do something." rec_cfg = RecordControlConfig( @@ -331,7 +324,6 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock): video=False, display_cameras=False, play_sounds=False, - run_compute_stats=False, ) dataset = record(robot, rec_cfg) @@ -342,7 +334,7 @@ def test_record_with_event_rerecord_episode(tmpdir, request, robot_type, mock): @pytest.mark.parametrize("robot_type, mock", [("koch", True)]) @require_robot -def test_record_with_event_exit_early(tmpdir, request, robot_type, mock): +def test_record_with_event_exit_early(tmp_path, request, robot_type, mock): robot_kwargs = {"robot_type": robot_type, "mock": mock} if mock: @@ -350,7 +342,7 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock): # Create an empty calibration directory to trigger manual calibration # and avoid writing calibration files in user .cache/calibration folder - calibration_dir = tmpdir / robot_type + calibration_dir = tmp_path / robot_type mock_calibration_dir(calibration_dir) robot_kwargs["calibration_dir"] = calibration_dir else: @@ -367,7 +359,7 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock): mock_listener.return_value = (None, mock_events) repo_id = "lerobot/debug" - root = Path(tmpdir) / "data" / repo_id + root = tmp_path / "data" / repo_id single_task = "Do something." rec_cfg = RecordControlConfig( @@ -382,7 +374,6 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock): video=False, display_cameras=False, play_sounds=False, - run_compute_stats=False, ) dataset = record(robot, rec_cfg) @@ -395,7 +386,7 @@ def test_record_with_event_exit_early(tmpdir, request, robot_type, mock): "robot_type, mock, num_image_writer_processes", [("koch", True, 0), ("koch", True, 1)] ) @require_robot -def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num_image_writer_processes): +def test_record_with_event_stop_recording(tmp_path, request, robot_type, mock, num_image_writer_processes): robot_kwargs = {"robot_type": robot_type, "mock": mock} if mock: @@ -403,7 +394,7 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num # Create an empty calibration directory to trigger manual calibration # and avoid writing calibration files in user .cache/calibration folder - calibration_dir = tmpdir / robot_type + calibration_dir = tmp_path / robot_type mock_calibration_dir(calibration_dir) robot_kwargs["calibration_dir"] = calibration_dir else: @@ -420,7 +411,7 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num mock_listener.return_value = (None, mock_events) repo_id = "lerobot/debug" - root = Path(tmpdir) / "data" / repo_id + root = tmp_path / "data" / repo_id single_task = "Do something." rec_cfg = RecordControlConfig( @@ -436,7 +427,6 @@ def test_record_with_event_stop_recording(tmpdir, request, robot_type, mock, num video=False, display_cameras=False, play_sounds=False, - run_compute_stats=False, num_image_writer_processes=num_image_writer_processes, ) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 54d92125..b1df9b46 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -20,21 +20,14 @@ from copy import deepcopy from itertools import chain from pathlib import Path -import einops import numpy as np import pytest import torch -from datasets import Dataset from huggingface_hub import HfApi from PIL import Image from safetensors.torch import load_file import lerobot -from lerobot.common.datasets.compute_stats import ( - aggregate_stats, - compute_stats, - get_stats_einops_patterns, -) from lerobot.common.datasets.factory import make_dataset from lerobot.common.datasets.image_writer import image_array_to_pil_image from lerobot.common.datasets.lerobot_dataset import ( @@ -44,13 +37,11 @@ from lerobot.common.datasets.lerobot_dataset import ( from lerobot.common.datasets.utils import ( create_branch, flatten_dict, - hf_transform_to_torch, unflatten_dict, ) from lerobot.common.envs.factory import make_env_config from lerobot.common.policies.factory import make_policy_config from lerobot.common.robot_devices.robots.utils import make_robot -from lerobot.common.utils.random_utils import seeded_context from lerobot.configs.default import DatasetConfig from lerobot.configs.train import TrainPipelineConfig from tests.fixtures.constants import DUMMY_CHW, DUMMY_HWC, DUMMY_REPO_ID @@ -196,12 +187,12 @@ def test_add_frame_wrong_shape_numpy_ndim_0(tmp_path, empty_lerobot_dataset_fact def test_add_frame(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) - dataset.add_frame({"state": torch.randn(1), "task": "dummy"}) + dataset.add_frame({"state": torch.randn(1), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert len(dataset) == 1 - assert dataset[0]["task"] == "dummy" + assert dataset[0]["task"] == "Dummy task" assert dataset[0]["task_index"] == 0 assert dataset[0]["state"].ndim == 0 @@ -209,9 +200,9 @@ def test_add_frame(tmp_path, empty_lerobot_dataset_factory): def test_add_frame_state_1d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2,), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) - dataset.add_frame({"state": torch.randn(2), "task": "dummy"}) + dataset.add_frame({"state": torch.randn(2), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["state"].shape == torch.Size([2]) @@ -219,9 +210,9 @@ def test_add_frame_state_1d(tmp_path, empty_lerobot_dataset_factory): def test_add_frame_state_2d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2, 4), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) - dataset.add_frame({"state": torch.randn(2, 4), "task": "dummy"}) + dataset.add_frame({"state": torch.randn(2, 4), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["state"].shape == torch.Size([2, 4]) @@ -229,9 +220,9 @@ def test_add_frame_state_2d(tmp_path, empty_lerobot_dataset_factory): def test_add_frame_state_3d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2, 4, 3), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) - dataset.add_frame({"state": torch.randn(2, 4, 3), "task": "dummy"}) + dataset.add_frame({"state": torch.randn(2, 4, 3), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["state"].shape == torch.Size([2, 4, 3]) @@ -239,9 +230,9 @@ def test_add_frame_state_3d(tmp_path, empty_lerobot_dataset_factory): def test_add_frame_state_4d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) - dataset.add_frame({"state": torch.randn(2, 4, 3, 5), "task": "dummy"}) + dataset.add_frame({"state": torch.randn(2, 4, 3, 5), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5]) @@ -249,9 +240,9 @@ def test_add_frame_state_4d(tmp_path, empty_lerobot_dataset_factory): def test_add_frame_state_5d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5, 1), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) - dataset.add_frame({"state": torch.randn(2, 4, 3, 5, 1), "task": "dummy"}) + dataset.add_frame({"state": torch.randn(2, 4, 3, 5, 1), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5, 1]) @@ -261,7 +252,7 @@ def test_add_frame_state_numpy(tmp_path, empty_lerobot_dataset_factory): dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"state": np.array([1], dtype=np.float32), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["state"].ndim == 0 @@ -271,7 +262,7 @@ def test_add_frame_string(tmp_path, empty_lerobot_dataset_factory): dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"caption": "Dummy caption", "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["caption"] == "Dummy caption" @@ -307,7 +298,7 @@ def test_add_frame_image(image_dataset): dataset = image_dataset dataset.add_frame({"image": np.random.rand(*DUMMY_CHW), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) @@ -316,7 +307,7 @@ def test_add_frame_image_h_w_c(image_dataset): dataset = image_dataset dataset.add_frame({"image": np.random.rand(*DUMMY_HWC), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) @@ -326,7 +317,7 @@ def test_add_frame_image_uint8(image_dataset): image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8) dataset.add_frame({"image": image, "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) @@ -336,7 +327,7 @@ def test_add_frame_image_pil(image_dataset): image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8) dataset.add_frame({"image": Image.fromarray(image), "task": "Dummy task"}) dataset.save_episode(encode_videos=False) - dataset.consolidate(run_compute_stats=False) + dataset.consolidate() assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) @@ -463,67 +454,6 @@ def test_multidataset_frames(): assert torch.equal(sub_dataset_item[k], dataset_item[k]) -# TODO(aliberts, rcadene): Refactor and move this to a tests/test_compute_stats.py -def test_compute_stats_on_xarm(): - """Check that the statistics are computed correctly according to the stats_patterns property. - - We compare with taking a straight min, mean, max, std of all the data in one pass (which we can do - because we are working with a small dataset). - """ - # TODO(rcadene, aliberts): remove dataset download - dataset = LeRobotDataset("lerobot/xarm_lift_medium", episodes=[0]) - - # reduce size of dataset sample on which stats compute is tested to 10 frames - dataset.hf_dataset = dataset.hf_dataset.select(range(10)) - - # Note: we set the batch size to be smaller than the whole dataset to make sure we are testing batched - # computation of the statistics. While doing this, we also make sure it works when we don't divide the - # dataset into even batches. - computed_stats = compute_stats(dataset, batch_size=int(len(dataset) * 0.25), num_workers=0) - - # get einops patterns to aggregate batches and compute statistics - stats_patterns = get_stats_einops_patterns(dataset) - - # get all frames from the dataset in the same dtype and range as during compute_stats - dataloader = torch.utils.data.DataLoader( - dataset, - num_workers=0, - batch_size=len(dataset), - shuffle=False, - ) - full_batch = next(iter(dataloader)) - - # compute stats based on all frames from the dataset without any batching - expected_stats = {} - for k, pattern in stats_patterns.items(): - full_batch[k] = full_batch[k].float() - expected_stats[k] = {} - expected_stats[k]["mean"] = einops.reduce(full_batch[k], pattern, "mean") - expected_stats[k]["std"] = torch.sqrt( - einops.reduce((full_batch[k] - expected_stats[k]["mean"]) ** 2, pattern, "mean") - ) - expected_stats[k]["min"] = einops.reduce(full_batch[k], pattern, "min") - expected_stats[k]["max"] = einops.reduce(full_batch[k], pattern, "max") - - # test computed stats match expected stats - for k in stats_patterns: - assert torch.allclose(computed_stats[k]["mean"], expected_stats[k]["mean"]) - assert torch.allclose(computed_stats[k]["std"], expected_stats[k]["std"]) - assert torch.allclose(computed_stats[k]["min"], expected_stats[k]["min"]) - assert torch.allclose(computed_stats[k]["max"], expected_stats[k]["max"]) - - # load stats used during training which are expected to match the ones returned by computed_stats - loaded_stats = dataset.meta.stats # noqa: F841 - - # TODO(rcadene): we can't test this because expected_stats is computed on a subset - # # test loaded stats match expected stats - # for k in stats_patterns: - # assert torch.allclose(loaded_stats[k]["mean"], expected_stats[k]["mean"]) - # assert torch.allclose(loaded_stats[k]["std"], expected_stats[k]["std"]) - # assert torch.allclose(loaded_stats[k]["min"], expected_stats[k]["min"]) - # assert torch.allclose(loaded_stats[k]["max"], expected_stats[k]["max"]) - - # TODO(aliberts): Move to more appropriate location def test_flatten_unflatten_dict(): d = { @@ -627,35 +557,6 @@ def test_backward_compatibility(repo_id): # load_and_compare(i - 1) -@pytest.mark.skip("TODO after fix multidataset") -def test_multidataset_aggregate_stats(): - """Makes 3 basic datasets and checks that aggregate stats are computed correctly.""" - with seeded_context(0): - data_a = torch.rand(30, dtype=torch.float32) - data_b = torch.rand(20, dtype=torch.float32) - data_c = torch.rand(20, dtype=torch.float32) - - hf_dataset_1 = Dataset.from_dict( - {"a": data_a[:10], "b": data_b[:10], "c": data_c[:10], "index": torch.arange(10)} - ) - hf_dataset_1.set_transform(hf_transform_to_torch) - hf_dataset_2 = Dataset.from_dict({"a": data_a[10:20], "b": data_b[10:], "index": torch.arange(10)}) - hf_dataset_2.set_transform(hf_transform_to_torch) - hf_dataset_3 = Dataset.from_dict({"a": data_a[20:], "c": data_c[10:], "index": torch.arange(10)}) - hf_dataset_3.set_transform(hf_transform_to_torch) - dataset_1 = LeRobotDataset.from_preloaded("d1", hf_dataset=hf_dataset_1) - dataset_1.stats = compute_stats(dataset_1, batch_size=len(hf_dataset_1), num_workers=0) - dataset_2 = LeRobotDataset.from_preloaded("d2", hf_dataset=hf_dataset_2) - dataset_2.stats = compute_stats(dataset_2, batch_size=len(hf_dataset_2), num_workers=0) - dataset_3 = LeRobotDataset.from_preloaded("d3", hf_dataset=hf_dataset_3) - dataset_3.stats = compute_stats(dataset_3, batch_size=len(hf_dataset_3), num_workers=0) - stats = aggregate_stats([dataset_1, dataset_2, dataset_3]) - for data_key, data in zip(["a", "b", "c"], [data_a, data_b, data_c], strict=True): - for agg_fn in ["mean", "min", "max"]: - assert torch.allclose(stats[data_key][agg_fn], einops.reduce(data, "n -> 1", agg_fn)) - assert torch.allclose(stats[data_key]["std"], torch.std(data, correction=0)) - - @pytest.mark.skip("Requires internet access") def test_create_branch(): api = HfApi() diff --git a/tests/test_push_dataset_to_hub.py b/tests/test_push_dataset_to_hub.py deleted file mode 100644 index a0c8d908..00000000 --- a/tests/test_push_dataset_to_hub.py +++ /dev/null @@ -1,370 +0,0 @@ -""" -This file contains generic tests to ensure that nothing breaks if we modify the push_dataset_to_hub API. -Also, this file contains backward compatibility tests. Because they are slow and require to download the raw datasets, -we skip them for now in our CI. - -Example to run backward compatiblity tests locally: -``` -python -m pytest --run-skipped tests/test_push_dataset_to_hub.py::test_push_dataset_to_hub_pusht_backward_compatibility -``` -""" - -from pathlib import Path - -import numpy as np -import pytest -import torch - -from lerobot.common.datasets.lerobot_dataset import LeRobotDataset -from lerobot.common.datasets.push_dataset_to_hub.utils import save_images_concurrently -from lerobot.common.datasets.video_utils import encode_video_frames -from lerobot.scripts.push_dataset_to_hub import push_dataset_to_hub -from tests.utils import require_package_arg - - -def _mock_download_raw_pusht(raw_dir, num_frames=4, num_episodes=3): - import zarr - - raw_dir.mkdir(parents=True, exist_ok=True) - zarr_path = raw_dir / "pusht_cchi_v7_replay.zarr" - store = zarr.DirectoryStore(zarr_path) - zarr_data = zarr.group(store=store) - - zarr_data.create_dataset( - "data/action", shape=(num_frames, 1), chunks=(num_frames, 1), dtype=np.float32, overwrite=True - ) - zarr_data.create_dataset( - "data/img", - shape=(num_frames, 96, 96, 3), - chunks=(num_frames, 96, 96, 3), - dtype=np.uint8, - overwrite=True, - ) - zarr_data.create_dataset( - "data/n_contacts", shape=(num_frames, 2), chunks=(num_frames, 2), dtype=np.float32, overwrite=True - ) - zarr_data.create_dataset( - "data/state", shape=(num_frames, 5), chunks=(num_frames, 5), dtype=np.float32, overwrite=True - ) - zarr_data.create_dataset( - "data/keypoint", shape=(num_frames, 9, 2), chunks=(num_frames, 9, 2), dtype=np.float32, overwrite=True - ) - zarr_data.create_dataset( - "meta/episode_ends", shape=(num_episodes,), chunks=(num_episodes,), dtype=np.int32, overwrite=True - ) - - zarr_data["data/action"][:] = np.random.randn(num_frames, 1) - zarr_data["data/img"][:] = np.random.randint(0, 255, size=(num_frames, 96, 96, 3), dtype=np.uint8) - zarr_data["data/n_contacts"][:] = np.random.randn(num_frames, 2) - zarr_data["data/state"][:] = np.random.randn(num_frames, 5) - zarr_data["data/keypoint"][:] = np.random.randn(num_frames, 9, 2) - zarr_data["meta/episode_ends"][:] = np.array([1, 3, 4]) - - store.close() - - -def _mock_download_raw_umi(raw_dir, num_frames=4, num_episodes=3): - import zarr - - raw_dir.mkdir(parents=True, exist_ok=True) - zarr_path = raw_dir / "cup_in_the_wild.zarr" - store = zarr.DirectoryStore(zarr_path) - zarr_data = zarr.group(store=store) - - zarr_data.create_dataset( - "data/camera0_rgb", - shape=(num_frames, 96, 96, 3), - chunks=(num_frames, 96, 96, 3), - dtype=np.uint8, - overwrite=True, - ) - zarr_data.create_dataset( - "data/robot0_demo_end_pose", - shape=(num_frames, 5), - chunks=(num_frames, 5), - dtype=np.float32, - overwrite=True, - ) - zarr_data.create_dataset( - "data/robot0_demo_start_pose", - shape=(num_frames, 5), - chunks=(num_frames, 5), - dtype=np.float32, - overwrite=True, - ) - zarr_data.create_dataset( - "data/robot0_eef_pos", shape=(num_frames, 5), chunks=(num_frames, 5), dtype=np.float32, overwrite=True - ) - zarr_data.create_dataset( - "data/robot0_eef_rot_axis_angle", - shape=(num_frames, 5), - chunks=(num_frames, 5), - dtype=np.float32, - overwrite=True, - ) - zarr_data.create_dataset( - "data/robot0_gripper_width", - shape=(num_frames, 5), - chunks=(num_frames, 5), - dtype=np.float32, - overwrite=True, - ) - zarr_data.create_dataset( - "meta/episode_ends", shape=(num_episodes,), chunks=(num_episodes,), dtype=np.int32, overwrite=True - ) - - zarr_data["data/camera0_rgb"][:] = np.random.randint(0, 255, size=(num_frames, 96, 96, 3), dtype=np.uint8) - zarr_data["data/robot0_demo_end_pose"][:] = np.random.randn(num_frames, 5) - zarr_data["data/robot0_demo_start_pose"][:] = np.random.randn(num_frames, 5) - zarr_data["data/robot0_eef_pos"][:] = np.random.randn(num_frames, 5) - zarr_data["data/robot0_eef_rot_axis_angle"][:] = np.random.randn(num_frames, 5) - zarr_data["data/robot0_gripper_width"][:] = np.random.randn(num_frames, 5) - zarr_data["meta/episode_ends"][:] = np.array([1, 3, 4]) - - store.close() - - -def _mock_download_raw_xarm(raw_dir, num_frames=4): - import pickle - - dataset_dict = { - "observations": { - "rgb": np.random.randint(0, 255, size=(num_frames, 3, 84, 84), dtype=np.uint8), - "state": np.random.randn(num_frames, 4), - }, - "actions": np.random.randn(num_frames, 3), - "rewards": np.random.randn(num_frames), - "masks": np.random.randn(num_frames), - "dones": np.array([False, True, True, True]), - } - - raw_dir.mkdir(parents=True, exist_ok=True) - pkl_path = raw_dir / "buffer.pkl" - with open(pkl_path, "wb") as f: - pickle.dump(dataset_dict, f) - - -def _mock_download_raw_aloha(raw_dir, num_frames=6, num_episodes=3): - import h5py - - for ep_idx in range(num_episodes): - raw_dir.mkdir(parents=True, exist_ok=True) - path_h5 = raw_dir / f"episode_{ep_idx}.hdf5" - with h5py.File(str(path_h5), "w") as f: - f.create_dataset("action", data=np.random.randn(num_frames // num_episodes, 14)) - f.create_dataset("observations/qpos", data=np.random.randn(num_frames // num_episodes, 14)) - f.create_dataset("observations/qvel", data=np.random.randn(num_frames // num_episodes, 14)) - f.create_dataset( - "observations/images/top", - data=np.random.randint( - 0, 255, size=(num_frames // num_episodes, 480, 640, 3), dtype=np.uint8 - ), - ) - - -def _mock_download_raw_dora(raw_dir, num_frames=6, num_episodes=3, fps=30): - from datetime import datetime, timedelta, timezone - - import pandas - - def write_parquet(key, timestamps, values): - data = { - "timestamp_utc": timestamps, - key: values, - } - df = pandas.DataFrame(data) - raw_dir.mkdir(parents=True, exist_ok=True) - df.to_parquet(raw_dir / f"{key}.parquet", engine="pyarrow") - - episode_indices = [None, None, -1, None, None, -1, None, None, -1] - episode_indices_mapping = [0, 0, 0, 1, 1, 1, 2, 2, 2] - frame_indices = [0, 1, -1, 0, 1, -1, 0, 1, -1] - - cam_key = "observation.images.cam_high" - timestamps = [] - actions = [] - states = [] - frames = [] - # `+ num_episodes`` for buffer frames associated to episode_index=-1 - for i, frame_idx in enumerate(frame_indices): - t_utc = datetime.now(timezone.utc) + timedelta(seconds=i / fps) - action = np.random.randn(21).tolist() - state = np.random.randn(21).tolist() - ep_idx = episode_indices_mapping[i] - frame = [{"path": f"videos/{cam_key}_episode_{ep_idx:06d}.mp4", "timestamp": frame_idx / fps}] - timestamps.append(t_utc) - actions.append(action) - states.append(state) - frames.append(frame) - - write_parquet(cam_key, timestamps, frames) - write_parquet("observation.state", timestamps, states) - write_parquet("action", timestamps, actions) - write_parquet("episode_index", timestamps, episode_indices) - - # write fake mp4 file for each episode - for ep_idx in range(num_episodes): - imgs_array = np.random.randint(0, 255, size=(num_frames // num_episodes, 480, 640, 3), dtype=np.uint8) - - tmp_imgs_dir = raw_dir / "tmp_images" - save_images_concurrently(imgs_array, tmp_imgs_dir) - - fname = f"{cam_key}_episode_{ep_idx:06d}.mp4" - video_path = raw_dir / "videos" / fname - encode_video_frames(tmp_imgs_dir, video_path, fps, vcodec="libx264") - - -def _mock_download_raw(raw_dir, repo_id): - if "wrist_gripper" in repo_id: - _mock_download_raw_dora(raw_dir) - elif "aloha" in repo_id: - _mock_download_raw_aloha(raw_dir) - elif "pusht" in repo_id: - _mock_download_raw_pusht(raw_dir) - elif "xarm" in repo_id: - _mock_download_raw_xarm(raw_dir) - elif "umi" in repo_id: - _mock_download_raw_umi(raw_dir) - else: - raise ValueError(repo_id) - - -@pytest.mark.skip("push_dataset_to_hub is deprecated") -def test_push_dataset_to_hub_invalid_repo_id(tmpdir): - with pytest.raises(ValueError): - push_dataset_to_hub(Path(tmpdir), "raw_format", "invalid_repo_id") - - -@pytest.mark.skip("push_dataset_to_hub is deprecated") -def test_push_dataset_to_hub_out_dir_force_override_false(tmpdir): - tmpdir = Path(tmpdir) - out_dir = tmpdir / "out" - raw_dir = tmpdir / "raw" - # mkdir to skip download - raw_dir.mkdir(parents=True, exist_ok=True) - with pytest.raises(ValueError): - push_dataset_to_hub( - raw_dir=raw_dir, - raw_format="some_format", - repo_id="user/dataset", - local_dir=out_dir, - force_override=False, - ) - - -@pytest.mark.skip("push_dataset_to_hub is deprecated") -@pytest.mark.parametrize( - "required_packages, raw_format, repo_id, make_test_data", - [ - (["gym_pusht"], "pusht_zarr", "lerobot/pusht", False), - (["gym_pusht"], "pusht_zarr", "lerobot/pusht", True), - (None, "xarm_pkl", "lerobot/xarm_lift_medium", False), - (None, "aloha_hdf5", "lerobot/aloha_sim_insertion_scripted", False), - (["imagecodecs"], "umi_zarr", "lerobot/umi_cup_in_the_wild", False), - (None, "dora_parquet", "cadene/wrist_gripper", False), - ], -) -@require_package_arg -def test_push_dataset_to_hub_format(required_packages, tmpdir, raw_format, repo_id, make_test_data): - num_episodes = 3 - tmpdir = Path(tmpdir) - - raw_dir = tmpdir / f"{repo_id}_raw" - _mock_download_raw(raw_dir, repo_id) - - local_dir = tmpdir / repo_id - - lerobot_dataset = push_dataset_to_hub( - raw_dir=raw_dir, - raw_format=raw_format, - repo_id=repo_id, - push_to_hub=False, - local_dir=local_dir, - force_override=False, - cache_dir=tmpdir / "cache", - tests_data_dir=tmpdir / "tests/data" if make_test_data else None, - encoding={"vcodec": "libx264"}, - ) - - # minimal generic tests on the local directory containing LeRobotDataset - assert (local_dir / "meta_data" / "info.json").exists() - assert (local_dir / "meta_data" / "stats.safetensors").exists() - assert (local_dir / "meta_data" / "episode_data_index.safetensors").exists() - for i in range(num_episodes): - for cam_key in lerobot_dataset.camera_keys: - assert (local_dir / "videos" / f"{cam_key}_episode_{i:06d}.mp4").exists() - assert (local_dir / "train" / "dataset_info.json").exists() - assert (local_dir / "train" / "state.json").exists() - assert len(list((local_dir / "train").glob("*.arrow"))) > 0 - - # minimal generic tests on the item - item = lerobot_dataset[0] - assert "index" in item - assert "episode_index" in item - assert "timestamp" in item - for cam_key in lerobot_dataset.camera_keys: - assert cam_key in item - - if make_test_data: - # Check that only the first episode is selected. - test_dataset = LeRobotDataset(repo_id=repo_id, root=tmpdir / "tests/data") - num_frames = sum( - i == lerobot_dataset.hf_dataset["episode_index"][0] - for i in lerobot_dataset.hf_dataset["episode_index"] - ).item() - assert ( - test_dataset.hf_dataset["episode_index"] - == lerobot_dataset.hf_dataset["episode_index"][:num_frames] - ) - for k in ["from", "to"]: - assert torch.equal(test_dataset.episode_data_index[k], lerobot_dataset.episode_data_index[k][:1]) - - -@pytest.mark.skip("push_dataset_to_hub is deprecated") -@pytest.mark.parametrize( - "raw_format, repo_id", - [ - # TODO(rcadene): add raw dataset test artifacts - ("pusht_zarr", "lerobot/pusht"), - ("xarm_pkl", "lerobot/xarm_lift_medium"), - ("aloha_hdf5", "lerobot/aloha_sim_insertion_scripted"), - ("umi_zarr", "lerobot/umi_cup_in_the_wild"), - ("dora_parquet", "cadene/wrist_gripper"), - ], -) -def test_push_dataset_to_hub_pusht_backward_compatibility(tmpdir, raw_format, repo_id): - _, dataset_id = repo_id.split("/") - - tmpdir = Path(tmpdir) - raw_dir = tmpdir / f"{dataset_id}_raw" - local_dir = tmpdir / repo_id - - push_dataset_to_hub( - raw_dir=raw_dir, - raw_format=raw_format, - repo_id=repo_id, - push_to_hub=False, - local_dir=local_dir, - force_override=False, - cache_dir=tmpdir / "cache", - episodes=[0], - ) - - ds_actual = LeRobotDataset(repo_id, root=tmpdir) - ds_reference = LeRobotDataset(repo_id) - - assert len(ds_reference.hf_dataset) == len(ds_actual.hf_dataset) - - def check_same_items(item1, item2): - assert item1.keys() == item2.keys(), "Keys mismatch" - - for key in item1: - if isinstance(item1[key], torch.Tensor) and isinstance(item2[key], torch.Tensor): - assert torch.equal(item1[key], item2[key]), f"Mismatch found in key: {key}" - else: - assert item1[key] == item2[key], f"Mismatch found in key: {key}" - - for i in range(len(ds_reference.hf_dataset)): - item_reference = ds_reference.hf_dataset[i] - item_actual = ds_actual.hf_dataset[i] - check_same_items(item_reference, item_actual) diff --git a/tests/test_robots.py b/tests/test_robots.py index e03b5f78..6c300b71 100644 --- a/tests/test_robots.py +++ b/tests/test_robots.py @@ -23,8 +23,6 @@ pytest -sx 'tests/test_robots.py::test_robot[aloha-True]' ``` """ -from pathlib import Path - import pytest import torch @@ -35,7 +33,7 @@ from tests.utils import TEST_ROBOT_TYPES, mock_calibration_dir, require_robot @pytest.mark.parametrize("robot_type, mock", TEST_ROBOT_TYPES) @require_robot -def test_robot(tmpdir, request, robot_type, mock): +def test_robot(tmp_path, request, robot_type, mock): # TODO(rcadene): measure fps in nightly? # TODO(rcadene): test logs # TODO(rcadene): add compatibility with other robots @@ -50,8 +48,7 @@ def test_robot(tmpdir, request, robot_type, mock): request.getfixturevalue("patch_builtins_input") # Create an empty calibration directory to trigger manual calibration - tmpdir = Path(tmpdir) - calibration_dir = tmpdir / robot_type + calibration_dir = tmp_path / robot_type mock_calibration_dir(calibration_dir) robot_kwargs["calibration_dir"] = calibration_dir From 02bc4e03e0f07e535425ce0fd14ffa08a87dc276 Mon Sep 17 00:00:00 2001 From: Tavish Date: Tue, 18 Feb 2025 22:25:58 +0800 Subject: [PATCH 05/28] support openx/rlds to lerobot --- examples/port_datasets/openx_rlds.py | 308 ++++++ examples/port_datasets/oxe_utils/configs.py | 848 +++++++++++++++ .../oxe_utils/transform_utils.py | 76 ++ .../port_datasets/oxe_utils/transforms.py | 985 ++++++++++++++++++ 4 files changed, 2217 insertions(+) create mode 100644 examples/port_datasets/openx_rlds.py create mode 100644 examples/port_datasets/oxe_utils/configs.py create mode 100644 examples/port_datasets/oxe_utils/transform_utils.py create mode 100644 examples/port_datasets/oxe_utils/transforms.py diff --git a/examples/port_datasets/openx_rlds.py b/examples/port_datasets/openx_rlds.py new file mode 100644 index 00000000..4ac2791c --- /dev/null +++ b/examples/port_datasets/openx_rlds.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +For all datasets in the RLDS format. +For https://github.com/google-deepmind/open_x_embodiment (OPENX) datasets. + +NOTE: You need to install tensorflow and tensorflow_datsets before running this script. + +Example: + python openx_rlds.py \ + --raw-dir /path/to/bridge_orig/1.0.0 \ + --local-dir /path/to/local_dir \ + --repo-id your_id \ + --use-videos \ + --push-to-hub +""" + +import argparse +import os +import re +import shutil +import sys +from functools import partial +from pathlib import Path + +import numpy as np +import tensorflow as tf +import tensorflow_datasets as tfds + +from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME, LeRobotDataset + +current_dir = os.path.dirname(os.path.abspath(__file__)) +oxe_utils_dir = os.path.join(current_dir, "oxe_utils") +sys.path.append(oxe_utils_dir) + +from oxe_utils.configs import OXE_DATASET_CONFIGS, StateEncoding +from oxe_utils.transforms import OXE_STANDARDIZATION_TRANSFORMS + +np.set_printoptions(precision=2) + + +def transform_raw_dataset(episode, dataset_name): + traj = next(iter(episode["steps"].batch(episode["steps"].cardinality()))) + + if dataset_name in OXE_STANDARDIZATION_TRANSFORMS: + traj = OXE_STANDARDIZATION_TRANSFORMS[dataset_name](traj) + + if dataset_name in OXE_DATASET_CONFIGS: + state_obs_keys = OXE_DATASET_CONFIGS[dataset_name]["state_obs_keys"] + else: + state_obs_keys = [None for _ in range(8)] + + proprio = tf.concat( + [ + ( + tf.zeros((tf.shape(traj["action"])[0], 1), dtype=tf.float32) # padding + if key is None + else tf.cast(traj["observation"][key], tf.float32) + ) + for key in state_obs_keys + ], + axis=1, + ) + + traj.update( + { + "proprio": proprio, + "task": traj.pop("language_instruction"), + "action": tf.cast(traj["action"], tf.float32), + } + ) + + episode["steps"] = traj + return episode + + +def generate_features_from_raw(builder: tfds.core.DatasetBuilder, use_videos: bool = True): + dataset_name = builder.name + + state_names = [f"motor_{i}" for i in range(8)] + if dataset_name in OXE_DATASET_CONFIGS: + state_encoding = OXE_DATASET_CONFIGS[dataset_name]["state_encoding"] + if state_encoding == StateEncoding.POS_EULER: + state_names = ["x", "y", "z", "roll", "pitch", "yaw", "pad", "gripper"] + if "libero" in dataset_name: + state_names = ["x", "y", "z", "roll", "pitch", "yaw", "gripper", "gripper"] # 2D gripper state + elif state_encoding == StateEncoding.POS_QUAT: + state_names = ["x", "y", "z", "rx", "ry", "rz", "rw", "gripper"] + + DEFAULT_FEATURES = { + "observation.state": { + "dtype": "float32", + "shape": (8,), + "names": {"motors": state_names}, + }, + "action": { + "dtype": "float32", + "shape": (7,), + "names": {"motors": ["x", "y", "z", "roll", "pitch", "yaw", "gripper"]}, + }, + } + + obs = builder.info.features["steps"]["observation"] + features = { + f"observation.images.{key}": { + "dtype": "video" if use_videos else "image", + "shape": value.shape, + "names": ["height", "width", "rgb"], + } + for key, value in obs.items() + if "depth" not in key and any(x in key for x in ["image", "rgb"]) + } + return {**features, **DEFAULT_FEATURES} + + +def save_as_lerobot_dataset(lerobot_dataset: LeRobotDataset, raw_dataset: tf.data.Dataset, **kwargs): + for episode in raw_dataset.as_numpy_iterator(): + traj = episode["steps"] + for i in range(traj["action"].shape[0]): + image_dict = { + f"observation.images.{key}": value[i] + for key, value in traj["observation"].items() + if "depth" not in key and any(x in key for x in ["image", "rgb"]) + } + lerobot_dataset.add_frame( + { + **image_dict, + "observation.state": traj["proprio"][i], + "action": traj["action"][i], + } + ) + lerobot_dataset.save_episode(task=traj["task"][0].decode()) + + lerobot_dataset.consolidate( + run_compute_stats=True, + keep_image_files=kwargs["keep_images"], + stat_kwargs={"batch_size": kwargs["batch_size"], "num_workers": kwargs["num_workers"]}, + ) + + +def create_lerobot_dataset( + raw_dir: Path, + repo_id: str = None, + local_dir: Path = None, + push_to_hub: bool = False, + fps: int = None, + robot_type: str = None, + use_videos: bool = True, + batch_size: int = 32, + num_workers: int = 8, + image_writer_process: int = 5, + image_writer_threads: int = 10, + keep_images: bool = True, +): + last_part = raw_dir.name + if re.match(r"^\d+\.\d+\.\d+$", last_part): + version = last_part + dataset_name = raw_dir.parent.name + data_dir = raw_dir.parent.parent + else: + version = "" + dataset_name = last_part + data_dir = raw_dir.parent + + if local_dir is None: + local_dir = Path(LEROBOT_HOME) + local_dir /= f"{dataset_name}_{version}_lerobot" + if local_dir.exists(): + shutil.rmtree(local_dir) + + builder = tfds.builder(dataset_name, data_dir=data_dir, version=version) + features = generate_features_from_raw(builder, use_videos) + raw_dataset = builder.as_dataset(split="train").map(partial(transform_raw_dataset, dataset_name=dataset_name)) + + if fps is None: + if dataset_name in OXE_DATASET_CONFIGS: + fps = OXE_DATASET_CONFIGS[dataset_name]["control_frequency"] + else: + fps = 10 + + if robot_type is None: + if dataset_name in OXE_DATASET_CONFIGS: + robot_type = OXE_DATASET_CONFIGS[dataset_name]["robot_type"] + robot_type = robot_type.lower().replace(" ", "_").replace("-", "_") + else: + robot_type = "unknown" + + lerobot_dataset = LeRobotDataset.create( + repo_id=repo_id, + robot_type=robot_type, + root=local_dir, + fps=fps, + use_videos=use_videos, + features=features, + image_writer_threads=image_writer_threads, + image_writer_processes=image_writer_process, + ) + + save_as_lerobot_dataset( + lerobot_dataset, raw_dataset, keep_images=keep_images, batch_size=batch_size, num_workers=num_workers + ) + + if push_to_hub: + assert repo_id is not None + tags = ["LeRobot", dataset_name, "rlds"] + if dataset_name in OXE_DATASET_CONFIGS: + tags.append("openx") + if robot_type != "unknown": + tags.append(robot_type) + lerobot_dataset.push_to_hub( + tags=tags, + private=False, + push_videos=True, + license="apache-2.0", + ) + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "--raw-dir", + type=Path, + required=True, + help="Directory containing input raw datasets (e.g. `path/to/dataset` or `path/to/dataset/version).", + ) + parser.add_argument( + "--local-dir", + type=Path, + required=True, + help="When provided, writes the dataset converted to LeRobotDataset format in this directory (e.g. `data/lerobot/aloha_mobile_chair`).", + ) + parser.add_argument( + "--repo-id", + type=str, + help="Repositery identifier on Hugging Face: a community or a user name `/` the name of the dataset, required when push-to-hub is True", + ) + parser.add_argument( + "--push-to-hub", + action="store_true", + help="Upload to hub.", + ) + parser.add_argument( + "--robot-type", + type=str, + default=None, + help="Robot type of this dataset.", + ) + parser.add_argument( + "--fps", + type=int, + default=None, + help="Frame rate used to collect videos. Default fps equals to the control frequency of the robot.", + ) + parser.add_argument( + "--use-videos", + action="store_true", + help="Convert each episode of the raw dataset to an mp4 video. This option allows 60 times lower disk space consumption and 25 faster loading time during training.", + ) + parser.add_argument( + "--batch-size", + type=int, + default=32, + help="Batch size loaded by DataLoader for computing the dataset statistics.", + ) + parser.add_argument( + "--num-workers", + type=int, + default=8, + help="Number of processes of Dataloader for computing the dataset statistics.", + ) + parser.add_argument( + "--image-writer-process", + type=int, + default=5, + help="Number of processes of image writer for saving images.", + ) + parser.add_argument( + "--image-writer-threads", + type=int, + default=10, + help="Number of threads per process of image writer for saving images.", + ) + parser.add_argument( + "--keep-images", + action="store_true", + help="Whether to keep the cached images.", + ) + + args = parser.parse_args() + create_lerobot_dataset(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/examples/port_datasets/oxe_utils/configs.py b/examples/port_datasets/oxe_utils/configs.py new file mode 100644 index 00000000..02522f81 --- /dev/null +++ b/examples/port_datasets/oxe_utils/configs.py @@ -0,0 +1,848 @@ +""" +Adapt from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/oxe/configs.py +configs.py + +Defines per-dataset configuration (kwargs) for each dataset in Open-X Embodiment. + +Configuration adopts the following structure: + image_obs_keys: + primary: primary external RGB + secondary: secondary external RGB + wrist: wrist RGB + + depth_obs_keys: + primary: primary external depth + secondary: secondary external depth + wrist: wrist depth + + # Always 8-dim =>> changes based on `StateEncoding` + state_obs_keys: + StateEncoding.POS_EULER: EEF XYZ (3) + Roll-Pitch-Yaw (3) + (1) + Gripper Open/Close (1) + StateEncoding.POS_QUAT: EEF XYZ (3) + Quaternion (4) + Gripper Open/Close (1) + StateEncoding.JOINT: Joint Angles (7, if fewer) + Gripper Open/Close (1) + + state_encoding: Type of `StateEncoding` + action_encoding: Type of action encoding (e.g., EEF Position vs. Joint Position) +""" + +from enum import IntEnum +from typing import Dict + +import tensorflow as tf + + +def zero_action_filter(traj: Dict) -> bool: + """ + Filters transitions whose actions are all-0 (only relative actions, no gripper action). + Note: this filter is applied *after* action normalization, so need to compare to "normalized 0". + """ + DROID_Q01 = tf.convert_to_tensor( + [ + -0.7776297926902771, + -0.5803514122962952, + -0.5795090794563293, + -0.6464047729969025, + -0.7041108310222626, + -0.8895104378461838, + ] + ) + DROID_Q99 = tf.convert_to_tensor( + [ + 0.7597932070493698, + 0.5726242214441299, + 0.7351000607013702, + 0.6705610305070877, + 0.6464948207139969, + 0.8897542208433151, + ] + ) + DROID_NORM_0_ACT = 2 * (tf.zeros_like(traj["action"][:, :6]) - DROID_Q01) / (DROID_Q99 - DROID_Q01 + 1e-8) - 1 + + return tf.reduce_any(tf.math.abs(traj["action"][:, :6] - DROID_NORM_0_ACT) > 1e-5) + + +# Defines Proprioceptive State Encoding Schemes +class StateEncoding(IntEnum): + # fmt: off + NONE = -1 # No Proprioceptive State + POS_EULER = 1 # EEF XYZ (3) + Roll-Pitch-Yaw (3) + (1) + Gripper Open/Close (1) + POS_QUAT = 2 # EEF XYZ (3) + Quaternion (4) + Gripper Open/Close (1) + JOINT = 3 # Joint Angles (7, if fewer) + Gripper Open/Close (1) + JOINT_BIMANUAL = 4 # Joint Angles (2 x [ Joint Angles (6) + Gripper Open/Close (1) ]) + # fmt: on + + +# Defines Action Encoding Schemes +class ActionEncoding(IntEnum): + # fmt: off + EEF_POS = 1 # EEF Delta XYZ (3) + Roll-Pitch-Yaw (3) + Gripper Open/Close (1) + JOINT_POS = 2 # Joint Delta Position (7) + Gripper Open/Close (1) + JOINT_POS_BIMANUAL = 3 # Joint Delta Position (2 x [ Joint Delta Position (6) + Gripper Open/Close (1) ]) + EEF_R6 = 4 # EEF Delta XYZ (3) + R6 (6) + Gripper Open/Close (1) + # fmt: on + + +# === Individual Dataset Configs === +OXE_DATASET_CONFIGS = { + "fractal20220817_data": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["base_pose_tool_reached", "gripper_closed"], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 3, + "robot_type": "Google Robot", + }, + "kuka": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": [ + "clip_function_input/base_pose_tool_reached", + "gripper_closed", + ], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Kuka iiwa", + }, + "bridge_oxe": { # Version of Bridge V2 in Open X-Embodiment mixture + "image_obs_keys": {"primary": "image", "secondary": "image_1", "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "WidowX", + }, + "bridge_orig": { # Original version of Bridge V2 from project website + "image_obs_keys": {"primary": "image_0", "secondary": "image_1", "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "WidowX", + }, + "bridge_dataset": { # Original version of Bridge V2 from project website + "image_obs_keys": {"primary": "image_0", "secondary": "image_1", "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "WidowX", + }, + "taco_play": { + "image_obs_keys": { + "primary": "rgb_static", + "secondary": None, + "wrist": "rgb_gripper", + }, + "depth_obs_keys": { + "primary": "depth_static", + "secondary": None, + "wrist": "depth_gripper", + }, + "state_obs_keys": ["state_eef", None, "state_gripper"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 15, + "robot_type": "Franka", + }, + "jaco_play": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "image_wrist", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state_eef", None, "state_gripper"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Jaco 2", + }, + "berkeley_cable_routing": { + "image_obs_keys": { + "primary": "image", + "secondary": "top_image", + "wrist": "wrist45_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["robot_state", None], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Franka", + }, + "roboturk": { + "image_obs_keys": {"primary": "front_rgb", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": [None, None, None, None, None, None, None, None], + "state_encoding": StateEncoding.NONE, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Sawyer", + }, + "nyu_door_opening_surprising_effectiveness": { + "image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": [None, None, None, None, None, None, None, None], + "state_encoding": StateEncoding.NONE, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 3, + "robot_type": "Hello Stretch", + }, + "viola": { + "image_obs_keys": { + "primary": "agentview_rgb", + "secondary": None, + "wrist": "eye_in_hand_rgb", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["joint_states", "gripper_states"], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "berkeley_autolab_ur5": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "hand_image", + }, + "depth_obs_keys": {"primary": "depth", "secondary": None, "wrist": None}, + "state_obs_keys": ["state"], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "UR5", + }, + "toto": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state", None], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 30, + "robot_type": "Franka", + }, + "language_table": { + "image_obs_keys": {"primary": "rgb", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["effector_translation", None, None, None, None, None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "xArm", + }, + "columbia_cairlab_pusht_real": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["robot_state", None, None, None, None, None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "UR5", + }, + "stanford_kuka_multimodal_dataset_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": "depth_image", "secondary": None, "wrist": None}, + "state_obs_keys": ["ee_position", "ee_orientation", None], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Kuka iiwa", + }, + "nyu_rot_dataset_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 3, + "robot_type": "xArm", + }, + "stanford_hydra_dataset_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Franka", + }, + "austin_buds_dataset_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state"], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "nyu_franka_play_dataset_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "image", + "secondary": "image_additional_view", + "wrist": None, + }, + "depth_obs_keys": { + "primary": "depth", + "secondary": "depth_additional_view", + "wrist": None, + }, + "state_obs_keys": ["eef_state", None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 3, + "robot_type": "Franka", + }, + "maniskill_dataset_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": { + "primary": "depth", + "secondary": None, + "wrist": "wrist_depth", + }, + "state_obs_keys": ["tcp_pose", "gripper_state"], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "furniture_bench_dataset_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state"], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Franka", + }, + "cmu_franka_exploration_dataset_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "highres_image", + "secondary": None, + "wrist": None, + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": [None, None, None, None, None, None, None, None], + "state_encoding": StateEncoding.NONE, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Franka", + }, + "ucsd_kitchen_dataset_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["joint_state", None], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 2, + "robot_type": "xArm", + }, + "ucsd_pick_and_place_dataset_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 3, + "robot_type": "xArm", + }, + "austin_sailor_dataset_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state"], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "austin_sirius_dataset_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state"], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "bc_z": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": [ + "present/xyz", + "present/axis_angle", + None, + "present/sensed_close", + ], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Google Robot", + }, + "utokyo_pr2_opening_fridge_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "PR2", + }, + "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "PR2", + }, + "utokyo_xarm_pick_and_place_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "image", + "secondary": "image2", + "wrist": "hand_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["end_effector_pose", None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "xArm", + }, + "utokyo_xarm_bimanual_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["pose_r", None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "xArm Bimanual", + }, + "robo_net": { + "image_obs_keys": {"primary": "image", "secondary": "image1", "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 1, + "robot_type": "Multi-Robot", + }, + "berkeley_mvp_converted_externally_to_rlds": { + "image_obs_keys": {"primary": None, "secondary": None, "wrist": "hand_image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["pose", "gripper"], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.JOINT_POS, + "control_frequency": 5, + "robot_type": "xArm", + }, + "berkeley_rpt_converted_externally_to_rlds": { + "image_obs_keys": {"primary": None, "secondary": None, "wrist": "hand_image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["joint_pos", "gripper"], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.JOINT_POS, + "control_frequency": 30, + "robot_type": "Franka", + }, + "kaist_nonprehensile_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state", None], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Franka", + }, + "stanford_mask_vit_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": None, + "robot_type": "Sawyer", + }, + "tokyo_u_lsmo_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Cobotta", + }, + "dlr_sara_pour_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state", None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "DLR SARA", + }, + "dlr_sara_grid_clamp_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state", None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "DLR SARA", + }, + "dlr_edan_shared_control_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state", None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "DLR EDAN", + }, + "asu_table_top_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 12.5, + "robot_type": "UR5", + }, + "stanford_robocook_converted_externally_to_rlds": { + "image_obs_keys": {"primary": "image_1", "secondary": "image_2", "wrist": None}, + "depth_obs_keys": {"primary": "depth_1", "secondary": "depth_2", "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "Franka", + }, + "imperialcollege_sawyer_wrist_cam": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": [None, None, None, None, None, None, None, "state"], + "state_encoding": StateEncoding.NONE, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Sawyer", + }, + "iamlab_cmu_pickup_insert_converted_externally_to_rlds": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["joint_state", "gripper_state"], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "uiuc_d3field": { + "image_obs_keys": {"primary": "image_1", "secondary": "image_2", "wrist": None}, + "depth_obs_keys": {"primary": "depth_1", "secondary": "depth_2", "wrist": None}, + "state_obs_keys": [None, None, None, None, None, None, None, None], + "state_encoding": StateEncoding.NONE, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 1, + "robot_type": "Kinova Gen3", + }, + "utaustin_mutex": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state"], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "berkeley_fanuc_manipulation": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "wrist_image", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["joint_state", None, "gripper_state"], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Fanuc Mate", + }, + "cmu_playing_with_food": { + "image_obs_keys": { + "primary": "image", + "secondary": None, + "wrist": "finger_vision_1", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state", None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Franka", + }, + "cmu_play_fusion": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state"], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "Franka", + }, + "cmu_stretch": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["eef_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Hello Stretch", + }, + "berkeley_gnm_recon": { + "image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state", None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 3, + "robot_type": "Jackal", + }, + "berkeley_gnm_cory_hall": { + "image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state", None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "RC Car", + }, + "berkeley_gnm_sac_son": { + "image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["state", None, None], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "TurtleBot 2", + }, + # NOTE: modified + "droid": { + "image_obs_keys": { + "primary": "exterior_image_1_left", + "secondary": "exterior_image_2_left", + "wrist": "wrist_image_left", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_QUAT, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 15, + "robot_type": "Franka", + "aux_kwargs": { + "dataset_frame_transform_kwargs": { + "chunk_filter_fn": zero_action_filter, + }, + }, + }, + "fmb_dataset": { + "image_obs_keys": { + "primary": "image_side_1", + "secondary": "image_side_2", + "wrist": "image_wrist_1", + }, + "depth_obs_keys": { + "primary": "image_side_1_depth", + "secondary": "image_side_2_depth", + "wrist": "image_wrist_1_depth", + }, + "state_obs_keys": ["proprio"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Franka", + }, + # NOTE: modified + "dobbe": { + "image_obs_keys": {"primary": "wrist_image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 3.75, + "robot_type": "Hello Stretch", + }, + "roboset": { + "image_obs_keys": { + "primary": "image_left", + "secondary": "image_right", + "wrist": "image_wrist", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["proprio"], + "state_encoding": StateEncoding.JOINT, + "action_encoding": ActionEncoding.JOINT_POS, + "control_frequency": 5, + "robot_type": "Franka", + }, + "rh20t": { + "image_obs_keys": { + "primary": "image_front", + "secondary": "image_side_right", + "wrist": "image_wrist", + }, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["proprio"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 10, + "robot_type": "Flexiv", + }, + ### T-DROID datasets + "tdroid_carrot_in_bowl": { # "put carrot in bowl" task, 50 demos @ 5 Hz control + "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "Franka", + }, + "tdroid_pour_corn_in_pot": { # "pour corn from red bonawl into steel pot" task, 50 demos @ 5 Hz control + "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "Franka", + }, + "tdroid_flip_pot_upright": { # "flip pot upright" task, 10 demos @ 5 Hz control + "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "Franka", + }, + "tdroid_move_object_onto_plate": { # "move onto plate" task, 150 demos @ 5 Hz control + "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "Franka", + }, + "tdroid_knock_object_over": { # "knock over" task, 70 demos @ 5 Hz control + "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "Franka", + }, + "tdroid_cover_object_with_towel": { # "cover with towel" task, 45 demos @ 5 Hz control + "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, + "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", None, "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 5, + "robot_type": "Franka", + }, + ### DROID Finetuning datasets + "droid_wipe": { + "image_obs_keys": {"primary": "exterior_image_2_left", "secondary": None, "wrist": "wrist_image_left"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["proprio"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 15, + "robot_type": "Franka", + }, + # NOTE: modified + ### LIBERO datasets (modified versions) + "libero_spatial_no_noops": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "libero_object_no_noops": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "libero_goal_no_noops": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, + "libero_10_no_noops": { + "image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"}, + "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, + "state_obs_keys": ["EEF_state", "gripper_state"], + "state_encoding": StateEncoding.POS_EULER, + "action_encoding": ActionEncoding.EEF_POS, + "control_frequency": 20, + "robot_type": "Franka", + }, +} diff --git a/examples/port_datasets/oxe_utils/transform_utils.py b/examples/port_datasets/oxe_utils/transform_utils.py new file mode 100644 index 00000000..ca250ca6 --- /dev/null +++ b/examples/port_datasets/oxe_utils/transform_utils.py @@ -0,0 +1,76 @@ +""" +Copied from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/utils/data_utils.py +""" + + +from typing import Any, Dict + +import tensorflow as tf + + +def binarize_gripper_actions(actions: tf.Tensor) -> tf.Tensor: + """ + Converts gripper actions from continuous to binary values (0 and 1). + + We exploit that fact that most of the time, the gripper is fully open (near 1.0) or fully closed (near 0.0). As it + transitions between the two, it sometimes passes through a few intermediate values. We relabel those intermediate + values based on the state that is reached _after_ those intermediate values. + + In the edge case that the trajectory ends with an intermediate value, we give up on binarizing and relabel that + chunk of intermediate values as the last action in the trajectory. + + The `scan_fn` implements the following logic: + new_actions = np.empty_like(actions) + carry = actions[-1] + for i in reversed(range(actions.shape[0])): + if in_between_mask[i]: + carry = carry + else: + carry = float(open_mask[i]) + new_actions[i] = carry + """ + open_mask, closed_mask = actions > 0.95, actions < 0.05 + in_between_mask = tf.logical_not(tf.logical_or(open_mask, closed_mask)) + is_open_float = tf.cast(open_mask, tf.float32) + + def scan_fn(carry, i): + return tf.cond(in_between_mask[i], lambda: tf.cast(carry, tf.float32), lambda: is_open_float[i]) + + return tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), actions[-1], reverse=True) + + +def invert_gripper_actions(actions: tf.Tensor) -> tf.Tensor: + return 1 - actions + + +def rel2abs_gripper_actions(actions: tf.Tensor) -> tf.Tensor: + """ + Converts relative gripper actions (+1 for closing, -1 for opening) to absolute actions (0 = closed; 1 = open). + + Assumes that the first relative gripper is not redundant (i.e. close when already closed)! + """ + # Note =>> -1 for closing, 1 for opening, 0 for no change + opening_mask, closing_mask = actions < -0.1, actions > 0.1 + thresholded_actions = tf.where(opening_mask, 1, tf.where(closing_mask, -1, 0)) + + def scan_fn(carry, i): + return tf.cond(thresholded_actions[i] == 0, lambda: carry, lambda: thresholded_actions[i]) + + # If no relative grasp, assumes open for whole trajectory + start = -1 * thresholded_actions[tf.argmax(thresholded_actions != 0, axis=0)] + start = tf.cond(start == 0, lambda: 1, lambda: start) + + # Note =>> -1 for closed, 1 for open + new_actions = tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), start) + new_actions = tf.cast(new_actions, tf.float32) / 2 + 0.5 + + return new_actions + +# === Bridge-V2 =>> Dataset-Specific Transform === +def relabel_bridge_actions(traj: Dict[str, Any]) -> Dict[str, Any]: + """Relabels actions to use reached proprioceptive state; discards last timestep (no-action).""" + movement_actions = traj["observation"]["state"][1:, :6] - traj["observation"]["state"][:-1, :6] + traj_truncated = tf.nest.map_structure(lambda x: x[:-1], traj) + traj_truncated["action"] = tf.concat([movement_actions, traj["action"][:-1, -1:]], axis=1) + + return traj_truncated diff --git a/examples/port_datasets/oxe_utils/transforms.py b/examples/port_datasets/oxe_utils/transforms.py new file mode 100644 index 00000000..6eaa496c --- /dev/null +++ b/examples/port_datasets/oxe_utils/transforms.py @@ -0,0 +1,985 @@ +""" +Adapt from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/oxe/transforms.py +transforms.py + +Defines a registry of per-dataset standardization transforms for each dataset in Open-X Embodiment. + +Transforms adopt the following structure: + Input: Dictionary of *batched* features (i.e., has leading time dimension) + Output: Dictionary `step` =>> { + "observation": { + + State (in chosen state representation) + }, + "action": Action (in chosen action representation), + "language_instruction": str + } +""" + +from typing import Any, Dict + +import tensorflow as tf +from oxe_utils.transform_utils import ( + binarize_gripper_actions, + invert_gripper_actions, + rel2abs_gripper_actions, + relabel_bridge_actions, +) + + +def droid_baseact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + """ + DROID dataset transformation for actions expressed in *base* frame of the robot. + """ + def rand_swap_exterior_images(img1, img2): + """ + Randomly swaps the two exterior images (for training with single exterior input). + """ + return tf.cond(tf.random.uniform(shape=[]) > 0.5, lambda: (img1, img2), lambda: (img2, img1)) + + dt = trajectory["action_dict"]["cartesian_velocity"][:, :3] + dR = trajectory["action_dict"]["cartesian_velocity"][:, 3:6] + + trajectory["action"] = tf.concat( + ( + dt, + dR, + 1 - trajectory["action_dict"]["gripper_position"], + ), + axis=-1, + ) + trajectory["observation"]["exterior_image_1_left"], trajectory["observation"]["exterior_image_2_left"] = ( + rand_swap_exterior_images( + trajectory["observation"]["exterior_image_1_left"], + trajectory["observation"]["exterior_image_2_left"], + ) + ) + # trajectory["observation"]["proprio"] = tf.concat( + # ( + # trajectory["observation"]["cartesian_position"], + # trajectory["observation"]["gripper_position"], + # ), + # axis=-1, + # ) + trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"] + return trajectory + + +def droid_finetuning_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + """ + DROID dataset transformation for actions expressed in *base* frame of the robot. + """ + dt = trajectory["action_dict"]["cartesian_velocity"][:, :3] + dR = trajectory["action_dict"]["cartesian_velocity"][:, 3:6] + trajectory["action"] = tf.concat( + ( + dt, + dR, + 1 - trajectory["action_dict"]["gripper_position"], + ), + axis=-1, + ) + trajectory["observation"]["proprio"] = tf.concat( + ( + trajectory["observation"]["cartesian_position"], + trajectory["observation"]["gripper_position"], + ), + axis=-1, + ) + return trajectory + + +def bridge_oxe_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + """ + Applies to version of Bridge V2 in Open X-Embodiment mixture. + + Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it! + """ + for key in trajectory.keys(): + if key == "traj_metadata": + continue + elif key in ["observation", "action"]: + for key2 in trajectory[key]: + trajectory[key][key2] = trajectory[key][key2][1:] + else: + trajectory[key] = trajectory[key][1:] + + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + tf.cast(trajectory["action"]["open_gripper"][:, None], tf.float32), + ), + axis=-1, + ) + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + trajectory = relabel_bridge_actions(trajectory) + trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + return trajectory + + +def bridge_orig_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + """ + Applies to original version of Bridge V2 from the official project website. + + Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it! + """ + for key in trajectory.keys(): + if key == "traj_metadata": + continue + elif key == "observation": + for key2 in trajectory[key]: + trajectory[key][key2] = trajectory[key][key2][1:] + else: + trajectory[key] = trajectory[key][1:] + + trajectory["action"] = tf.concat( + [ + trajectory["action"][:, :6], + binarize_gripper_actions(trajectory["action"][:, -1])[:, None], + ], + axis=1, + ) + trajectory = relabel_bridge_actions(trajectory) + trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + return trajectory + + +def ppgm_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = tf.concat( + [ + trajectory["action"][:, :6], + binarize_gripper_actions(trajectory["action"][:, -1])[:, None], + ], + axis=1, + ) + trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"][:, -1:] + return trajectory + + +def rt1_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # make gripper action absolute action, +1 = open, 0 = close + gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0] + gripper_action = rel2abs_gripper_actions(gripper_action) + + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + gripper_action[:, None], + ), + axis=-1, + ) + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def kuka_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # make gripper action absolute action, +1 = open, 0 = close + gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0] + gripper_action = rel2abs_gripper_actions(gripper_action) + + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + gripper_action[:, None], + ), + axis=-1, + ) + # decode compressed state + eef_value = tf.io.decode_compressed( + trajectory["observation"]["clip_function_input/base_pose_tool_reached"], + compression_type="ZLIB", + ) + eef_value = tf.io.decode_raw(eef_value, tf.float32) + trajectory["observation"]["clip_function_input/base_pose_tool_reached"] = tf.reshape(eef_value, (-1, 7)) + gripper_value = tf.io.decode_compressed(trajectory["observation"]["gripper_closed"], compression_type="ZLIB") + gripper_value = tf.io.decode_raw(gripper_value, tf.float32) + trajectory["observation"]["gripper_closed"] = tf.reshape(gripper_value, (-1, 1)) + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" + # ) # delete uninformative language instruction + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def taco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["state_eef"] = trajectory["observation"]["robot_obs"][:, :6] + trajectory["observation"]["state_gripper"] = trajectory["observation"]["robot_obs"][:, 7:8] + trajectory["action"] = trajectory["action"]["rel_actions_world"] + + # invert gripper action + clip, +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :6], + tf.clip_by_value(trajectory["action"][:, -1:], 0, 1), + ), + axis=-1, + ) + + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def jaco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["state_eef"] = trajectory["observation"]["end_effector_cartesian_pos"][:, :6] + trajectory["observation"]["state_gripper"] = trajectory["observation"]["end_effector_cartesian_pos"][:, -1:] + + # make gripper action absolute action, +1 = open, 0 = close + gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0] + gripper_action = rel2abs_gripper_actions(gripper_action) + + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + tf.zeros_like(trajectory["action"]["world_vector"]), + gripper_action[:, None], + ), + axis=-1, + ) + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def berkeley_cable_routing_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + tf.zeros_like(trajectory["action"]["world_vector"][:, :1]), + ), + axis=-1, + ) + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" + # ) # delete uninformative language instruction + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def roboturk_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # invert absolute gripper action, +1 = open, 0 = close + gripper_action = invert_gripper_actions(tf.clip_by_value(trajectory["action"]["gripper_closedness_action"], 0, 1)) + + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + gripper_action, + ), + axis=-1, + ) + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" + # ) # delete uninformative language instruction + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def nyu_door_opening_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # make gripper action absolute action, +1 = open, 0 = close + gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0] + gripper_action = rel2abs_gripper_actions(gripper_action) + + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + gripper_action[:, None], + ), + axis=-1, + ) + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" + # ) # delete uninformative language instruction + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def viola_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # make gripper action, +1 = open, 0 = close + gripper_action = trajectory["action"]["gripper_closedness_action"][:, None] + gripper_action = tf.clip_by_value(gripper_action, 0, 1) + gripper_action = invert_gripper_actions(gripper_action) + + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + gripper_action, + ), + axis=-1, + ) + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" + # ) # delete uninformative language instruction + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def berkeley_autolab_ur5_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["state"] = trajectory["observation"]["robot_state"][:, 6:14] + trajectory["observation"]["depth"] = trajectory["observation"].pop("image_with_depth") + + # make gripper action absolute action, +1 = open, 0 = close + gripper_action = trajectory["action"]["gripper_closedness_action"] + gripper_action = rel2abs_gripper_actions(gripper_action) + + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + gripper_action[:, None], + ), + axis=-1, + ) + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def toto_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + tf.cast(trajectory["action"]["open_gripper"][:, None], tf.float32), + ), + axis=-1, + ) + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" + # ) # delete uninformative language instruction + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def language_table_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # default to "open" gripper + trajectory["action"] = tf.concat( + ( + trajectory["action"], + tf.zeros_like(trajectory["action"]), + tf.zeros_like(trajectory["action"]), + tf.ones_like(trajectory["action"][:, :1]), + ), + axis=-1, + ) + + # decode language instruction + instruction_bytes = trajectory["observation"]["instruction"] + instruction_encoded = tf.strings.unicode_encode(instruction_bytes, output_encoding="UTF-8") + # Remove trailing padding --> convert RaggedTensor to regular Tensor. + trajectory["language_instruction"] = tf.strings.split(instruction_encoded, "\x00")[:, :1].to_tensor()[:, 0] + return trajectory + + +def pusht_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = tf.concat( + ( + trajectory["action"]["world_vector"], + trajectory["action"]["rotation_delta"], + trajectory["action"]["gripper_closedness_action"][:, None], + ), + axis=-1, + ) + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def stanford_kuka_multimodal_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["depth_image"] = trajectory["observation"]["depth_image"][..., 0] + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :3], + tf.zeros_like(trajectory["action"][:, :3]), + trajectory["action"][:, -1:], + ), + axis=-1, + ) + return trajectory + + +def nyu_rot_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][..., :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][..., -1:] + trajectory["action"] = trajectory["action"][..., :7] + return trajectory + + +def stanford_hydra_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # invert gripper action, +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :6], + invert_gripper_actions(trajectory["action"][:, -1:]), + ), + axis=-1, + ) + + trajectory["observation"]["eef_state"] = tf.concat( + ( + trajectory["observation"]["state"][:, :3], + trajectory["observation"]["state"][:, 7:10], + ), + axis=-1, + ) + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -3:-2] + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["language_instruction"]), "" + # ) # delete uninformative language instruction + return trajectory + + +def austin_buds_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # invert gripper action + clip, +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :6], + invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), + ), + axis=-1, + ) + + trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :8] + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["language_instruction"]), "" + # ) # delete uninformative language instruction + return trajectory + + +def nyu_franka_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["depth"] = tf.cast(trajectory["observation"]["depth"][..., 0], tf.float32) + trajectory["observation"]["depth_additional_view"] = tf.cast( + trajectory["observation"]["depth_additional_view"][..., 0], tf.float32 + ) + trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, -6:] + + # clip gripper action, +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, -8:-2], + tf.clip_by_value(trajectory["action"][:, -2:-1], 0, 1), + ), + axis=-1, + ) + + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["language_instruction"]), "" + # ) # delete uninformative language instruction + return trajectory + + +def maniskill_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][..., 7:8] + return trajectory + + +def furniture_bench_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + import tensorflow_graphics.geometry.transformation as tft + + trajectory["observation"]["state"] = tf.concat( + ( + trajectory["observation"]["state"][:, :7], + trajectory["observation"]["state"][:, -1:], + ), + axis=-1, + ) + + # invert gripper action + clip, +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :3], + tft.euler.from_quaternion(trajectory["action"][:, 3:7]), + invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), + ), + axis=-1, + ) + return trajectory + + +def cmu_franka_exploration_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = trajectory["action"][..., :-1] + return trajectory + + +def ucsd_kitchen_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :7] + trajectory["action"] = trajectory["action"][..., :-1] + return trajectory + + +def ucsd_pick_place_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :3], + tf.zeros_like(trajectory["action"][:, :3]), + trajectory["action"][:, -1:], + ), + axis=-1, + ) + return trajectory + + +def austin_sailor_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # invert gripper action + clip, +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :6], + invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), + ), + axis=-1, + ) + + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["language_instruction"]), "" + # ) # delete uninformative language instruction + return trajectory + + +def austin_sirius_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # invert gripper action + clip, +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :6], + invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), + ), + axis=-1, + ) + + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["language_instruction"]), "" + # ) # delete uninformative language instruction + return trajectory + + +def bc_z_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = tf.concat( + ( + trajectory["action"]["future/xyz_residual"][:, :3], + trajectory["action"]["future/axis_angle_residual"][:, :3], + invert_gripper_actions(tf.cast(trajectory["action"]["future/target_close"][:, :1], tf.float32)), + ), + axis=-1, + ) + trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] + return trajectory + + +def tokyo_pr2_opening_fridge_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + trajectory["action"] = trajectory["action"][..., :-1] + return trajectory + + +def tokyo_pr2_tabletop_manipulation_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + trajectory["action"] = trajectory["action"][..., :-1] + return trajectory + + +def utokyo_xarm_pick_place_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + return trajectory + + +def utokyo_xarm_bimanual_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = trajectory["action"][..., -7:] + return trajectory + + +def robo_net_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = tf.concat( + ( + trajectory["observation"]["state"][:, :4], + tf.zeros_like(trajectory["observation"]["state"][:, :2]), + ), + axis=-1, + ) + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :4], + tf.zeros_like(trajectory["action"][:, :2]), + trajectory["action"][:, -1:], + ), + axis=-1, + ) + return trajectory + + +def berkeley_mvp_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + return trajectory + + +def berkeley_rpt_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + return trajectory + + +def kaist_nonprehensible_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["state"] = trajectory["observation"]["state"][:, -7:] + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :6], + tf.zeros_like(trajectory["action"][:, :1]), + ), + axis=-1, + ) + return trajectory + + +def stanford_mask_vit_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = tf.concat( + ( + trajectory["observation"]["end_effector_pose"][:, :4], + tf.zeros_like(trajectory["observation"]["end_effector_pose"][:, :2]), + ), + axis=-1, + ) + trajectory["observation"]["gripper_state"] = trajectory["observation"]["end_effector_pose"][:, -1:] + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :4], + tf.zeros_like(trajectory["action"][:, :2]), + trajectory["action"][:, -1:], + ), + axis=-1, + ) + return trajectory + + +def tokyo_lsmo_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + return trajectory + + +def dlr_sara_pour_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + return trajectory + + +def dlr_sara_grid_clamp_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :6] + return trajectory + + +def dlr_edan_shared_control_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # invert gripper action, +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :6], + invert_gripper_actions(trajectory["action"][:, -1:]), + ), + axis=-1, + ) + return trajectory + + +def asu_table_top_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = trajectory["ground_truth_states"]["EE"] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + return trajectory + + +def robocook_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + return trajectory + + +def imperial_wristcam_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = trajectory["action"][..., :-1] + return trajectory + + +def iamlab_pick_insert_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + import tensorflow_graphics.geometry.transformation as tft + + trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :7] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, 7:8] + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :3], + tft.euler.from_quaternion(trajectory["action"][:, 3:7]), + trajectory["action"][:, 7:8], + ), + axis=-1, + ) + return trajectory + + +def uiuc_d3field_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = tf.concat( + ( + trajectory["action"], + tf.zeros_like(trajectory["action"]), + tf.zeros_like(trajectory["action"][:, :1]), + ), + axis=-1, + ) + return trajectory + + +def utaustin_mutex_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :8] + + # invert gripper action + clip, +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :6], + invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), + ), + axis=-1, + ) + + # trajectory["language_instruction"] = tf.fill( + # tf.shape(trajectory["language_instruction"]), "" + # ) # delete uninformative language instruction + return trajectory + + +def berkeley_fanuc_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, 6:7] + + # dataset does not store gripper actions, so use gripper state info, invert so +1 = open, 0 = close + trajectory["action"] = tf.concat( + ( + trajectory["action"], + invert_gripper_actions(trajectory["observation"]["gripper_state"]), + ), + axis=-1, + ) + return trajectory + + +def cmu_playing_with_food_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + import tensorflow_graphics.geometry.transformation as tft + + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :3], + tft.euler.from_quaternion(trajectory["action"][:, 3:7]), + trajectory["action"][:, -1:], + ), + axis=-1, + ) + return trajectory + + +def playfusion_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :3], + trajectory["action"][:, -4:], + ), + axis=-1, + ) + return trajectory + + +def cmu_stretch_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["eef_state"] = tf.concat( + ( + trajectory["observation"]["state"][:, :3], + tf.zeros_like(trajectory["observation"]["state"][:, :3]), + ), + axis=-1, + ) + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + trajectory["action"] = trajectory["action"][..., :-1] + return trajectory + + +def gnm_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["observation"]["state"] = tf.concat( + ( + trajectory["observation"]["position"], + tf.zeros_like(trajectory["observation"]["state"][:, :3]), + trajectory["observation"]["yaw"], + ), + axis=-1, + ) + trajectory["action"] = tf.concat( + ( + trajectory["action"], + tf.zeros_like(trajectory["action"]), + tf.zeros_like(trajectory["action"]), + tf.zeros_like(trajectory["action"][:, :1]), + ), + axis=-1, + ) + return trajectory + + +def fmb_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # every input feature is batched, ie has leading batch dimension + trajectory["observation"]["proprio"] = tf.concat( + ( + trajectory["observation"]["eef_pose"], + trajectory["observation"]["state_gripper_pose"][..., None], + ), + axis=-1, + ) + return trajectory + + +def dobbe_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # every input feature is batched, ie has leading batch dimension + trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] + return trajectory + + +def roboset_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # every input feature is batched, ie has leading batch dimension + trajectory["observation"]["proprio"] = trajectory["observation"]["state"] + + # gripper action is in -1...1 --> clip to 0...1, flip + gripper_action = trajectory["action"][:, -1:] + gripper_action = invert_gripper_actions(tf.clip_by_value(gripper_action, 0, 1)) + + trajectory["action"] = tf.concat( + ( + trajectory["action"][:, :7], + gripper_action, + ), + axis=-1, + ) + return trajectory + + +def rh20t_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = tf.concat( + ( + trajectory["action"]["tcp_base"], + tf.cast(trajectory["action"]["gripper"][:, None], tf.float32), + ), + axis=-1, + ) + trajectory["observation"]["proprio"] = tf.concat( + ( + trajectory["observation"]["tcp_base"], + trajectory["observation"]["gripper_width"][..., None], + ), + axis=-1, + ) + return trajectory + + +def tdroid_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + trajectory["action"] = tf.concat( + [ + trajectory["action"][:, :6], + binarize_gripper_actions(trajectory["action"][:, -1])[:, None], + ], + axis=1, + ) + trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"][:, -1:] + return trajectory + + +def libero_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: + # gripper action is in -1 (open)...1 (close) --> clip to 0...1, flip --> +1 = open, 0 = close + gripper_action = trajectory["action"][:, -1:] + gripper_action = invert_gripper_actions(tf.clip_by_value(gripper_action, 0, 1)) + + trajectory["action"] = tf.concat( + [ + trajectory["action"][:, :6], + gripper_action, + ], + axis=1, + ) + trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6] + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -2:] # 2D gripper state + return trajectory + + +# === Registry === +OXE_STANDARDIZATION_TRANSFORMS = { + "bridge_oxe": bridge_oxe_dataset_transform, + "bridge_orig": bridge_orig_dataset_transform, + "bridge_dataset": bridge_orig_dataset_transform, + "ppgm": ppgm_dataset_transform, + "ppgm_static": ppgm_dataset_transform, + "ppgm_wrist": ppgm_dataset_transform, + "fractal20220817_data": rt1_dataset_transform, + "kuka": kuka_dataset_transform, + "taco_play": taco_play_dataset_transform, + "jaco_play": jaco_play_dataset_transform, + "berkeley_cable_routing": berkeley_cable_routing_dataset_transform, + "roboturk": roboturk_dataset_transform, + "nyu_door_opening_surprising_effectiveness": nyu_door_opening_dataset_transform, + "viola": viola_dataset_transform, + "berkeley_autolab_ur5": berkeley_autolab_ur5_dataset_transform, + "toto": toto_dataset_transform, + "language_table": language_table_dataset_transform, + "columbia_cairlab_pusht_real": pusht_dataset_transform, + "stanford_kuka_multimodal_dataset_converted_externally_to_rlds": stanford_kuka_multimodal_dataset_transform, + "nyu_rot_dataset_converted_externally_to_rlds": nyu_rot_dataset_transform, + "stanford_hydra_dataset_converted_externally_to_rlds": stanford_hydra_dataset_transform, + "austin_buds_dataset_converted_externally_to_rlds": austin_buds_dataset_transform, + "nyu_franka_play_dataset_converted_externally_to_rlds": nyu_franka_play_dataset_transform, + "maniskill_dataset_converted_externally_to_rlds": maniskill_dataset_transform, + "furniture_bench_dataset_converted_externally_to_rlds": furniture_bench_dataset_transform, + "cmu_franka_exploration_dataset_converted_externally_to_rlds": cmu_franka_exploration_dataset_transform, + "ucsd_kitchen_dataset_converted_externally_to_rlds": ucsd_kitchen_dataset_transform, + "ucsd_pick_and_place_dataset_converted_externally_to_rlds": ucsd_pick_place_dataset_transform, + "austin_sailor_dataset_converted_externally_to_rlds": austin_sailor_dataset_transform, + "austin_sirius_dataset_converted_externally_to_rlds": austin_sirius_dataset_transform, + "bc_z": bc_z_dataset_transform, + "utokyo_pr2_opening_fridge_converted_externally_to_rlds": tokyo_pr2_opening_fridge_dataset_transform, + "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": tokyo_pr2_tabletop_manipulation_dataset_transform, + "utokyo_xarm_pick_and_place_converted_externally_to_rlds": utokyo_xarm_pick_place_dataset_transform, + "utokyo_xarm_bimanual_converted_externally_to_rlds": utokyo_xarm_bimanual_dataset_transform, + "robo_net": robo_net_dataset_transform, + "berkeley_mvp_converted_externally_to_rlds": berkeley_mvp_dataset_transform, + "berkeley_rpt_converted_externally_to_rlds": berkeley_rpt_dataset_transform, + "kaist_nonprehensile_converted_externally_to_rlds": kaist_nonprehensible_dataset_transform, + "stanford_mask_vit_converted_externally_to_rlds": stanford_mask_vit_dataset_transform, + "tokyo_u_lsmo_converted_externally_to_rlds": tokyo_lsmo_dataset_transform, + "dlr_sara_pour_converted_externally_to_rlds": dlr_sara_pour_dataset_transform, + "dlr_sara_grid_clamp_converted_externally_to_rlds": dlr_sara_grid_clamp_dataset_transform, + "dlr_edan_shared_control_converted_externally_to_rlds": dlr_edan_shared_control_dataset_transform, + "asu_table_top_converted_externally_to_rlds": asu_table_top_dataset_transform, + "stanford_robocook_converted_externally_to_rlds": robocook_dataset_transform, + "imperialcollege_sawyer_wrist_cam": imperial_wristcam_dataset_transform, + "iamlab_cmu_pickup_insert_converted_externally_to_rlds": iamlab_pick_insert_dataset_transform, + "uiuc_d3field": uiuc_d3field_dataset_transform, + "utaustin_mutex": utaustin_mutex_dataset_transform, + "berkeley_fanuc_manipulation": berkeley_fanuc_dataset_transform, + "cmu_playing_with_food": cmu_playing_with_food_dataset_transform, + "cmu_play_fusion": playfusion_dataset_transform, + "cmu_stretch": cmu_stretch_dataset_transform, + "berkeley_gnm_recon": gnm_dataset_transform, + "berkeley_gnm_cory_hall": gnm_dataset_transform, + "berkeley_gnm_sac_son": gnm_dataset_transform, + "droid": droid_baseact_transform, + "fmb_dataset": fmb_dataset_transform, + "dobbe": dobbe_dataset_transform, + "roboset": roboset_dataset_transform, + "rh20t_rlds": rh20t_dataset_transform, + ### T-DROID datasets + "tdroid_carrot_in_bowl": tdroid_dataset_transform, + "tdroid_pour_corn_in_pot": tdroid_dataset_transform, + "tdroid_flip_pot_upright": tdroid_dataset_transform, + "tdroid_move_object_onto_plate": tdroid_dataset_transform, + "tdroid_knock_object_over": tdroid_dataset_transform, + "tdroid_cover_object_with_towel": tdroid_dataset_transform, + ### DROID Finetuning datasets + "droid_wipe": droid_finetuning_transform, + ### LIBERO datasets (modified versions) + "libero_spatial_no_noops": libero_dataset_transform, + "libero_object_no_noops": libero_dataset_transform, + "libero_goal_no_noops": libero_dataset_transform, + "libero_10_no_noops": libero_dataset_transform, +} From fbf2f2222a65cbfda352298eebef164b636b8a53 Mon Sep 17 00:00:00 2001 From: Simon Alibert <75076266+aliberts@users.noreply.github.com> Date: Wed, 19 Feb 2025 08:36:32 +0100 Subject: [PATCH 06/28] Remove `local_files_only` and use `codebase_version` instead of branches (#734) --- examples/port_datasets/pusht_zarr.py | 2 +- .../common/datasets/backward_compatibility.py | 40 ++++++ lerobot/common/datasets/factory.py | 4 +- lerobot/common/datasets/lerobot_dataset.py | 129 ++++++++++-------- lerobot/common/datasets/utils.py | 95 +++++-------- .../datasets/v2/convert_dataset_v1_to_v2.py | 4 +- .../v21/batch_convert_dataset_v20_to_v21.py | 49 +++++++ .../v21/convert_dataset_v20_to_v21.py | 64 +++++---- .../common/robot_devices/control_configs.py | 6 - lerobot/configs/default.py | 2 +- lerobot/scripts/control_robot.py | 6 +- lerobot/scripts/visualize_dataset.py | 9 +- lerobot/scripts/visualize_dataset_html.py | 13 +- lerobot/scripts/visualize_image_transforms.py | 2 +- pyproject.toml | 3 +- tests/fixtures/dataset_factories.py | 14 +- tests/test_control_robot.py | 5 +- tests/test_datasets.py | 4 - 18 files changed, 253 insertions(+), 198 deletions(-) create mode 100644 lerobot/common/datasets/backward_compatibility.py create mode 100644 lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py diff --git a/examples/port_datasets/pusht_zarr.py b/examples/port_datasets/pusht_zarr.py index 2eaf1c1c..622fbd14 100644 --- a/examples/port_datasets/pusht_zarr.py +++ b/examples/port_datasets/pusht_zarr.py @@ -223,5 +223,5 @@ if __name__ == "__main__": main(raw_dir, repo_id=repo_id, mode=mode) # Uncomment if you want to load the local dataset and explore it - # dataset = LeRobotDataset(repo_id=repo_id, local_files_only=True) + # dataset = LeRobotDataset(repo_id=repo_id) # breakpoint() diff --git a/lerobot/common/datasets/backward_compatibility.py b/lerobot/common/datasets/backward_compatibility.py new file mode 100644 index 00000000..aa814549 --- /dev/null +++ b/lerobot/common/datasets/backward_compatibility.py @@ -0,0 +1,40 @@ +import packaging.version + +V2_MESSAGE = """ +The dataset you requested ({repo_id}) is in {version} format. + +We introduced a new format since v2.0 which is not backward compatible with v1.x. +Please, use our conversion script. Modify the following command with your own task description: +``` +python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \\ + --repo-id {repo_id} \\ + --single-task "TASK DESCRIPTION." # <---- /!\\ Replace TASK DESCRIPTION /!\\ +``` + +A few examples to replace TASK DESCRIPTION: "Pick up the blue cube and place it into the bin.", "Insert the +peg into the socket.", "Slide open the ziploc bag.", "Take the elevator to the 1st floor.", "Open the top +cabinet, store the pot inside it then close the cabinet.", "Push the T-shaped block onto the T-shaped +target.", "Grab the spray paint on the shelf and place it in the bin on top of the robot dog.", "Fold the +sweatshirt.", ... + +If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb) +or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose). +""" + +V21_MESSAGE = """ +The dataset you requested ({repo_id}) is in {version} format. +While current version of LeRobot is backward-compatible with it, the version of your dataset still uses global +stats instead of per-episode stats. Update your dataset stats to the new format using this command: +``` +python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id={repo_id} +``` + +If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb) +or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose). +""" + + +class BackwardCompatibilityError(Exception): + def __init__(self, repo_id: str, version: packaging.version.Version): + message = V2_MESSAGE.format(repo_id=repo_id, version=version) + super().__init__(message) diff --git a/lerobot/common/datasets/factory.py b/lerobot/common/datasets/factory.py index 95ba76b8..fb1fe6d6 100644 --- a/lerobot/common/datasets/factory.py +++ b/lerobot/common/datasets/factory.py @@ -83,15 +83,15 @@ def make_dataset(cfg: TrainPipelineConfig) -> LeRobotDataset | MultiLeRobotDatas ) if isinstance(cfg.dataset.repo_id, str): - ds_meta = LeRobotDatasetMetadata(cfg.dataset.repo_id, local_files_only=cfg.dataset.local_files_only) + ds_meta = LeRobotDatasetMetadata(cfg.dataset.repo_id, revision=cfg.dataset.revision) delta_timestamps = resolve_delta_timestamps(cfg.policy, ds_meta) dataset = LeRobotDataset( cfg.dataset.repo_id, episodes=cfg.dataset.episodes, delta_timestamps=delta_timestamps, image_transforms=image_transforms, + revision=cfg.dataset.revision, video_backend=cfg.dataset.video_backend, - local_files_only=cfg.dataset.local_files_only, ) else: raise NotImplementedError("The MultiLeRobotDataset isn't supported for now.") diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index c7f0b2b3..dfdb3618 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -16,7 +16,6 @@ import logging import os import shutil -from functools import cached_property from pathlib import Path from typing import Callable @@ -27,6 +26,8 @@ import torch import torch.utils from datasets import load_dataset from huggingface_hub import HfApi, snapshot_download +from huggingface_hub.constants import REPOCARD_NAME +from packaging import version from lerobot.common.datasets.compute_stats import aggregate_stats, compute_episode_stats from lerobot.common.datasets.image_writer import AsyncImageWriter, write_image @@ -41,14 +42,13 @@ from lerobot.common.datasets.utils import ( check_frame_features, check_timestamps_sync, check_version_compatibility, - create_branch, create_empty_dataset_info, create_lerobot_dataset_card, get_delta_indices, get_episode_data_index, get_features_from_robot, get_hf_features_from_features, - get_hub_safe_version, + get_safe_revision, hf_transform_to_torch, load_episodes, load_episodes_stats, @@ -79,30 +79,35 @@ class LeRobotDatasetMetadata: self, repo_id: str, root: str | Path | None = None, - local_files_only: bool = False, + revision: str | None = None, + force_cache_sync: bool = False, ): self.repo_id = repo_id + self.revision = revision if revision else CODEBASE_VERSION self.root = Path(root) if root is not None else LEROBOT_HOME / repo_id - self.local_files_only = local_files_only - # Load metadata - (self.root / "meta").mkdir(exist_ok=True, parents=True) - self.pull_from_repo(allow_patterns="meta/") + try: + if force_cache_sync: + raise FileNotFoundError + self.load_metadata() + except (FileNotFoundError, NotADirectoryError): + (self.root / "meta").mkdir(exist_ok=True, parents=True) + self.revision = get_safe_revision(self.repo_id, self.revision) + self.pull_from_repo(allow_patterns="meta/") + self.load_metadata() + + check_version_compatibility(self.repo_id, self._version, CODEBASE_VERSION) + + def load_metadata(self): self.info = load_info(self.root) - self.stats = load_stats(self.root) self.tasks, self.task_to_task_index = load_tasks(self.root) self.episodes = load_episodes(self.root) - try: - self.episodes_stats = load_episodes_stats(self.root) - self.stats = aggregate_stats(list(self.episodes_stats.values())) - except FileNotFoundError: - logging.warning( - f"""'episodes_stats.jsonl' not found. Using global dataset stats for each episode instead. - Convert your dataset stats to the new format using this command: - python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py --repo-id={self.repo_id} """ - ) + if version.parse(self._version) < version.parse("v2.1"): self.stats = load_stats(self.root) self.episodes_stats = backward_compatible_episodes_stats(self.stats, self.episodes) + else: + self.episodes_stats = load_episodes_stats(self.root) + self.stats = aggregate_stats(list(self.episodes_stats.values())) def pull_from_repo( self, @@ -112,17 +117,12 @@ class LeRobotDatasetMetadata: snapshot_download( self.repo_id, repo_type="dataset", - revision=self._hub_version, + revision=self.revision, local_dir=self.root, allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, - local_files_only=self.local_files_only, ) - @cached_property - def _hub_version(self) -> str | None: - return None if self.local_files_only else get_hub_safe_version(self.repo_id, CODEBASE_VERSION) - @property def _version(self) -> str: """Codebase version used to create this dataset.""" @@ -342,7 +342,7 @@ class LeRobotDatasetMetadata: if len(obj.video_keys) > 0 and not use_videos: raise ValueError() write_json(obj.info, obj.root / INFO_PATH) - obj.local_files_only = True + obj.revision = None return obj @@ -355,8 +355,9 @@ class LeRobotDataset(torch.utils.data.Dataset): image_transforms: Callable | None = None, delta_timestamps: dict[list[float]] | None = None, tolerance_s: float = 1e-4, + revision: str | None = None, + force_cache_sync: bool = False, download_videos: bool = True, - local_files_only: bool = False, video_backend: str | None = None, ): """ @@ -366,7 +367,7 @@ class LeRobotDataset(torch.utils.data.Dataset): - On your local disk in the 'root' folder. This is typically the case when you recorded your dataset locally and you may or may not have pushed it to the hub yet. Instantiating this class with 'root' will load your dataset directly from disk. This can happen while you're offline (no - internet connection), in that case, use local_files_only=True. + internet connection). - On the Hugging Face Hub at the address https://huggingface.co/datasets/{repo_id} and not on your local disk in the 'root' folder. Instantiating this class with this 'repo_id' will download @@ -448,11 +449,15 @@ class LeRobotDataset(torch.utils.data.Dataset): timestamps is separated to the next by 1/fps +/- tolerance_s. This also applies to frames decoded from video files. It is also used to check that `delta_timestamps` (when provided) are multiples of 1/fps. Defaults to 1e-4. + revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a + commit hash. Defaults to current codebase version tag. + sync_cache_first (bool, optional): Flag to sync and refresh local files first. If True and files + are already present in the local cache, this will be faster. However, files loaded might not + be in sync with the version on the hub, especially if you specified 'revision'. Defaults to + False. download_videos (bool, optional): Flag to download the videos. Note that when set to True but the video files are already present on local disk, they won't be downloaded again. Defaults to True. - local_files_only (bool, optional): Flag to use local files only. If True, no requests to the hub - will be made. Defaults to False. video_backend (str | None, optional): Video backend to use for decoding videos. There is currently a single option which is the pyav decoder used by Torchvision. Defaults to pyav. """ @@ -463,9 +468,9 @@ class LeRobotDataset(torch.utils.data.Dataset): self.delta_timestamps = delta_timestamps self.episodes = episodes self.tolerance_s = tolerance_s + self.revision = revision if revision else CODEBASE_VERSION self.video_backend = video_backend if video_backend else "pyav" self.delta_indices = None - self.local_files_only = local_files_only # Unused attributes self.image_writer = None @@ -474,17 +479,24 @@ class LeRobotDataset(torch.utils.data.Dataset): self.root.mkdir(exist_ok=True, parents=True) # Load metadata - self.meta = LeRobotDatasetMetadata(self.repo_id, self.root, self.local_files_only) - if self.episodes is not None and self.meta._version == CODEBASE_VERSION: + self.meta = LeRobotDatasetMetadata( + self.repo_id, self.root, self.revision, force_cache_sync=force_cache_sync + ) + if self.episodes is not None and version.parse(self.meta._version) >= version.parse("v2.1"): episodes_stats = [self.meta.episodes_stats[ep_idx] for ep_idx in self.episodes] self.stats = aggregate_stats(episodes_stats) - # Check version - check_version_compatibility(self.repo_id, self.meta._version, CODEBASE_VERSION) - # Load actual data - self.download_episodes(download_videos) - self.hf_dataset = self.load_hf_dataset() + try: + if force_cache_sync: + raise FileNotFoundError + assert all((self.root / fpath).is_file() for fpath in self.get_episodes_file_paths()) + self.hf_dataset = self.load_hf_dataset() + except (AssertionError, FileNotFoundError, NotADirectoryError): + self.revision = get_safe_revision(self.repo_id, self.revision) + self.download_episodes(download_videos) + self.hf_dataset = self.load_hf_dataset() + self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes) # Check timestamps @@ -501,7 +513,6 @@ class LeRobotDataset(torch.utils.data.Dataset): def push_to_hub( self, branch: str | None = None, - create_card: bool = True, tags: list | None = None, license: str | None = "apache-2.0", push_videos: bool = True, @@ -528,7 +539,13 @@ class LeRobotDataset(torch.utils.data.Dataset): exist_ok=True, ) if branch: - create_branch(repo_id=self.repo_id, branch=branch, repo_type="dataset") + hub_api.create_branch( + repo_id=self.repo_id, + branch=branch, + revision=self.revision, + repo_type="dataset", + exist_ok=True, + ) hub_api.upload_folder( repo_id=self.repo_id, @@ -538,15 +555,12 @@ class LeRobotDataset(torch.utils.data.Dataset): allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, ) - if create_card: + if not hub_api.file_exists(self.repo_id, REPOCARD_NAME, repo_type="dataset", revision=branch): card = create_lerobot_dataset_card( tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs ) card.push_to_hub(repo_id=self.repo_id, repo_type="dataset", revision=branch) - if not branch: - create_branch(repo_id=self.repo_id, branch=CODEBASE_VERSION, repo_type="dataset") - def pull_from_repo( self, allow_patterns: list[str] | str | None = None, @@ -555,11 +569,10 @@ class LeRobotDataset(torch.utils.data.Dataset): snapshot_download( self.repo_id, repo_type="dataset", - revision=self.meta._hub_version, + revision=self.revision, local_dir=self.root, allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, - local_files_only=self.local_files_only, ) def download_episodes(self, download_videos: bool = True) -> None: @@ -573,17 +586,23 @@ class LeRobotDataset(torch.utils.data.Dataset): files = None ignore_patterns = None if download_videos else "videos/" if self.episodes is not None: - files = [str(self.meta.get_data_file_path(ep_idx)) for ep_idx in self.episodes] - if len(self.meta.video_keys) > 0 and download_videos: - video_files = [ - str(self.meta.get_video_file_path(ep_idx, vid_key)) - for vid_key in self.meta.video_keys - for ep_idx in self.episodes - ] - files += video_files + files = self.get_episodes_file_paths() self.pull_from_repo(allow_patterns=files, ignore_patterns=ignore_patterns) + def get_episodes_file_paths(self) -> list[Path]: + episodes = self.episodes if self.episodes is not None else list(range(self.meta.total_episodes)) + fpaths = [str(self.meta.get_data_file_path(ep_idx)) for ep_idx in episodes] + if len(self.meta.video_keys) > 0: + video_files = [ + str(self.meta.get_video_file_path(ep_idx, vid_key)) + for vid_key in self.meta.video_keys + for ep_idx in episodes + ] + fpaths += video_files + + return fpaths + def load_hf_dataset(self) -> datasets.Dataset: """hf_dataset contains all the observations, states, actions, rewards, etc.""" if self.episodes is None: @@ -991,7 +1010,7 @@ class LeRobotDataset(torch.utils.data.Dataset): ) obj.repo_id = obj.meta.repo_id obj.root = obj.meta.root - obj.local_files_only = obj.meta.local_files_only + obj.revision = None obj.tolerance_s = tolerance_s obj.image_writer = None @@ -1033,7 +1052,6 @@ class MultiLeRobotDataset(torch.utils.data.Dataset): delta_timestamps: dict[list[float]] | None = None, tolerances_s: dict | None = None, download_videos: bool = True, - local_files_only: bool = False, video_backend: str | None = None, ): super().__init__() @@ -1051,7 +1069,6 @@ class MultiLeRobotDataset(torch.utils.data.Dataset): delta_timestamps=delta_timestamps, tolerance_s=self.tolerances_s[repo_id], download_videos=download_videos, - local_files_only=local_files_only, video_backend=video_backend, ) for repo_id in repo_ids diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index 8b734042..c9b0c345 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -13,10 +13,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import contextlib import importlib.resources import json import logging -import textwrap from collections.abc import Iterator from itertools import accumulate from pathlib import Path @@ -31,9 +31,11 @@ import pyarrow.compute as pc import torch from datasets.table import embed_table_storage from huggingface_hub import DatasetCard, DatasetCardData, HfApi +from packaging import version from PIL import Image as PILImage from torchvision import transforms +from lerobot.common.datasets.backward_compatibility import V21_MESSAGE, BackwardCompatibilityError from lerobot.common.robot_devices.robots.utils import Robot from lerobot.common.utils.utils import is_valid_numpy_dtype_string from lerobot.configs.types import DictLike, FeatureType, PolicyFeature @@ -200,7 +202,7 @@ def write_task(task_index: int, task: dict, local_dir: Path): append_jsonlines(task_dict, local_dir / TASKS_PATH) -def load_tasks(local_dir: Path) -> dict: +def load_tasks(local_dir: Path) -> tuple[dict, dict]: tasks = load_jsonlines(local_dir / TASKS_PATH) tasks = {item["task_index"]: item["task"] for item in sorted(tasks, key=lambda x: x["task_index"])} task_to_task_index = {task: task_index for task_index, task in tasks.items()} @@ -231,7 +233,9 @@ def load_episodes_stats(local_dir: Path) -> dict: } -def backward_compatible_episodes_stats(stats, episodes: list[int]) -> dict[str, dict[str, np.ndarray]]: +def backward_compatible_episodes_stats( + stats: dict[str, dict[str, np.ndarray]], episodes: list[int] +) -> dict[str, dict[str, np.ndarray]]: return {ep_idx: stats for ep_idx in episodes} @@ -265,73 +269,38 @@ def hf_transform_to_torch(items_dict: dict[torch.Tensor | None]): return items_dict -def _get_major_minor(version: str) -> tuple[int]: - split = version.strip("v").split(".") - return int(split[0]), int(split[1]) - - -class BackwardCompatibilityError(Exception): - def __init__(self, repo_id, version): - message = textwrap.dedent(f""" - BackwardCompatibilityError: The dataset you requested ({repo_id}) is in {version} format. - - We introduced a new format since v2.0 which is not backward compatible with v1.x. - Please, use our conversion script. Modify the following command with your own task description: - ``` - python lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py \\ - --repo-id {repo_id} \\ - --single-task "TASK DESCRIPTION." # <---- /!\\ Replace TASK DESCRIPTION /!\\ - ``` - - A few examples to replace TASK DESCRIPTION: "Pick up the blue cube and place it into the bin.", - "Insert the peg into the socket.", "Slide open the ziploc bag.", "Take the elevator to the 1st floor.", - "Open the top cabinet, store the pot inside it then close the cabinet.", "Push the T-shaped block onto the T-shaped target.", - "Grab the spray paint on the shelf and place it in the bin on top of the robot dog.", "Fold the sweatshirt.", ... - - If you encounter a problem, contact LeRobot maintainers on [Discord](https://discord.com/invite/s3KuuzsPFb) - or open an [issue on GitHub](https://github.com/huggingface/lerobot/issues/new/choose). - """) - super().__init__(message) - - def check_version_compatibility( repo_id: str, version_to_check: str, current_version: str, enforce_breaking_major: bool = True ) -> None: - current_major, _ = _get_major_minor(current_version) - major_to_check, _ = _get_major_minor(version_to_check) - if major_to_check < current_major and enforce_breaking_major: - raise BackwardCompatibilityError(repo_id, version_to_check) - elif float(version_to_check.strip("v")) < float(current_version.strip("v")): - logging.warning( - f"""The dataset you requested ({repo_id}) was created with a previous version ({version_to_check}) of the - codebase. The current codebase version is {current_version}. You should be fine since - backward compatibility is maintained. If you encounter a problem, contact LeRobot maintainers on - Discord ('https://discord.com/invite/s3KuuzsPFb') or open an issue on github.""", - ) + v_check = version.parse(version_to_check) + v_current = version.parse(current_version) + if v_check.major < v_current.major and enforce_breaking_major: + raise BackwardCompatibilityError(repo_id, v_check) + elif v_check.minor < v_current.minor: + logging.warning(V21_MESSAGE.format(repo_id=repo_id, version=version_to_check)) -def get_hub_safe_version(repo_id: str, version: str) -> str: +def get_repo_versions(repo_id: str) -> list[version.Version]: + """Returns available valid versions (branches and tags) on given repo.""" api = HfApi() - dataset_info = api.list_repo_refs(repo_id, repo_type="dataset") - branches = [b.name for b in dataset_info.branches] - if version not in branches: - num_version = float(version.strip("v")) - hub_num_versions = [float(v.strip("v")) for v in branches if v.startswith("v")] - if num_version >= 2.0 and all(v < 2.0 for v in hub_num_versions): - raise BackwardCompatibilityError(repo_id, version) + repo_refs = api.list_repo_refs(repo_id, repo_type="dataset") + repo_refs = [b.name for b in repo_refs.branches + repo_refs.tags] + repo_versions = [] + for ref in repo_refs: + with contextlib.suppress(version.InvalidVersion): + repo_versions.append(version.parse(ref)) - logging.warning( - f"""You are trying to load a dataset from {repo_id} created with a previous version of the - codebase. The following versions are available: {branches}. - The requested version ('{version}') is not found. You should be fine since - backward compatibility is maintained. If you encounter a problem, contact LeRobot maintainers on - Discord ('https://discord.com/invite/s3KuuzsPFb') or open an issue on github.""", - ) - if "main" not in branches: - raise ValueError(f"Version 'main' not found on {repo_id}") - return "main" - else: - return version + return repo_versions + + +def get_safe_revision(repo_id: str, revision: str) -> str: + """Returns the version if available on repo, otherwise return the latest available.""" + api = HfApi() + if api.revision_exists(repo_id, revision, repo_type="dataset"): + return revision + + hub_versions = get_repo_versions(repo_id) + return f"v{max(hub_versions)}" def get_hf_features_from_features(features: dict) -> datasets.Features: diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py index 62ca9932..943e94f0 100644 --- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py +++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py @@ -130,7 +130,7 @@ from lerobot.common.datasets.utils import ( create_branch, create_lerobot_dataset_card, flatten_dict, - get_hub_safe_version, + get_safe_revision, load_json, unflatten_dict, write_json, @@ -443,7 +443,7 @@ def convert_dataset( test_branch: str | None = None, **card_kwargs, ): - v1 = get_hub_safe_version(repo_id, V16) + v1 = get_safe_revision(repo_id, V16) v1x_dir = local_dir / V16 / repo_id v20_dir = local_dir / V20 / repo_id v1x_dir.mkdir(parents=True, exist_ok=True) diff --git a/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py b/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py new file mode 100644 index 00000000..624827bd --- /dev/null +++ b/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script is for internal use to convert all datasets under the 'lerobot' hub user account to v2.1. +""" + +import traceback +from pathlib import Path + +from lerobot import available_datasets +from lerobot.common.datasets.v21.convert_dataset_v20_to_v21 import convert_dataset + +LOCAL_DIR = Path("data/") + + +def batch_convert(): + status = {} + logfile = LOCAL_DIR / "conversion_log_v21.txt" + for num, repo_id in available_datasets: + print(f"\nConverting {repo_id} ({num}/{len(available_datasets)})") + print("---------------------------------------------------------") + try: + convert_dataset(repo_id) + status = f"{repo_id}: success." + with open(logfile, "a") as file: + file.write(status + "\n") + except Exception: + status = f"{repo_id}: failed\n {traceback.format_exc()}" + with open(logfile, "a") as file: + file.write(status + "\n") + continue + + +if __name__ == "__main__": + batch_convert() diff --git a/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py b/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py index 0c5d2688..d52a0a10 100644 --- a/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py +++ b/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py @@ -1,10 +1,12 @@ """ This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 2.0 to -2.1. It performs the following: +2.1. It will: - Generates per-episodes stats and writes them in `episodes_stats.jsonl` +- Check consistency between these new stats and the old ones. - Removes the deprecated `stats.json` (by default) - Updates codebase_version in `info.json` +- Push this new version to the hub on the 'main' branch and tags it with "v2.1". Usage: @@ -14,9 +16,9 @@ python lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py \ ``` """ -# TODO(rcadene, aliberts): ensure this script works for any other changes for the final v2.1 import argparse +import logging from huggingface_hub import HfApi @@ -24,14 +26,27 @@ from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDat from lerobot.common.datasets.utils import EPISODES_STATS_PATH, STATS_PATH, load_stats, write_info from lerobot.common.datasets.v21.convert_stats import check_aggregate_stats, convert_stats +V20 = "v2.0" +V21 = "v2.1" -def main( + +class SuppressWarnings: + def __enter__(self): + self.previous_level = logging.getLogger().getEffectiveLevel() + logging.getLogger().setLevel(logging.ERROR) + + def __exit__(self, exc_type, exc_val, exc_tb): + logging.getLogger().setLevel(self.previous_level) + + +def convert_dataset( repo_id: str, - test_branch: str | None = None, - delete_old_stats: bool = False, + branch: str | None = None, num_workers: int = 4, ): - dataset = LeRobotDataset(repo_id) + with SuppressWarnings(): + dataset = LeRobotDataset(repo_id, revision=V20, force_cache_sync=True) + if (dataset.root / EPISODES_STATS_PATH).is_file(): raise FileExistsError("episodes_stats.jsonl already exists.") @@ -42,18 +57,21 @@ def main( dataset.meta.info["codebase_version"] = CODEBASE_VERSION write_info(dataset.meta.info, dataset.root) - dataset.push_to_hub(branch=test_branch, create_card=False, allow_patterns="meta/") + dataset.push_to_hub(branch=branch, allow_patterns="meta/") - if delete_old_stats: - if (dataset.root / STATS_PATH).is_file: - (dataset.root / STATS_PATH).unlink() - hub_api = HfApi() - if hub_api.file_exists( - STATS_PATH, repo_id=dataset.repo_id, revision=test_branch, repo_type="dataset" - ): - hub_api.delete_file( - STATS_PATH, repo_id=dataset.repo_id, revision=test_branch, repo_type="dataset" - ) + # delete old stats.json file + if (dataset.root / STATS_PATH).is_file: + (dataset.root / STATS_PATH).unlink() + + hub_api = HfApi() + if hub_api.file_exists( + repo_id=dataset.repo_id, filename=STATS_PATH, revision=branch, repo_type="dataset" + ): + hub_api.delete_file( + path_in_repo=STATS_PATH, repo_id=dataset.repo_id, revision=branch, repo_type="dataset" + ) + + hub_api.create_tag(repo_id, tag=CODEBASE_VERSION, revision=branch, repo_type="dataset") if __name__ == "__main__": @@ -65,16 +83,10 @@ if __name__ == "__main__": help="Repository identifier on Hugging Face: a community or a user name `/` the name of the dataset (e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).", ) parser.add_argument( - "--test-branch", + "--branch", type=str, default=None, - help="Repo branch to test your conversion first (e.g. 'v2.0.test')", - ) - parser.add_argument( - "--delete-old-stats", - type=bool, - default=False, - help="Delete the deprecated `stats.json`", + help="Repo branch to push your dataset (defaults to the main branch)", ) parser.add_argument( "--num-workers", @@ -84,4 +96,4 @@ if __name__ == "__main__": ) args = parser.parse_args() - main(**vars(args)) + convert_dataset(**vars(args)) diff --git a/lerobot/common/robot_devices/control_configs.py b/lerobot/common/robot_devices/control_configs.py index c96a87f0..d7d03ac0 100644 --- a/lerobot/common/robot_devices/control_configs.py +++ b/lerobot/common/robot_devices/control_configs.py @@ -81,9 +81,6 @@ class RecordControlConfig(ControlConfig): play_sounds: bool = True # Resume recording on an existing dataset. resume: bool = False - # TODO(rcadene, aliberts): remove local_files_only when refactor with dataset as argument - # Use local files only. By default, this script will try to fetch the dataset from the hub if it exists. - local_files_only: bool = False def __post_init__(self): # HACK: We parse again the cli args here to get the pretrained path if there was one. @@ -128,9 +125,6 @@ class ReplayControlConfig(ControlConfig): fps: int | None = None # Use vocal synthesis to read events. play_sounds: bool = True - # TODO(rcadene, aliberts): remove local_files_only when refactor with dataset as argument - # Use local files only. By default, this script will try to fetch the dataset from the hub if it exists. - local_files_only: bool = False @dataclass diff --git a/lerobot/configs/default.py b/lerobot/configs/default.py index 5dd2f898..a5013431 100644 --- a/lerobot/configs/default.py +++ b/lerobot/configs/default.py @@ -31,7 +31,7 @@ class DatasetConfig: repo_id: str episodes: list[int] | None = None image_transforms: ImageTransformsConfig = field(default_factory=ImageTransformsConfig) - local_files_only: bool = False + revision: str | None = None use_imagenet_stats: bool = True video_backend: str = "pyav" diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index 5f51c81b..dee2792d 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -85,7 +85,6 @@ python lerobot/scripts/control_robot.py record \ This might require a sudo permission to allow your terminal to monitor keyboard events. **NOTE**: You can resume/continue data recording by running the same data recording command and adding `--control.resume=true`. -If the dataset you want to extend is not on the hub, you also need to add `--control.local_files_only=true`. - Train on this dataset with the ACT policy: ```bash @@ -216,7 +215,6 @@ def record( dataset = LeRobotDataset( cfg.repo_id, root=cfg.root, - local_files_only=cfg.local_files_only, ) if len(robot.cameras) > 0: dataset.start_image_writer( @@ -318,9 +316,7 @@ def replay( # TODO(rcadene, aliberts): refactor with control_loop, once `dataset` is an instance of LeRobotDataset # TODO(rcadene): Add option to record logs - dataset = LeRobotDataset( - cfg.repo_id, root=cfg.root, episodes=[cfg.episode], local_files_only=cfg.local_files_only - ) + dataset = LeRobotDataset(cfg.repo_id, root=cfg.root, episodes=[cfg.episode]) actions = dataset.hf_dataset.select_columns("action") if not robot.is_connected: diff --git a/lerobot/scripts/visualize_dataset.py b/lerobot/scripts/visualize_dataset.py index 626b0bde..11feb1af 100644 --- a/lerobot/scripts/visualize_dataset.py +++ b/lerobot/scripts/visualize_dataset.py @@ -207,12 +207,6 @@ def main(): required=True, help="Episode to visualize.", ) - parser.add_argument( - "--local-files-only", - type=int, - default=0, - help="Use local files only. By default, this script will try to fetch the dataset from the hub if it exists.", - ) parser.add_argument( "--root", type=Path, @@ -275,10 +269,9 @@ def main(): kwargs = vars(args) repo_id = kwargs.pop("repo_id") root = kwargs.pop("root") - local_files_only = kwargs.pop("local_files_only") logging.info("Loading dataset") - dataset = LeRobotDataset(repo_id, root=root, local_files_only=local_files_only) + dataset = LeRobotDataset(repo_id, root=root) visualize_dataset(dataset, **vars(args)) diff --git a/lerobot/scripts/visualize_dataset_html.py b/lerobot/scripts/visualize_dataset_html.py index cc3f3930..a022c91e 100644 --- a/lerobot/scripts/visualize_dataset_html.py +++ b/lerobot/scripts/visualize_dataset_html.py @@ -384,12 +384,6 @@ def main(): default=None, help="Name of hugging face repositery containing a LeRobotDataset dataset (e.g. `lerobot/pusht` for https://huggingface.co/datasets/lerobot/pusht).", ) - parser.add_argument( - "--local-files-only", - type=int, - default=0, - help="Use local files only. By default, this script will try to fetch the dataset from the hub if it exists.", - ) parser.add_argument( "--root", type=Path, @@ -445,15 +439,10 @@ def main(): repo_id = kwargs.pop("repo_id") load_from_hf_hub = kwargs.pop("load_from_hf_hub") root = kwargs.pop("root") - local_files_only = kwargs.pop("local_files_only") dataset = None if repo_id: - dataset = ( - LeRobotDataset(repo_id, root=root, local_files_only=local_files_only) - if not load_from_hf_hub - else get_dataset_info(repo_id) - ) + dataset = LeRobotDataset(repo_id, root=root) if not load_from_hf_hub else get_dataset_info(repo_id) visualize_dataset_html(dataset, **vars(args)) diff --git a/lerobot/scripts/visualize_image_transforms.py b/lerobot/scripts/visualize_image_transforms.py index 727fe178..80935d32 100644 --- a/lerobot/scripts/visualize_image_transforms.py +++ b/lerobot/scripts/visualize_image_transforms.py @@ -109,7 +109,7 @@ def visualize_image_transforms(cfg: DatasetConfig, output_dir: Path = OUTPUT_DIR dataset = LeRobotDataset( repo_id=cfg.repo_id, episodes=cfg.episodes, - local_files_only=cfg.local_files_only, + revision=cfg.revision, video_backend=cfg.video_backend, ) diff --git a/pyproject.toml b/pyproject.toml index 6e7e0575..4a3355e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ dependencies = [ "numba>=0.59.0", "omegaconf>=2.3.0", "opencv-python>=4.9.0", + "packaging>=24.2", "pyav>=12.0.5", "pymunk>=6.6.0", "rerun-sdk>=0.21.0", @@ -54,7 +55,7 @@ dependencies = [ "torch>=2.2.1", "torchvision>=0.21.0", "wandb>=0.16.3", - "zarr>=2.17.0" + "zarr>=2.17.0", ] [project.optional-dependencies] diff --git a/tests/fixtures/dataset_factories.py b/tests/fixtures/dataset_factories.py index f57f945b..811e29b7 100644 --- a/tests/fixtures/dataset_factories.py +++ b/tests/fixtures/dataset_factories.py @@ -309,7 +309,6 @@ def lerobot_dataset_metadata_factory( episodes_stats: list[dict] | None = None, tasks: list[dict] | None = None, episodes: list[dict] | None = None, - local_files_only: bool = False, ) -> LeRobotDatasetMetadata: if not info: info = info_factory() @@ -335,16 +334,16 @@ def lerobot_dataset_metadata_factory( ) with ( patch( - "lerobot.common.datasets.lerobot_dataset.get_hub_safe_version" - ) as mock_get_hub_safe_version_patch, + "lerobot.common.datasets.lerobot_dataset.get_safe_revision" + ) as mock_get_safe_revision_patch, patch( "lerobot.common.datasets.lerobot_dataset.snapshot_download" ) as mock_snapshot_download_patch, ): - mock_get_hub_safe_version_patch.side_effect = lambda repo_id, version: version + mock_get_safe_revision_patch.side_effect = lambda repo_id, version: version mock_snapshot_download_patch.side_effect = mock_snapshot_download - return LeRobotDatasetMetadata(repo_id=repo_id, root=root, local_files_only=local_files_only) + return LeRobotDatasetMetadata(repo_id=repo_id, root=root) return _create_lerobot_dataset_metadata @@ -411,15 +410,18 @@ def lerobot_dataset_factory( episodes_stats=episodes_stats, tasks=tasks, episodes=episode_dicts, - local_files_only=kwargs.get("local_files_only", False), ) with ( patch("lerobot.common.datasets.lerobot_dataset.LeRobotDatasetMetadata") as mock_metadata_patch, + patch( + "lerobot.common.datasets.lerobot_dataset.get_safe_revision" + ) as mock_get_safe_revision_patch, patch( "lerobot.common.datasets.lerobot_dataset.snapshot_download" ) as mock_snapshot_download_patch, ): mock_metadata_patch.return_value = mock_metadata + mock_get_safe_revision_patch.side_effect = lambda repo_id, version: version mock_snapshot_download_patch.side_effect = mock_snapshot_download return LeRobotDataset(repo_id=repo_id, root=root, **kwargs) diff --git a/tests/test_control_robot.py b/tests/test_control_robot.py index a4f538a6..12b68641 100644 --- a/tests/test_control_robot.py +++ b/tests/test_control_robot.py @@ -167,9 +167,7 @@ def test_record_and_replay_and_policy(tmp_path, request, robot_type, mock): assert dataset.meta.total_episodes == 2 assert len(dataset) == 2 - replay_cfg = ReplayControlConfig( - episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False, local_files_only=True - ) + replay_cfg = ReplayControlConfig(episode=0, fps=1, root=root, repo_id=repo_id, play_sounds=False) replay(robot, replay_cfg) policy_cfg = ACTConfig() @@ -266,7 +264,6 @@ def test_resume_record(tmp_path, request, robot_type, mock): video=False, display_cameras=False, play_sounds=False, - local_files_only=True, num_episodes=1, ) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 72fa5b50..3e8b531d 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -77,10 +77,6 @@ def test_same_attributes_defined(tmp_path, lerobot_dataset_factory): root_init = tmp_path / "init" dataset_init = lerobot_dataset_factory(root=root_init) - # Access the '_hub_version' cached_property in both instances to force its creation - _ = dataset_init.meta._hub_version - _ = dataset_create.meta._hub_version - init_attr = set(vars(dataset_init).keys()) create_attr = set(vars(dataset_create).keys()) From 2487228ea73c210e40dbd02fd0f09ab50036a004 Mon Sep 17 00:00:00 2001 From: Simon Alibert <75076266+aliberts@users.noreply.github.com> Date: Wed, 19 Feb 2025 14:49:46 +0100 Subject: [PATCH 07/28] Use `HF_HOME` env variable (#753) --- examples/port_datasets/pusht_zarr.py | 7 ++++--- lerobot/common/constants.py | 15 +++++++++++++++ lerobot/common/datasets/lerobot_dataset.py | 11 +++++------ tests/fixtures/constants.py | 4 ++-- tests/test_datasets.py | 4 ++-- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/examples/port_datasets/pusht_zarr.py b/examples/port_datasets/pusht_zarr.py index 622fbd14..e05c742e 100644 --- a/examples/port_datasets/pusht_zarr.py +++ b/examples/port_datasets/pusht_zarr.py @@ -4,7 +4,8 @@ from pathlib import Path import numpy as np import torch -from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME, LeRobotDataset +from lerobot.common.constants import HF_LEROBOT_HOME +from lerobot.common.datasets.lerobot_dataset import LeRobotDataset from lerobot.common.datasets.push_dataset_to_hub._download_raw import download_raw PUSHT_TASK = "Push the T-shaped blue block onto the T-shaped green target surface." @@ -134,8 +135,8 @@ def main(raw_dir: Path, repo_id: str, mode: str = "video", push_to_hub: bool = T if mode not in ["video", "image", "keypoints"]: raise ValueError(mode) - if (LEROBOT_HOME / repo_id).exists(): - shutil.rmtree(LEROBOT_HOME / repo_id) + if (HF_LEROBOT_HOME / repo_id).exists(): + shutil.rmtree(HF_LEROBOT_HOME / repo_id) if not raw_dir.exists(): download_raw(raw_dir, repo_id="lerobot-raw/pusht_raw") diff --git a/lerobot/common/constants.py b/lerobot/common/constants.py index 34da4ac0..d0c9845a 100644 --- a/lerobot/common/constants.py +++ b/lerobot/common/constants.py @@ -1,4 +1,9 @@ # keys +import os +from pathlib import Path + +from huggingface_hub.constants import HF_HOME + OBS_ENV = "observation.environment_state" OBS_ROBOT = "observation.state" OBS_IMAGE = "observation.image" @@ -15,3 +20,13 @@ TRAINING_STEP = "training_step.json" OPTIMIZER_STATE = "optimizer_state.safetensors" OPTIMIZER_PARAM_GROUPS = "optimizer_param_groups.json" SCHEDULER_STATE = "scheduler_state.json" + +# cache dir +default_cache_path = Path(HF_HOME) / "lerobot" +HF_LEROBOT_HOME = Path(os.getenv("HF_LEROBOT_HOME", default_cache_path)).expanduser() + +if "LEROBOT_HOME" in os.environ: + raise ValueError( + f"You have a 'LEROBOT_HOME' environment variable set to '{os.getenv('LEROBOT_HOME')}'.\n" + "'LEROBOT_HOME' is deprecated, please use 'HF_LEROBOT_HOME' instead." + ) diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index dfdb3618..d4224b7e 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -import os import shutil from pathlib import Path from typing import Callable @@ -29,6 +28,7 @@ from huggingface_hub import HfApi, snapshot_download from huggingface_hub.constants import REPOCARD_NAME from packaging import version +from lerobot.common.constants import HF_LEROBOT_HOME from lerobot.common.datasets.compute_stats import aggregate_stats, compute_episode_stats from lerobot.common.datasets.image_writer import AsyncImageWriter, write_image from lerobot.common.datasets.utils import ( @@ -71,7 +71,6 @@ from lerobot.common.robot_devices.robots.utils import Robot # For maintainers, see lerobot/common/datasets/push_dataset_to_hub/CODEBASE_VERSION.md CODEBASE_VERSION = "v2.1" -LEROBOT_HOME = Path(os.getenv("LEROBOT_HOME", "~/.cache/huggingface/lerobot")).expanduser() class LeRobotDatasetMetadata: @@ -84,7 +83,7 @@ class LeRobotDatasetMetadata: ): self.repo_id = repo_id self.revision = revision if revision else CODEBASE_VERSION - self.root = Path(root) if root is not None else LEROBOT_HOME / repo_id + self.root = Path(root) if root is not None else HF_LEROBOT_HOME / repo_id try: if force_cache_sync: @@ -308,7 +307,7 @@ class LeRobotDatasetMetadata: """Creates metadata for a LeRobotDataset.""" obj = cls.__new__(cls) obj.repo_id = repo_id - obj.root = Path(root) if root is not None else LEROBOT_HOME / repo_id + obj.root = Path(root) if root is not None else HF_LEROBOT_HOME / repo_id obj.root.mkdir(parents=True, exist_ok=False) @@ -463,7 +462,7 @@ class LeRobotDataset(torch.utils.data.Dataset): """ super().__init__() self.repo_id = repo_id - self.root = Path(root) if root else LEROBOT_HOME / repo_id + self.root = Path(root) if root else HF_LEROBOT_HOME / repo_id self.image_transforms = image_transforms self.delta_timestamps = delta_timestamps self.episodes = episodes @@ -1056,7 +1055,7 @@ class MultiLeRobotDataset(torch.utils.data.Dataset): ): super().__init__() self.repo_ids = repo_ids - self.root = Path(root) if root else LEROBOT_HOME + self.root = Path(root) if root else HF_LEROBOT_HOME self.tolerances_s = tolerances_s if tolerances_s else {repo_id: 1e-4 for repo_id in repo_ids} # Construct the underlying datasets passing everything but `transform` and `delta_timestamps` which # are handled by this class. diff --git a/tests/fixtures/constants.py b/tests/fixtures/constants.py index 7d80d2b7..3201dcf2 100644 --- a/tests/fixtures/constants.py +++ b/tests/fixtures/constants.py @@ -1,6 +1,6 @@ -from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME +from lerobot.common.constants import HF_LEROBOT_HOME -LEROBOT_TEST_DIR = LEROBOT_HOME / "_testing" +LEROBOT_TEST_DIR = HF_LEROBOT_HOME / "_testing" DUMMY_REPO_ID = "dummy/repo" DUMMY_ROBOT_TYPE = "dummy_robot" DUMMY_MOTOR_FEATURES = { diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 3e8b531d..6d358eea 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -581,9 +581,9 @@ def test_create_branch(): def test_dataset_feature_with_forward_slash_raises_error(): # make sure dir does not exist - from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME + from lerobot.common.constants import HF_LEROBOT_HOME - dataset_dir = LEROBOT_HOME / "lerobot/test/with/slash" + dataset_dir = HF_LEROBOT_HOME / "lerobot/test/with/slash" # make sure does not exist if dataset_dir.exists(): dataset_dir.rmdir() From 6fe42a72dbb6f4fc2d6481acbd962def900ec8dc Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Wed, 19 Feb 2025 15:01:44 +0100 Subject: [PATCH 08/28] Add tag --- examples/port_datasets/pusht_zarr.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/port_datasets/pusht_zarr.py b/examples/port_datasets/pusht_zarr.py index e05c742e..4ff361cb 100644 --- a/examples/port_datasets/pusht_zarr.py +++ b/examples/port_datasets/pusht_zarr.py @@ -3,9 +3,10 @@ from pathlib import Path import numpy as np import torch +from huggingface_hub import HfApi from lerobot.common.constants import HF_LEROBOT_HOME -from lerobot.common.datasets.lerobot_dataset import LeRobotDataset +from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset from lerobot.common.datasets.push_dataset_to_hub._download_raw import download_raw PUSHT_TASK = "Push the T-shaped blue block onto the T-shaped green target surface." @@ -203,6 +204,8 @@ def main(raw_dir: Path, repo_id: str, mode: str = "video", push_to_hub: bool = T if push_to_hub: dataset.push_to_hub() + hub_api = HfApi() + hub_api.create_tag(repo_id, tag=CODEBASE_VERSION, repo_type="dataset") if __name__ == "__main__": From 969ef745a2e44156f00e8cbb319a7fddff5c0589 Mon Sep 17 00:00:00 2001 From: Simon Alibert <75076266+aliberts@users.noreply.github.com> Date: Wed, 19 Feb 2025 16:02:54 +0100 Subject: [PATCH 09/28] Remove dataset `consolidate` (#752) --- examples/port_datasets/pusht_zarr.py | 2 - lerobot/common/datasets/lerobot_dataset.py | 101 +++++++-------------- lerobot/common/datasets/utils.py | 52 ++++++++--- lerobot/scripts/control_robot.py | 2 - tests/fixtures/dataset_factories.py | 25 ++--- tests/test_datasets.py | 39 +++----- 6 files changed, 93 insertions(+), 128 deletions(-) diff --git a/examples/port_datasets/pusht_zarr.py b/examples/port_datasets/pusht_zarr.py index 4ff361cb..53855ec8 100644 --- a/examples/port_datasets/pusht_zarr.py +++ b/examples/port_datasets/pusht_zarr.py @@ -200,8 +200,6 @@ def main(raw_dir: Path, repo_id: str, mode: str = "video", push_to_hub: bool = T dataset.save_episode() - dataset.consolidate() - if push_to_hub: dataset.push_to_hub() hub_api = HfApi() diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index d4224b7e..81280e68 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -39,7 +39,6 @@ from lerobot.common.datasets.utils import ( append_jsonlines, backward_compatible_episodes_stats, check_delta_timestamps, - check_frame_features, check_timestamps_sync, check_version_compatibility, create_empty_dataset_info, @@ -55,6 +54,8 @@ from lerobot.common.datasets.utils import ( load_info, load_stats, load_tasks, + validate_episode_buffer, + validate_frame, write_episode, write_episode_stats, write_info, @@ -256,6 +257,9 @@ class LeRobotDatasetMetadata: self.info["splits"] = {"train": f"0:{self.info['total_episodes']}"} self.info["total_videos"] += len(self.video_keys) + if len(self.video_keys) > 0: + self.update_video_info() + write_info(self.info, self.root) episode_dict = { @@ -270,7 +274,7 @@ class LeRobotDatasetMetadata: self.stats = aggregate_stats([self.stats, episode_stats]) if self.stats else episode_stats write_episode_stats(episode_index, episode_stats, self.root) - def write_video_info(self) -> None: + def update_video_info(self) -> None: """ Warning: this function writes info from first episode videos, implicitly assuming that all videos have been encoded the same way. Also, this means it assumes the first episode exists. @@ -280,8 +284,6 @@ class LeRobotDatasetMetadata: video_path = self.root / self.get_video_file_path(ep_index=0, vid_key=key) self.info["features"][key]["info"] = get_video_info(video_path) - write_json(self.info, self.root / INFO_PATH) - def __repr__(self): feature_keys = list(self.features) return ( @@ -506,9 +508,6 @@ class LeRobotDataset(torch.utils.data.Dataset): check_delta_timestamps(self.delta_timestamps, self.fps, self.tolerance_s) self.delta_indices = get_delta_indices(self.delta_timestamps, self.fps) - # Available stats implies all videos have been encoded and dataset is iterable - self.consolidated = self.meta.stats is not None - def push_to_hub( self, branch: str | None = None, @@ -519,13 +518,6 @@ class LeRobotDataset(torch.utils.data.Dataset): allow_patterns: list[str] | str | None = None, **card_kwargs, ) -> None: - if not self.consolidated: - logging.warning( - "You are trying to upload to the hub a LeRobotDataset that has not been consolidated yet. " - "Consolidating first." - ) - self.consolidate() - ignore_patterns = ["images/"] if not push_videos: ignore_patterns.append("videos/") @@ -779,7 +771,7 @@ class LeRobotDataset(torch.utils.data.Dataset): if isinstance(frame[name], torch.Tensor): frame[name] = frame[name].numpy() - check_frame_features(frame, self.features) + validate_frame(frame, self.features) if self.episode_buffer is None: self.episode_buffer = self.create_episode_buffer() @@ -815,41 +807,25 @@ class LeRobotDataset(torch.utils.data.Dataset): self.episode_buffer["size"] += 1 - def save_episode(self, encode_videos: bool = True, episode_data: dict | None = None) -> None: + def save_episode(self, episode_data: dict | None = None) -> None: """ - This will save to disk the current episode in self.episode_buffer. Note that since it affects files on - disk, it sets self.consolidated to False to ensure proper consolidation later on before uploading to - the hub. + This will save to disk the current episode in self.episode_buffer. - Use 'encode_videos' if you want to encode videos during the saving of this episode. Otherwise, - you can do it later with dataset.consolidate(). This is to give more flexibility on when to spend - time for video encoding. + Args: + episode_data (dict | None, optional): Dict containing the episode data to save. If None, this will + save the current episode in self.episode_buffer, which is filled with 'add_frame'. Defaults to + None. """ if not episode_data: episode_buffer = self.episode_buffer + validate_episode_buffer(episode_buffer, self.meta.total_episodes, self.features) + # size and task are special cases that won't be added to hf_dataset episode_length = episode_buffer.pop("size") tasks = episode_buffer.pop("task") episode_tasks = list(set(tasks)) - episode_index = episode_buffer["episode_index"] - if episode_index != self.meta.total_episodes: - # TODO(aliberts): Add option to use existing episode_index - raise NotImplementedError( - "You might have manually provided the episode_buffer with an episode_index that doesn't " - "match the total number of episodes already in the dataset. This is not supported for now." - ) - - if episode_length == 0: - raise ValueError( - "You must add one or several frames with `add_frame` before calling `add_episode`." - ) - - if not set(episode_buffer.keys()) == set(self.features): - raise ValueError( - f"Features from `episode_buffer` don't match the ones in `self.features`: '{set(episode_buffer.keys())}' vs '{set(self.features)}'" - ) episode_buffer["index"] = np.arange(self.meta.total_frames, self.meta.total_frames + episode_length) episode_buffer["episode_index"] = np.full((episode_length,), episode_index) @@ -875,16 +851,29 @@ class LeRobotDataset(torch.utils.data.Dataset): ep_stats = compute_episode_stats(episode_buffer, self.features) self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats) - if encode_videos and len(self.meta.video_keys) > 0: + if len(self.meta.video_keys) > 0: video_paths = self.encode_episode_videos(episode_index) for key in self.meta.video_keys: episode_buffer[key] = video_paths[key] + self.hf_dataset = self.load_hf_dataset() + self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes) + check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s) + + video_files = list(self.root.rglob("*.mp4")) + assert len(video_files) == self.num_episodes * len(self.meta.video_keys) + + parquet_files = list(self.root.rglob("*.parquet")) + assert len(parquet_files) == self.num_episodes + + # delete images + img_dir = self.root / "images" + if img_dir.is_dir(): + shutil.rmtree(self.root / "images") + if not episode_data: # Reset the buffer self.episode_buffer = self.create_episode_buffer() - self.consolidated = False - def _save_episode_table(self, episode_buffer: dict, episode_index: int) -> None: episode_dict = {key: episode_buffer[key] for key in self.hf_features} ep_dataset = datasets.Dataset.from_dict(episode_dict, features=self.hf_features, split="train") @@ -959,28 +948,6 @@ class LeRobotDataset(torch.utils.data.Dataset): return video_paths - def consolidate(self, keep_image_files: bool = False) -> None: - self.hf_dataset = self.load_hf_dataset() - self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes) - check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s) - - if len(self.meta.video_keys) > 0: - self.encode_videos() - self.meta.write_video_info() - - if not keep_image_files: - img_dir = self.root / "images" - if img_dir.is_dir(): - shutil.rmtree(self.root / "images") - - video_files = list(self.root.rglob("*.mp4")) - assert len(video_files) == self.num_episodes * len(self.meta.video_keys) - - parquet_files = list(self.root.rglob("*.parquet")) - assert len(parquet_files) == self.num_episodes - - self.consolidated = True - @classmethod def create( cls, @@ -1019,12 +986,6 @@ class LeRobotDataset(torch.utils.data.Dataset): # TODO(aliberts, rcadene, alexander-soare): Merge this with OnlineBuffer/DataBuffer obj.episode_buffer = obj.create_episode_buffer() - # This bool indicates that the current LeRobotDataset instance is in sync with the files on disk. It - # is used to know when certain operations are need (for instance, computing dataset statistics). In - # order to be able to push the dataset to the hub, it needs to be consolidated first by calling - # self.consolidate(). - obj.consolidated = True - obj.episodes = None obj.hf_dataset = None obj.image_transforms = None diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index c9b0c345..6125480c 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -644,25 +644,25 @@ class IterableNamespace(SimpleNamespace): return vars(self).keys() -def check_frame_features(frame: dict, features: dict): +def validate_frame(frame: dict, features: dict): optional_features = {"timestamp"} expected_features = (set(features) - set(DEFAULT_FEATURES.keys())) | {"task"} actual_features = set(frame.keys()) - error_message = check_features_presence(actual_features, expected_features, optional_features) + error_message = validate_features_presence(actual_features, expected_features, optional_features) if "task" in frame: - error_message += check_feature_string("task", frame["task"]) + error_message += validate_feature_string("task", frame["task"]) common_features = actual_features & (expected_features | optional_features) for name in common_features - {"task"}: - error_message += check_feature_dtype_and_shape(name, features[name], frame[name]) + error_message += validate_feature_dtype_and_shape(name, features[name], frame[name]) if error_message: raise ValueError(error_message) -def check_features_presence( +def validate_features_presence( actual_features: set[str], expected_features: set[str], optional_features: set[str] ): error_message = "" @@ -679,20 +679,22 @@ def check_features_presence( return error_message -def check_feature_dtype_and_shape(name: str, feature: dict, value: np.ndarray | PILImage.Image | str): +def validate_feature_dtype_and_shape(name: str, feature: dict, value: np.ndarray | PILImage.Image | str): expected_dtype = feature["dtype"] expected_shape = feature["shape"] if is_valid_numpy_dtype_string(expected_dtype): - return check_feature_numpy_array(name, expected_dtype, expected_shape, value) + return validate_feature_numpy_array(name, expected_dtype, expected_shape, value) elif expected_dtype in ["image", "video"]: - return check_feature_image_or_video(name, expected_shape, value) + return validate_feature_image_or_video(name, expected_shape, value) elif expected_dtype == "string": - return check_feature_string(name, value) + return validate_feature_string(name, value) else: raise NotImplementedError(f"The feature dtype '{expected_dtype}' is not implemented yet.") -def check_feature_numpy_array(name: str, expected_dtype: str, expected_shape: list[int], value: np.ndarray): +def validate_feature_numpy_array( + name: str, expected_dtype: str, expected_shape: list[int], value: np.ndarray +): error_message = "" if isinstance(value, np.ndarray): actual_dtype = value.dtype @@ -709,7 +711,7 @@ def check_feature_numpy_array(name: str, expected_dtype: str, expected_shape: li return error_message -def check_feature_image_or_video(name: str, expected_shape: list[str], value: np.ndarray | PILImage.Image): +def validate_feature_image_or_video(name: str, expected_shape: list[str], value: np.ndarray | PILImage.Image): # Note: The check of pixels range ([0,1] for float and [0,255] for uint8) is done by the image writer threads. error_message = "" if isinstance(value, np.ndarray): @@ -725,7 +727,33 @@ def check_feature_image_or_video(name: str, expected_shape: list[str], value: np return error_message -def check_feature_string(name: str, value: str): +def validate_feature_string(name: str, value: str): if not isinstance(value, str): return f"The feature '{name}' is expected to be of type 'str', but type '{type(value)}' provided instead.\n" return "" + + +def validate_episode_buffer(episode_buffer: dict, total_episodes: int, features: dict): + if "size" not in episode_buffer: + raise ValueError("size key not found in episode_buffer") + + if "task" not in episode_buffer: + raise ValueError("task key not found in episode_buffer") + + if episode_buffer["episode_index"] != total_episodes: + # TODO(aliberts): Add option to use existing episode_index + raise NotImplementedError( + "You might have manually provided the episode_buffer with an episode_index that doesn't " + "match the total number of episodes already in the dataset. This is not supported for now." + ) + + if episode_buffer["size"] == 0: + raise ValueError("You must add one or several frames with `add_frame` before calling `add_episode`.") + + buffer_keys = set(episode_buffer.keys()) - {"task", "size"} + if not buffer_keys == set(features): + raise ValueError( + f"Features from `episode_buffer` don't match the ones in `features`." + f"In episode_buffer not in features: {buffer_keys - set(features)}" + f"In features not in episode_buffer: {set(features) - buffer_keys}" + ) diff --git a/lerobot/scripts/control_robot.py b/lerobot/scripts/control_robot.py index dee2792d..e6103271 100644 --- a/lerobot/scripts/control_robot.py +++ b/lerobot/scripts/control_robot.py @@ -299,8 +299,6 @@ def record( log_say("Stop recording", cfg.play_sounds, blocking=True) stop_recording(robot, listener, cfg.display_cameras) - dataset.consolidate() - if cfg.push_to_hub: dataset.push_to_hub(tags=cfg.tags, private=cfg.private) diff --git a/tests/fixtures/dataset_factories.py b/tests/fixtures/dataset_factories.py index 811e29b7..e3604591 100644 --- a/tests/fixtures/dataset_factories.py +++ b/tests/fixtures/dataset_factories.py @@ -1,5 +1,7 @@ import random +from functools import partial from pathlib import Path +from typing import Protocol from unittest.mock import patch import datasets @@ -17,7 +19,6 @@ from lerobot.common.datasets.utils import ( get_hf_features_from_features, hf_transform_to_torch, ) -from lerobot.common.robot_devices.robots.utils import Robot from tests.fixtures.constants import ( DEFAULT_FPS, DUMMY_CAMERA_FEATURES, @@ -28,6 +29,10 @@ from tests.fixtures.constants import ( ) +class LeRobotDatasetFactory(Protocol): + def __call__(self, *args, **kwargs) -> LeRobotDataset: ... + + def get_task_index(task_dicts: dict, task: str) -> int: tasks = {d["task_index"]: d["task"] for d in task_dicts.values()} task_to_task_index = {task: task_idx for task_idx, task in tasks.items()} @@ -358,7 +363,7 @@ def lerobot_dataset_factory( hf_dataset_factory, mock_snapshot_download_factory, lerobot_dataset_metadata_factory, -): +) -> LeRobotDatasetFactory: def _create_lerobot_dataset( root: Path, repo_id: str = DUMMY_REPO_ID, @@ -430,17 +435,5 @@ def lerobot_dataset_factory( @pytest.fixture(scope="session") -def empty_lerobot_dataset_factory(): - def _create_empty_lerobot_dataset( - root: Path, - repo_id: str = DUMMY_REPO_ID, - fps: int = DEFAULT_FPS, - robot: Robot | None = None, - robot_type: str | None = None, - features: dict | None = None, - ) -> LeRobotDataset: - return LeRobotDataset.create( - repo_id=repo_id, fps=fps, root=root, robot=robot, robot_type=robot_type, features=features - ) - - return _create_empty_lerobot_dataset +def empty_lerobot_dataset_factory() -> LeRobotDatasetFactory: + return partial(LeRobotDataset.create, repo_id=DUMMY_REPO_ID, fps=DEFAULT_FPS) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 6d358eea..61b68aa8 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -184,8 +184,7 @@ def test_add_frame(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"state": torch.randn(1), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert len(dataset) == 1 assert dataset[0]["task"] == "Dummy task" @@ -197,8 +196,7 @@ def test_add_frame_state_1d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2,), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"state": torch.randn(2), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["state"].shape == torch.Size([2]) @@ -207,8 +205,7 @@ def test_add_frame_state_2d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2, 4), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"state": torch.randn(2, 4), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["state"].shape == torch.Size([2, 4]) @@ -217,8 +214,7 @@ def test_add_frame_state_3d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2, 4, 3), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"state": torch.randn(2, 4, 3), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["state"].shape == torch.Size([2, 4, 3]) @@ -227,8 +223,7 @@ def test_add_frame_state_4d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"state": torch.randn(2, 4, 3, 5), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5]) @@ -237,8 +232,7 @@ def test_add_frame_state_5d(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (2, 4, 3, 5, 1), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"state": torch.randn(2, 4, 3, 5, 1), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["state"].shape == torch.Size([2, 4, 3, 5, 1]) @@ -247,8 +241,7 @@ def test_add_frame_state_numpy(tmp_path, empty_lerobot_dataset_factory): features = {"state": {"dtype": "float32", "shape": (1,), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"state": np.array([1], dtype=np.float32), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["state"].ndim == 0 @@ -257,8 +250,7 @@ def test_add_frame_string(tmp_path, empty_lerobot_dataset_factory): features = {"caption": {"dtype": "string", "shape": (1,), "names": None}} dataset = empty_lerobot_dataset_factory(root=tmp_path / "test", features=features) dataset.add_frame({"caption": "Dummy caption", "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["caption"] == "Dummy caption" @@ -287,14 +279,13 @@ def test_add_frame_image_wrong_range(image_dataset): dataset = image_dataset dataset.add_frame({"image": np.random.rand(*DUMMY_CHW) * 255, "task": "Dummy task"}) with pytest.raises(FileNotFoundError): - dataset.save_episode(encode_videos=False) + dataset.save_episode() def test_add_frame_image(image_dataset): dataset = image_dataset dataset.add_frame({"image": np.random.rand(*DUMMY_CHW), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) @@ -302,8 +293,7 @@ def test_add_frame_image(image_dataset): def test_add_frame_image_h_w_c(image_dataset): dataset = image_dataset dataset.add_frame({"image": np.random.rand(*DUMMY_HWC), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) @@ -312,8 +302,7 @@ def test_add_frame_image_uint8(image_dataset): dataset = image_dataset image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8) dataset.add_frame({"image": image, "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) @@ -322,8 +311,7 @@ def test_add_frame_image_pil(image_dataset): dataset = image_dataset image = np.random.randint(0, 256, DUMMY_HWC, dtype=np.uint8) dataset.add_frame({"image": Image.fromarray(image), "task": "Dummy task"}) - dataset.save_episode(encode_videos=False) - dataset.consolidate() + dataset.save_episode() assert dataset[0]["image"].shape == torch.Size(DUMMY_CHW) @@ -338,7 +326,6 @@ def test_image_array_to_pil_image_wrong_range_float_0_255(): # - [ ] test various attributes & state from init and create # - [ ] test init with episodes and check num_frames # - [ ] test add_episode -# - [ ] test consolidate # - [ ] test push_to_hub # - [ ] test smaller methods From 392a8c32a70c95661073225c2547c3b6e8b7bffb Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Thu, 20 Feb 2025 08:24:41 +0100 Subject: [PATCH 10/28] Improve doc --- .../datasets/v21/convert_dataset_v20_to_v21.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py b/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py index d52a0a10..f55c13c1 100644 --- a/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py +++ b/lerobot/common/datasets/v21/convert_dataset_v20_to_v21.py @@ -2,10 +2,10 @@ This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 2.0 to 2.1. It will: -- Generates per-episodes stats and writes them in `episodes_stats.jsonl` +- Generate per-episodes stats and writes them in `episodes_stats.jsonl` - Check consistency between these new stats and the old ones. -- Removes the deprecated `stats.json` (by default) -- Updates codebase_version in `info.json` +- Remove the deprecated `stats.json`. +- Update codebase_version in `info.json`. - Push this new version to the hub on the 'main' branch and tags it with "v2.1". Usage: @@ -80,19 +80,20 @@ if __name__ == "__main__": "--repo-id", type=str, required=True, - help="Repository identifier on Hugging Face: a community or a user name `/` the name of the dataset (e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).", + help="Repository identifier on Hugging Face: a community or a user name `/` the name of the dataset " + "(e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).", ) parser.add_argument( "--branch", type=str, default=None, - help="Repo branch to push your dataset (defaults to the main branch)", + help="Repo branch to push your dataset. Defaults to the main branch.", ) parser.add_argument( "--num-workers", type=int, default=4, - help="Number of workers for parallelizing compute", + help="Number of workers for parallelizing stats compute. Defaults to 4.", ) args = parser.parse_args() From 64ed5258e67f00ae42144cbd58aaf8f85f0c345f Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Thu, 20 Feb 2025 09:00:14 +0100 Subject: [PATCH 11/28] Fix batch convert --- .../common/datasets/v21/batch_convert_dataset_v20_to_v21.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py b/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py index 624827bd..cee9da16 100644 --- a/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py +++ b/lerobot/common/datasets/v21/batch_convert_dataset_v20_to_v21.py @@ -29,8 +29,9 @@ LOCAL_DIR = Path("data/") def batch_convert(): status = {} + LOCAL_DIR.mkdir(parents=True, exist_ok=True) logfile = LOCAL_DIR / "conversion_log_v21.txt" - for num, repo_id in available_datasets: + for num, repo_id in enumerate(available_datasets): print(f"\nConverting {repo_id} ({num}/{len(available_datasets)})") print("---------------------------------------------------------") try: From 71d1f5e2c92973dcf7e569130381f4e36b4255ef Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Thu, 20 Feb 2025 23:04:31 +0000 Subject: [PATCH 12/28] WIP --- examples/port_datasets/openx_rlds.py | 158 +++++++++--------- .../port_datasets/openx_rlds_datatrove.py | 106 ++++++++++++ .../{oxe_utils => openx_utils}/configs.py | 10 +- .../openx_utils/slurm_submit.bash | 30 ++++ examples/port_datasets/openx_utils/test.py | 54 ++++++ .../transform_utils.py | 2 +- .../{oxe_utils => openx_utils}/transforms.py | 38 +++-- 7 files changed, 306 insertions(+), 92 deletions(-) create mode 100644 examples/port_datasets/openx_rlds_datatrove.py rename examples/port_datasets/{oxe_utils => openx_utils}/configs.py (99%) create mode 100755 examples/port_datasets/openx_utils/slurm_submit.bash create mode 100644 examples/port_datasets/openx_utils/test.py rename examples/port_datasets/{oxe_utils => openx_utils}/transform_utils.py (100%) rename examples/port_datasets/{oxe_utils => openx_utils}/transforms.py (98%) diff --git a/examples/port_datasets/openx_rlds.py b/examples/port_datasets/openx_rlds.py index 4ac2791c..d582d947 100644 --- a/examples/port_datasets/openx_rlds.py +++ b/examples/port_datasets/openx_rlds.py @@ -17,37 +17,35 @@ For all datasets in the RLDS format. For https://github.com/google-deepmind/open_x_embodiment (OPENX) datasets. -NOTE: You need to install tensorflow and tensorflow_datsets before running this script. +NOTE: Install `tensorflow` and `tensorflow_datasets` before running this script. +```bash +pip install tensorflow +pip install tensorflow_datasets +``` Example: - python openx_rlds.py \ - --raw-dir /path/to/bridge_orig/1.0.0 \ - --local-dir /path/to/local_dir \ - --repo-id your_id \ - --use-videos \ - --push-to-hub +```bash +python examples/port_datasets/openx_rlds.py \ + --raw-dir /fsx/mustafa_shukor/droid \ + --repo-id cadene/droid \ + --use-videos \ + --push-to-hub +``` """ import argparse -import os import re import shutil -import sys -from functools import partial from pathlib import Path import numpy as np import tensorflow as tf import tensorflow_datasets as tfds +import tqdm -from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME, LeRobotDataset - -current_dir = os.path.dirname(os.path.abspath(__file__)) -oxe_utils_dir = os.path.join(current_dir, "oxe_utils") -sys.path.append(oxe_utils_dir) - -from oxe_utils.configs import OXE_DATASET_CONFIGS, StateEncoding -from oxe_utils.transforms import OXE_STANDARDIZATION_TRANSFORMS +from examples.port_datasets.openx_utils.configs import OXE_DATASET_CONFIGS, StateEncoding +from examples.port_datasets.openx_utils.transforms import OXE_STANDARDIZATION_TRANSFORMS +from lerobot.common.datasets.lerobot_dataset import LeRobotDataset np.set_printoptions(precision=2) @@ -87,16 +85,23 @@ def transform_raw_dataset(episode, dataset_name): return episode -def generate_features_from_raw(builder: tfds.core.DatasetBuilder, use_videos: bool = True): - dataset_name = builder.name - +def generate_features_from_raw(dataset_name: str, builder: tfds.core.DatasetBuilder, use_videos: bool = True): state_names = [f"motor_{i}" for i in range(8)] if dataset_name in OXE_DATASET_CONFIGS: state_encoding = OXE_DATASET_CONFIGS[dataset_name]["state_encoding"] if state_encoding == StateEncoding.POS_EULER: state_names = ["x", "y", "z", "roll", "pitch", "yaw", "pad", "gripper"] if "libero" in dataset_name: - state_names = ["x", "y", "z", "roll", "pitch", "yaw", "gripper", "gripper"] # 2D gripper state + state_names = [ + "x", + "y", + "z", + "roll", + "pitch", + "yaw", + "gripper", + "gripper", + ] # 2D gripper state elif state_encoding == StateEncoding.POS_QUAT: state_names = ["x", "y", "z", "rx", "ry", "rz", "rw", "gripper"] @@ -126,44 +131,68 @@ def generate_features_from_raw(builder: tfds.core.DatasetBuilder, use_videos: bo return {**features, **DEFAULT_FEATURES} -def save_as_lerobot_dataset(lerobot_dataset: LeRobotDataset, raw_dataset: tf.data.Dataset, **kwargs): - for episode in raw_dataset.as_numpy_iterator(): +def save_as_lerobot_dataset( + dataset_name: str, + lerobot_dataset: LeRobotDataset, + raw_dataset: tf.data.Dataset, + num_shards: int | None = None, + shard_index: int | None = None, +): + total_num_episodes = raw_dataset.cardinality().numpy().item() + print(f"Total number of episodes {total_num_episodes}") + + if num_shards is not None: + num_shards = 10000 + shard_index = 9999 + sharded_dataset = raw_dataset.shard(num_shards=num_shards, index=shard_index) + sharded_num_episodes = sharded_dataset.cardinality().numpy().item() + print(f"{sharded_num_episodes=}") + num_episodes = sharded_num_episodes + iter_ = iter(sharded_dataset) + else: + num_episodes = total_num_episodes + iter_ = iter(raw_dataset) + + for episode_index in range(num_episodes): + print(f"{episode_index} / {num_episodes}") + episode = next(iter_) + print("\nnext\n") + episode = transform_raw_dataset(episode, dataset_name) + traj = episode["steps"] - for i in range(traj["action"].shape[0]): + for i in tqdm.tqdm(range(traj["action"].shape[0])): image_dict = { - f"observation.images.{key}": value[i] + f"observation.images.{key}": value[i].numpy() for key, value in traj["observation"].items() if "depth" not in key and any(x in key for x in ["image", "rgb"]) } lerobot_dataset.add_frame( { **image_dict, - "observation.state": traj["proprio"][i], - "action": traj["action"][i], + "observation.state": traj["proprio"][i].numpy(), + "action": traj["action"][i].numpy(), + "task": traj["task"][i].numpy().decode(), } ) - lerobot_dataset.save_episode(task=traj["task"][0].decode()) - lerobot_dataset.consolidate( - run_compute_stats=True, - keep_image_files=kwargs["keep_images"], - stat_kwargs={"batch_size": kwargs["batch_size"], "num_workers": kwargs["num_workers"]}, - ) + print() + lerobot_dataset.save_episode() + print("\nsave_episode\n") + + break def create_lerobot_dataset( raw_dir: Path, repo_id: str = None, - local_dir: Path = None, push_to_hub: bool = False, fps: int = None, robot_type: str = None, use_videos: bool = True, - batch_size: int = 32, - num_workers: int = 8, image_writer_process: int = 5, image_writer_threads: int = 10, - keep_images: bool = True, + num_shards: int | None = None, + shard_index: int | None = None, ): last_part = raw_dir.name if re.match(r"^\d+\.\d+\.\d+$", last_part): @@ -175,15 +204,9 @@ def create_lerobot_dataset( dataset_name = last_part data_dir = raw_dir.parent - if local_dir is None: - local_dir = Path(LEROBOT_HOME) - local_dir /= f"{dataset_name}_{version}_lerobot" - if local_dir.exists(): - shutil.rmtree(local_dir) - builder = tfds.builder(dataset_name, data_dir=data_dir, version=version) - features = generate_features_from_raw(builder, use_videos) - raw_dataset = builder.as_dataset(split="train").map(partial(transform_raw_dataset, dataset_name=dataset_name)) + features = generate_features_from_raw(dataset_name, builder, use_videos) + raw_dataset = builder.as_dataset(split="train") if fps is None: if dataset_name in OXE_DATASET_CONFIGS: @@ -201,7 +224,6 @@ def create_lerobot_dataset( lerobot_dataset = LeRobotDataset.create( repo_id=repo_id, robot_type=robot_type, - root=local_dir, fps=fps, use_videos=use_videos, features=features, @@ -210,16 +232,18 @@ def create_lerobot_dataset( ) save_as_lerobot_dataset( - lerobot_dataset, raw_dataset, keep_images=keep_images, batch_size=batch_size, num_workers=num_workers + dataset_name, + lerobot_dataset, + raw_dataset, + num_shards=num_shards, + shard_index=shard_index, ) if push_to_hub: assert repo_id is not None - tags = ["LeRobot", dataset_name, "rlds"] + tags = [] if dataset_name in OXE_DATASET_CONFIGS: tags.append("openx") - if robot_type != "unknown": - tags.append(robot_type) lerobot_dataset.push_to_hub( tags=tags, private=False, @@ -237,12 +261,6 @@ def main(): required=True, help="Directory containing input raw datasets (e.g. `path/to/dataset` or `path/to/dataset/version).", ) - parser.add_argument( - "--local-dir", - type=Path, - required=True, - help="When provided, writes the dataset converted to LeRobotDataset format in this directory (e.g. `data/lerobot/aloha_mobile_chair`).", - ) parser.add_argument( "--repo-id", type=str, @@ -270,37 +288,25 @@ def main(): action="store_true", help="Convert each episode of the raw dataset to an mp4 video. This option allows 60 times lower disk space consumption and 25 faster loading time during training.", ) - parser.add_argument( - "--batch-size", - type=int, - default=32, - help="Batch size loaded by DataLoader for computing the dataset statistics.", - ) - parser.add_argument( - "--num-workers", - type=int, - default=8, - help="Number of processes of Dataloader for computing the dataset statistics.", - ) parser.add_argument( "--image-writer-process", type=int, - default=5, + default=0, help="Number of processes of image writer for saving images.", ) parser.add_argument( "--image-writer-threads", type=int, - default=10, + default=8, help="Number of threads per process of image writer for saving images.", ) - parser.add_argument( - "--keep-images", - action="store_true", - help="Whether to keep the cached images.", - ) args = parser.parse_args() + + droid_dir = Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene/droid") + if droid_dir.exists(): + shutil.rmtree(droid_dir) + create_lerobot_dataset(**vars(args)) diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py new file mode 100644 index 00000000..842b378d --- /dev/null +++ b/examples/port_datasets/openx_rlds_datatrove.py @@ -0,0 +1,106 @@ +import datetime as dt +import shutil +from pathlib import Path + +from datatrove.executor import LocalPipelineExecutor +from datatrove.executor.slurm import SlurmPipelineExecutor +from datatrove.pipeline.base import PipelineStep + + +class PortOpenXDataset(PipelineStep): + def __init__( + self, + raw_dir: Path, + repo_id: str = None, + image_writer_process: int = 0, + image_writer_threads: int = 8, + ): + super().__init__() + self.raw_dir = raw_dir + self.repo_id = repo_id + self.image_writer_process = image_writer_process + self.image_writer_threads = image_writer_threads + + def run(self, data=None, rank: int = 0, world_size: int = 1): + from examples.port_datasets.openx_rlds import create_lerobot_dataset + from examples.port_datasets.openx_utils.test import display_slurm_info, display_system_info + + display_system_info() + display_slurm_info() + + create_lerobot_dataset( + self.raw_dir, + f"{self.repo_id}_world_{world_size}_rank_{rank}", + image_writer_process=self.image_writer_process, + image_writer_threads=self.image_writer_threads, + push_to_hub=False, + num_shards=world_size, + shard_index=rank, + ) + + +class AggregateDatasets(PipelineStep): + def run(self, data=None, rank: int = 0, world_size: int = 1): + print("aggregation") + + +def main(slurm=True): + for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"): + shutil.rmtree(dir_) + + now = dt.datetime.now() + port_job_name = "port_openx_droid" + logs_dir = Path("/fsx/remi_cadene/logs") + port_log_dir = logs_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{port_job_name}" + + if slurm: + executor_class = SlurmPipelineExecutor + dist_extra_kwargs = { + "job_name": port_job_name, + "tasks": 10000, + "workers": 24, + "time": "00:30:00", + "partition": "hopper-cpu", + "cpus_per_task": 12, + "mem_per_cpu_gb": 4, + } + else: + executor_class = LocalPipelineExecutor + dist_extra_kwargs = { + "tasks": 1, + "workers": 1, + } + + port_executor = executor_class( + pipeline=[ + PortOpenXDataset(raw_dir=Path("/fsx/mustafa_shukor/droid"), repo_id="cadene/droid"), + ], + logging_dir=str(port_log_dir), + **dist_extra_kwargs, + ) + port_executor.run() + + # if slurm: + # merge_extra_kwargs = {} + # else: + # merge_extra_kwargs = { + # "job_name": "aggregate", + # "time": "00:01:00", + # "partition": "hopper-cpu", + # } + + # merge_executor = executor_class( + # depends=dist_executor, + # pipeline=[ + # Aggregate(), + # ], + # logging_dir=f"/fsx/remi_cadene/logs/openx_rlds_merge", + # tasks=1, + # workers=1, + # **merge_extra_kwargs, + # ) + # merge_executor.run() + + +if __name__ == "__main__": + main() diff --git a/examples/port_datasets/oxe_utils/configs.py b/examples/port_datasets/openx_utils/configs.py similarity index 99% rename from examples/port_datasets/oxe_utils/configs.py rename to examples/port_datasets/openx_utils/configs.py index 02522f81..04468110 100644 --- a/examples/port_datasets/oxe_utils/configs.py +++ b/examples/port_datasets/openx_utils/configs.py @@ -56,7 +56,9 @@ def zero_action_filter(traj: Dict) -> bool: 0.8897542208433151, ] ) - DROID_NORM_0_ACT = 2 * (tf.zeros_like(traj["action"][:, :6]) - DROID_Q01) / (DROID_Q99 - DROID_Q01 + 1e-8) - 1 + DROID_NORM_0_ACT = ( + 2 * (tf.zeros_like(traj["action"][:, :6]) - DROID_Q01) / (DROID_Q99 - DROID_Q01 + 1e-8) - 1 + ) return tf.reduce_any(tf.math.abs(traj["action"][:, :6] - DROID_NORM_0_ACT) > 1e-5) @@ -799,7 +801,11 @@ OXE_DATASET_CONFIGS = { }, ### DROID Finetuning datasets "droid_wipe": { - "image_obs_keys": {"primary": "exterior_image_2_left", "secondary": None, "wrist": "wrist_image_left"}, + "image_obs_keys": { + "primary": "exterior_image_2_left", + "secondary": None, + "wrist": "wrist_image_left", + }, "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, "state_obs_keys": ["proprio"], "state_encoding": StateEncoding.POS_EULER, diff --git a/examples/port_datasets/openx_utils/slurm_submit.bash b/examples/port_datasets/openx_utils/slurm_submit.bash new file mode 100755 index 00000000..3aee950c --- /dev/null +++ b/examples/port_datasets/openx_utils/slurm_submit.bash @@ -0,0 +1,30 @@ +#!/bin/bash +#SBATCH --job-name=openx_rlds +#SBATCH --partition=hopper-cpu +#SBATCH --requeue +#SBATCH --time=00:01:00 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=8 +#SBATCH --output=/fsx/%u/slurm/%j-%x.out + +OUTPUT_DIR="/fsx/${USER}/slurm/${SLURM_JOB_NAME}-${SLURM_JOB_ID}-tasks" +mkdir -p $OUTPUT_DIR + +# Function to run a task and redirect output to a separate file +run_task() { + local task_id=$1 + local output_file="${OUTPUT_DIR}/task-${task_id}-${SLURM_JOB_ID}.out" + + # Run the task and redirect output + python examples/port_datasets/openx_utils/test.py > $output_file 2>&1 +} + +echo $SBATCH_OUTPUT + +# node has 380850M and 96 cpus +trap 'scontrol requeue ${SLURM_JOB_ID}; exit 15' SIGUSR1 +echo "Starting job" +# note the "&" to start srun as a background thread +srun python examples/port_datasets/openx_utils/test.py & +# wait for signals... +wait diff --git a/examples/port_datasets/openx_utils/test.py b/examples/port_datasets/openx_utils/test.py new file mode 100644 index 00000000..09d14983 --- /dev/null +++ b/examples/port_datasets/openx_utils/test.py @@ -0,0 +1,54 @@ +import os + +import psutil + + +def display_system_info(): + # Get the number of CPUs + num_cpus = psutil.cpu_count(logical=True) + print(f"Number of CPUs: {num_cpus}") + + # Get memory information + memory_info = psutil.virtual_memory() + total_memory = memory_info.total / (1024**3) # Convert bytes to GB + available_memory = memory_info.available / (1024**3) # Convert bytes to GB + used_memory = memory_info.used / (1024**3) # Convert bytes to GB + + print(f"Total Memory: {total_memory:.2f} GB") + print(f"Available Memory: {available_memory:.2f} GB") + print(f"Used Memory: {used_memory:.2f} GB") + + +def display_slurm_info(): + # Get SLURM job ID + job_id = os.getenv("SLURM_JOB_ID") + print(f"SLURM Job ID: {job_id}") + + # Get SLURM job name + job_name = os.getenv("SLURM_JOB_NAME") + print(f"SLURM Job Name: {job_name}") + + # Get the number of tasks + num_tasks = os.getenv("SLURM_NTASKS") + print(f"Number of Tasks: {num_tasks}") + + # Get the number of nodes + num_nodes = os.getenv("SLURM_NNODES") + print(f"Number of Nodes: {num_nodes}") + + # Get the number of CPUs per task + cpus_per_task = os.getenv("SLURM_CPUS_PER_TASK") + print(f"CPUs per Task: {cpus_per_task}") + + # Get the node list + node_list = os.getenv("SLURM_NODELIST") + print(f"Node List: {node_list}") + + # Get the task ID (only available within an srun task) + task_id = os.getenv("SLURM_PROCID") + print(f"Task ID: {task_id}") + + +if __name__ == "__main__": + display_system_info() + display_slurm_info() diff --git a/examples/port_datasets/oxe_utils/transform_utils.py b/examples/port_datasets/openx_utils/transform_utils.py similarity index 100% rename from examples/port_datasets/oxe_utils/transform_utils.py rename to examples/port_datasets/openx_utils/transform_utils.py index ca250ca6..3d7e8846 100644 --- a/examples/port_datasets/oxe_utils/transform_utils.py +++ b/examples/port_datasets/openx_utils/transform_utils.py @@ -2,7 +2,6 @@ Copied from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/utils/data_utils.py """ - from typing import Any, Dict import tensorflow as tf @@ -66,6 +65,7 @@ def rel2abs_gripper_actions(actions: tf.Tensor) -> tf.Tensor: return new_actions + # === Bridge-V2 =>> Dataset-Specific Transform === def relabel_bridge_actions(traj: Dict[str, Any]) -> Dict[str, Any]: """Relabels actions to use reached proprioceptive state; discards last timestep (no-action).""" diff --git a/examples/port_datasets/oxe_utils/transforms.py b/examples/port_datasets/openx_utils/transforms.py similarity index 98% rename from examples/port_datasets/oxe_utils/transforms.py rename to examples/port_datasets/openx_utils/transforms.py index 6eaa496c..83152c24 100644 --- a/examples/port_datasets/oxe_utils/transforms.py +++ b/examples/port_datasets/openx_utils/transforms.py @@ -19,7 +19,8 @@ Transforms adopt the following structure: from typing import Any, Dict import tensorflow as tf -from oxe_utils.transform_utils import ( + +from examples.port_datasets.openx_utils.transform_utils import ( binarize_gripper_actions, invert_gripper_actions, rel2abs_gripper_actions, @@ -31,6 +32,7 @@ def droid_baseact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: """ DROID dataset transformation for actions expressed in *base* frame of the robot. """ + def rand_swap_exterior_images(img1, img2): """ Randomly swaps the two exterior images (for training with single exterior input). @@ -55,11 +57,11 @@ def droid_baseact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: ) ) # trajectory["observation"]["proprio"] = tf.concat( - # ( - # trajectory["observation"]["cartesian_position"], - # trajectory["observation"]["gripper_position"], - # ), - # axis=-1, + # ( + # trajectory["observation"]["cartesian_position"], + # trajectory["observation"]["gripper_position"], + # ), + # axis=-1, # ) trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"] trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"] @@ -96,7 +98,7 @@ def bridge_oxe_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it! """ - for key in trajectory.keys(): + for key in trajectory: if key == "traj_metadata": continue elif key in ["observation", "action"]: @@ -126,7 +128,7 @@ def bridge_orig_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it! """ - for key in trajectory.keys(): + for key in trajectory: if key == "traj_metadata": continue elif key == "observation": @@ -198,7 +200,9 @@ def kuka_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: ) eef_value = tf.io.decode_raw(eef_value, tf.float32) trajectory["observation"]["clip_function_input/base_pose_tool_reached"] = tf.reshape(eef_value, (-1, 7)) - gripper_value = tf.io.decode_compressed(trajectory["observation"]["gripper_closed"], compression_type="ZLIB") + gripper_value = tf.io.decode_compressed( + trajectory["observation"]["gripper_closed"], compression_type="ZLIB" + ) gripper_value = tf.io.decode_raw(gripper_value, tf.float32) trajectory["observation"]["gripper_closed"] = tf.reshape(gripper_value, (-1, 1)) # trajectory["language_instruction"] = tf.fill( @@ -228,7 +232,9 @@ def taco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: def jaco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: trajectory["observation"]["state_eef"] = trajectory["observation"]["end_effector_cartesian_pos"][:, :6] - trajectory["observation"]["state_gripper"] = trajectory["observation"]["end_effector_cartesian_pos"][:, -1:] + trajectory["observation"]["state_gripper"] = trajectory["observation"]["end_effector_cartesian_pos"][ + :, -1: + ] # make gripper action absolute action, +1 = open, 0 = close gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0] @@ -264,7 +270,9 @@ def berkeley_cable_routing_dataset_transform(trajectory: Dict[str, Any]) -> Dict def roboturk_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: # invert absolute gripper action, +1 = open, 0 = close - gripper_action = invert_gripper_actions(tf.clip_by_value(trajectory["action"]["gripper_closedness_action"], 0, 1)) + gripper_action = invert_gripper_actions( + tf.clip_by_value(trajectory["action"]["gripper_closedness_action"], 0, 1) + ) trajectory["action"] = tf.concat( ( @@ -374,7 +382,9 @@ def language_table_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, An instruction_bytes = trajectory["observation"]["instruction"] instruction_encoded = tf.strings.unicode_encode(instruction_bytes, output_encoding="UTF-8") # Remove trailing padding --> convert RaggedTensor to regular Tensor. - trajectory["language_instruction"] = tf.strings.split(instruction_encoded, "\x00")[:, :1].to_tensor()[:, 0] + trajectory["language_instruction"] = tf.strings.split(instruction_encoded, "\x00")[:, :1].to_tensor()[ + :, 0 + ] return trajectory @@ -900,7 +910,9 @@ def libero_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: axis=1, ) trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -2:] # 2D gripper state + trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][ + :, -2: + ] # 2D gripper state return trajectory From 5fbbaa1bc0336a4580014d389ab8077db5873efd Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Thu, 20 Feb 2025 23:04:58 +0000 Subject: [PATCH 13/28] fix No such file or directory error --- lerobot/common/datasets/lerobot_dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 81280e68..1d79edc1 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -849,13 +849,15 @@ class LeRobotDataset(torch.utils.data.Dataset): self._wait_image_writer() self._save_episode_table(episode_buffer, episode_index) ep_stats = compute_episode_stats(episode_buffer, self.features) - self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats) if len(self.meta.video_keys) > 0: video_paths = self.encode_episode_videos(episode_index) for key in self.meta.video_keys: episode_buffer[key] = video_paths[key] + # `meta.save_episode` be executed after encoding the videos + self.meta.save_episode(episode_index, episode_length, episode_tasks, ep_stats) + self.hf_dataset = self.load_hf_dataset() self.episode_data_index = get_episode_data_index(self.meta.episodes, self.episodes) check_timestamps_sync(self.hf_dataset, self.episode_data_index, self.fps, self.tolerance_s) From 93c80b2cb10f1d50014658329e7d439217b6f59f Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Thu, 20 Feb 2025 23:24:03 +0000 Subject: [PATCH 14/28] rm brake --- examples/port_datasets/openx_rlds.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/port_datasets/openx_rlds.py b/examples/port_datasets/openx_rlds.py index d582d947..38e21d71 100644 --- a/examples/port_datasets/openx_rlds.py +++ b/examples/port_datasets/openx_rlds.py @@ -179,8 +179,6 @@ def save_as_lerobot_dataset( lerobot_dataset.save_episode() print("\nsave_episode\n") - break - def create_lerobot_dataset( raw_dir: Path, From 52fb4143b52208005f6fc96284a7a4091648fa15 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Fri, 21 Feb 2025 13:08:21 +0000 Subject: [PATCH 15/28] workers --- examples/port_datasets/openx_rlds_datatrove.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py index 842b378d..1163345f 100644 --- a/examples/port_datasets/openx_rlds_datatrove.py +++ b/examples/port_datasets/openx_rlds_datatrove.py @@ -58,8 +58,8 @@ def main(slurm=True): dist_extra_kwargs = { "job_name": port_job_name, "tasks": 10000, - "workers": 24, - "time": "00:30:00", + "workers": 8 * 16, + "time": "08:00:00", "partition": "hopper-cpu", "cpus_per_task": 12, "mem_per_cpu_gb": 4, From 15e7a9d541107f6ab181afed80dbcef4d9951a5a Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Fri, 21 Feb 2025 23:14:22 +0000 Subject: [PATCH 16/28] before new launch from scratch --- examples/port_datasets/openx_rlds.py | 5 +++-- examples/port_datasets/openx_rlds_datatrove.py | 16 +++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/examples/port_datasets/openx_rlds.py b/examples/port_datasets/openx_rlds.py index 38e21d71..99ff6d4d 100644 --- a/examples/port_datasets/openx_rlds.py +++ b/examples/port_datasets/openx_rlds.py @@ -142,8 +142,6 @@ def save_as_lerobot_dataset( print(f"Total number of episodes {total_num_episodes}") if num_shards is not None: - num_shards = 10000 - shard_index = 9999 sharded_dataset = raw_dataset.shard(num_shards=num_shards, index=shard_index) sharded_num_episodes = sharded_dataset.cardinality().numpy().item() print(f"{sharded_num_episodes=}") @@ -153,6 +151,9 @@ def save_as_lerobot_dataset( num_episodes = total_num_episodes iter_ = iter(raw_dataset) + if num_episodes <= 0: + raise ValueError(f"Number of episodes is {num_episodes}, but needs to be positive.") + for episode_index in range(num_episodes): print(f"{episode_index} / {num_episodes}") episode = next(iter_) diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py index 1163345f..acb8e94e 100644 --- a/examples/port_datasets/openx_rlds_datatrove.py +++ b/examples/port_datasets/openx_rlds_datatrove.py @@ -1,5 +1,4 @@ import datetime as dt -import shutil from pathlib import Path from datatrove.executor import LocalPipelineExecutor @@ -45,24 +44,27 @@ class AggregateDatasets(PipelineStep): def main(slurm=True): - for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"): - shutil.rmtree(dir_) + # breakpoint() + # for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"): + # shutil.rmtree(dir_) now = dt.datetime.now() port_job_name = "port_openx_droid" logs_dir = Path("/fsx/remi_cadene/logs") - port_log_dir = logs_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{port_job_name}" + # port_log_dir = logs_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{port_job_name}" + port_log_dir = Path("/fsx/remi_cadene/logs/2025-02-20_23-24-12_port_openx_droid") if slurm: executor_class = SlurmPipelineExecutor dist_extra_kwargs = { "job_name": port_job_name, "tasks": 10000, - "workers": 8 * 16, + "workers": 20, # 8 * 16, "time": "08:00:00", "partition": "hopper-cpu", - "cpus_per_task": 12, - "mem_per_cpu_gb": 4, + "cpus_per_task": 24, + "mem_per_cpu_gb": 2, + "max_array_launch_parallel": True, } else: executor_class = LocalPipelineExecutor From eda0b996cdd0b3b0875f9855bfb9ed64adffde46 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Fri, 21 Feb 2025 23:56:44 +0000 Subject: [PATCH 17/28] new dir --- examples/port_datasets/openx_rlds_datatrove.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py index acb8e94e..9ecbec59 100644 --- a/examples/port_datasets/openx_rlds_datatrove.py +++ b/examples/port_datasets/openx_rlds_datatrove.py @@ -29,7 +29,7 @@ class PortOpenXDataset(PipelineStep): create_lerobot_dataset( self.raw_dir, - f"{self.repo_id}_world_{world_size}_rank_{rank}", + f"{self.repo_id}_2025-02-22_00-12-00_world_{world_size}_rank_{rank}", image_writer_process=self.image_writer_process, image_writer_threads=self.image_writer_threads, push_to_hub=False, @@ -52,7 +52,7 @@ def main(slurm=True): port_job_name = "port_openx_droid" logs_dir = Path("/fsx/remi_cadene/logs") # port_log_dir = logs_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{port_job_name}" - port_log_dir = Path("/fsx/remi_cadene/logs/2025-02-20_23-24-12_port_openx_droid") + port_log_dir = Path("/fsx/remi_cadene/logs/2025-02-22_00-12-00_port_openx_droid") if slurm: executor_class = SlurmPipelineExecutor From 689c5efc727e73da03dc0790c64ecc71d3653b6d Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Sat, 22 Feb 2025 10:13:09 +0000 Subject: [PATCH 18/28] optimize shard --- examples/port_datasets/openx_rlds.py | 13 ++++++++++--- examples/port_datasets/openx_rlds_datatrove.py | 8 +++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/examples/port_datasets/openx_rlds.py b/examples/port_datasets/openx_rlds.py index 99ff6d4d..c57a0c52 100644 --- a/examples/port_datasets/openx_rlds.py +++ b/examples/port_datasets/openx_rlds.py @@ -205,7 +205,16 @@ def create_lerobot_dataset( builder = tfds.builder(dataset_name, data_dir=data_dir, version=version) features = generate_features_from_raw(dataset_name, builder, use_videos) - raw_dataset = builder.as_dataset(split="train") + + if num_shards is not None: + if num_shards != builder.info.splits["train"].num_shards: + raise ValueError() + if shard_index >= builder.info.splits["train"].num_shards: + raise ValueError() + + raw_dataset = builder.as_dataset(split=f"train[{shard_index}shard]") + else: + raw_dataset = builder.as_dataset(split="train") if fps is None: if dataset_name in OXE_DATASET_CONFIGS: @@ -234,8 +243,6 @@ def create_lerobot_dataset( dataset_name, lerobot_dataset, raw_dataset, - num_shards=num_shards, - shard_index=shard_index, ) if push_to_hub: diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py index 9ecbec59..32ccaa1a 100644 --- a/examples/port_datasets/openx_rlds_datatrove.py +++ b/examples/port_datasets/openx_rlds_datatrove.py @@ -29,7 +29,7 @@ class PortOpenXDataset(PipelineStep): create_lerobot_dataset( self.raw_dir, - f"{self.repo_id}_2025-02-22_00-12-00_world_{world_size}_rank_{rank}", + f"{self.repo_id}_world_{world_size}_rank_{rank}", image_writer_process=self.image_writer_process, image_writer_threads=self.image_writer_threads, push_to_hub=False, @@ -64,7 +64,7 @@ def main(slurm=True): "partition": "hopper-cpu", "cpus_per_task": 24, "mem_per_cpu_gb": 2, - "max_array_launch_parallel": True, + # "max_array_launch_parallel": True, } else: executor_class = LocalPipelineExecutor @@ -75,7 +75,9 @@ def main(slurm=True): port_executor = executor_class( pipeline=[ - PortOpenXDataset(raw_dir=Path("/fsx/mustafa_shukor/droid"), repo_id="cadene/droid"), + PortOpenXDataset( + raw_dir=Path("/fsx/mustafa_shukor/droid"), repo_id="cadene/droid_2025-02-22_00-12-00" + ), ], logging_dir=str(port_log_dir), **dist_extra_kwargs, From 39ad2d16d424ab879c1c4b8f8d0ff7c14da460d8 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Sat, 22 Feb 2025 11:12:39 +0000 Subject: [PATCH 19/28] let's go --- examples/port_datasets/openx_rlds.py | 23 +++++++++++------- .../port_datasets/openx_rlds_datatrove.py | 24 ++++++++++++------- lerobot/common/datasets/video_utils.py | 2 +- lerobot/common/utils/utils.py | 10 ++++++++ 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/examples/port_datasets/openx_rlds.py b/examples/port_datasets/openx_rlds.py index c57a0c52..51a92773 100644 --- a/examples/port_datasets/openx_rlds.py +++ b/examples/port_datasets/openx_rlds.py @@ -34,18 +34,20 @@ python examples/port_datasets/openx_rlds.py \ """ import argparse +import logging import re import shutil +import time from pathlib import Path import numpy as np import tensorflow as tf import tensorflow_datasets as tfds -import tqdm from examples.port_datasets.openx_utils.configs import OXE_DATASET_CONFIGS, StateEncoding from examples.port_datasets.openx_utils.transforms import OXE_STANDARDIZATION_TRANSFORMS from lerobot.common.datasets.lerobot_dataset import LeRobotDataset +from lerobot.common.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds np.set_printoptions(precision=2) @@ -138,13 +140,14 @@ def save_as_lerobot_dataset( num_shards: int | None = None, shard_index: int | None = None, ): + start_time = time.time() total_num_episodes = raw_dataset.cardinality().numpy().item() - print(f"Total number of episodes {total_num_episodes}") + logging.info(f"Total number of episodes {total_num_episodes}") if num_shards is not None: sharded_dataset = raw_dataset.shard(num_shards=num_shards, index=shard_index) sharded_num_episodes = sharded_dataset.cardinality().numpy().item() - print(f"{sharded_num_episodes=}") + logging.info(f"{sharded_num_episodes=}") num_episodes = sharded_num_episodes iter_ = iter(sharded_dataset) else: @@ -155,13 +158,18 @@ def save_as_lerobot_dataset( raise ValueError(f"Number of episodes is {num_episodes}, but needs to be positive.") for episode_index in range(num_episodes): - print(f"{episode_index} / {num_episodes}") + logging.info(f"{episode_index} / {num_episodes} episodes processed") + + elapsed_time = time.time() - start_time + d, h, m, s = get_elapsed_time_in_days_hours_minutes_seconds(elapsed_time) + logging.info(f"It has been {d} days, {h} hours, {m} minutes, {s:.3f} seconds") + episode = next(iter_) - print("\nnext\n") + logging.info("next") episode = transform_raw_dataset(episode, dataset_name) traj = episode["steps"] - for i in tqdm.tqdm(range(traj["action"].shape[0])): + for i in range(traj["action"].shape[0]): image_dict = { f"observation.images.{key}": value[i].numpy() for key, value in traj["observation"].items() @@ -176,9 +184,8 @@ def save_as_lerobot_dataset( } ) - print() lerobot_dataset.save_episode() - print("\nsave_episode\n") + logging.info("save_episode") def create_lerobot_dataset( diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py index 32ccaa1a..82d6cb3f 100644 --- a/examples/port_datasets/openx_rlds_datatrove.py +++ b/examples/port_datasets/openx_rlds_datatrove.py @@ -21,8 +21,14 @@ class PortOpenXDataset(PipelineStep): self.image_writer_threads = image_writer_threads def run(self, data=None, rank: int = 0, world_size: int = 1): + from datasets.utils.tqdm import disable_progress_bars + from examples.port_datasets.openx_rlds import create_lerobot_dataset from examples.port_datasets.openx_utils.test import display_slurm_info, display_system_info + from lerobot.common.utils.utils import init_logging + + init_logging() + disable_progress_bars() display_system_info() display_slurm_info() @@ -48,18 +54,22 @@ def main(slurm=True): # for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"): # shutil.rmtree(dir_) - now = dt.datetime.now() port_job_name = "port_openx_droid" logs_dir = Path("/fsx/remi_cadene/logs") - # port_log_dir = logs_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{port_job_name}" - port_log_dir = Path("/fsx/remi_cadene/logs/2025-02-22_00-12-00_port_openx_droid") + + now = dt.datetime.now() + datetime = f"{now:%Y-%m-%d}_{now:%H-%M-%S}" + # datetime = "2025-02-22_11-17-00" + + port_log_dir = logs_dir / f"{datetime}_{port_job_name}" if slurm: executor_class = SlurmPipelineExecutor dist_extra_kwargs = { "job_name": port_job_name, - "tasks": 10000, - "workers": 20, # 8 * 16, + "tasks": 2048, + # "workers": 20, # 8 * 16, + "workers": 1, # 8 * 16, "time": "08:00:00", "partition": "hopper-cpu", "cpus_per_task": 24, @@ -75,9 +85,7 @@ def main(slurm=True): port_executor = executor_class( pipeline=[ - PortOpenXDataset( - raw_dir=Path("/fsx/mustafa_shukor/droid"), repo_id="cadene/droid_2025-02-22_00-12-00" - ), + PortOpenXDataset(raw_dir=Path("/fsx/mustafa_shukor/droid"), repo_id=f"cadene/droid_{datetime}"), ], logging_dir=str(port_log_dir), **dist_extra_kwargs, diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py index 8be53483..3d9269a2 100644 --- a/lerobot/common/datasets/video_utils.py +++ b/lerobot/common/datasets/video_utils.py @@ -136,7 +136,7 @@ def encode_video_frames( g: int | None = 2, crf: int | None = 30, fast_decode: int = 0, - log_level: str | None = "error", + log_level: str | None = "quiet", overwrite: bool = False, ) -> None: """More info on ffmpeg arguments tuning on `benchmark/video/README.md`""" diff --git a/lerobot/common/utils/utils.py b/lerobot/common/utils/utils.py index d0c12b30..0d9631b0 100644 --- a/lerobot/common/utils/utils.py +++ b/lerobot/common/utils/utils.py @@ -216,3 +216,13 @@ def is_valid_numpy_dtype_string(dtype_str: str) -> bool: except TypeError: # If a TypeError is raised, the string is not a valid dtype return False + + +def get_elapsed_time_in_days_hours_minutes_seconds(elapsed_time_s: float): + days = int(elapsed_time_s // (24 * 3600)) + elapsed_time_s %= 24 * 3600 + hours = int(elapsed_time_s // 3600) + elapsed_time_s %= 3600 + minutes = int(elapsed_time_s // 60) + seconds = elapsed_time_s % 60 + return days, hours, minutes, seconds From ff0029f84bf1f3311e0b61c5b41eeae1c588330d Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Sat, 22 Feb 2025 15:33:47 +0000 Subject: [PATCH 20/28] aggregate works --- .../port_datasets/openx_rlds_datatrove.py | 4 +- lerobot/common/datasets/aggregate.py | 162 ++++++++++++++++++ 2 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 lerobot/common/datasets/aggregate.py diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py index 82d6cb3f..b4910544 100644 --- a/examples/port_datasets/openx_rlds_datatrove.py +++ b/examples/port_datasets/openx_rlds_datatrove.py @@ -69,12 +69,12 @@ def main(slurm=True): "job_name": port_job_name, "tasks": 2048, # "workers": 20, # 8 * 16, - "workers": 1, # 8 * 16, + "workers": 20, # 8 * 16, "time": "08:00:00", "partition": "hopper-cpu", "cpus_per_task": 24, "mem_per_cpu_gb": 2, - # "max_array_launch_parallel": True, + "max_array_launch_parallel": True, } else: executor_class = LocalPipelineExecutor diff --git a/lerobot/common/datasets/aggregate.py b/lerobot/common/datasets/aggregate.py new file mode 100644 index 00000000..1f02e07e --- /dev/null +++ b/lerobot/common/datasets/aggregate.py @@ -0,0 +1,162 @@ +import shutil +from pathlib import Path + +import pandas as pd + +from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata +from lerobot.common.datasets.utils import write_episode, write_episode_stats, write_info, write_task + + +def validate_all_metadata(all_metadata: list[LeRobotDatasetMetadata]): + # validate same fps, robot_type, features + + fps = all_metadata[0].fps + robot_type = all_metadata[0].robot_type + features = all_metadata[0].features + + for meta in all_metadata: + if fps != meta.fps: + raise ValueError(f"Same fps is expected, but got fps={meta.fps} instead of {fps}.") + if robot_type != meta.robot_type: + raise ValueError( + f"Same robot_type is expected, but got robot_type={meta.robot_type} instead of {robot_type}." + ) + if features != meta.features: + raise ValueError( + f"Same features is expected, but got features={meta.features} instead of {features}." + ) + + return fps, robot_type, features + + +def get_update_episode_and_task_func(episode_index_to_add, task_index_to_global_task_index): + def _update(row): + row["episode_index"] = row["episode_index"] + episode_index_to_add + row["task_index"] = task_index_to_global_task_index[row["task_index"]] + return row + + return _update + + +def aggregate_datasets(all_metadata: list[LeRobotDatasetMetadata], repo_id: str, root=None): + fps, robot_type, features = validate_all_metadata(all_metadata) + + # Create resulting dataset folder + aggr_meta = LeRobotDatasetMetadata.create( + repo_id=repo_id, + fps=fps, + robot_type=robot_type, + features=features, + root=root, + ) + + # find all tasks, deduplicate them, create new task indices for each dataset + # indexed by dataset index + datasets_task_index_to_aggr_task_index = {} + aggr_task_index = 0 + for dataset_index, meta in enumerate(all_metadata): + task_index_to_aggr_task_index = {} + + for task_index, task in meta.tasks.items(): + if task not in aggr_meta.task_to_task_index: + # add the task to aggr tasks mappings + aggr_meta.tasks[aggr_task_index] = task + aggr_meta.task_to_task_index[task] = aggr_task_index + aggr_task_index += 1 + + # add task_index anyway + task_index_to_aggr_task_index[task_index] = aggr_meta.task_to_task_index[task] + + datasets_task_index_to_aggr_task_index[dataset_index] = task_index_to_aggr_task_index + + aggr_episode_index_shift = 0 + for dataset_index, meta in enumerate(all_metadata): + # cp data + for episode_index in range(meta.total_episodes): + aggr_episode_index = episode_index + aggr_episode_index_shift + data_path = meta.root / meta.get_data_file_path(episode_index) + aggr_data_path = aggr_meta.root / aggr_meta.get_data_file_path(aggr_episode_index) + + # update episode_index and task_index + df = pd.read_parquet(data_path) + update_row_func = get_update_episode_and_task_func( + aggr_episode_index_shift, datasets_task_index_to_aggr_task_index[dataset_index] + ) + df = df.apply(update_row_func, axis=1) + + aggr_data_path.parent.mkdir(parents=True, exist_ok=True) + df.to_parquet(aggr_data_path) + + # cp videos + for episode_index in range(meta.total_episodes): + aggr_episode_index = episode_index + aggr_episode_index_shift + for vid_key in meta.video_keys: + video_path = meta.root / meta.get_video_file_path(episode_index, vid_key) + aggr_video_path = aggr_meta.root / aggr_meta.get_video_file_path(aggr_episode_index, vid_key) + aggr_video_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(video_path, aggr_video_path) + + # populate episodes + for episode_index, episode_dict in meta.episodes.items(): + aggr_episode_index = episode_index + aggr_episode_index_shift + episode_dict["episode_index"] = aggr_episode_index + aggr_meta.episodes[aggr_episode_index] = episode_dict + + # populate episodes_stats + for episode_index, episode_stats in meta.episodes_stats.items(): + aggr_episode_index = episode_index + aggr_episode_index_shift + aggr_meta.episodes_stats[aggr_episode_index] = episode_stats + + # populate info + aggr_meta.info["total_episodes"] += meta.total_episodes + aggr_meta.info["total_frames"] += meta.total_episodes + aggr_meta.info["total_videos"] += len(aggr_meta.video_keys) * meta.total_episodes + + aggr_episode_index_shift += meta.total_episodes + + aggr_meta.info["total_chunks"] = aggr_meta.get_episode_chunk(aggr_episode_index_shift - 1) + aggr_meta.info["splits"] = {"train": f"0:{aggr_meta.info['total_episodes']}"} + + # create a new episodes jsonl with updated episode_index using write_episode + for episode_dict in aggr_meta.episodes.values(): + write_episode(episode_dict, aggr_meta.root) + + # create a new episode_stats jsonl with updated episode_index using write_episode_stats + for episode_index, episode_stats in aggr_meta.episodes_stats.items(): + write_episode_stats(episode_index, episode_stats, aggr_meta.root) + + # create a new task jsonl with updated episode_index using write_task + for task_index, task in aggr_meta.tasks.items(): + write_task(task_index, task, aggr_meta.root) + + write_info(aggr_meta.info, aggr_meta.root) + + +if __name__ == "__main__": + repo_id = "cadene/droid" + datetime = "2025-02-22_11-23-54" + + root = Path(f"/tmp/{repo_id}") + if root.exists(): + shutil.rmtree(root) + + all_metadata = [ + LeRobotDatasetMetadata(f"{repo_id}_{datetime}_world_2048_rank_0"), + LeRobotDatasetMetadata(f"{repo_id}_{datetime}_world_2048_rank_1"), + ] + + aggregate_datasets( + all_metadata, + repo_id, + root=root, + ) + + aggr_dataset = LeRobotDataset( + repo_id=repo_id, + root=root, + ) + aggr_dataset.push_to_hub() + + # for meta in all_metadata: + # dataset = LeRobotDataset(repo_id=meta.repo_id, root=meta.root) + # dataset.push_to_hub() From e2e6f6e666dc3d85bde12b642643259b08cc19d1 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Sun, 23 Feb 2025 18:15:39 +0000 Subject: [PATCH 21/28] Add auto_downsample_height_width --- lerobot/common/datasets/compute_stats.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/lerobot/common/datasets/compute_stats.py b/lerobot/common/datasets/compute_stats.py index 7519c743..a029f892 100644 --- a/lerobot/common/datasets/compute_stats.py +++ b/lerobot/common/datasets/compute_stats.py @@ -43,16 +43,32 @@ def sample_indices(data_len: int) -> list[int]: return np.round(np.linspace(0, data_len - 1, num_samples)).astype(int).tolist() +def auto_downsample_height_width(img: np.ndarray, target_size: int = 150, max_size_threshold: int = 300): + _, height, width = img.shape + + if max(width, height) < max_size_threshold: + # no downsampling needed + return img + + downsample_factor = int(width / target_size) if width > height else int(height / target_size) + return img[:, ::downsample_factor, ::downsample_factor] + + def sample_images(image_paths: list[str]) -> np.ndarray: sampled_indices = sample_indices(len(image_paths)) - images = [] - for idx in sampled_indices: + + images = None + for i, idx in enumerate(sampled_indices): path = image_paths[idx] # we load as uint8 to reduce memory usage img = load_image_as_numpy(path, dtype=np.uint8, channel_first=True) - images.append(img) + img = auto_downsample_height_width(img) + + if images is None: + images = np.empty((len(sampled_indices), *img.shape), dtype=np.uint8) + + images[i] = img - images = np.stack(images) return images From c36d2253d0b48f6a66a0111fd0018ab5f7a236b1 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Sun, 23 Feb 2025 18:18:46 +0000 Subject: [PATCH 22/28] Aggregate works --- examples/port_datasets/openx_rlds.py | 7 ++- .../port_datasets/openx_rlds_completed.py | 52 ++++++++++++++++++ lerobot/common/datasets/aggregate.py | 54 +++++++++++-------- tests/test_aggregate_datasets.py | 19 +++++++ 4 files changed, 106 insertions(+), 26 deletions(-) create mode 100644 examples/port_datasets/openx_rlds_completed.py create mode 100644 tests/test_aggregate_datasets.py diff --git a/examples/port_datasets/openx_rlds.py b/examples/port_datasets/openx_rlds.py index 51a92773..db051b5f 100644 --- a/examples/port_datasets/openx_rlds.py +++ b/examples/port_datasets/openx_rlds.py @@ -36,7 +36,6 @@ python examples/port_datasets/openx_rlds.py \ import argparse import logging import re -import shutil import time from pathlib import Path @@ -316,9 +315,9 @@ def main(): args = parser.parse_args() - droid_dir = Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene/droid") - if droid_dir.exists(): - shutil.rmtree(droid_dir) + # droid_dir = Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene/droid") + # if droid_dir.exists(): + # shutil.rmtree(droid_dir) create_lerobot_dataset(**vars(args)) diff --git a/examples/port_datasets/openx_rlds_completed.py b/examples/port_datasets/openx_rlds_completed.py new file mode 100644 index 00000000..849d65cf --- /dev/null +++ b/examples/port_datasets/openx_rlds_completed.py @@ -0,0 +1,52 @@ +from pathlib import Path + +import tqdm + +from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata + + +def main(): + repo_id = "cadene/droid" + datetime = "2025-02-22_11-23-54" + port_log_dir = Path(f"/fsx/remi_cadene/logs/{datetime}_port_openx_droid") + + compl_dir = port_log_dir / "completions" + + paths = list(compl_dir.glob("*")) + total_items = len(paths) + + # Use tqdm with the total parameter + wrong_completions = [] + error_messages = [] + for i, path in tqdm.tqdm(enumerate(paths), total=total_items): + try: + rank = path.name.lstrip("0") + if rank == "": + rank = 0 + meta = LeRobotDatasetMetadata(f"{repo_id}_{datetime}_world_2048_rank_{rank}") + last_episode_index = meta.total_episodes - 1 + last_ep_data_path = meta.root / meta.get_data_file_path(last_episode_index) + + if not last_ep_data_path.exists(): + raise ValueError(path) + + for vid_key in meta.video_keys: + last_ep_vid_path = meta.root / meta.get_video_file_path(last_episode_index, vid_key) + if not last_ep_vid_path.exists(): + raise ValueError(path) + + except Exception as e: + error_messages.append(str(e)) + wrong_completions.append(path) + + for path, error_msg in zip(wrong_completions, error_messages, strict=False): + print(path) + print(error_msg) + print() + # path.unlink() + + print(f"Error {len(wrong_completions)} / {total_items}") + + +if __name__ == "__main__": + main() diff --git a/lerobot/common/datasets/aggregate.py b/lerobot/common/datasets/aggregate.py index 1f02e07e..d891e008 100644 --- a/lerobot/common/datasets/aggregate.py +++ b/lerobot/common/datasets/aggregate.py @@ -1,10 +1,12 @@ -import shutil -from pathlib import Path +import logging +import subprocess import pandas as pd +import tqdm from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, LeRobotDatasetMetadata from lerobot.common.datasets.utils import write_episode, write_episode_stats, write_info, write_task +from lerobot.common.utils.utils import init_logging def validate_all_metadata(all_metadata: list[LeRobotDatasetMetadata]): @@ -14,7 +16,7 @@ def validate_all_metadata(all_metadata: list[LeRobotDatasetMetadata]): robot_type = all_metadata[0].robot_type features = all_metadata[0].features - for meta in all_metadata: + for meta in tqdm.tqdm(all_metadata): if fps != meta.fps: raise ValueError(f"Same fps is expected, but got fps={meta.fps} instead of {fps}.") if robot_type != meta.robot_type: @@ -39,6 +41,7 @@ def get_update_episode_and_task_func(episode_index_to_add, task_index_to_global_ def aggregate_datasets(all_metadata: list[LeRobotDatasetMetadata], repo_id: str, root=None): + logging.info("start aggregate_datasets") fps, robot_type, features = validate_all_metadata(all_metadata) # Create resulting dataset folder @@ -50,11 +53,12 @@ def aggregate_datasets(all_metadata: list[LeRobotDatasetMetadata], repo_id: str, root=root, ) + logging.info("find all tasks") # find all tasks, deduplicate them, create new task indices for each dataset # indexed by dataset index datasets_task_index_to_aggr_task_index = {} aggr_task_index = 0 - for dataset_index, meta in enumerate(all_metadata): + for dataset_index, meta in enumerate(tqdm.tqdm(all_metadata)): task_index_to_aggr_task_index = {} for task_index, task in meta.tasks.items(): @@ -69,8 +73,9 @@ def aggregate_datasets(all_metadata: list[LeRobotDatasetMetadata], repo_id: str, datasets_task_index_to_aggr_task_index[dataset_index] = task_index_to_aggr_task_index + logging.info("cp data and videos") aggr_episode_index_shift = 0 - for dataset_index, meta in enumerate(all_metadata): + for dataset_index, meta in enumerate(tqdm.tqdm(all_metadata)): # cp data for episode_index in range(meta.total_episodes): aggr_episode_index = episode_index + aggr_episode_index_shift @@ -94,7 +99,10 @@ def aggregate_datasets(all_metadata: list[LeRobotDatasetMetadata], repo_id: str, video_path = meta.root / meta.get_video_file_path(episode_index, vid_key) aggr_video_path = aggr_meta.root / aggr_meta.get_video_file_path(aggr_episode_index, vid_key) aggr_video_path.parent.mkdir(parents=True, exist_ok=True) - shutil.copy(video_path, aggr_video_path) + # shutil.copy(video_path, aggr_video_path) + + copy_command = f"cp {video_path} {aggr_video_path} &" + subprocess.Popen(copy_command, shell=True) # populate episodes for episode_index, episode_dict in meta.episodes.items(): @@ -109,11 +117,13 @@ def aggregate_datasets(all_metadata: list[LeRobotDatasetMetadata], repo_id: str, # populate info aggr_meta.info["total_episodes"] += meta.total_episodes - aggr_meta.info["total_frames"] += meta.total_episodes + aggr_meta.info["total_frames"] += meta.total_frames aggr_meta.info["total_videos"] += len(aggr_meta.video_keys) * meta.total_episodes aggr_episode_index_shift += meta.total_episodes + logging.info("write meta data") + aggr_meta.info["total_chunks"] = aggr_meta.get_episode_chunk(aggr_episode_index_shift - 1) aggr_meta.info["splits"] = {"train": f"0:{aggr_meta.info['total_episodes']}"} @@ -133,30 +143,30 @@ def aggregate_datasets(all_metadata: list[LeRobotDatasetMetadata], repo_id: str, if __name__ == "__main__": + init_logging() repo_id = "cadene/droid" + aggr_repo_id = "cadene/droid" datetime = "2025-02-22_11-23-54" - root = Path(f"/tmp/{repo_id}") - if root.exists(): - shutil.rmtree(root) + # root = Path(f"/tmp/{repo_id}") + # if root.exists(): + # shutil.rmtree(root) + root = None - all_metadata = [ - LeRobotDatasetMetadata(f"{repo_id}_{datetime}_world_2048_rank_0"), - LeRobotDatasetMetadata(f"{repo_id}_{datetime}_world_2048_rank_1"), - ] + # all_metadata = [LeRobotDatasetMetadata(f"{repo_id}_{datetime}_world_2048_rank_{rank}") for rank in range(2048)] - aggregate_datasets( - all_metadata, - repo_id, - root=root, - ) + # aggregate_datasets( + # all_metadata, + # aggr_repo_id, + # root=root, + # ) aggr_dataset = LeRobotDataset( - repo_id=repo_id, + repo_id=aggr_repo_id, root=root, ) - aggr_dataset.push_to_hub() + aggr_dataset.push_to_hub(tags=["openx"]) # for meta in all_metadata: # dataset = LeRobotDataset(repo_id=meta.repo_id, root=meta.root) - # dataset.push_to_hub() + # dataset.push_to_hub(tags=["openx"]) diff --git a/tests/test_aggregate_datasets.py b/tests/test_aggregate_datasets.py new file mode 100644 index 00000000..ad5c2022 --- /dev/null +++ b/tests/test_aggregate_datasets.py @@ -0,0 +1,19 @@ +from lerobot.common.datasets.aggregate import aggregate_datasets +from tests.fixtures.constants import DUMMY_REPO_ID + + +def test_aggregate_datasets(tmp_path, lerobot_dataset_factory): + dataset_0 = lerobot_dataset_factory( + root=tmp_path / "test_0", + repo_id=DUMMY_REPO_ID + "_0", + total_episodes=10, + total_frames=400, + ) + dataset_1 = lerobot_dataset_factory( + root=tmp_path / "test_1", + repo_id=DUMMY_REPO_ID + "_1", + total_episodes=10, + total_frames=400, + ) + + dataset_2 = aggregate_datasets([dataset_0, dataset_1]) From 3daab2acbb941ab6862658167299fb93521f5fed Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Sun, 23 Feb 2025 18:19:12 +0000 Subject: [PATCH 23/28] Add upload_large_folder --- lerobot/common/datasets/lerobot_dataset.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 1d79edc1..a5ebc475 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -516,6 +516,7 @@ class LeRobotDataset(torch.utils.data.Dataset): push_videos: bool = True, private: bool = False, allow_patterns: list[str] | str | None = None, + upload_large_folder: bool = False, **card_kwargs, ) -> None: ignore_patterns = ["images/"] @@ -538,14 +539,19 @@ class LeRobotDataset(torch.utils.data.Dataset): exist_ok=True, ) - hub_api.upload_folder( - repo_id=self.repo_id, - folder_path=self.root, - repo_type="dataset", - revision=branch, - allow_patterns=allow_patterns, - ignore_patterns=ignore_patterns, - ) + upload_kwargs = { + "repo_id": self.repo_id, + "folder_path": self.root, + "repo_type": "dataset", + "revision": branch, + "allow_patterns": allow_patterns, + "ignore_patterns": ignore_patterns, + } + if upload_large_folder: + hub_api.upload_large_folder(**upload_kwargs) + else: + hub_api.upload_folder(**upload_kwargs) + if not hub_api.file_exists(self.repo_id, REPOCARD_NAME, repo_type="dataset", revision=branch): card = create_lerobot_dataset_card( tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs From 3666ac9346afd3a11732555159cfec552d944ad9 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Sat, 1 Mar 2025 19:07:22 +0000 Subject: [PATCH 24/28] WIP UploadDataset --- .../port_datasets/openx_rlds_datatrove.py | 290 +++++++++++++++--- lerobot/common/datasets/aggregate.py | 9 +- 2 files changed, 247 insertions(+), 52 deletions(-) diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py index b4910544..1fb200d9 100644 --- a/examples/port_datasets/openx_rlds_datatrove.py +++ b/examples/port_datasets/openx_rlds_datatrove.py @@ -1,21 +1,35 @@ import datetime as dt +import logging +import os +import random +import time from pathlib import Path from datatrove.executor import LocalPipelineExecutor from datatrove.executor.slurm import SlurmPipelineExecutor from datatrove.pipeline.base import PipelineStep +from huggingface_hub import CommitOperationAdd, HfApi, create_commit, preupload_lfs_files +from huggingface_hub.constants import REPOCARD_NAME +from huggingface_hub.utils import HfHubHTTPError + +from lerobot.common.datasets.aggregate import aggregate_datasets +from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata +from lerobot.common.datasets.utils import create_lerobot_dataset_card + +BASE_DELAY = 0.1 +MAX_RETRIES = 12 class PortOpenXDataset(PipelineStep): def __init__( self, - raw_dir: Path, + raw_dir: Path | str, repo_id: str = None, image_writer_process: int = 0, image_writer_threads: int = 8, ): super().__init__() - self.raw_dir = raw_dir + self.raw_dir = Path(raw_dir) self.repo_id = repo_id self.image_writer_process = image_writer_process self.image_writer_threads = image_writer_threads @@ -45,8 +59,215 @@ class PortOpenXDataset(PipelineStep): class AggregateDatasets(PipelineStep): + def __init__( + self, + repo_ids: list[str], + aggregated_repo_id: str, + ): + super().__init__() + self.repo_ids = repo_ids + self.aggregated_repo_id = aggregated_repo_id + def run(self, data=None, rank: int = 0, world_size: int = 1): - print("aggregation") + aggregate_datasets(self.repo_ids, self.aggregated_repo_id) + + +class UploadDataset(PipelineStep): + def __init__( + self, + repo_id: str, + branch: str | None = None, + tags: list | None = None, + license: str | None = "apache-2.0", + private: bool = False, + **card_kwargs, + ): + super().__init__() + self.repo_id = repo_id + self.branch = branch + self.tags = tags + self.license = license + self.private = private + self.card_kwargs = card_kwargs + + if os.environ.get("HF_HUB_ENABLE_HF_TRANSFER", "0") != "1": + logging.warning( + 'HF_HUB_ENABLE_HF_TRANSFER is not set to "1". Install hf_transfer and set the env ' + "variable for faster uploads:\npip install hf-transfer\nexport HF_HUB_ENABLE_HF_TRANSFER=1" + ) + + self._repo_init = False + + def _create_repo(self, hub_api): + hub_api.create_repo( + repo_id=self.repo_id, + private=self.private, + repo_type="dataset", + exist_ok=True, + ) + if self.branch: + hub_api.create_branch( + repo_id=self.repo_id, + branch=self.branch, + revision=self.revision, + repo_type="dataset", + exist_ok=True, + ) + + if not hub_api.file_exists(self.repo_id, REPOCARD_NAME, repo_type="dataset", revision=self.branch): + card = create_lerobot_dataset_card( + tags=self.tags, dataset_info=self.meta.info, license=license, **self.card_kwargs + ) + card.push_to_hub(repo_id=self.repo_id, repo_type="dataset", revision=self.branch) + + def run(self, data=None, rank: int = 0, world_size: int = 1): + from lerobot.common.utils.utils import init_logging + + init_logging() + + meta = LeRobotDatasetMetadata(self.repo_id) + + # TODO: list files, shard files, upload meta data for rank=0 + filenames = [] + + raise NotImplementedError() + + hub_api = HfApi() + if not self._repo_init: + self._create_repo(hub_api) + self._repo_init = True + + additions = [ + CommitOperationAdd(path_in_repo=filename, path_or_fileobj=meta.root / filename) + for filename in filenames + ] + logging.info(f"Uploading {','.join(filenames)} to the hub...") + preupload_lfs_files( + repo_id=self.repo_id, repo_type="dataset", additions=additions, revision=self.revision + ) + logging.info(f"Upload of {','.join(filenames)} to the hub complete!") + # if self.cleanup: + # for filename in filenames: + # self.local_working_dir.rm(filename) + self.operations.extend(additions) + + def close(self, rank: int = 0): + filelist = list(self.output_mg.get_open_files().keys()) + super().close() + if filelist: + logging.info(f"Starting upload of {len(filelist)} files to {self.dataset}") + self.upload_files(*filelist) + retries = 0 + while True: + try: + create_commit( + self.repo_id, + repo_type="dataset", + operations=self.operations, + commit_message=f"DataTrove upload ({len(self.operations)} files)", + revision=self.revision, + ) + break + except HfHubHTTPError as e: + if "A commit has happened since" in e.server_message: + if retries >= MAX_RETRIES: + logging.error(f"Failed to create commit after {MAX_RETRIES=}. Giving up.") + raise e + logging.info("Commit creation race condition issue. Waiting...") + time.sleep(BASE_DELAY * 2**retries + random.uniform(0, 2)) + retries += 1 + else: + raise e + + +def make_port_executor(raw_dir, repo_id, port_job_name, port_log_dir, slurm=True): + kwargs = { + "pipeline": [ + PortOpenXDataset(raw_dir, repo_id), + ], + "logging_dir": str(port_log_dir), + } + + if slurm: + kwargs.update( + { + "job_name": port_job_name, + "tasks": 2048, + "workers": 20, + "time": "08:00:00", + "partition": "hopper-cpu", + "cpus_per_task": 24, + "mem_per_cpu_gb": 2, + "max_array_launch_parallel": True, + } + ) + executor = SlurmPipelineExecutor(**kwargs) + else: + kwargs.update( + { + "tasks": 1, + "workers": 1, + } + ) + executor = LocalPipelineExecutor(**kwargs) + + return executor + + +def make_aggregate_executor( + repo_ids, aggr_repo_id, port_job_name, aggregate_log_dir, depends=None, slurm=True +): + kwargs = { + "pipeline": [ + AggregateDatasets(repo_ids, aggr_repo_id), + ], + "logging_dir": str(aggregate_log_dir), + "tasks": 1, + "workers": 1, + } + if depends: + kwargs["depends"] = depends + + if slurm: + kwargs.update( + { + "job_name": port_job_name, + "time": "08:00:00", + "partition": "hopper-cpu", + } + ) + executor = SlurmPipelineExecutor(**kwargs) + else: + executor = LocalPipelineExecutor(**kwargs) + + return executor + + +def make_upload_executor(repo_id, upload_job_name, upload_log_dir, depends=None, slurm=True): + kwargs = { + "pipeline": [ + UploadDataset(repo_id), + ], + "logging_dir": str(upload_log_dir), + "tasks": 1, + "workers": 1, + } + if depends: + kwargs["depends"] = depends + + if slurm: + kwargs.update( + { + "job_name": upload_job_name, + "time": "08:00:00", + "partition": "hopper-cpu", + } + ) + executor = SlurmPipelineExecutor(**kwargs) + else: + executor = LocalPipelineExecutor(**kwargs) + + return executor def main(slurm=True): @@ -54,64 +275,35 @@ def main(slurm=True): # for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"): # shutil.rmtree(dir_) + world = 2048 + raw_dir = "/fsx/mustafa_shukor/droid" port_job_name = "port_openx_droid" + aggregate_job_name = "aggregate_openx_droid" + upload_job_name = "upload_openx_droid" logs_dir = Path("/fsx/remi_cadene/logs") + repo_id = "cadene/droid" now = dt.datetime.now() datetime = f"{now:%Y-%m-%d}_{now:%H-%M-%S}" # datetime = "2025-02-22_11-17-00" port_log_dir = logs_dir / f"{datetime}_{port_job_name}" + aggregate_log_dir = logs_dir / f"{datetime}_{aggregate_job_name}" + upload_log_dir = logs_dir / f"{datetime}_{upload_job_name}" - if slurm: - executor_class = SlurmPipelineExecutor - dist_extra_kwargs = { - "job_name": port_job_name, - "tasks": 2048, - # "workers": 20, # 8 * 16, - "workers": 20, # 8 * 16, - "time": "08:00:00", - "partition": "hopper-cpu", - "cpus_per_task": 24, - "mem_per_cpu_gb": 2, - "max_array_launch_parallel": True, - } - else: - executor_class = LocalPipelineExecutor - dist_extra_kwargs = { - "tasks": 1, - "workers": 1, - } - - port_executor = executor_class( - pipeline=[ - PortOpenXDataset(raw_dir=Path("/fsx/mustafa_shukor/droid"), repo_id=f"cadene/droid_{datetime}"), - ], - logging_dir=str(port_log_dir), - **dist_extra_kwargs, - ) + port_executor = make_port_executor(raw_dir, repo_id, port_job_name, port_log_dir, slurm) port_executor.run() - # if slurm: - # merge_extra_kwargs = {} - # else: - # merge_extra_kwargs = { - # "job_name": "aggregate", - # "time": "00:01:00", - # "partition": "hopper-cpu", - # } + repo_ids = [f"{repo_id}_{datetime}_world_{world}_rank_{rank}" for rank in range(world)] + aggregate_executor = make_aggregate_executor( + repo_ids, repo_id, aggregate_job_name, aggregate_log_dir, port_executor, slurm + ) + aggregate_executor.run() - # merge_executor = executor_class( - # depends=dist_executor, - # pipeline=[ - # Aggregate(), - # ], - # logging_dir=f"/fsx/remi_cadene/logs/openx_rlds_merge", - # tasks=1, - # workers=1, - # **merge_extra_kwargs, - # ) - # merge_executor.run() + upload_executor = make_upload_executor( + repo_id, upload_job_name, upload_log_dir, aggregate_executor, slurm + ) + upload_executor.run() if __name__ == "__main__": diff --git a/lerobot/common/datasets/aggregate.py b/lerobot/common/datasets/aggregate.py index d891e008..c927f6b3 100644 --- a/lerobot/common/datasets/aggregate.py +++ b/lerobot/common/datasets/aggregate.py @@ -40,17 +40,20 @@ def get_update_episode_and_task_func(episode_index_to_add, task_index_to_global_ return _update -def aggregate_datasets(all_metadata: list[LeRobotDatasetMetadata], repo_id: str, root=None): +def aggregate_datasets(repo_ids: list[str], aggr_repo_id: str, aggr_root=None): logging.info("start aggregate_datasets") + + all_metadata = [LeRobotDatasetMetadata(repo_id) for repo_id in repo_ids] + fps, robot_type, features = validate_all_metadata(all_metadata) # Create resulting dataset folder aggr_meta = LeRobotDatasetMetadata.create( - repo_id=repo_id, + repo_id=aggr_repo_id, fps=fps, robot_type=robot_type, features=features, - root=root, + root=aggr_root, ) logging.info("find all tasks") From 1a5c1ef9c7931c5fbe26e368263023da57e62973 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Tue, 18 Mar 2025 16:28:09 +0000 Subject: [PATCH 25/28] Rename openx to droid + Improve all (not tested) --- examples/port_datasets/droid_rlds/README.md | 143 +++ .../port_datasets/droid_rlds/port_droid.py | 410 +++++++ .../droid_rlds/slurm_aggregate_shards.py | 287 +++++ .../droid_rlds/slurm_port_shards.py | 161 +++ .../port_datasets/droid_rlds/slurm_upload.py | 230 ++++ examples/port_datasets/openx_rlds.py | 326 ------ .../port_datasets/openx_rlds_completed.py | 52 - .../port_datasets/openx_rlds_datatrove.py | 310 ------ examples/port_datasets/openx_utils/configs.py | 854 --------------- .../openx_utils/slurm_submit.bash | 30 - examples/port_datasets/openx_utils/test.py | 54 - .../openx_utils/transform_utils.py | 76 -- .../port_datasets/openx_utils/transforms.py | 997 ------------------ lerobot/common/datasets/aggregate.py | 20 +- 14 files changed, 1241 insertions(+), 2709 deletions(-) create mode 100644 examples/port_datasets/droid_rlds/README.md create mode 100644 examples/port_datasets/droid_rlds/port_droid.py create mode 100644 examples/port_datasets/droid_rlds/slurm_aggregate_shards.py create mode 100644 examples/port_datasets/droid_rlds/slurm_port_shards.py create mode 100644 examples/port_datasets/droid_rlds/slurm_upload.py delete mode 100644 examples/port_datasets/openx_rlds.py delete mode 100644 examples/port_datasets/openx_rlds_completed.py delete mode 100644 examples/port_datasets/openx_rlds_datatrove.py delete mode 100644 examples/port_datasets/openx_utils/configs.py delete mode 100755 examples/port_datasets/openx_utils/slurm_submit.bash delete mode 100644 examples/port_datasets/openx_utils/test.py delete mode 100644 examples/port_datasets/openx_utils/transform_utils.py delete mode 100644 examples/port_datasets/openx_utils/transforms.py diff --git a/examples/port_datasets/droid_rlds/README.md b/examples/port_datasets/droid_rlds/README.md new file mode 100644 index 00000000..6d426dcc --- /dev/null +++ b/examples/port_datasets/droid_rlds/README.md @@ -0,0 +1,143 @@ +# Port DROID 1.0.1 dataset to LeRobotDataset + +## Download + +TODO + +It will take 2 TB in your local disk. + +## Port on a single computer + +First, install tensorflow dataset utilities to read from raw files: +```bash +pip install tensorflow +pip install tensorflow_datasets +``` + +Then run this script to start porting the dataset: +```bash +python examples/port_datasets/droid_rlds/port_droid.py \ + --raw-dir /your/data/droid/1.0.1 \ + --repo-id your_id/droid_1.0.1 \ + --push-to-hub +``` + +It will take 400GB in your local disk. + +As usual, your LeRobotDataset will be stored in your huggingface/lerobot cache folder. + +WARNING: it will take 7 days for porting the dataset locally and 3 days to upload, so we will need to parallelize over multiple nodes on a slurm cluster. + +NOTE: For development, run this script to start porting a shard: +```bash +python examples/port_datasets/droid_rlds/port.py \ + --raw-dir /your/data/droid/1.0.1 \ + --repo-id your_id/droid_1.0.1 \ + --num-shards 2048 \ + --shard-index 0 +``` + +## Port over SLURM + +### 1. Port one shard per job + +First, install slurm utilities from Hugging Face: +```bash +pip install datatrove +``` + +Then run this script to start porting shards of the dataset: +```bash +python examples/port_datasets/droid_rlds/slurm_port_shards.py \ + --raw-dir /your/data/droid/1.0.1 \ + --repo-id your_id/droid_1.0.1 \ + --logs-dir /your/logs \ + --job-name port_droid \ + --partition your_partition \ + --workers 2048 \ + --cpus-per-task 8 \ + --mem-per-cpu 1950M +``` + +**Note on how to set your command line arguments** + +Regarding `--partition`, find yours by running: +```bash +info --format="%R"` +``` +and select the CPU partition if you have one. No GPU needed. + +Regarding `--workers`, it is the number of slurm jobs you will launch in parallel. 2048 is the maximum number, since there is 2048 shards in Droid. This big number will certainly max-out your cluster. + +Regarding `--cpus-per-task` and `--mem-per-cpu`, by default it will use ~16GB of RAM (8*1950M) which is recommended to load the raw frames and 8 CPUs which can be useful to parallelize the encoding of the frames. + +Find the number of CPUs and Memory of the nodes of your partition by running: +```bash +sinfo -N -p your_partition -h -o "%N cpus=%c mem=%m" +``` + +**Useful commands to check progress and debug** + +Check if your jobs are running: +```bash +squeue -u $USER` +``` + +You should see a list with job indices like `15125385_155` where `15125385` is the job index and `155` is the worker index. The output/print of this worker is written in real time in `/your/logs/job_name/slurm_jobs/15125385_155.out`. For instance, you can inspect the content of this file by running `less /your/logs/job_name/slurm_jobs/15125385_155.out`. + +Check the progression of your jobs by running: +```bash +jobs_status /your/logs +``` + +If it's not 100% and no more slurm job is running, it means that some of them failed. Inspect the logs by running: +```bash +failed_logs /your/logs/job_name +``` + +If there is an issue in the code, you can fix it in debug mode with `--slurm 0` which allows to set breakpoint: +```bash +python examples/port_datasets/droid_rlds/slurm_port_shards.py --slurm 0 ... +``` + +And you can relaunch the same command, which will skip the completed jobs: +```bash +python examples/port_datasets/droid_rlds/slurm_port_shards.py --slurm 1 ... +``` + +Once all jobs are completed, you will have one dataset per shard (e.g. `droid_1.0.1_world_2048_rank_1594`) saved on disk in your `/lerobot/home/dir/your_id` directory. You can find your `/lerobot/home/dir` by running: +```bash +python -c "from lerobot.common.constants import HF_LEROBOT_HOME;print(HF_LEROBOT_HOME)" +``` + + +### 2. Aggregate all shards + +Run this script to start aggregation: +```bash +python examples/port_datasets/droid_rlds/slurm_aggregate_shards.py \ + --repo-id your_id/droid_1.0.1 \ + --logs-dir /your/logs \ + --job-name aggr_droid \ + --partition your_partition \ + --workers 2048 \ + --cpus-per-task 8 \ + --mem-per-cpu 1950M +``` + +Once all jobs are completed, you will have one dataset your `/lerobot/home/dir/your_id/droid_1.0.1` directory. + + +### 3. Upload dataset + +Run this script to start uploading: +```bash +python examples/port_datasets/droid_rlds/slurm_upload.py \ + --repo-id your_id/droid_1.0.1 \ + --logs-dir /your/logs \ + --job-name aggr_droid \ + --partition your_partition \ + --workers 50 \ + --cpus-per-task 4 \ + --mem-per-cpu 1950M +``` diff --git a/examples/port_datasets/droid_rlds/port_droid.py b/examples/port_datasets/droid_rlds/port_droid.py new file mode 100644 index 00000000..ca6b9fce --- /dev/null +++ b/examples/port_datasets/droid_rlds/port_droid.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import logging +import time +from pathlib import Path + +import numpy as np +import tensorflow_datasets as tfds + +from lerobot.common.datasets.lerobot_dataset import LeRobotDataset +from lerobot.common.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds + +DROID_SHARDS = 2048 +DROID_FPS = 15 +DROID_ROBOT_TYPE = "Franka" + +# Dataset schema slightly adapted from: https://droid-dataset.github.io/droid/the-droid-dataset.html#-dataset-schema +DROID_FEATURES = { + # true on first step of the episode + "is_first": { + "dtype": "bool", + "shape": (1,), + "names": None, + }, + # true on last step of the episode + "is_last": { + "dtype": "bool", + "shape": (1,), + "names": None, + }, + # true on last step of the episode if it is a terminal step, True for demos + "is_terminal": { + "dtype": "bool", + "shape": (1,), + "names": None, + }, + # language_instruction is also stored as "task" to follow LeRobot standard + "language_instruction": { + "dtype": "string", + "shape": (1,), + "names": None, + }, + "language_instruction_2": { + "dtype": "string", + "shape": (1,), + "names": None, + }, + "language_instruction_3": { + "dtype": "string", + "shape": (1,), + "names": None, + }, + "observation.state.gripper_position": { + "dtype": "float32", + "shape": (1,), + "names": { + "axes": ["gripper"], + }, + }, + "observation.state.cartesian_position": { + "dtype": "float32", + "shape": (6,), + "names": { + "axes": ["x", "y", "z", "roll", "pitch", "yaw"], + }, + }, + "observation.state.joint_position": { + "dtype": "float32", + "shape": (7,), + "names": { + "axes": ["x", "y", "z", "roll", "pitch", "yaw"], + }, + }, + # Add this new feature to follow LeRobot standard of using joint position + gripper + "observation.state": { + "dtype": "float32", + "shape": (8,), + "names": { + "axes": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6", "gripper"], + }, + }, + # Initially called wrist_image_left + "observation.images.wrist_left": { + "dtype": "video", + "shape": (180, 320, 3), + "names": [ + "height", + "width", + "channels", + ], + }, + # Initially called exterior_image_1_left + "observation.images.exterior_1_left": { + "dtype": "video", + "shape": (180, 320, 3), + "names": [ + "height", + "width", + "channels", + ], + }, + # Initially called exterior_image_2_left + "observation.images.exterior_2_left": { + "dtype": "video", + "shape": (180, 320, 3), + "names": [ + "height", + "width", + "channels", + ], + }, + "action.gripper_position": { + "dtype": "float32", + "shape": (1,), + "names": { + "axes": ["gripper"], + }, + }, + "action.gripper_velocity": { + "dtype": "float32", + "shape": (1,), + "names": { + "axes": ["gripper"], + }, + }, + "action.cartesian_position": { + "dtype": "float32", + "shape": (6,), + "names": { + "axes": ["x", "y", "z", "roll", "pitch", "yaw"], + }, + }, + "action.cartesian_velocity": { + "dtype": "float32", + "shape": (6,), + "names": { + "axes": ["x", "y", "z", "roll", "pitch", "yaw"], + }, + }, + "action.joint_position": { + "dtype": "float32", + "shape": (7,), + "names": { + "axes": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6"], + }, + }, + "action.joint_velocity": { + "dtype": "float32", + "shape": (7,), + "names": { + "axes": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6"], + }, + }, + # This feature was called "action" in RLDS dataset and consists of [6x joint velocities, 1x gripper position] + "action.original": { + "dtype": "float32", + "shape": (7,), + "names": { + "axes": ["x", "y", "z", "roll", "pitch", "yaw", "gripper"], + }, + }, + # Add this new feature to follow LeRobot standard of using joint position + gripper + "action": { + "dtype": "float32", + "shape": (8,), + "names": { + "axes": ["joint_0", "joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6", "gripper"], + }, + }, + "discount": { + "dtype": "float32", + "shape": (1,), + "names": None, + }, + "reward": { + "dtype": "float32", + "shape": (1,), + "names": None, + }, + # Meta data that are the same for all frames in the episode + "task_category": { + "dtype": "string", + "shape": (1,), + "names": None, + }, + "building": { + "dtype": "string", + "shape": (1,), + "names": None, + }, + "collector_id": { + "dtype": "string", + "shape": (1,), + "names": None, + }, + "date": { + "dtype": "string", + "shape": (1,), + "names": None, + }, + "camera_extrinsics.wrist_left": { + "dtype": "float32", + "shape": (6,), + "names": { + "axes": ["x", "y", "z", "roll", "pitch", "yaw"], + }, + }, + "camera_extrinsics.exterior_1_left": { + "dtype": "float32", + "shape": (6,), + "names": { + "axes": ["x", "y", "z", "roll", "pitch", "yaw"], + }, + }, + "camera_extrinsics.exterior_2_left": { + "dtype": "float32", + "shape": (6,), + "names": { + "axes": ["x", "y", "z", "roll", "pitch", "yaw"], + }, + }, + "is_episode_successful": { + "dtype": "bool", + "shape": (1,), + "names": None, + }, +} + + +def is_episode_successful(tf_episode_metadata): + # Adapted from: https://github.com/droid-dataset/droid_policy_learning/blob/dd1020eb20d981f90b5ff07dc80d80d5c0cb108b/robomimic/utils/rlds_utils.py#L8 + return "/success/" in tf_episode_metadata["file_path"].numpy().decode() + + +def generate_lerobot_frames(tf_episode): + m = tf_episode["episode_metadata"] + frame_meta = { + "task_category": m["building"].numpy().decode(), + "building": m["building"].numpy().decode(), + "collector_id": m["collector_id"].numpy().decode(), + "date": m["date"].numpy().decode(), + "camera_extrinsics.wrist_left": m["extrinsics_wrist_cam"].numpy(), + "camera_extrinsics.exterior_1_left": m["extrinsics_exterior_cam_1"].numpy(), + "camera_extrinsics.exterior_2_left": m["extrinsics_exterior_cam_2"].numpy(), + "is_episode_successful": np.array([is_episode_successful(m)]), + } + for f in tf_episode["steps"]: + # Dataset schema slightly adapted from: https://droid-dataset.github.io/droid/the-droid-dataset.html#-dataset-schema + frame = { + "is_first": np.array([f["is_first"].numpy()]), + "is_last": np.array([f["is_last"].numpy()]), + "is_terminal": np.array([f["is_terminal"].numpy()]), + "language_instruction": f["language_instruction"].numpy().decode(), + "language_instruction_2": f["language_instruction_2"].numpy().decode(), + "language_instruction_3": f["language_instruction_3"].numpy().decode(), + "observation.state.gripper_position": f["observation"]["gripper_position"].numpy(), + "observation.state.cartesian_position": f["observation"]["cartesian_position"].numpy(), + "observation.state.joint_position": f["observation"]["joint_position"].numpy(), + "observation.images.wrist_left": f["observation"]["wrist_image_left"].numpy(), + "observation.images.exterior_1_left": f["observation"]["exterior_image_1_left"].numpy(), + "observation.images.exterior_2_left": f["observation"]["exterior_image_2_left"].numpy(), + "action.gripper_position": f["action_dict"]["gripper_position"].numpy(), + "action.gripper_velocity": f["action_dict"]["gripper_velocity"].numpy(), + "action.cartesian_position": f["action_dict"]["cartesian_position"].numpy(), + "action.cartesian_velocity": f["action_dict"]["cartesian_velocity"].numpy(), + "action.joint_position": f["action_dict"]["joint_position"].numpy(), + "action.joint_velocity": f["action_dict"]["joint_velocity"].numpy(), + "discount": np.array([f["discount"].numpy()]), + "reward": np.array([f["reward"].numpy()]), + "action.original": f["action"].numpy(), + } + + # language_instruction is also stored as "task" to follow LeRobot standard + frame["task"] = frame["language_instruction"] + + # Add this new feature to follow LeRobot standard of using joint position + gripper + frame["observation.state"] = np.concatenate( + [frame["observation.state.joint_position"], frame["observation.state.gripper_position"]] + ) + frame["action"] = np.concatenate([frame["action.joint_position"], frame["action.gripper_position"]]) + + # Meta data that are the same for all frames in the episode + frame.update(frame_meta) + + # Cast fp64 to fp32 + for key in frame: + if isinstance(frame[key], np.ndarray) and frame[key].dtype == np.float64: + frame[key] = frame[key].astype(np.float32) + + yield frame + + +def port_droid( + raw_dir: Path, + repo_id: str = None, + push_to_hub: bool = False, + num_shards: int | None = None, + shard_index: int | None = None, +): + dataset_name = raw_dir.parent.name + version = raw_dir.name + data_dir = raw_dir.parent.parent + + builder = tfds.builder(f"{dataset_name}/{version}", data_dir=data_dir, version="") + + if num_shards is not None: + tfds_num_shards = builder.info.splits["train"].num_shards + if tfds_num_shards != DROID_SHARDS: + raise ValueError( + f"Number of shards of Droid dataset is expected to be {DROID_SHARDS} but is {tfds_num_shards}." + ) + if num_shards != tfds_num_shards: + raise ValueError( + f"We only shard over the fixed number of shards provided by tensorflow dataset ({tfds_num_shards}), but {num_shards} shards provided instead." + ) + if shard_index >= tfds_num_shards: + raise ValueError( + f"Shard index is greater than the num of shards ({shard_index} >= {num_shards})." + ) + + raw_dataset = builder.as_dataset(split=f"train[{shard_index}shard]") + else: + raw_dataset = builder.as_dataset(split="train") + + lerobot_dataset = LeRobotDataset.create( + repo_id=repo_id, + robot_type=DROID_ROBOT_TYPE, + fps=DROID_FPS, + features=DROID_FEATURES, + ) + + start_time = time.time() + num_episodes = raw_dataset.cardinality().numpy().item() + logging.info(f"Number of episodes {num_episodes}") + + for episode_index, episode in enumerate(raw_dataset): + logging.info(f"{episode_index} / {num_episodes} episodes processed") + + elapsed_time = time.time() - start_time + d, h, m, s = get_elapsed_time_in_days_hours_minutes_seconds(elapsed_time) + logging.info(f"It has been {d} days, {h} hours, {m} minutes, {s:.3f} seconds") + + for frame in generate_lerobot_frames(episode): + lerobot_dataset.add_frame(frame) + + lerobot_dataset.save_episode() + logging.info("Save_episode") + + if push_to_hub: + lerobot_dataset.push_to_hub( + # Add openx tag, since it belongs to the openx collection of datasets + tags=["openx"], + private=False, + ) + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "--raw-dir", + type=Path, + required=True, + help="Directory containing input raw datasets (e.g. `path/to/dataset` or `path/to/dataset/version).", + ) + parser.add_argument( + "--repo-id", + type=str, + help="Repositery identifier on Hugging Face: a community or a user name `/` the name of the dataset, required when push-to-hub is True", + ) + parser.add_argument( + "--push-to-hub", + action="store_true", + help="Upload to hub.", + ) + parser.add_argument( + "--num-shards", + type=int, + default=None, + help="Number of shards. Can be either None to load the full dataset, or 2048 to load one of the 2048 tensorflow dataset files.", + ) + parser.add_argument( + "--shard-index", + type=int, + default=None, + help="Index of the shard. Can be either None to load the full dataset, or in [0,2047] to load one of the 2048 tensorflow dataset files.", + ) + + args = parser.parse_args() + + port_droid(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py b/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py new file mode 100644 index 00000000..ad8593a7 --- /dev/null +++ b/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py @@ -0,0 +1,287 @@ +#!/usr/bin/env python + +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import logging + +import tqdm +from datatrove.executor import LocalPipelineExecutor +from datatrove.executor.slurm import SlurmPipelineExecutor +from datatrove.pipeline.base import PipelineStep + +from examples.port_datasets.droid_rlds.port_droid import DROID_SHARDS +from lerobot.common.datasets.aggregate import validate_all_metadata +from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata +from lerobot.common.datasets.utils import write_episode, write_episode_stats, write_info, write_task +from lerobot.common.utils.utils import init_logging + + +class AggregateDatasets(PipelineStep): + def __init__( + self, + repo_ids: list[str], + aggregated_repo_id: str, + ): + super().__init__() + self.repo_ids = repo_ids + self.aggr_repo_id = aggregated_repo_id + + self.create_aggr_dataset() + + def create_aggr_dataset(self): + init_logging() + + logging.info("Start aggregate_datasets") + + all_metadata = [LeRobotDatasetMetadata(repo_id) for repo_id in self.repo_ids] + + fps, robot_type, features = validate_all_metadata(all_metadata) + + # Create resulting dataset folder + aggr_meta = LeRobotDatasetMetadata.create( + repo_id=self.aggr_repo_id, + fps=fps, + robot_type=robot_type, + features=features, + ) + + logging.info("Find all tasks") + # find all tasks, deduplicate them, create new task indices for each dataset + # indexed by dataset index + datasets_task_index_to_aggr_task_index = {} + aggr_task_index = 0 + for dataset_index, meta in enumerate(tqdm.tqdm(all_metadata, desc="Find all tasks")): + task_index_to_aggr_task_index = {} + + for task_index, task in meta.tasks.items(): + if task not in aggr_meta.task_to_task_index: + # add the task to aggr tasks mappings + aggr_meta.tasks[aggr_task_index] = task + aggr_meta.task_to_task_index[task] = aggr_task_index + aggr_task_index += 1 + + # add task_index anyway + task_index_to_aggr_task_index[task_index] = aggr_meta.task_to_task_index[task] + + datasets_task_index_to_aggr_task_index[dataset_index] = task_index_to_aggr_task_index + + logging.info("Prepare copy data and videos") + datasets_ep_idx_to_aggr_ep_idx = {} + datasets_aggr_episode_index_shift = {} + aggr_episode_index_shift = 0 + for dataset_index, meta in enumerate(tqdm.tqdm(all_metadata, desc="Prepare copy data and videos")): + ep_idx_to_aggr_ep_idx = {} + + for episode_index in range(meta.total_episodes): + aggr_episode_index = episode_index + aggr_episode_index_shift + ep_idx_to_aggr_ep_idx[episode_index] = aggr_episode_index + + datasets_ep_idx_to_aggr_ep_idx[dataset_index] = ep_idx_to_aggr_ep_idx + datasets_aggr_episode_index_shift[dataset_index] = aggr_episode_index_shift + + # populate episodes + for episode_index, episode_dict in meta.episodes.items(): + aggr_episode_index = episode_index + aggr_episode_index_shift + episode_dict["episode_index"] = aggr_episode_index + aggr_meta.episodes[aggr_episode_index] = episode_dict + + # populate episodes_stats + for episode_index, episode_stats in meta.episodes_stats.items(): + aggr_episode_index = episode_index + aggr_episode_index_shift + aggr_meta.episodes_stats[aggr_episode_index] = episode_stats + + # populate info + aggr_meta.info["total_episodes"] += meta.total_episodes + aggr_meta.info["total_frames"] += meta.total_frames + aggr_meta.info["total_videos"] += len(aggr_meta.video_keys) * meta.total_episodes + + aggr_episode_index_shift += meta.total_episodes + + logging.info("Write meta data") + aggr_meta.info["total_tasks"] = len(aggr_meta.tasks) + aggr_meta.info["total_chunks"] = aggr_meta.get_episode_chunk(aggr_episode_index_shift - 1) + aggr_meta.info["splits"] = {"train": f"0:{aggr_meta.info['total_episodes']}"} + + # create a new episodes jsonl with updated episode_index using write_episode + for episode_dict in tqdm.tqdm(aggr_meta.episodes.values(), desc="Write episodes"): + write_episode(episode_dict, aggr_meta.root) + + # create a new episode_stats jsonl with updated episode_index using write_episode_stats + for episode_index, episode_stats in tqdm.tqdm( + aggr_meta.episodes_stats.items(), desc="Write episodes stats" + ): + write_episode_stats(episode_index, episode_stats, aggr_meta.root) + + # create a new task jsonl with updated episode_index using write_task + for task_index, task in tqdm.tqdm(aggr_meta.tasks.items(), desc="Write tasks"): + write_task(task_index, task, aggr_meta.root) + + write_info(aggr_meta.info, aggr_meta.root) + + self.datasets_task_index_to_aggr_task_index = datasets_task_index_to_aggr_task_index + self.datasets_ep_idx_to_aggr_ep_idx = datasets_ep_idx_to_aggr_ep_idx + self.datasets_aggr_episode_index_shift = datasets_aggr_episode_index_shift + + logging.info("Meta data done writing!") + + def run(self, data=None, rank: int = 0, world_size: int = 1): + import logging + import shutil + + import pandas as pd + + from lerobot.common.datasets.aggregate import get_update_episode_and_task_func + from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata + from lerobot.common.utils.utils import init_logging + + init_logging() + + aggr_meta = LeRobotDatasetMetadata(self.aggr_repo_id) + all_metadata = [LeRobotDatasetMetadata(repo_id) for repo_id in self.repo_ids] + + if world_size != len(all_metadata): + raise ValueError() + + dataset_index = rank + meta = all_metadata[dataset_index] + aggr_episode_index_shift = self.datasets_aggr_episode_index_shift[dataset_index] + + logging.info("Copy data") + for episode_index in range(meta.total_episodes): + aggr_episode_index = self.datasets_ep_idx_to_aggr_ep_idx[dataset_index][episode_index] + data_path = meta.root / meta.get_data_file_path(episode_index) + aggr_data_path = aggr_meta.root / aggr_meta.get_data_file_path(aggr_episode_index) + + # update episode_index and task_index + df = pd.read_parquet(data_path) + update_row_func = get_update_episode_and_task_func( + aggr_episode_index_shift, self.datasets_task_index_to_aggr_task_index[dataset_index] + ) + df = df.apply(update_row_func, axis=1) + + aggr_data_path.parent.mkdir(parents=True, exist_ok=True) + df.to_parquet(aggr_data_path) + + logging.info("Copy videos") + for episode_index in range(meta.total_episodes): + aggr_episode_index = episode_index + aggr_episode_index_shift + for vid_key in meta.video_keys: + video_path = meta.root / meta.get_video_file_path(episode_index, vid_key) + aggr_video_path = aggr_meta.root / aggr_meta.get_video_file_path(aggr_episode_index, vid_key) + aggr_video_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(video_path, aggr_video_path) + + # copy_command = f"cp {video_path} {aggr_video_path} &" + # subprocess.Popen(copy_command, shell=True) + + logging.info("Done!") + + +def make_aggregate_executor( + repo_ids, repo_id, job_name, logs_dir, workers, partition, cpus_per_task, mem_per_cpu, slurm=True +): + kwargs = { + "pipeline": [ + AggregateDatasets(repo_ids, repo_id), + ], + "logging_dir": str(logs_dir), + } + + if slurm: + kwargs.update( + { + "job_name": job_name, + "tasks": DROID_SHARDS, + "workers": workers, + "time": "08:00:00", + "partition": partition, + "cpus_per_task": cpus_per_task, + "sbatch_args": {"mem-per-cpu": mem_per_cpu}, + } + ) + executor = SlurmPipelineExecutor(**kwargs) + else: + kwargs.update( + { + "tasks": DROID_SHARDS, + "workers": 1, + } + ) + executor = LocalPipelineExecutor(**kwargs) + + return executor + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "--repo-id", + type=str, + help="Repositery identifier on Hugging Face: a community or a user name `/` the name of the dataset, required when push-to-hub is True.", + ) + parser.add_argument( + "--logs-dir", + type=str, + help="Path to logs directory for `datatrove`.", + ) + parser.add_argument( + "--job-name", + type=str, + default="port_droid", + help="Job name used in slurm, and name of the directory created inside the provided logs directory.", + ) + parser.add_argument( + "--slurm", + type=int, + default=1, + help="Launch over slurm. Use `--slurm 0` to launch sequentially (useful to debug).", + ) + parser.add_argument( + "--workers", + type=int, + default=2048, + help="Number of slurm workers. It should be less than the maximum number of shards.", + ) + parser.add_argument( + "--partition", + type=str, + help="Slurm partition. Ideally a CPU partition. No need for GPU partition.", + ) + parser.add_argument( + "--cpus-per-task", + type=int, + default=8, + help="Number of cpus that each slurm worker will use.", + ) + parser.add_argument( + "--mem-per-cpu", + type=str, + default="1950M", + help="Memory per cpu that each worker will use.", + ) + + args = parser.parse_args() + kwargs = vars(args) + kwargs["slurm"] = kwargs.pop("slurm") == 1 + + repo_ids = [f"{args.repo_id}_world_{DROID_SHARDS}_rank_{rank}" for rank in range(DROID_SHARDS)] + aggregate_executor = make_aggregate_executor(repo_ids, **kwargs) + aggregate_executor.run() + + +if __name__ == "__main__": + main() diff --git a/examples/port_datasets/droid_rlds/slurm_port_shards.py b/examples/port_datasets/droid_rlds/slurm_port_shards.py new file mode 100644 index 00000000..89d6bd85 --- /dev/null +++ b/examples/port_datasets/droid_rlds/slurm_port_shards.py @@ -0,0 +1,161 @@ +import argparse +from pathlib import Path + +from datatrove.executor import LocalPipelineExecutor +from datatrove.executor.slurm import SlurmPipelineExecutor +from datatrove.pipeline.base import PipelineStep + +from examples.port_datasets.droid_rlds.port_droid import DROID_SHARDS +from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata + + +def validate_shard(repo_id): + """Sanity check that ensure meta data can be loaded and all files are present.""" + meta = LeRobotDatasetMetadata(repo_id) + + if meta.total_episodes == 0: + raise ValueError("Number of episodes is 0.") + + for ep_idx in range(meta.total_episodes): + data_path = meta.root / meta.get_data_file_path(ep_idx) + + if not data_path.exists(): + raise ValueError(f"Parquet file is missing in: {data_path}") + + for vid_key in meta.video_keys: + vid_path = meta.root / meta.get_video_file_path(ep_idx, vid_key) + if not vid_path.exists(): + raise ValueError(f"Video file is missing in: {vid_path}") + + +class PortDroidShards(PipelineStep): + def __init__( + self, + raw_dir: Path | str, + repo_id: str = None, + ): + super().__init__() + self.raw_dir = Path(raw_dir) + self.repo_id = repo_id + + def run(self, data=None, rank: int = 0, world_size: int = 1): + from datasets.utils.tqdm import disable_progress_bars + + from examples.port_datasets.droid_rlds.port_droid import port_droid + from lerobot.common.utils.utils import init_logging + + init_logging() + disable_progress_bars() + + shard_repo_id = f"{self.repo_id}_world_{world_size}_rank_{rank}" + + port_droid( + self.raw_dir, + shard_repo_id, + push_to_hub=False, + num_shards=world_size, + shard_index=rank, + ) + + validate_shard(shard_repo_id) + + +def make_port_executor( + raw_dir, repo_id, job_name, logs_dir, workers, partition, cpus_per_task, mem_per_cpu, slurm=True +): + kwargs = { + "pipeline": [ + PortDroidShards(raw_dir, repo_id), + ], + "logging_dir": str(logs_dir), + } + + if slurm: + kwargs.update( + { + "job_name": job_name, + "tasks": DROID_SHARDS, + "workers": workers, + "time": "08:00:00", + "partition": partition, + "cpus_per_task": cpus_per_task, + "sbatch_args": {"mem-per-cpu": mem_per_cpu}, + } + ) + executor = SlurmPipelineExecutor(**kwargs) + else: + kwargs.update( + { + "tasks": 1, + "workers": 1, + } + ) + executor = LocalPipelineExecutor(**kwargs) + + return executor + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "--raw-dir", + type=Path, + required=True, + help="Directory containing input raw datasets (e.g. `path/to/dataset` or `path/to/dataset/version).", + ) + parser.add_argument( + "--repo-id", + type=str, + help="Repositery identifier on Hugging Face: a community or a user name `/` the name of the dataset, required when push-to-hub is True.", + ) + parser.add_argument( + "--logs-dir", + type=str, + help="Path to logs directory for `datatrove`.", + ) + parser.add_argument( + "--job-name", + type=str, + default="port_droid", + help="Job name used in slurm, and name of the directory created inside the provided logs directory.", + ) + parser.add_argument( + "--slurm", + type=int, + default=1, + help="Launch over slurm. Use `--slurm 0` to launch sequentially (useful to debug).", + ) + parser.add_argument( + "--workers", + type=int, + default=2048, + help="Number of slurm workers. It should be less than the maximum number of shards.", + ) + parser.add_argument( + "--partition", + type=str, + help="Slurm partition. Ideally a CPU partition. No need for GPU partition.", + ) + parser.add_argument( + "--cpus-per-task", + type=int, + default=8, + help="Number of cpus that each slurm worker will use.", + ) + parser.add_argument( + "--mem-per-cpu", + type=str, + default="1950M", + help="Memory per cpu that each worker will use.", + ) + + args = parser.parse_args() + kwargs = vars(args) + kwargs["slurm"] = kwargs.pop("slurm") == 1 + port_executor = make_port_executor(**kwargs) + port_executor.run() + + +if __name__ == "__main__": + main() diff --git a/examples/port_datasets/droid_rlds/slurm_upload.py b/examples/port_datasets/droid_rlds/slurm_upload.py new file mode 100644 index 00000000..27f0c8dd --- /dev/null +++ b/examples/port_datasets/droid_rlds/slurm_upload.py @@ -0,0 +1,230 @@ +import argparse +import logging +import os +from pathlib import Path + +from datatrove.executor import LocalPipelineExecutor +from datatrove.executor.slurm import SlurmPipelineExecutor +from datatrove.pipeline.base import PipelineStep +from huggingface_hub import HfApi +from huggingface_hub.constants import REPOCARD_NAME + +from examples.port_datasets.droid_rlds.port_droid import DROID_SHARDS +from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDatasetMetadata +from lerobot.common.datasets.utils import create_lerobot_dataset_card + + +class UploadDataset(PipelineStep): + def __init__( + self, + repo_id: str, + branch: str | None = None, + revision: str | None = None, + tags: list | None = None, + license: str | None = "apache-2.0", + private: bool = False, + **card_kwargs, + ): + super().__init__() + self.repo_id = repo_id + self.branch = branch + self.tags = tags + self.license = license + self.private = private + self.card_kwargs = card_kwargs + self.revision = revision if revision else CODEBASE_VERSION + + if os.environ.get("HF_HUB_ENABLE_HF_TRANSFER", "0") != "1": + logging.warning( + 'HF_HUB_ENABLE_HF_TRANSFER is not set to "1". Install hf_transfer and set the env ' + "variable for faster uploads:\npip install hf-transfer\nexport HF_HUB_ENABLE_HF_TRANSFER=1" + ) + + self.create_repo() + + def create_repo(self): + hub_api = HfApi() + + meta = LeRobotDatasetMetadata(self.repo_id) + hub_api.create_repo( + repo_id=self.repo_id, + private=self.private, + repo_type="dataset", + exist_ok=True, + ) + if self.branch: + hub_api.create_branch( + repo_id=self.repo_id, + branch=self.branch, + revision=self.revision, + repo_type="dataset", + exist_ok=True, + ) + + if not hub_api.file_exists(self.repo_id, REPOCARD_NAME, repo_type="dataset", revision=self.branch): + card = create_lerobot_dataset_card( + tags=self.tags, dataset_info=meta.info, license=self.license, **self.card_kwargs + ) + card.push_to_hub(repo_id=self.repo_id, repo_type="dataset", revision=self.branch) + + def list_files_recursively(directory): + base_path = Path(directory) + return [str(file.relative_to(base_path)) for file in base_path.rglob("*") if file.is_file()] + + meta = LeRobotDatasetMetadata(self.repo_id) + self.file_paths = list_files_recursively(meta.root) + self.file_paths = sorted(self.file_paths) + + def run(self, data=None, rank: int = 0, world_size: int = 1): + import logging + import random + import time + from itertools import islice + + from huggingface_hub import CommitOperationAdd, create_commit, preupload_lfs_files + from huggingface_hub.utils import HfHubHTTPError + + from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata + from lerobot.common.utils.utils import init_logging + + BASE_DELAY = 1.0 # noqa: N806 + MAX_RETRIES = 24 # noqa: N806 + + init_logging() + + def chunked(lst, n): + it = iter(lst) + return [list(islice(it, size)) for size in [len(lst) // n + (i < len(lst) % n) for i in range(n)]] + + chunks = chunked(self.file_paths, world_size) + file_paths = chunks[rank] + + if len(file_paths) == 0: + raise ValueError(file_paths) + + meta = LeRobotDatasetMetadata(self.repo_id) + additions = [ + CommitOperationAdd(path_in_repo=path, path_or_fileobj=meta.root / path) for path in file_paths + ] + logging.info(f"Uploading {','.join(file_paths)} to the hub...") + preupload_lfs_files( + repo_id=self.repo_id, repo_type="dataset", additions=additions, revision=self.branch + ) + logging.info(f"Upload of {','.join(file_paths)} to the hub complete!") + + retries = 0 + while True: + try: + create_commit( + self.repo_id, + repo_type="dataset", + operations=additions, + commit_message=f"DataTrove upload ({len(additions)} files)", + revision=self.branch, + ) + break + except HfHubHTTPError as e: + if "A commit has happened since" in e.server_message: + if retries >= MAX_RETRIES: + logging.error(f"Failed to create commit after {MAX_RETRIES=}. Giving up.") + raise e + logging.info("Commit creation race condition issue. Waiting...") + time.sleep(BASE_DELAY * 2**retries + random.uniform(0, 2)) + retries += 1 + else: + raise e + + +def make_upload_executor( + repo_id, job_name, logs_dir, workers, partition, cpus_per_task, mem_per_cpu, slurm=True +): + kwargs = { + "pipeline": [ + UploadDataset(repo_id), + ], + "logging_dir": str(logs_dir), + } + + if slurm: + kwargs.update( + { + "job_name": job_name, + "tasks": DROID_SHARDS, + "workers": workers, + "time": "08:00:00", + "partition": partition, + "cpus_per_task": cpus_per_task, + "sbatch_args": {"mem-per-cpu": mem_per_cpu}, + } + ) + executor = SlurmPipelineExecutor(**kwargs) + else: + kwargs.update( + { + "tasks": DROID_SHARDS, + "workers": 1, + } + ) + executor = LocalPipelineExecutor(**kwargs) + + return executor + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "--repo-id", + type=str, + help="Repositery identifier on Hugging Face: a community or a user name `/` the name of the dataset, required when push-to-hub is True.", + ) + parser.add_argument( + "--logs-dir", + type=str, + help="Path to logs directory for `datatrove`.", + ) + parser.add_argument( + "--job-name", + type=str, + default="port_droid", + help="Job name used in slurm, and name of the directory created inside the provided logs directory.", + ) + parser.add_argument( + "--slurm", + type=int, + default=1, + help="Launch over slurm. Use `--slurm 0` to launch sequentially (useful to debug).", + ) + parser.add_argument( + "--workers", + type=int, + default=2048, + help="Number of slurm workers. It should be less than the maximum number of shards.", + ) + parser.add_argument( + "--partition", + type=str, + help="Slurm partition. Ideally a CPU partition. No need for GPU partition.", + ) + parser.add_argument( + "--cpus-per-task", + type=int, + default=8, + help="Number of cpus that each slurm worker will use.", + ) + parser.add_argument( + "--mem-per-cpu", + type=str, + default="1950M", + help="Memory per cpu that each worker will use.", + ) + + args = parser.parse_args() + kwargs = vars(args) + kwargs["slurm"] = kwargs.pop("slurm") == 1 + upload_executor = make_upload_executor(**kwargs) + upload_executor.run() + + +if __name__ == "__main__": + main() diff --git a/examples/port_datasets/openx_rlds.py b/examples/port_datasets/openx_rlds.py deleted file mode 100644 index db051b5f..00000000 --- a/examples/port_datasets/openx_rlds.py +++ /dev/null @@ -1,326 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -For all datasets in the RLDS format. -For https://github.com/google-deepmind/open_x_embodiment (OPENX) datasets. - -NOTE: Install `tensorflow` and `tensorflow_datasets` before running this script. -```bash -pip install tensorflow -pip install tensorflow_datasets -``` - -Example: -```bash -python examples/port_datasets/openx_rlds.py \ - --raw-dir /fsx/mustafa_shukor/droid \ - --repo-id cadene/droid \ - --use-videos \ - --push-to-hub -``` -""" - -import argparse -import logging -import re -import time -from pathlib import Path - -import numpy as np -import tensorflow as tf -import tensorflow_datasets as tfds - -from examples.port_datasets.openx_utils.configs import OXE_DATASET_CONFIGS, StateEncoding -from examples.port_datasets.openx_utils.transforms import OXE_STANDARDIZATION_TRANSFORMS -from lerobot.common.datasets.lerobot_dataset import LeRobotDataset -from lerobot.common.utils.utils import get_elapsed_time_in_days_hours_minutes_seconds - -np.set_printoptions(precision=2) - - -def transform_raw_dataset(episode, dataset_name): - traj = next(iter(episode["steps"].batch(episode["steps"].cardinality()))) - - if dataset_name in OXE_STANDARDIZATION_TRANSFORMS: - traj = OXE_STANDARDIZATION_TRANSFORMS[dataset_name](traj) - - if dataset_name in OXE_DATASET_CONFIGS: - state_obs_keys = OXE_DATASET_CONFIGS[dataset_name]["state_obs_keys"] - else: - state_obs_keys = [None for _ in range(8)] - - proprio = tf.concat( - [ - ( - tf.zeros((tf.shape(traj["action"])[0], 1), dtype=tf.float32) # padding - if key is None - else tf.cast(traj["observation"][key], tf.float32) - ) - for key in state_obs_keys - ], - axis=1, - ) - - traj.update( - { - "proprio": proprio, - "task": traj.pop("language_instruction"), - "action": tf.cast(traj["action"], tf.float32), - } - ) - - episode["steps"] = traj - return episode - - -def generate_features_from_raw(dataset_name: str, builder: tfds.core.DatasetBuilder, use_videos: bool = True): - state_names = [f"motor_{i}" for i in range(8)] - if dataset_name in OXE_DATASET_CONFIGS: - state_encoding = OXE_DATASET_CONFIGS[dataset_name]["state_encoding"] - if state_encoding == StateEncoding.POS_EULER: - state_names = ["x", "y", "z", "roll", "pitch", "yaw", "pad", "gripper"] - if "libero" in dataset_name: - state_names = [ - "x", - "y", - "z", - "roll", - "pitch", - "yaw", - "gripper", - "gripper", - ] # 2D gripper state - elif state_encoding == StateEncoding.POS_QUAT: - state_names = ["x", "y", "z", "rx", "ry", "rz", "rw", "gripper"] - - DEFAULT_FEATURES = { - "observation.state": { - "dtype": "float32", - "shape": (8,), - "names": {"motors": state_names}, - }, - "action": { - "dtype": "float32", - "shape": (7,), - "names": {"motors": ["x", "y", "z", "roll", "pitch", "yaw", "gripper"]}, - }, - } - - obs = builder.info.features["steps"]["observation"] - features = { - f"observation.images.{key}": { - "dtype": "video" if use_videos else "image", - "shape": value.shape, - "names": ["height", "width", "rgb"], - } - for key, value in obs.items() - if "depth" not in key and any(x in key for x in ["image", "rgb"]) - } - return {**features, **DEFAULT_FEATURES} - - -def save_as_lerobot_dataset( - dataset_name: str, - lerobot_dataset: LeRobotDataset, - raw_dataset: tf.data.Dataset, - num_shards: int | None = None, - shard_index: int | None = None, -): - start_time = time.time() - total_num_episodes = raw_dataset.cardinality().numpy().item() - logging.info(f"Total number of episodes {total_num_episodes}") - - if num_shards is not None: - sharded_dataset = raw_dataset.shard(num_shards=num_shards, index=shard_index) - sharded_num_episodes = sharded_dataset.cardinality().numpy().item() - logging.info(f"{sharded_num_episodes=}") - num_episodes = sharded_num_episodes - iter_ = iter(sharded_dataset) - else: - num_episodes = total_num_episodes - iter_ = iter(raw_dataset) - - if num_episodes <= 0: - raise ValueError(f"Number of episodes is {num_episodes}, but needs to be positive.") - - for episode_index in range(num_episodes): - logging.info(f"{episode_index} / {num_episodes} episodes processed") - - elapsed_time = time.time() - start_time - d, h, m, s = get_elapsed_time_in_days_hours_minutes_seconds(elapsed_time) - logging.info(f"It has been {d} days, {h} hours, {m} minutes, {s:.3f} seconds") - - episode = next(iter_) - logging.info("next") - episode = transform_raw_dataset(episode, dataset_name) - - traj = episode["steps"] - for i in range(traj["action"].shape[0]): - image_dict = { - f"observation.images.{key}": value[i].numpy() - for key, value in traj["observation"].items() - if "depth" not in key and any(x in key for x in ["image", "rgb"]) - } - lerobot_dataset.add_frame( - { - **image_dict, - "observation.state": traj["proprio"][i].numpy(), - "action": traj["action"][i].numpy(), - "task": traj["task"][i].numpy().decode(), - } - ) - - lerobot_dataset.save_episode() - logging.info("save_episode") - - -def create_lerobot_dataset( - raw_dir: Path, - repo_id: str = None, - push_to_hub: bool = False, - fps: int = None, - robot_type: str = None, - use_videos: bool = True, - image_writer_process: int = 5, - image_writer_threads: int = 10, - num_shards: int | None = None, - shard_index: int | None = None, -): - last_part = raw_dir.name - if re.match(r"^\d+\.\d+\.\d+$", last_part): - version = last_part - dataset_name = raw_dir.parent.name - data_dir = raw_dir.parent.parent - else: - version = "" - dataset_name = last_part - data_dir = raw_dir.parent - - builder = tfds.builder(dataset_name, data_dir=data_dir, version=version) - features = generate_features_from_raw(dataset_name, builder, use_videos) - - if num_shards is not None: - if num_shards != builder.info.splits["train"].num_shards: - raise ValueError() - if shard_index >= builder.info.splits["train"].num_shards: - raise ValueError() - - raw_dataset = builder.as_dataset(split=f"train[{shard_index}shard]") - else: - raw_dataset = builder.as_dataset(split="train") - - if fps is None: - if dataset_name in OXE_DATASET_CONFIGS: - fps = OXE_DATASET_CONFIGS[dataset_name]["control_frequency"] - else: - fps = 10 - - if robot_type is None: - if dataset_name in OXE_DATASET_CONFIGS: - robot_type = OXE_DATASET_CONFIGS[dataset_name]["robot_type"] - robot_type = robot_type.lower().replace(" ", "_").replace("-", "_") - else: - robot_type = "unknown" - - lerobot_dataset = LeRobotDataset.create( - repo_id=repo_id, - robot_type=robot_type, - fps=fps, - use_videos=use_videos, - features=features, - image_writer_threads=image_writer_threads, - image_writer_processes=image_writer_process, - ) - - save_as_lerobot_dataset( - dataset_name, - lerobot_dataset, - raw_dataset, - ) - - if push_to_hub: - assert repo_id is not None - tags = [] - if dataset_name in OXE_DATASET_CONFIGS: - tags.append("openx") - lerobot_dataset.push_to_hub( - tags=tags, - private=False, - push_videos=True, - license="apache-2.0", - ) - - -def main(): - parser = argparse.ArgumentParser() - - parser.add_argument( - "--raw-dir", - type=Path, - required=True, - help="Directory containing input raw datasets (e.g. `path/to/dataset` or `path/to/dataset/version).", - ) - parser.add_argument( - "--repo-id", - type=str, - help="Repositery identifier on Hugging Face: a community or a user name `/` the name of the dataset, required when push-to-hub is True", - ) - parser.add_argument( - "--push-to-hub", - action="store_true", - help="Upload to hub.", - ) - parser.add_argument( - "--robot-type", - type=str, - default=None, - help="Robot type of this dataset.", - ) - parser.add_argument( - "--fps", - type=int, - default=None, - help="Frame rate used to collect videos. Default fps equals to the control frequency of the robot.", - ) - parser.add_argument( - "--use-videos", - action="store_true", - help="Convert each episode of the raw dataset to an mp4 video. This option allows 60 times lower disk space consumption and 25 faster loading time during training.", - ) - parser.add_argument( - "--image-writer-process", - type=int, - default=0, - help="Number of processes of image writer for saving images.", - ) - parser.add_argument( - "--image-writer-threads", - type=int, - default=8, - help="Number of threads per process of image writer for saving images.", - ) - - args = parser.parse_args() - - # droid_dir = Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene/droid") - # if droid_dir.exists(): - # shutil.rmtree(droid_dir) - - create_lerobot_dataset(**vars(args)) - - -if __name__ == "__main__": - main() diff --git a/examples/port_datasets/openx_rlds_completed.py b/examples/port_datasets/openx_rlds_completed.py deleted file mode 100644 index 849d65cf..00000000 --- a/examples/port_datasets/openx_rlds_completed.py +++ /dev/null @@ -1,52 +0,0 @@ -from pathlib import Path - -import tqdm - -from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata - - -def main(): - repo_id = "cadene/droid" - datetime = "2025-02-22_11-23-54" - port_log_dir = Path(f"/fsx/remi_cadene/logs/{datetime}_port_openx_droid") - - compl_dir = port_log_dir / "completions" - - paths = list(compl_dir.glob("*")) - total_items = len(paths) - - # Use tqdm with the total parameter - wrong_completions = [] - error_messages = [] - for i, path in tqdm.tqdm(enumerate(paths), total=total_items): - try: - rank = path.name.lstrip("0") - if rank == "": - rank = 0 - meta = LeRobotDatasetMetadata(f"{repo_id}_{datetime}_world_2048_rank_{rank}") - last_episode_index = meta.total_episodes - 1 - last_ep_data_path = meta.root / meta.get_data_file_path(last_episode_index) - - if not last_ep_data_path.exists(): - raise ValueError(path) - - for vid_key in meta.video_keys: - last_ep_vid_path = meta.root / meta.get_video_file_path(last_episode_index, vid_key) - if not last_ep_vid_path.exists(): - raise ValueError(path) - - except Exception as e: - error_messages.append(str(e)) - wrong_completions.append(path) - - for path, error_msg in zip(wrong_completions, error_messages, strict=False): - print(path) - print(error_msg) - print() - # path.unlink() - - print(f"Error {len(wrong_completions)} / {total_items}") - - -if __name__ == "__main__": - main() diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py deleted file mode 100644 index 1fb200d9..00000000 --- a/examples/port_datasets/openx_rlds_datatrove.py +++ /dev/null @@ -1,310 +0,0 @@ -import datetime as dt -import logging -import os -import random -import time -from pathlib import Path - -from datatrove.executor import LocalPipelineExecutor -from datatrove.executor.slurm import SlurmPipelineExecutor -from datatrove.pipeline.base import PipelineStep -from huggingface_hub import CommitOperationAdd, HfApi, create_commit, preupload_lfs_files -from huggingface_hub.constants import REPOCARD_NAME -from huggingface_hub.utils import HfHubHTTPError - -from lerobot.common.datasets.aggregate import aggregate_datasets -from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata -from lerobot.common.datasets.utils import create_lerobot_dataset_card - -BASE_DELAY = 0.1 -MAX_RETRIES = 12 - - -class PortOpenXDataset(PipelineStep): - def __init__( - self, - raw_dir: Path | str, - repo_id: str = None, - image_writer_process: int = 0, - image_writer_threads: int = 8, - ): - super().__init__() - self.raw_dir = Path(raw_dir) - self.repo_id = repo_id - self.image_writer_process = image_writer_process - self.image_writer_threads = image_writer_threads - - def run(self, data=None, rank: int = 0, world_size: int = 1): - from datasets.utils.tqdm import disable_progress_bars - - from examples.port_datasets.openx_rlds import create_lerobot_dataset - from examples.port_datasets.openx_utils.test import display_slurm_info, display_system_info - from lerobot.common.utils.utils import init_logging - - init_logging() - disable_progress_bars() - - display_system_info() - display_slurm_info() - - create_lerobot_dataset( - self.raw_dir, - f"{self.repo_id}_world_{world_size}_rank_{rank}", - image_writer_process=self.image_writer_process, - image_writer_threads=self.image_writer_threads, - push_to_hub=False, - num_shards=world_size, - shard_index=rank, - ) - - -class AggregateDatasets(PipelineStep): - def __init__( - self, - repo_ids: list[str], - aggregated_repo_id: str, - ): - super().__init__() - self.repo_ids = repo_ids - self.aggregated_repo_id = aggregated_repo_id - - def run(self, data=None, rank: int = 0, world_size: int = 1): - aggregate_datasets(self.repo_ids, self.aggregated_repo_id) - - -class UploadDataset(PipelineStep): - def __init__( - self, - repo_id: str, - branch: str | None = None, - tags: list | None = None, - license: str | None = "apache-2.0", - private: bool = False, - **card_kwargs, - ): - super().__init__() - self.repo_id = repo_id - self.branch = branch - self.tags = tags - self.license = license - self.private = private - self.card_kwargs = card_kwargs - - if os.environ.get("HF_HUB_ENABLE_HF_TRANSFER", "0") != "1": - logging.warning( - 'HF_HUB_ENABLE_HF_TRANSFER is not set to "1". Install hf_transfer and set the env ' - "variable for faster uploads:\npip install hf-transfer\nexport HF_HUB_ENABLE_HF_TRANSFER=1" - ) - - self._repo_init = False - - def _create_repo(self, hub_api): - hub_api.create_repo( - repo_id=self.repo_id, - private=self.private, - repo_type="dataset", - exist_ok=True, - ) - if self.branch: - hub_api.create_branch( - repo_id=self.repo_id, - branch=self.branch, - revision=self.revision, - repo_type="dataset", - exist_ok=True, - ) - - if not hub_api.file_exists(self.repo_id, REPOCARD_NAME, repo_type="dataset", revision=self.branch): - card = create_lerobot_dataset_card( - tags=self.tags, dataset_info=self.meta.info, license=license, **self.card_kwargs - ) - card.push_to_hub(repo_id=self.repo_id, repo_type="dataset", revision=self.branch) - - def run(self, data=None, rank: int = 0, world_size: int = 1): - from lerobot.common.utils.utils import init_logging - - init_logging() - - meta = LeRobotDatasetMetadata(self.repo_id) - - # TODO: list files, shard files, upload meta data for rank=0 - filenames = [] - - raise NotImplementedError() - - hub_api = HfApi() - if not self._repo_init: - self._create_repo(hub_api) - self._repo_init = True - - additions = [ - CommitOperationAdd(path_in_repo=filename, path_or_fileobj=meta.root / filename) - for filename in filenames - ] - logging.info(f"Uploading {','.join(filenames)} to the hub...") - preupload_lfs_files( - repo_id=self.repo_id, repo_type="dataset", additions=additions, revision=self.revision - ) - logging.info(f"Upload of {','.join(filenames)} to the hub complete!") - # if self.cleanup: - # for filename in filenames: - # self.local_working_dir.rm(filename) - self.operations.extend(additions) - - def close(self, rank: int = 0): - filelist = list(self.output_mg.get_open_files().keys()) - super().close() - if filelist: - logging.info(f"Starting upload of {len(filelist)} files to {self.dataset}") - self.upload_files(*filelist) - retries = 0 - while True: - try: - create_commit( - self.repo_id, - repo_type="dataset", - operations=self.operations, - commit_message=f"DataTrove upload ({len(self.operations)} files)", - revision=self.revision, - ) - break - except HfHubHTTPError as e: - if "A commit has happened since" in e.server_message: - if retries >= MAX_RETRIES: - logging.error(f"Failed to create commit after {MAX_RETRIES=}. Giving up.") - raise e - logging.info("Commit creation race condition issue. Waiting...") - time.sleep(BASE_DELAY * 2**retries + random.uniform(0, 2)) - retries += 1 - else: - raise e - - -def make_port_executor(raw_dir, repo_id, port_job_name, port_log_dir, slurm=True): - kwargs = { - "pipeline": [ - PortOpenXDataset(raw_dir, repo_id), - ], - "logging_dir": str(port_log_dir), - } - - if slurm: - kwargs.update( - { - "job_name": port_job_name, - "tasks": 2048, - "workers": 20, - "time": "08:00:00", - "partition": "hopper-cpu", - "cpus_per_task": 24, - "mem_per_cpu_gb": 2, - "max_array_launch_parallel": True, - } - ) - executor = SlurmPipelineExecutor(**kwargs) - else: - kwargs.update( - { - "tasks": 1, - "workers": 1, - } - ) - executor = LocalPipelineExecutor(**kwargs) - - return executor - - -def make_aggregate_executor( - repo_ids, aggr_repo_id, port_job_name, aggregate_log_dir, depends=None, slurm=True -): - kwargs = { - "pipeline": [ - AggregateDatasets(repo_ids, aggr_repo_id), - ], - "logging_dir": str(aggregate_log_dir), - "tasks": 1, - "workers": 1, - } - if depends: - kwargs["depends"] = depends - - if slurm: - kwargs.update( - { - "job_name": port_job_name, - "time": "08:00:00", - "partition": "hopper-cpu", - } - ) - executor = SlurmPipelineExecutor(**kwargs) - else: - executor = LocalPipelineExecutor(**kwargs) - - return executor - - -def make_upload_executor(repo_id, upload_job_name, upload_log_dir, depends=None, slurm=True): - kwargs = { - "pipeline": [ - UploadDataset(repo_id), - ], - "logging_dir": str(upload_log_dir), - "tasks": 1, - "workers": 1, - } - if depends: - kwargs["depends"] = depends - - if slurm: - kwargs.update( - { - "job_name": upload_job_name, - "time": "08:00:00", - "partition": "hopper-cpu", - } - ) - executor = SlurmPipelineExecutor(**kwargs) - else: - executor = LocalPipelineExecutor(**kwargs) - - return executor - - -def main(slurm=True): - # breakpoint() - # for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"): - # shutil.rmtree(dir_) - - world = 2048 - raw_dir = "/fsx/mustafa_shukor/droid" - port_job_name = "port_openx_droid" - aggregate_job_name = "aggregate_openx_droid" - upload_job_name = "upload_openx_droid" - logs_dir = Path("/fsx/remi_cadene/logs") - repo_id = "cadene/droid" - - now = dt.datetime.now() - datetime = f"{now:%Y-%m-%d}_{now:%H-%M-%S}" - # datetime = "2025-02-22_11-17-00" - - port_log_dir = logs_dir / f"{datetime}_{port_job_name}" - aggregate_log_dir = logs_dir / f"{datetime}_{aggregate_job_name}" - upload_log_dir = logs_dir / f"{datetime}_{upload_job_name}" - - port_executor = make_port_executor(raw_dir, repo_id, port_job_name, port_log_dir, slurm) - port_executor.run() - - repo_ids = [f"{repo_id}_{datetime}_world_{world}_rank_{rank}" for rank in range(world)] - aggregate_executor = make_aggregate_executor( - repo_ids, repo_id, aggregate_job_name, aggregate_log_dir, port_executor, slurm - ) - aggregate_executor.run() - - upload_executor = make_upload_executor( - repo_id, upload_job_name, upload_log_dir, aggregate_executor, slurm - ) - upload_executor.run() - - -if __name__ == "__main__": - main() diff --git a/examples/port_datasets/openx_utils/configs.py b/examples/port_datasets/openx_utils/configs.py deleted file mode 100644 index 04468110..00000000 --- a/examples/port_datasets/openx_utils/configs.py +++ /dev/null @@ -1,854 +0,0 @@ -""" -Adapt from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/oxe/configs.py -configs.py - -Defines per-dataset configuration (kwargs) for each dataset in Open-X Embodiment. - -Configuration adopts the following structure: - image_obs_keys: - primary: primary external RGB - secondary: secondary external RGB - wrist: wrist RGB - - depth_obs_keys: - primary: primary external depth - secondary: secondary external depth - wrist: wrist depth - - # Always 8-dim =>> changes based on `StateEncoding` - state_obs_keys: - StateEncoding.POS_EULER: EEF XYZ (3) + Roll-Pitch-Yaw (3) + (1) + Gripper Open/Close (1) - StateEncoding.POS_QUAT: EEF XYZ (3) + Quaternion (4) + Gripper Open/Close (1) - StateEncoding.JOINT: Joint Angles (7, if fewer) + Gripper Open/Close (1) - - state_encoding: Type of `StateEncoding` - action_encoding: Type of action encoding (e.g., EEF Position vs. Joint Position) -""" - -from enum import IntEnum -from typing import Dict - -import tensorflow as tf - - -def zero_action_filter(traj: Dict) -> bool: - """ - Filters transitions whose actions are all-0 (only relative actions, no gripper action). - Note: this filter is applied *after* action normalization, so need to compare to "normalized 0". - """ - DROID_Q01 = tf.convert_to_tensor( - [ - -0.7776297926902771, - -0.5803514122962952, - -0.5795090794563293, - -0.6464047729969025, - -0.7041108310222626, - -0.8895104378461838, - ] - ) - DROID_Q99 = tf.convert_to_tensor( - [ - 0.7597932070493698, - 0.5726242214441299, - 0.7351000607013702, - 0.6705610305070877, - 0.6464948207139969, - 0.8897542208433151, - ] - ) - DROID_NORM_0_ACT = ( - 2 * (tf.zeros_like(traj["action"][:, :6]) - DROID_Q01) / (DROID_Q99 - DROID_Q01 + 1e-8) - 1 - ) - - return tf.reduce_any(tf.math.abs(traj["action"][:, :6] - DROID_NORM_0_ACT) > 1e-5) - - -# Defines Proprioceptive State Encoding Schemes -class StateEncoding(IntEnum): - # fmt: off - NONE = -1 # No Proprioceptive State - POS_EULER = 1 # EEF XYZ (3) + Roll-Pitch-Yaw (3) + (1) + Gripper Open/Close (1) - POS_QUAT = 2 # EEF XYZ (3) + Quaternion (4) + Gripper Open/Close (1) - JOINT = 3 # Joint Angles (7, if fewer) + Gripper Open/Close (1) - JOINT_BIMANUAL = 4 # Joint Angles (2 x [ Joint Angles (6) + Gripper Open/Close (1) ]) - # fmt: on - - -# Defines Action Encoding Schemes -class ActionEncoding(IntEnum): - # fmt: off - EEF_POS = 1 # EEF Delta XYZ (3) + Roll-Pitch-Yaw (3) + Gripper Open/Close (1) - JOINT_POS = 2 # Joint Delta Position (7) + Gripper Open/Close (1) - JOINT_POS_BIMANUAL = 3 # Joint Delta Position (2 x [ Joint Delta Position (6) + Gripper Open/Close (1) ]) - EEF_R6 = 4 # EEF Delta XYZ (3) + R6 (6) + Gripper Open/Close (1) - # fmt: on - - -# === Individual Dataset Configs === -OXE_DATASET_CONFIGS = { - "fractal20220817_data": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["base_pose_tool_reached", "gripper_closed"], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 3, - "robot_type": "Google Robot", - }, - "kuka": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": [ - "clip_function_input/base_pose_tool_reached", - "gripper_closed", - ], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Kuka iiwa", - }, - "bridge_oxe": { # Version of Bridge V2 in Open X-Embodiment mixture - "image_obs_keys": {"primary": "image", "secondary": "image_1", "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "WidowX", - }, - "bridge_orig": { # Original version of Bridge V2 from project website - "image_obs_keys": {"primary": "image_0", "secondary": "image_1", "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "WidowX", - }, - "bridge_dataset": { # Original version of Bridge V2 from project website - "image_obs_keys": {"primary": "image_0", "secondary": "image_1", "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "WidowX", - }, - "taco_play": { - "image_obs_keys": { - "primary": "rgb_static", - "secondary": None, - "wrist": "rgb_gripper", - }, - "depth_obs_keys": { - "primary": "depth_static", - "secondary": None, - "wrist": "depth_gripper", - }, - "state_obs_keys": ["state_eef", None, "state_gripper"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 15, - "robot_type": "Franka", - }, - "jaco_play": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "image_wrist", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state_eef", None, "state_gripper"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Jaco 2", - }, - "berkeley_cable_routing": { - "image_obs_keys": { - "primary": "image", - "secondary": "top_image", - "wrist": "wrist45_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["robot_state", None], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Franka", - }, - "roboturk": { - "image_obs_keys": {"primary": "front_rgb", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": [None, None, None, None, None, None, None, None], - "state_encoding": StateEncoding.NONE, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Sawyer", - }, - "nyu_door_opening_surprising_effectiveness": { - "image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": [None, None, None, None, None, None, None, None], - "state_encoding": StateEncoding.NONE, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 3, - "robot_type": "Hello Stretch", - }, - "viola": { - "image_obs_keys": { - "primary": "agentview_rgb", - "secondary": None, - "wrist": "eye_in_hand_rgb", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["joint_states", "gripper_states"], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "berkeley_autolab_ur5": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "hand_image", - }, - "depth_obs_keys": {"primary": "depth", "secondary": None, "wrist": None}, - "state_obs_keys": ["state"], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "UR5", - }, - "toto": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state", None], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 30, - "robot_type": "Franka", - }, - "language_table": { - "image_obs_keys": {"primary": "rgb", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["effector_translation", None, None, None, None, None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "xArm", - }, - "columbia_cairlab_pusht_real": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["robot_state", None, None, None, None, None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "UR5", - }, - "stanford_kuka_multimodal_dataset_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": "depth_image", "secondary": None, "wrist": None}, - "state_obs_keys": ["ee_position", "ee_orientation", None], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Kuka iiwa", - }, - "nyu_rot_dataset_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 3, - "robot_type": "xArm", - }, - "stanford_hydra_dataset_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Franka", - }, - "austin_buds_dataset_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state"], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "nyu_franka_play_dataset_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "image", - "secondary": "image_additional_view", - "wrist": None, - }, - "depth_obs_keys": { - "primary": "depth", - "secondary": "depth_additional_view", - "wrist": None, - }, - "state_obs_keys": ["eef_state", None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 3, - "robot_type": "Franka", - }, - "maniskill_dataset_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": { - "primary": "depth", - "secondary": None, - "wrist": "wrist_depth", - }, - "state_obs_keys": ["tcp_pose", "gripper_state"], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "furniture_bench_dataset_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state"], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Franka", - }, - "cmu_franka_exploration_dataset_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "highres_image", - "secondary": None, - "wrist": None, - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": [None, None, None, None, None, None, None, None], - "state_encoding": StateEncoding.NONE, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Franka", - }, - "ucsd_kitchen_dataset_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["joint_state", None], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 2, - "robot_type": "xArm", - }, - "ucsd_pick_and_place_dataset_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 3, - "robot_type": "xArm", - }, - "austin_sailor_dataset_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state"], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "austin_sirius_dataset_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state"], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "bc_z": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": [ - "present/xyz", - "present/axis_angle", - None, - "present/sensed_close", - ], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Google Robot", - }, - "utokyo_pr2_opening_fridge_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "PR2", - }, - "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "PR2", - }, - "utokyo_xarm_pick_and_place_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "image", - "secondary": "image2", - "wrist": "hand_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["end_effector_pose", None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "xArm", - }, - "utokyo_xarm_bimanual_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["pose_r", None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "xArm Bimanual", - }, - "robo_net": { - "image_obs_keys": {"primary": "image", "secondary": "image1", "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 1, - "robot_type": "Multi-Robot", - }, - "berkeley_mvp_converted_externally_to_rlds": { - "image_obs_keys": {"primary": None, "secondary": None, "wrist": "hand_image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["pose", "gripper"], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.JOINT_POS, - "control_frequency": 5, - "robot_type": "xArm", - }, - "berkeley_rpt_converted_externally_to_rlds": { - "image_obs_keys": {"primary": None, "secondary": None, "wrist": "hand_image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["joint_pos", "gripper"], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.JOINT_POS, - "control_frequency": 30, - "robot_type": "Franka", - }, - "kaist_nonprehensile_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state", None], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Franka", - }, - "stanford_mask_vit_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": None, - "robot_type": "Sawyer", - }, - "tokyo_u_lsmo_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Cobotta", - }, - "dlr_sara_pour_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state", None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "DLR SARA", - }, - "dlr_sara_grid_clamp_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state", None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "DLR SARA", - }, - "dlr_edan_shared_control_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state", None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "DLR EDAN", - }, - "asu_table_top_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 12.5, - "robot_type": "UR5", - }, - "stanford_robocook_converted_externally_to_rlds": { - "image_obs_keys": {"primary": "image_1", "secondary": "image_2", "wrist": None}, - "depth_obs_keys": {"primary": "depth_1", "secondary": "depth_2", "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "Franka", - }, - "imperialcollege_sawyer_wrist_cam": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": [None, None, None, None, None, None, None, "state"], - "state_encoding": StateEncoding.NONE, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Sawyer", - }, - "iamlab_cmu_pickup_insert_converted_externally_to_rlds": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["joint_state", "gripper_state"], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "uiuc_d3field": { - "image_obs_keys": {"primary": "image_1", "secondary": "image_2", "wrist": None}, - "depth_obs_keys": {"primary": "depth_1", "secondary": "depth_2", "wrist": None}, - "state_obs_keys": [None, None, None, None, None, None, None, None], - "state_encoding": StateEncoding.NONE, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 1, - "robot_type": "Kinova Gen3", - }, - "utaustin_mutex": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state"], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "berkeley_fanuc_manipulation": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "wrist_image", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["joint_state", None, "gripper_state"], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Fanuc Mate", - }, - "cmu_playing_with_food": { - "image_obs_keys": { - "primary": "image", - "secondary": None, - "wrist": "finger_vision_1", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state", None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Franka", - }, - "cmu_play_fusion": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state"], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "Franka", - }, - "cmu_stretch": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["eef_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Hello Stretch", - }, - "berkeley_gnm_recon": { - "image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state", None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 3, - "robot_type": "Jackal", - }, - "berkeley_gnm_cory_hall": { - "image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state", None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "RC Car", - }, - "berkeley_gnm_sac_son": { - "image_obs_keys": {"primary": None, "secondary": None, "wrist": "image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["state", None, None], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "TurtleBot 2", - }, - # NOTE: modified - "droid": { - "image_obs_keys": { - "primary": "exterior_image_1_left", - "secondary": "exterior_image_2_left", - "wrist": "wrist_image_left", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_QUAT, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 15, - "robot_type": "Franka", - "aux_kwargs": { - "dataset_frame_transform_kwargs": { - "chunk_filter_fn": zero_action_filter, - }, - }, - }, - "fmb_dataset": { - "image_obs_keys": { - "primary": "image_side_1", - "secondary": "image_side_2", - "wrist": "image_wrist_1", - }, - "depth_obs_keys": { - "primary": "image_side_1_depth", - "secondary": "image_side_2_depth", - "wrist": "image_wrist_1_depth", - }, - "state_obs_keys": ["proprio"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Franka", - }, - # NOTE: modified - "dobbe": { - "image_obs_keys": {"primary": "wrist_image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 3.75, - "robot_type": "Hello Stretch", - }, - "roboset": { - "image_obs_keys": { - "primary": "image_left", - "secondary": "image_right", - "wrist": "image_wrist", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["proprio"], - "state_encoding": StateEncoding.JOINT, - "action_encoding": ActionEncoding.JOINT_POS, - "control_frequency": 5, - "robot_type": "Franka", - }, - "rh20t": { - "image_obs_keys": { - "primary": "image_front", - "secondary": "image_side_right", - "wrist": "image_wrist", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["proprio"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 10, - "robot_type": "Flexiv", - }, - ### T-DROID datasets - "tdroid_carrot_in_bowl": { # "put carrot in bowl" task, 50 demos @ 5 Hz control - "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "Franka", - }, - "tdroid_pour_corn_in_pot": { # "pour corn from red bonawl into steel pot" task, 50 demos @ 5 Hz control - "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "Franka", - }, - "tdroid_flip_pot_upright": { # "flip pot upright" task, 10 demos @ 5 Hz control - "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "Franka", - }, - "tdroid_move_object_onto_plate": { # "move onto plate" task, 150 demos @ 5 Hz control - "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "Franka", - }, - "tdroid_knock_object_over": { # "knock over" task, 70 demos @ 5 Hz control - "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "Franka", - }, - "tdroid_cover_object_with_towel": { # "cover with towel" task, 45 demos @ 5 Hz control - "image_obs_keys": {"primary": "static_image", "secondary": None, "wrist": None}, - "depth_obs_keys": {"primary": "static_depth_image", "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", None, "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 5, - "robot_type": "Franka", - }, - ### DROID Finetuning datasets - "droid_wipe": { - "image_obs_keys": { - "primary": "exterior_image_2_left", - "secondary": None, - "wrist": "wrist_image_left", - }, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["proprio"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 15, - "robot_type": "Franka", - }, - # NOTE: modified - ### LIBERO datasets (modified versions) - "libero_spatial_no_noops": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "libero_object_no_noops": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "libero_goal_no_noops": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, - "libero_10_no_noops": { - "image_obs_keys": {"primary": "image", "secondary": None, "wrist": "wrist_image"}, - "depth_obs_keys": {"primary": None, "secondary": None, "wrist": None}, - "state_obs_keys": ["EEF_state", "gripper_state"], - "state_encoding": StateEncoding.POS_EULER, - "action_encoding": ActionEncoding.EEF_POS, - "control_frequency": 20, - "robot_type": "Franka", - }, -} diff --git a/examples/port_datasets/openx_utils/slurm_submit.bash b/examples/port_datasets/openx_utils/slurm_submit.bash deleted file mode 100755 index 3aee950c..00000000 --- a/examples/port_datasets/openx_utils/slurm_submit.bash +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=openx_rlds -#SBATCH --partition=hopper-cpu -#SBATCH --requeue -#SBATCH --time=00:01:00 -#SBATCH --nodes=1 -#SBATCH --ntasks-per-node=8 -#SBATCH --output=/fsx/%u/slurm/%j-%x.out - -OUTPUT_DIR="/fsx/${USER}/slurm/${SLURM_JOB_NAME}-${SLURM_JOB_ID}-tasks" -mkdir -p $OUTPUT_DIR - -# Function to run a task and redirect output to a separate file -run_task() { - local task_id=$1 - local output_file="${OUTPUT_DIR}/task-${task_id}-${SLURM_JOB_ID}.out" - - # Run the task and redirect output - python examples/port_datasets/openx_utils/test.py > $output_file 2>&1 -} - -echo $SBATCH_OUTPUT - -# node has 380850M and 96 cpus -trap 'scontrol requeue ${SLURM_JOB_ID}; exit 15' SIGUSR1 -echo "Starting job" -# note the "&" to start srun as a background thread -srun python examples/port_datasets/openx_utils/test.py & -# wait for signals... -wait diff --git a/examples/port_datasets/openx_utils/test.py b/examples/port_datasets/openx_utils/test.py deleted file mode 100644 index 09d14983..00000000 --- a/examples/port_datasets/openx_utils/test.py +++ /dev/null @@ -1,54 +0,0 @@ -import os - -import psutil - - -def display_system_info(): - # Get the number of CPUs - num_cpus = psutil.cpu_count(logical=True) - print(f"Number of CPUs: {num_cpus}") - - # Get memory information - memory_info = psutil.virtual_memory() - total_memory = memory_info.total / (1024**3) # Convert bytes to GB - available_memory = memory_info.available / (1024**3) # Convert bytes to GB - used_memory = memory_info.used / (1024**3) # Convert bytes to GB - - print(f"Total Memory: {total_memory:.2f} GB") - print(f"Available Memory: {available_memory:.2f} GB") - print(f"Used Memory: {used_memory:.2f} GB") - - -def display_slurm_info(): - # Get SLURM job ID - job_id = os.getenv("SLURM_JOB_ID") - print(f"SLURM Job ID: {job_id}") - - # Get SLURM job name - job_name = os.getenv("SLURM_JOB_NAME") - print(f"SLURM Job Name: {job_name}") - - # Get the number of tasks - num_tasks = os.getenv("SLURM_NTASKS") - print(f"Number of Tasks: {num_tasks}") - - # Get the number of nodes - num_nodes = os.getenv("SLURM_NNODES") - print(f"Number of Nodes: {num_nodes}") - - # Get the number of CPUs per task - cpus_per_task = os.getenv("SLURM_CPUS_PER_TASK") - print(f"CPUs per Task: {cpus_per_task}") - - # Get the node list - node_list = os.getenv("SLURM_NODELIST") - print(f"Node List: {node_list}") - - # Get the task ID (only available within an srun task) - task_id = os.getenv("SLURM_PROCID") - print(f"Task ID: {task_id}") - - -if __name__ == "__main__": - display_system_info() - display_slurm_info() diff --git a/examples/port_datasets/openx_utils/transform_utils.py b/examples/port_datasets/openx_utils/transform_utils.py deleted file mode 100644 index 3d7e8846..00000000 --- a/examples/port_datasets/openx_utils/transform_utils.py +++ /dev/null @@ -1,76 +0,0 @@ -""" -Copied from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/utils/data_utils.py -""" - -from typing import Any, Dict - -import tensorflow as tf - - -def binarize_gripper_actions(actions: tf.Tensor) -> tf.Tensor: - """ - Converts gripper actions from continuous to binary values (0 and 1). - - We exploit that fact that most of the time, the gripper is fully open (near 1.0) or fully closed (near 0.0). As it - transitions between the two, it sometimes passes through a few intermediate values. We relabel those intermediate - values based on the state that is reached _after_ those intermediate values. - - In the edge case that the trajectory ends with an intermediate value, we give up on binarizing and relabel that - chunk of intermediate values as the last action in the trajectory. - - The `scan_fn` implements the following logic: - new_actions = np.empty_like(actions) - carry = actions[-1] - for i in reversed(range(actions.shape[0])): - if in_between_mask[i]: - carry = carry - else: - carry = float(open_mask[i]) - new_actions[i] = carry - """ - open_mask, closed_mask = actions > 0.95, actions < 0.05 - in_between_mask = tf.logical_not(tf.logical_or(open_mask, closed_mask)) - is_open_float = tf.cast(open_mask, tf.float32) - - def scan_fn(carry, i): - return tf.cond(in_between_mask[i], lambda: tf.cast(carry, tf.float32), lambda: is_open_float[i]) - - return tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), actions[-1], reverse=True) - - -def invert_gripper_actions(actions: tf.Tensor) -> tf.Tensor: - return 1 - actions - - -def rel2abs_gripper_actions(actions: tf.Tensor) -> tf.Tensor: - """ - Converts relative gripper actions (+1 for closing, -1 for opening) to absolute actions (0 = closed; 1 = open). - - Assumes that the first relative gripper is not redundant (i.e. close when already closed)! - """ - # Note =>> -1 for closing, 1 for opening, 0 for no change - opening_mask, closing_mask = actions < -0.1, actions > 0.1 - thresholded_actions = tf.where(opening_mask, 1, tf.where(closing_mask, -1, 0)) - - def scan_fn(carry, i): - return tf.cond(thresholded_actions[i] == 0, lambda: carry, lambda: thresholded_actions[i]) - - # If no relative grasp, assumes open for whole trajectory - start = -1 * thresholded_actions[tf.argmax(thresholded_actions != 0, axis=0)] - start = tf.cond(start == 0, lambda: 1, lambda: start) - - # Note =>> -1 for closed, 1 for open - new_actions = tf.scan(scan_fn, tf.range(tf.shape(actions)[0]), start) - new_actions = tf.cast(new_actions, tf.float32) / 2 + 0.5 - - return new_actions - - -# === Bridge-V2 =>> Dataset-Specific Transform === -def relabel_bridge_actions(traj: Dict[str, Any]) -> Dict[str, Any]: - """Relabels actions to use reached proprioceptive state; discards last timestep (no-action).""" - movement_actions = traj["observation"]["state"][1:, :6] - traj["observation"]["state"][:-1, :6] - traj_truncated = tf.nest.map_structure(lambda x: x[:-1], traj) - traj_truncated["action"] = tf.concat([movement_actions, traj["action"][:-1, -1:]], axis=1) - - return traj_truncated diff --git a/examples/port_datasets/openx_utils/transforms.py b/examples/port_datasets/openx_utils/transforms.py deleted file mode 100644 index 83152c24..00000000 --- a/examples/port_datasets/openx_utils/transforms.py +++ /dev/null @@ -1,997 +0,0 @@ -""" -Adapt from https://github.com/openvla/openvla/blob/main/prismatic/vla/datasets/rlds/oxe/transforms.py -transforms.py - -Defines a registry of per-dataset standardization transforms for each dataset in Open-X Embodiment. - -Transforms adopt the following structure: - Input: Dictionary of *batched* features (i.e., has leading time dimension) - Output: Dictionary `step` =>> { - "observation": { - - State (in chosen state representation) - }, - "action": Action (in chosen action representation), - "language_instruction": str - } -""" - -from typing import Any, Dict - -import tensorflow as tf - -from examples.port_datasets.openx_utils.transform_utils import ( - binarize_gripper_actions, - invert_gripper_actions, - rel2abs_gripper_actions, - relabel_bridge_actions, -) - - -def droid_baseact_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - """ - DROID dataset transformation for actions expressed in *base* frame of the robot. - """ - - def rand_swap_exterior_images(img1, img2): - """ - Randomly swaps the two exterior images (for training with single exterior input). - """ - return tf.cond(tf.random.uniform(shape=[]) > 0.5, lambda: (img1, img2), lambda: (img2, img1)) - - dt = trajectory["action_dict"]["cartesian_velocity"][:, :3] - dR = trajectory["action_dict"]["cartesian_velocity"][:, 3:6] - - trajectory["action"] = tf.concat( - ( - dt, - dR, - 1 - trajectory["action_dict"]["gripper_position"], - ), - axis=-1, - ) - trajectory["observation"]["exterior_image_1_left"], trajectory["observation"]["exterior_image_2_left"] = ( - rand_swap_exterior_images( - trajectory["observation"]["exterior_image_1_left"], - trajectory["observation"]["exterior_image_2_left"], - ) - ) - # trajectory["observation"]["proprio"] = tf.concat( - # ( - # trajectory["observation"]["cartesian_position"], - # trajectory["observation"]["gripper_position"], - # ), - # axis=-1, - # ) - trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"] - return trajectory - - -def droid_finetuning_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - """ - DROID dataset transformation for actions expressed in *base* frame of the robot. - """ - dt = trajectory["action_dict"]["cartesian_velocity"][:, :3] - dR = trajectory["action_dict"]["cartesian_velocity"][:, 3:6] - trajectory["action"] = tf.concat( - ( - dt, - dR, - 1 - trajectory["action_dict"]["gripper_position"], - ), - axis=-1, - ) - trajectory["observation"]["proprio"] = tf.concat( - ( - trajectory["observation"]["cartesian_position"], - trajectory["observation"]["gripper_position"], - ), - axis=-1, - ) - return trajectory - - -def bridge_oxe_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - """ - Applies to version of Bridge V2 in Open X-Embodiment mixture. - - Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it! - """ - for key in trajectory: - if key == "traj_metadata": - continue - elif key in ["observation", "action"]: - for key2 in trajectory[key]: - trajectory[key][key2] = trajectory[key][key2][1:] - else: - trajectory[key] = trajectory[key][1:] - - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - tf.cast(trajectory["action"]["open_gripper"][:, None], tf.float32), - ), - axis=-1, - ) - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - trajectory = relabel_bridge_actions(trajectory) - trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - return trajectory - - -def bridge_orig_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - """ - Applies to original version of Bridge V2 from the official project website. - - Note =>> In original Bridge V2 dataset, the first timestep has an all-zero action, so we remove it! - """ - for key in trajectory: - if key == "traj_metadata": - continue - elif key == "observation": - for key2 in trajectory[key]: - trajectory[key][key2] = trajectory[key][key2][1:] - else: - trajectory[key] = trajectory[key][1:] - - trajectory["action"] = tf.concat( - [ - trajectory["action"][:, :6], - binarize_gripper_actions(trajectory["action"][:, -1])[:, None], - ], - axis=1, - ) - trajectory = relabel_bridge_actions(trajectory) - trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - return trajectory - - -def ppgm_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = tf.concat( - [ - trajectory["action"][:, :6], - binarize_gripper_actions(trajectory["action"][:, -1])[:, None], - ], - axis=1, - ) - trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"][:, -1:] - return trajectory - - -def rt1_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # make gripper action absolute action, +1 = open, 0 = close - gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0] - gripper_action = rel2abs_gripper_actions(gripper_action) - - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - gripper_action[:, None], - ), - axis=-1, - ) - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def kuka_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # make gripper action absolute action, +1 = open, 0 = close - gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0] - gripper_action = rel2abs_gripper_actions(gripper_action) - - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - gripper_action[:, None], - ), - axis=-1, - ) - # decode compressed state - eef_value = tf.io.decode_compressed( - trajectory["observation"]["clip_function_input/base_pose_tool_reached"], - compression_type="ZLIB", - ) - eef_value = tf.io.decode_raw(eef_value, tf.float32) - trajectory["observation"]["clip_function_input/base_pose_tool_reached"] = tf.reshape(eef_value, (-1, 7)) - gripper_value = tf.io.decode_compressed( - trajectory["observation"]["gripper_closed"], compression_type="ZLIB" - ) - gripper_value = tf.io.decode_raw(gripper_value, tf.float32) - trajectory["observation"]["gripper_closed"] = tf.reshape(gripper_value, (-1, 1)) - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" - # ) # delete uninformative language instruction - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def taco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["state_eef"] = trajectory["observation"]["robot_obs"][:, :6] - trajectory["observation"]["state_gripper"] = trajectory["observation"]["robot_obs"][:, 7:8] - trajectory["action"] = trajectory["action"]["rel_actions_world"] - - # invert gripper action + clip, +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :6], - tf.clip_by_value(trajectory["action"][:, -1:], 0, 1), - ), - axis=-1, - ) - - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def jaco_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["state_eef"] = trajectory["observation"]["end_effector_cartesian_pos"][:, :6] - trajectory["observation"]["state_gripper"] = trajectory["observation"]["end_effector_cartesian_pos"][ - :, -1: - ] - - # make gripper action absolute action, +1 = open, 0 = close - gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0] - gripper_action = rel2abs_gripper_actions(gripper_action) - - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - tf.zeros_like(trajectory["action"]["world_vector"]), - gripper_action[:, None], - ), - axis=-1, - ) - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def berkeley_cable_routing_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - tf.zeros_like(trajectory["action"]["world_vector"][:, :1]), - ), - axis=-1, - ) - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" - # ) # delete uninformative language instruction - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def roboturk_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # invert absolute gripper action, +1 = open, 0 = close - gripper_action = invert_gripper_actions( - tf.clip_by_value(trajectory["action"]["gripper_closedness_action"], 0, 1) - ) - - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - gripper_action, - ), - axis=-1, - ) - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" - # ) # delete uninformative language instruction - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def nyu_door_opening_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # make gripper action absolute action, +1 = open, 0 = close - gripper_action = trajectory["action"]["gripper_closedness_action"][:, 0] - gripper_action = rel2abs_gripper_actions(gripper_action) - - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - gripper_action[:, None], - ), - axis=-1, - ) - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" - # ) # delete uninformative language instruction - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def viola_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # make gripper action, +1 = open, 0 = close - gripper_action = trajectory["action"]["gripper_closedness_action"][:, None] - gripper_action = tf.clip_by_value(gripper_action, 0, 1) - gripper_action = invert_gripper_actions(gripper_action) - - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - gripper_action, - ), - axis=-1, - ) - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" - # ) # delete uninformative language instruction - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def berkeley_autolab_ur5_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["state"] = trajectory["observation"]["robot_state"][:, 6:14] - trajectory["observation"]["depth"] = trajectory["observation"].pop("image_with_depth") - - # make gripper action absolute action, +1 = open, 0 = close - gripper_action = trajectory["action"]["gripper_closedness_action"] - gripper_action = rel2abs_gripper_actions(gripper_action) - - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - gripper_action[:, None], - ), - axis=-1, - ) - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def toto_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - tf.cast(trajectory["action"]["open_gripper"][:, None], tf.float32), - ), - axis=-1, - ) - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["observation"]["natural_language_instruction"]), "" - # ) # delete uninformative language instruction - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def language_table_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # default to "open" gripper - trajectory["action"] = tf.concat( - ( - trajectory["action"], - tf.zeros_like(trajectory["action"]), - tf.zeros_like(trajectory["action"]), - tf.ones_like(trajectory["action"][:, :1]), - ), - axis=-1, - ) - - # decode language instruction - instruction_bytes = trajectory["observation"]["instruction"] - instruction_encoded = tf.strings.unicode_encode(instruction_bytes, output_encoding="UTF-8") - # Remove trailing padding --> convert RaggedTensor to regular Tensor. - trajectory["language_instruction"] = tf.strings.split(instruction_encoded, "\x00")[:, :1].to_tensor()[ - :, 0 - ] - return trajectory - - -def pusht_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = tf.concat( - ( - trajectory["action"]["world_vector"], - trajectory["action"]["rotation_delta"], - trajectory["action"]["gripper_closedness_action"][:, None], - ), - axis=-1, - ) - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def stanford_kuka_multimodal_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["depth_image"] = trajectory["observation"]["depth_image"][..., 0] - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :3], - tf.zeros_like(trajectory["action"][:, :3]), - trajectory["action"][:, -1:], - ), - axis=-1, - ) - return trajectory - - -def nyu_rot_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][..., :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][..., -1:] - trajectory["action"] = trajectory["action"][..., :7] - return trajectory - - -def stanford_hydra_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # invert gripper action, +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :6], - invert_gripper_actions(trajectory["action"][:, -1:]), - ), - axis=-1, - ) - - trajectory["observation"]["eef_state"] = tf.concat( - ( - trajectory["observation"]["state"][:, :3], - trajectory["observation"]["state"][:, 7:10], - ), - axis=-1, - ) - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -3:-2] - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["language_instruction"]), "" - # ) # delete uninformative language instruction - return trajectory - - -def austin_buds_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # invert gripper action + clip, +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :6], - invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), - ), - axis=-1, - ) - - trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :8] - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["language_instruction"]), "" - # ) # delete uninformative language instruction - return trajectory - - -def nyu_franka_play_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["depth"] = tf.cast(trajectory["observation"]["depth"][..., 0], tf.float32) - trajectory["observation"]["depth_additional_view"] = tf.cast( - trajectory["observation"]["depth_additional_view"][..., 0], tf.float32 - ) - trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, -6:] - - # clip gripper action, +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, -8:-2], - tf.clip_by_value(trajectory["action"][:, -2:-1], 0, 1), - ), - axis=-1, - ) - - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["language_instruction"]), "" - # ) # delete uninformative language instruction - return trajectory - - -def maniskill_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][..., 7:8] - return trajectory - - -def furniture_bench_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - import tensorflow_graphics.geometry.transformation as tft - - trajectory["observation"]["state"] = tf.concat( - ( - trajectory["observation"]["state"][:, :7], - trajectory["observation"]["state"][:, -1:], - ), - axis=-1, - ) - - # invert gripper action + clip, +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :3], - tft.euler.from_quaternion(trajectory["action"][:, 3:7]), - invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), - ), - axis=-1, - ) - return trajectory - - -def cmu_franka_exploration_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = trajectory["action"][..., :-1] - return trajectory - - -def ucsd_kitchen_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :7] - trajectory["action"] = trajectory["action"][..., :-1] - return trajectory - - -def ucsd_pick_place_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :3], - tf.zeros_like(trajectory["action"][:, :3]), - trajectory["action"][:, -1:], - ), - axis=-1, - ) - return trajectory - - -def austin_sailor_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # invert gripper action + clip, +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :6], - invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), - ), - axis=-1, - ) - - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["language_instruction"]), "" - # ) # delete uninformative language instruction - return trajectory - - -def austin_sirius_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # invert gripper action + clip, +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :6], - invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), - ), - axis=-1, - ) - - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["language_instruction"]), "" - # ) # delete uninformative language instruction - return trajectory - - -def bc_z_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = tf.concat( - ( - trajectory["action"]["future/xyz_residual"][:, :3], - trajectory["action"]["future/axis_angle_residual"][:, :3], - invert_gripper_actions(tf.cast(trajectory["action"]["future/target_close"][:, :1], tf.float32)), - ), - axis=-1, - ) - trajectory["language_instruction"] = trajectory["observation"]["natural_language_instruction"] - return trajectory - - -def tokyo_pr2_opening_fridge_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - trajectory["action"] = trajectory["action"][..., :-1] - return trajectory - - -def tokyo_pr2_tabletop_manipulation_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - trajectory["action"] = trajectory["action"][..., :-1] - return trajectory - - -def utokyo_xarm_pick_place_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - return trajectory - - -def utokyo_xarm_bimanual_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = trajectory["action"][..., -7:] - return trajectory - - -def robo_net_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = tf.concat( - ( - trajectory["observation"]["state"][:, :4], - tf.zeros_like(trajectory["observation"]["state"][:, :2]), - ), - axis=-1, - ) - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :4], - tf.zeros_like(trajectory["action"][:, :2]), - trajectory["action"][:, -1:], - ), - axis=-1, - ) - return trajectory - - -def berkeley_mvp_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - return trajectory - - -def berkeley_rpt_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - return trajectory - - -def kaist_nonprehensible_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["state"] = trajectory["observation"]["state"][:, -7:] - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :6], - tf.zeros_like(trajectory["action"][:, :1]), - ), - axis=-1, - ) - return trajectory - - -def stanford_mask_vit_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = tf.concat( - ( - trajectory["observation"]["end_effector_pose"][:, :4], - tf.zeros_like(trajectory["observation"]["end_effector_pose"][:, :2]), - ), - axis=-1, - ) - trajectory["observation"]["gripper_state"] = trajectory["observation"]["end_effector_pose"][:, -1:] - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :4], - tf.zeros_like(trajectory["action"][:, :2]), - trajectory["action"][:, -1:], - ), - axis=-1, - ) - return trajectory - - -def tokyo_lsmo_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - return trajectory - - -def dlr_sara_pour_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - return trajectory - - -def dlr_sara_grid_clamp_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :6] - return trajectory - - -def dlr_edan_shared_control_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # invert gripper action, +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :6], - invert_gripper_actions(trajectory["action"][:, -1:]), - ), - axis=-1, - ) - return trajectory - - -def asu_table_top_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = trajectory["ground_truth_states"]["EE"] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - return trajectory - - -def robocook_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - return trajectory - - -def imperial_wristcam_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = trajectory["action"][..., :-1] - return trajectory - - -def iamlab_pick_insert_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - import tensorflow_graphics.geometry.transformation as tft - - trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :7] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, 7:8] - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :3], - tft.euler.from_quaternion(trajectory["action"][:, 3:7]), - trajectory["action"][:, 7:8], - ), - axis=-1, - ) - return trajectory - - -def uiuc_d3field_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = tf.concat( - ( - trajectory["action"], - tf.zeros_like(trajectory["action"]), - tf.zeros_like(trajectory["action"][:, :1]), - ), - axis=-1, - ) - return trajectory - - -def utaustin_mutex_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["state"] = trajectory["observation"]["state"][:, :8] - - # invert gripper action + clip, +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :6], - invert_gripper_actions(tf.clip_by_value(trajectory["action"][:, -1:], 0, 1)), - ), - axis=-1, - ) - - # trajectory["language_instruction"] = tf.fill( - # tf.shape(trajectory["language_instruction"]), "" - # ) # delete uninformative language instruction - return trajectory - - -def berkeley_fanuc_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["joint_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, 6:7] - - # dataset does not store gripper actions, so use gripper state info, invert so +1 = open, 0 = close - trajectory["action"] = tf.concat( - ( - trajectory["action"], - invert_gripper_actions(trajectory["observation"]["gripper_state"]), - ), - axis=-1, - ) - return trajectory - - -def cmu_playing_with_food_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - import tensorflow_graphics.geometry.transformation as tft - - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :3], - tft.euler.from_quaternion(trajectory["action"][:, 3:7]), - trajectory["action"][:, -1:], - ), - axis=-1, - ) - return trajectory - - -def playfusion_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :3], - trajectory["action"][:, -4:], - ), - axis=-1, - ) - return trajectory - - -def cmu_stretch_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["eef_state"] = tf.concat( - ( - trajectory["observation"]["state"][:, :3], - tf.zeros_like(trajectory["observation"]["state"][:, :3]), - ), - axis=-1, - ) - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - trajectory["action"] = trajectory["action"][..., :-1] - return trajectory - - -def gnm_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["observation"]["state"] = tf.concat( - ( - trajectory["observation"]["position"], - tf.zeros_like(trajectory["observation"]["state"][:, :3]), - trajectory["observation"]["yaw"], - ), - axis=-1, - ) - trajectory["action"] = tf.concat( - ( - trajectory["action"], - tf.zeros_like(trajectory["action"]), - tf.zeros_like(trajectory["action"]), - tf.zeros_like(trajectory["action"][:, :1]), - ), - axis=-1, - ) - return trajectory - - -def fmb_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # every input feature is batched, ie has leading batch dimension - trajectory["observation"]["proprio"] = tf.concat( - ( - trajectory["observation"]["eef_pose"], - trajectory["observation"]["state_gripper_pose"][..., None], - ), - axis=-1, - ) - return trajectory - - -def dobbe_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # every input feature is batched, ie has leading batch dimension - trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][:, -1:] - return trajectory - - -def roboset_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # every input feature is batched, ie has leading batch dimension - trajectory["observation"]["proprio"] = trajectory["observation"]["state"] - - # gripper action is in -1...1 --> clip to 0...1, flip - gripper_action = trajectory["action"][:, -1:] - gripper_action = invert_gripper_actions(tf.clip_by_value(gripper_action, 0, 1)) - - trajectory["action"] = tf.concat( - ( - trajectory["action"][:, :7], - gripper_action, - ), - axis=-1, - ) - return trajectory - - -def rh20t_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = tf.concat( - ( - trajectory["action"]["tcp_base"], - tf.cast(trajectory["action"]["gripper"][:, None], tf.float32), - ), - axis=-1, - ) - trajectory["observation"]["proprio"] = tf.concat( - ( - trajectory["observation"]["tcp_base"], - trajectory["observation"]["gripper_width"][..., None], - ), - axis=-1, - ) - return trajectory - - -def tdroid_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - trajectory["action"] = tf.concat( - [ - trajectory["action"][:, :6], - binarize_gripper_actions(trajectory["action"][:, -1])[:, None], - ], - axis=1, - ) - trajectory["observation"]["EEF_state"] = trajectory["observation"]["cartesian_position"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["gripper_position"][:, -1:] - return trajectory - - -def libero_dataset_transform(trajectory: Dict[str, Any]) -> Dict[str, Any]: - # gripper action is in -1 (open)...1 (close) --> clip to 0...1, flip --> +1 = open, 0 = close - gripper_action = trajectory["action"][:, -1:] - gripper_action = invert_gripper_actions(tf.clip_by_value(gripper_action, 0, 1)) - - trajectory["action"] = tf.concat( - [ - trajectory["action"][:, :6], - gripper_action, - ], - axis=1, - ) - trajectory["observation"]["EEF_state"] = trajectory["observation"]["state"][:, :6] - trajectory["observation"]["gripper_state"] = trajectory["observation"]["state"][ - :, -2: - ] # 2D gripper state - return trajectory - - -# === Registry === -OXE_STANDARDIZATION_TRANSFORMS = { - "bridge_oxe": bridge_oxe_dataset_transform, - "bridge_orig": bridge_orig_dataset_transform, - "bridge_dataset": bridge_orig_dataset_transform, - "ppgm": ppgm_dataset_transform, - "ppgm_static": ppgm_dataset_transform, - "ppgm_wrist": ppgm_dataset_transform, - "fractal20220817_data": rt1_dataset_transform, - "kuka": kuka_dataset_transform, - "taco_play": taco_play_dataset_transform, - "jaco_play": jaco_play_dataset_transform, - "berkeley_cable_routing": berkeley_cable_routing_dataset_transform, - "roboturk": roboturk_dataset_transform, - "nyu_door_opening_surprising_effectiveness": nyu_door_opening_dataset_transform, - "viola": viola_dataset_transform, - "berkeley_autolab_ur5": berkeley_autolab_ur5_dataset_transform, - "toto": toto_dataset_transform, - "language_table": language_table_dataset_transform, - "columbia_cairlab_pusht_real": pusht_dataset_transform, - "stanford_kuka_multimodal_dataset_converted_externally_to_rlds": stanford_kuka_multimodal_dataset_transform, - "nyu_rot_dataset_converted_externally_to_rlds": nyu_rot_dataset_transform, - "stanford_hydra_dataset_converted_externally_to_rlds": stanford_hydra_dataset_transform, - "austin_buds_dataset_converted_externally_to_rlds": austin_buds_dataset_transform, - "nyu_franka_play_dataset_converted_externally_to_rlds": nyu_franka_play_dataset_transform, - "maniskill_dataset_converted_externally_to_rlds": maniskill_dataset_transform, - "furniture_bench_dataset_converted_externally_to_rlds": furniture_bench_dataset_transform, - "cmu_franka_exploration_dataset_converted_externally_to_rlds": cmu_franka_exploration_dataset_transform, - "ucsd_kitchen_dataset_converted_externally_to_rlds": ucsd_kitchen_dataset_transform, - "ucsd_pick_and_place_dataset_converted_externally_to_rlds": ucsd_pick_place_dataset_transform, - "austin_sailor_dataset_converted_externally_to_rlds": austin_sailor_dataset_transform, - "austin_sirius_dataset_converted_externally_to_rlds": austin_sirius_dataset_transform, - "bc_z": bc_z_dataset_transform, - "utokyo_pr2_opening_fridge_converted_externally_to_rlds": tokyo_pr2_opening_fridge_dataset_transform, - "utokyo_pr2_tabletop_manipulation_converted_externally_to_rlds": tokyo_pr2_tabletop_manipulation_dataset_transform, - "utokyo_xarm_pick_and_place_converted_externally_to_rlds": utokyo_xarm_pick_place_dataset_transform, - "utokyo_xarm_bimanual_converted_externally_to_rlds": utokyo_xarm_bimanual_dataset_transform, - "robo_net": robo_net_dataset_transform, - "berkeley_mvp_converted_externally_to_rlds": berkeley_mvp_dataset_transform, - "berkeley_rpt_converted_externally_to_rlds": berkeley_rpt_dataset_transform, - "kaist_nonprehensile_converted_externally_to_rlds": kaist_nonprehensible_dataset_transform, - "stanford_mask_vit_converted_externally_to_rlds": stanford_mask_vit_dataset_transform, - "tokyo_u_lsmo_converted_externally_to_rlds": tokyo_lsmo_dataset_transform, - "dlr_sara_pour_converted_externally_to_rlds": dlr_sara_pour_dataset_transform, - "dlr_sara_grid_clamp_converted_externally_to_rlds": dlr_sara_grid_clamp_dataset_transform, - "dlr_edan_shared_control_converted_externally_to_rlds": dlr_edan_shared_control_dataset_transform, - "asu_table_top_converted_externally_to_rlds": asu_table_top_dataset_transform, - "stanford_robocook_converted_externally_to_rlds": robocook_dataset_transform, - "imperialcollege_sawyer_wrist_cam": imperial_wristcam_dataset_transform, - "iamlab_cmu_pickup_insert_converted_externally_to_rlds": iamlab_pick_insert_dataset_transform, - "uiuc_d3field": uiuc_d3field_dataset_transform, - "utaustin_mutex": utaustin_mutex_dataset_transform, - "berkeley_fanuc_manipulation": berkeley_fanuc_dataset_transform, - "cmu_playing_with_food": cmu_playing_with_food_dataset_transform, - "cmu_play_fusion": playfusion_dataset_transform, - "cmu_stretch": cmu_stretch_dataset_transform, - "berkeley_gnm_recon": gnm_dataset_transform, - "berkeley_gnm_cory_hall": gnm_dataset_transform, - "berkeley_gnm_sac_son": gnm_dataset_transform, - "droid": droid_baseact_transform, - "fmb_dataset": fmb_dataset_transform, - "dobbe": dobbe_dataset_transform, - "roboset": roboset_dataset_transform, - "rh20t_rlds": rh20t_dataset_transform, - ### T-DROID datasets - "tdroid_carrot_in_bowl": tdroid_dataset_transform, - "tdroid_pour_corn_in_pot": tdroid_dataset_transform, - "tdroid_flip_pot_upright": tdroid_dataset_transform, - "tdroid_move_object_onto_plate": tdroid_dataset_transform, - "tdroid_knock_object_over": tdroid_dataset_transform, - "tdroid_cover_object_with_towel": tdroid_dataset_transform, - ### DROID Finetuning datasets - "droid_wipe": droid_finetuning_transform, - ### LIBERO datasets (modified versions) - "libero_spatial_no_noops": libero_dataset_transform, - "libero_object_no_noops": libero_dataset_transform, - "libero_goal_no_noops": libero_dataset_transform, - "libero_10_no_noops": libero_dataset_transform, -} diff --git a/lerobot/common/datasets/aggregate.py b/lerobot/common/datasets/aggregate.py index c927f6b3..8761def9 100644 --- a/lerobot/common/datasets/aggregate.py +++ b/lerobot/common/datasets/aggregate.py @@ -1,5 +1,5 @@ import logging -import subprocess +import shutil import pandas as pd import tqdm @@ -16,7 +16,7 @@ def validate_all_metadata(all_metadata: list[LeRobotDatasetMetadata]): robot_type = all_metadata[0].robot_type features = all_metadata[0].features - for meta in tqdm.tqdm(all_metadata): + for meta in tqdm.tqdm(all_metadata, desc="Validate all meta data"): if fps != meta.fps: raise ValueError(f"Same fps is expected, but got fps={meta.fps} instead of {fps}.") if robot_type != meta.robot_type: @@ -41,7 +41,7 @@ def get_update_episode_and_task_func(episode_index_to_add, task_index_to_global_ def aggregate_datasets(repo_ids: list[str], aggr_repo_id: str, aggr_root=None): - logging.info("start aggregate_datasets") + logging.info("Start aggregate_datasets") all_metadata = [LeRobotDatasetMetadata(repo_id) for repo_id in repo_ids] @@ -56,12 +56,12 @@ def aggregate_datasets(repo_ids: list[str], aggr_repo_id: str, aggr_root=None): root=aggr_root, ) - logging.info("find all tasks") + logging.info("Find all tasks") # find all tasks, deduplicate them, create new task indices for each dataset # indexed by dataset index datasets_task_index_to_aggr_task_index = {} aggr_task_index = 0 - for dataset_index, meta in enumerate(tqdm.tqdm(all_metadata)): + for dataset_index, meta in enumerate(tqdm.tqdm(all_metadata, desc="Find all tasks")): task_index_to_aggr_task_index = {} for task_index, task in meta.tasks.items(): @@ -76,9 +76,9 @@ def aggregate_datasets(repo_ids: list[str], aggr_repo_id: str, aggr_root=None): datasets_task_index_to_aggr_task_index[dataset_index] = task_index_to_aggr_task_index - logging.info("cp data and videos") + logging.info("Copy data and videos") aggr_episode_index_shift = 0 - for dataset_index, meta in enumerate(tqdm.tqdm(all_metadata)): + for dataset_index, meta in enumerate(tqdm.tqdm(all_metadata, desc="Copy data and videos")): # cp data for episode_index in range(meta.total_episodes): aggr_episode_index = episode_index + aggr_episode_index_shift @@ -102,10 +102,10 @@ def aggregate_datasets(repo_ids: list[str], aggr_repo_id: str, aggr_root=None): video_path = meta.root / meta.get_video_file_path(episode_index, vid_key) aggr_video_path = aggr_meta.root / aggr_meta.get_video_file_path(aggr_episode_index, vid_key) aggr_video_path.parent.mkdir(parents=True, exist_ok=True) - # shutil.copy(video_path, aggr_video_path) + shutil.copy(video_path, aggr_video_path) - copy_command = f"cp {video_path} {aggr_video_path} &" - subprocess.Popen(copy_command, shell=True) + # copy_command = f"cp {video_path} {aggr_video_path} &" + # subprocess.Popen(copy_command, shell=True) # populate episodes for episode_index, episode_dict in meta.episodes.items(): From 5d184a78115f1c5c77ade1767d67beb72f4a988e Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Tue, 18 Mar 2025 16:55:08 +0000 Subject: [PATCH 26/28] NIT --- examples/port_datasets/droid_rlds/README.md | 2 +- examples/port_datasets/droid_rlds/slurm_aggregate_shards.py | 2 +- examples/port_datasets/droid_rlds/slurm_upload.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/port_datasets/droid_rlds/README.md b/examples/port_datasets/droid_rlds/README.md index 6d426dcc..11020d09 100644 --- a/examples/port_datasets/droid_rlds/README.md +++ b/examples/port_datasets/droid_rlds/README.md @@ -135,7 +135,7 @@ Run this script to start uploading: python examples/port_datasets/droid_rlds/slurm_upload.py \ --repo-id your_id/droid_1.0.1 \ --logs-dir /your/logs \ - --job-name aggr_droid \ + --job-name upload_droid \ --partition your_partition \ --workers 50 \ --cpus-per-task 4 \ diff --git a/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py b/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py index ad8593a7..621b3600 100644 --- a/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py +++ b/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py @@ -241,7 +241,7 @@ def main(): parser.add_argument( "--job-name", type=str, - default="port_droid", + default="aggr_droid", help="Job name used in slurm, and name of the directory created inside the provided logs directory.", ) parser.add_argument( diff --git a/examples/port_datasets/droid_rlds/slurm_upload.py b/examples/port_datasets/droid_rlds/slurm_upload.py index 27f0c8dd..f7f03716 100644 --- a/examples/port_datasets/droid_rlds/slurm_upload.py +++ b/examples/port_datasets/droid_rlds/slurm_upload.py @@ -186,7 +186,7 @@ def main(): parser.add_argument( "--job-name", type=str, - default="port_droid", + default="upload_droid", help="Job name used in slurm, and name of the directory created inside the provided logs directory.", ) parser.add_argument( @@ -198,7 +198,7 @@ def main(): parser.add_argument( "--workers", type=int, - default=2048, + default=50, help="Number of slurm workers. It should be less than the maximum number of shards.", ) parser.add_argument( @@ -209,7 +209,7 @@ def main(): parser.add_argument( "--cpus-per-task", type=int, - default=8, + default=4, help="Number of cpus that each slurm worker will use.", ) parser.add_argument( From 65738f0a80a06210f3a87f65d5b042f90e9faf40 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Thu, 20 Mar 2025 14:12:46 +0000 Subject: [PATCH 27/28] Improve slurm droid --- examples/port_datasets/droid_rlds/README.md | 11 +- .../port_datasets/droid_rlds/port_droid.py | 9 +- .../droid_rlds/slurm_aggregate_shards.py | 5 +- .../droid_rlds/slurm_port_shards.py | 4 +- .../port_datasets/droid_rlds/slurm_upload.py | 126 +++++++++++------- 5 files changed, 95 insertions(+), 60 deletions(-) diff --git a/examples/port_datasets/droid_rlds/README.md b/examples/port_datasets/droid_rlds/README.md index 11020d09..b4da4986 100644 --- a/examples/port_datasets/droid_rlds/README.md +++ b/examples/port_datasets/droid_rlds/README.md @@ -39,14 +39,15 @@ python examples/port_datasets/droid_rlds/port.py \ ## Port over SLURM -### 1. Port one shard per job - -First, install slurm utilities from Hugging Face: +Install slurm utilities from Hugging Face: ```bash pip install datatrove ``` -Then run this script to start porting shards of the dataset: + +### 1. Port one shard per job + +Run this script to start porting shards of the dataset: ```bash python examples/port_datasets/droid_rlds/slurm_port_shards.py \ --raw-dir /your/data/droid/1.0.1 \ @@ -83,7 +84,7 @@ Check if your jobs are running: squeue -u $USER` ``` -You should see a list with job indices like `15125385_155` where `15125385` is the job index and `155` is the worker index. The output/print of this worker is written in real time in `/your/logs/job_name/slurm_jobs/15125385_155.out`. For instance, you can inspect the content of this file by running `less /your/logs/job_name/slurm_jobs/15125385_155.out`. +You should see a list with job indices like `15125385_155` where `15125385` is the index of the run and `155` is the worker index. The output/print of this worker is written in real time in `/your/logs/job_name/slurm_jobs/15125385_155.out`. For instance, you can inspect the content of this file by running `less /your/logs/job_name/slurm_jobs/15125385_155.out`. Check the progression of your jobs by running: ```bash diff --git a/examples/port_datasets/droid_rlds/port_droid.py b/examples/port_datasets/droid_rlds/port_droid.py index ca6b9fce..b92cd1f7 100644 --- a/examples/port_datasets/droid_rlds/port_droid.py +++ b/examples/port_datasets/droid_rlds/port_droid.py @@ -307,7 +307,7 @@ def generate_lerobot_frames(tf_episode): def port_droid( raw_dir: Path, - repo_id: str = None, + repo_id: str, push_to_hub: bool = False, num_shards: int | None = None, shard_index: int | None = None, @@ -349,11 +349,12 @@ def port_droid( logging.info(f"Number of episodes {num_episodes}") for episode_index, episode in enumerate(raw_dataset): - logging.info(f"{episode_index} / {num_episodes} episodes processed") - elapsed_time = time.time() - start_time d, h, m, s = get_elapsed_time_in_days_hours_minutes_seconds(elapsed_time) - logging.info(f"It has been {d} days, {h} hours, {m} minutes, {s:.3f} seconds") + + logging.info( + f"{episode_index} / {num_episodes} episodes processed (after {d} days, {h} hours, {m} minutes, {s:.3f} seconds)" + ) for frame in generate_lerobot_frames(episode): lerobot_dataset.add_frame(frame) diff --git a/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py b/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py index 621b3600..d2f2dfdf 100644 --- a/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py +++ b/examples/port_datasets/droid_rlds/slurm_aggregate_shards.py @@ -16,6 +16,7 @@ import argparse import logging +from pathlib import Path import tqdm from datatrove.executor import LocalPipelineExecutor @@ -197,7 +198,7 @@ def make_aggregate_executor( "pipeline": [ AggregateDatasets(repo_ids, repo_id), ], - "logging_dir": str(logs_dir), + "logging_dir": str(logs_dir / job_name), } if slurm: @@ -235,7 +236,7 @@ def main(): ) parser.add_argument( "--logs-dir", - type=str, + type=Path, help="Path to logs directory for `datatrove`.", ) parser.add_argument( diff --git a/examples/port_datasets/droid_rlds/slurm_port_shards.py b/examples/port_datasets/droid_rlds/slurm_port_shards.py index 89d6bd85..08e36bc3 100644 --- a/examples/port_datasets/droid_rlds/slurm_port_shards.py +++ b/examples/port_datasets/droid_rlds/slurm_port_shards.py @@ -67,7 +67,7 @@ def make_port_executor( "pipeline": [ PortDroidShards(raw_dir, repo_id), ], - "logging_dir": str(logs_dir), + "logging_dir": str(logs_dir / job_name), } if slurm: @@ -111,7 +111,7 @@ def main(): ) parser.add_argument( "--logs-dir", - type=str, + type=Path, help="Path to logs directory for `datatrove`.", ) parser.add_argument( diff --git a/examples/port_datasets/droid_rlds/slurm_upload.py b/examples/port_datasets/droid_rlds/slurm_upload.py index f7f03716..43849468 100644 --- a/examples/port_datasets/droid_rlds/slurm_upload.py +++ b/examples/port_datasets/droid_rlds/slurm_upload.py @@ -12,6 +12,7 @@ from huggingface_hub.constants import REPOCARD_NAME from examples.port_datasets.droid_rlds.port_droid import DROID_SHARDS from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDatasetMetadata from lerobot.common.datasets.utils import create_lerobot_dataset_card +from lerobot.common.utils.utils import init_logging class UploadDataset(PipelineStep): @@ -23,10 +24,12 @@ class UploadDataset(PipelineStep): tags: list | None = None, license: str | None = "apache-2.0", private: bool = False, + distant_repo_id: str | None = None, **card_kwargs, ): super().__init__() self.repo_id = repo_id + self.distant_repo_id = self.repo_id if distant_repo_id is None else distant_repo_id self.branch = branch self.tags = tags self.license = license @@ -43,96 +46,123 @@ class UploadDataset(PipelineStep): self.create_repo() def create_repo(self): - hub_api = HfApi() - + logging.info(f"Loading meta data from {self.repo_id}...") meta = LeRobotDatasetMetadata(self.repo_id) + + logging.info(f"Creating repo {self.distant_repo_id}...") + hub_api = HfApi() hub_api.create_repo( - repo_id=self.repo_id, + repo_id=self.distant_repo_id, private=self.private, repo_type="dataset", exist_ok=True, ) if self.branch: hub_api.create_branch( - repo_id=self.repo_id, + repo_id=self.distant_repo_id, branch=self.branch, revision=self.revision, repo_type="dataset", exist_ok=True, ) - if not hub_api.file_exists(self.repo_id, REPOCARD_NAME, repo_type="dataset", revision=self.branch): + if not hub_api.file_exists( + self.distant_repo_id, REPOCARD_NAME, repo_type="dataset", revision=self.branch + ): card = create_lerobot_dataset_card( tags=self.tags, dataset_info=meta.info, license=self.license, **self.card_kwargs ) - card.push_to_hub(repo_id=self.repo_id, repo_type="dataset", revision=self.branch) + card.push_to_hub(repo_id=self.distant_repo_id, repo_type="dataset", revision=self.branch) def list_files_recursively(directory): base_path = Path(directory) return [str(file.relative_to(base_path)) for file in base_path.rglob("*") if file.is_file()] - meta = LeRobotDatasetMetadata(self.repo_id) + logging.info(f"Listing all local files from {self.repo_id}...") self.file_paths = list_files_recursively(meta.root) self.file_paths = sorted(self.file_paths) - def run(self, data=None, rank: int = 0, world_size: int = 1): - import logging - import random - import time + def create_chunks(self, lst, n): from itertools import islice - from huggingface_hub import CommitOperationAdd, create_commit, preupload_lfs_files + it = iter(lst) + return [list(islice(it, size)) for size in [len(lst) // n + (i < len(lst) % n) for i in range(n)]] + + def create_commits(self, additions): + import logging + import math + import random + import time + + from huggingface_hub import create_commit from huggingface_hub.utils import HfHubHTTPError + FILES_BETWEEN_COMMITS = 10 # noqa: N806 + BASE_DELAY = 0.1 # noqa: N806 + MAX_RETRIES = 12 # noqa: N806 + + # Split the files into smaller chunks for faster commit + # and avoiding "A commit has happened since" error + num_chunks = math.ceil(len(additions) / FILES_BETWEEN_COMMITS) + chunks = self.create_chunks(additions, num_chunks) + + for chunk in chunks: + retries = 0 + while True: + try: + create_commit( + self.distant_repo_id, + repo_type="dataset", + operations=chunk, + commit_message=f"DataTrove upload ({len(chunk)} files)", + revision=self.branch, + ) + logging.info("create_commit completed!") + break + except HfHubHTTPError as e: + if "A commit has happened since" in e.server_message: + if retries >= MAX_RETRIES: + logging.error(f"Failed to create commit after {MAX_RETRIES=}. Giving up.") + raise e + logging.info("Commit creation race condition issue. Waiting...") + time.sleep(BASE_DELAY * 2**retries + random.uniform(0, 2)) + retries += 1 + else: + raise e + + def run(self, data=None, rank: int = 0, world_size: int = 1): + import logging + + from datasets.utils.tqdm import disable_progress_bars + from huggingface_hub import CommitOperationAdd, preupload_lfs_files + from lerobot.common.datasets.lerobot_dataset import LeRobotDatasetMetadata from lerobot.common.utils.utils import init_logging - BASE_DELAY = 1.0 # noqa: N806 - MAX_RETRIES = 24 # noqa: N806 - init_logging() + disable_progress_bars() - def chunked(lst, n): - it = iter(lst) - return [list(islice(it, size)) for size in [len(lst) // n + (i < len(lst) % n) for i in range(n)]] - - chunks = chunked(self.file_paths, world_size) + chunks = self.create_chunks(self.file_paths, world_size) file_paths = chunks[rank] if len(file_paths) == 0: raise ValueError(file_paths) + logging.info("Pre-uploading LFS files...") + for i, path in enumerate(file_paths): + logging.info(f"{i}: {path}") + meta = LeRobotDatasetMetadata(self.repo_id) additions = [ CommitOperationAdd(path_in_repo=path, path_or_fileobj=meta.root / path) for path in file_paths ] - logging.info(f"Uploading {','.join(file_paths)} to the hub...") preupload_lfs_files( - repo_id=self.repo_id, repo_type="dataset", additions=additions, revision=self.branch + repo_id=self.distant_repo_id, repo_type="dataset", additions=additions, revision=self.branch ) - logging.info(f"Upload of {','.join(file_paths)} to the hub complete!") - retries = 0 - while True: - try: - create_commit( - self.repo_id, - repo_type="dataset", - operations=additions, - commit_message=f"DataTrove upload ({len(additions)} files)", - revision=self.branch, - ) - break - except HfHubHTTPError as e: - if "A commit has happened since" in e.server_message: - if retries >= MAX_RETRIES: - logging.error(f"Failed to create commit after {MAX_RETRIES=}. Giving up.") - raise e - logging.info("Commit creation race condition issue. Waiting...") - time.sleep(BASE_DELAY * 2**retries + random.uniform(0, 2)) - retries += 1 - else: - raise e + logging.info("Creating commits...") + self.create_commits(additions) + logging.info("Done!") def make_upload_executor( @@ -142,7 +172,7 @@ def make_upload_executor( "pipeline": [ UploadDataset(repo_id), ], - "logging_dir": str(logs_dir), + "logging_dir": str(logs_dir / job_name), } if slurm: @@ -180,7 +210,7 @@ def main(): ) parser.add_argument( "--logs-dir", - type=str, + type=Path, help="Path to logs directory for `datatrove`.", ) parser.add_argument( @@ -209,7 +239,7 @@ def main(): parser.add_argument( "--cpus-per-task", type=int, - default=4, + default=8, help="Number of cpus that each slurm worker will use.", ) parser.add_argument( @@ -219,6 +249,8 @@ def main(): help="Memory per cpu that each worker will use.", ) + init_logging() + args = parser.parse_args() kwargs = vars(args) kwargs["slurm"] = kwargs.pop("slurm") == 1 From 53ecec5fb216a09270677e4fe068b2b66ca59c32 Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Mon, 31 Mar 2025 07:38:01 +0000 Subject: [PATCH 28/28] WIP v21 to v30 --- .../port_datasets/droid_rlds/slurm_upload.py | 1 + lerobot/common/datasets/lerobot_dataset.py | 4 +- .../v30/convert_dataset_v21_to_v30.py | 137 ++++++++++++++++++ 3 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 lerobot/common/datasets/v30/convert_dataset_v21_to_v30.py diff --git a/examples/port_datasets/droid_rlds/slurm_upload.py b/examples/port_datasets/droid_rlds/slurm_upload.py index 43849468..8dec7c20 100644 --- a/examples/port_datasets/droid_rlds/slurm_upload.py +++ b/examples/port_datasets/droid_rlds/slurm_upload.py @@ -117,6 +117,7 @@ class UploadDataset(PipelineStep): commit_message=f"DataTrove upload ({len(chunk)} files)", revision=self.branch, ) + # TODO: every 100 chunks super_squach_commits() logging.info("create_commit completed!") break except HfHubHTTPError as e: diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 5414c76d..4fb71b3b 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -73,7 +73,7 @@ from lerobot.common.datasets.video_utils import ( ) from lerobot.common.robot_devices.robots.utils import Robot -CODEBASE_VERSION = "v2.1" +CODEBASE_VERSION = "v3.0" class LeRobotDatasetMetadata: @@ -616,6 +616,8 @@ class LeRobotDataset(torch.utils.data.Dataset): """hf_dataset contains all the observations, states, actions, rewards, etc.""" if self.episodes is None: path = str(self.root / "data") + # TODO(rcadene): load_dataset convert parquet to arrow. + # set num_proc to accelerate this conversion hf_dataset = load_dataset("parquet", data_dir=path, split="train") else: files = [str(self.root / self.meta.get_data_file_path(ep_idx)) for ep_idx in self.episodes] diff --git a/lerobot/common/datasets/v30/convert_dataset_v21_to_v30.py b/lerobot/common/datasets/v30/convert_dataset_v21_to_v30.py new file mode 100644 index 00000000..f1efae35 --- /dev/null +++ b/lerobot/common/datasets/v30/convert_dataset_v21_to_v30.py @@ -0,0 +1,137 @@ +""" +This script will help you convert any LeRobot dataset already pushed to the hub from codebase version 2.1 to +3.0. It will: + +- Generate per-episodes stats and writes them in `episodes_stats.jsonl` +- Check consistency between these new stats and the old ones. +- Remove the deprecated `stats.json`. +- Update codebase_version in `info.json`. +- Push this new version to the hub on the 'main' branch and tags it with "v2.1". + +Usage: + +```bash +python lerobot/common/datasets/v30/convert_dataset_v21_to_v30.py \ + --repo-id=lerobot/pusht +``` + +""" + +import argparse +import logging + +from datasets import Dataset +from huggingface_hub import snapshot_download + +from lerobot.common.constants import HF_LEROBOT_HOME +from lerobot.common.datasets.utils import ( + load_episodes_stats, +) + +V21 = "v2.1" + + +class SuppressWarnings: + def __enter__(self): + self.previous_level = logging.getLogger().getEffectiveLevel() + logging.getLogger().setLevel(logging.ERROR) + + def __exit__(self, exc_type, exc_val, exc_tb): + logging.getLogger().setLevel(self.previous_level) + + +def convert_dataset( + repo_id: str, + branch: str | None = None, + num_workers: int = 4, +): + root = HF_LEROBOT_HOME / repo_id + snapshot_download( + repo_id, + repo_type="dataset", + revision=V21, + local_dir=root, + ) + + # Concatenate videos + + # Create + + """ + ------------------------- + OLD + data/chunk-000/episode_000000.parquet + + NEW + data/chunk-000/file_000.parquet + ------------------------- + OLD + videos/chunk-000/CAMERA/episode_000000.mp4 + + NEW + videos/chunk-000/file_000.mp4 + ------------------------- + OLD + episodes.jsonl + {"episode_index": 1, "tasks": ["Put the blue block in the green bowl"], "length": 266} + + NEW + meta/episodes/chunk-000/episodes_000.parquet + episode_index | video_chunk_index | video_file_index | data_chunk_index | data_file_index | tasks | length + ------------------------- + OLD + tasks.jsonl + {"task_index": 1, "task": "Put the blue block in the green bowl"} + + NEW + meta/tasks/chunk-000/file_000.parquet + task_index | task + ------------------------- + OLD + episodes_stats.jsonl + + NEW + meta/episodes_stats/chunk-000/file_000.parquet + episode_index | mean | std | min | max + ------------------------- + UPDATE + meta/info.json + ------------------------- + """ + + new_root = HF_LEROBOT_HOME / f"{repo_id}_v30" + new_root.mkdir(parents=True, exist_ok=True) + + episodes_stats = load_episodes_stats(root) + hf_dataset = Dataset.from_dict(episodes_stats) # noqa: F841 + + meta_ep_st_ch = new_root / "meta/episodes_stats/chunk-000" + meta_ep_st_ch.mkdir(parents=True, exist_ok=True) + + # hf_dataset.to_parquet(meta_ep_st_ch / 'file_000.parquet') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--repo-id", + type=str, + required=True, + help="Repository identifier on Hugging Face: a community or a user name `/` the name of the dataset " + "(e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).", + ) + parser.add_argument( + "--branch", + type=str, + default=None, + help="Repo branch to push your dataset. Defaults to the main branch.", + ) + parser.add_argument( + "--num-workers", + type=int, + default=4, + help="Number of workers for parallelizing stats compute. Defaults to 4.", + ) + + args = parser.parse_args() + convert_dataset(**vars(args))