Merge e5d3ed4de9 into 768e36660d

2025-04-14 23:46:16 +00:00 · 2025-04-14 23:46:16 +00:00 · 7bde337b49
parent 768e36660d e5d3ed4de9
commit 7bde337b49
2 changed files with 574 additions and 0 deletions
--- a/examples/port_datasets/aloha_hd5.py
+++ b/examples/port_datasets/aloha_hd5.py
@ -0,0 +1,492 @@
+import argparse
+import logging
+import os
+import shutil
+import traceback
+from pathlib import Path
+
+import cv2
+import h5py
+import torch
+
+from lerobot.common.datasets.lerobot_dataset import LEROBOT_HOME, LeRobotDataset
+
+
+class AlohaHD5Extractor:
+    TAGS = ["aloha", "robotics", "hdf5"]
+    aloha_stationary = "aloha-stationary"
+    aloha_mobile = "aloha-mobile"
+
+    @staticmethod
+    def get_cameras(hdf5_data: h5py.File):
+        """
+        Extracts the list of RGB camera keys from the given HDF5 data.
+        Parameters
+        ----------
+        hdf5_data : h5py.File
+            The HDF5 file object containing the dataset.
+        Returns
+        -------
+        list of str
+            A list of keys corresponding to RGB cameras in the dataset.
+        """
+
+        rgb_cameras = [key for key in hdf5_data["/observations/images"] if "depth" not in key]
+        return rgb_cameras
+
+    @staticmethod
+    def check_format(episode_list: list[str] | list[Path], image_compressed: bool = True):
+        """
+        Check the format of the given list of HDF5 files.
+        Parameters
+        ----------
+        episode_list : list of str or list of Path
+            List of paths to the HDF5 files to be checked.
+        image_compressed : bool, optional
+            Flag indicating whether the images are compressed (default is True).
+        Raises
+        ------
+        ValueError
+            If the episode_list is empty.
+            If any HDF5 file is missing required keys '/action' or '/observations/qpos'.
+            If the '/action' or '/observations/qpos' keys do not have 2 dimensions.
+            If the number of frames in '/action' and '/observations/qpos' keys do not match.
+            If the number of frames in '/observations/images/{camera}' does not match the number of frames in '/action' and '/observations/qpos'.
+            If the dimensions of images do not match the expected dimensions based on the image_compressed flag.
+            If uncompressed images do not have the expected (h, w, c) format.
+        """
+
+        if not episode_list:
+            raise ValueError(
+                "No hdf5 files found in the raw directory. Make sure they are named 'episode_*.hdf5'"
+            )
+        for episode_path in episode_list:
+            with h5py.File(episode_path, "r") as data:
+                if not all(key in data for key in ["/action", "/observations/qpos"]):
+                    raise ValueError(
+                        "Missing required keys in the hdf5 file. Make sure the keys '/action' and '/observations/qpos' are present."
+                    )
+
+                if not data["/action"].ndim == data["/observations/qpos"].ndim == 2:
+                    raise ValueError(
+                        "The '/action' and '/observations/qpos' keys should have both 2 dimensions."
+                    )
+
+                if (num_frames := data["/action"].shape[0]) != data["/observations/qpos"].shape[0]:
+                    raise ValueError(
+                        "The '/action' and '/observations/qpos' keys should have the same number of frames."
+                    )
+
+                for camera in AlohaHD5Extractor.get_cameras(data):
+                    if num_frames != data[f"/observations/images/{camera}"].shape[0]:
+                        raise ValueError(
+                            f"The number of frames in '/observations/images/{camera}' should be the same as in '/action' and '/observations/qpos' keys."
+                        )
+
+                    expected_dims = 2 if image_compressed else 4
+                    if data[f"/observations/images/{camera}"].ndim != expected_dims:
+                        raise ValueError(
+                            f"Expect {expected_dims} dimensions for {'compressed' if image_compressed else 'uncompressed'} images but {data[f'/observations/images/{camera}'].ndim} provided."
+                        )
+                    if not image_compressed:
+                        b, h, w, c = data[f"/observations/images/{camera}"].shape
+                        if not c < h and c < w:
+                            raise ValueError(f"Expect (h,w,c) image format but ({h=},{w=},{c=}) provided.")
+
+    @staticmethod
+    def extract_episode_frames(
+        episode_path: str | Path, features: dict[str, dict], image_compressed: bool
+    ) -> list[dict[str, torch.Tensor]]:
+        """
+        Extract frames from an episode stored in an HDF5 file.
+        Parameters
+        ----------
+        episode_path : str or Path
+            Path to the HDF5 file containing the episode data.
+        features : dict of str to dict
+            Dictionary where keys are feature identifiers and values are dictionaries with feature details.
+        image_compressed : bool
+            Flag indicating whether the images are stored in a compressed format.
+        Returns
+        -------
+        list of dict of str to torch.Tensor
+            List of frames, where each frame is a dictionary mapping feature identifiers to tensors.
+        """
+
+        frames = []
+        with h5py.File(episode_path, "r") as file:
+            for frame_idx in range(file["/action"].shape[0]):
+                frame = {}
+                for feature_id in features:
+                    feature_name_hd5 = (
+                        feature_id.replace(".", "/")
+                        .replace("observation", "observations")
+                        .replace("state", "qpos")
+                    )
+                    if "images" in feature_id.split("."):
+                        image = (
+                            (file[feature_name_hd5][frame_idx])
+                            if not image_compressed
+                            else cv2.imdecode(file[feature_name_hd5][frame_idx], 1)
+                        )
+                        frame[feature_id] = torch.from_numpy(image.transpose(2, 0, 1))
+                    else:
+                        frame[feature_id] = torch.from_numpy(file[feature_name_hd5][frame_idx])
+                frames.append(frame)
+        return frames
+
+    @staticmethod
+    def define_features(
+        hdf5_file_path: Path, image_compressed: bool = True, encode_as_video: bool = True
+    ) -> dict[str, dict]:
+        """
+        Define features from an HDF5 file.
+        Parameters
+        ----------
+        hdf5_file_path : Path
+            The path to the HDF5 file.
+        image_compressed : bool, optional
+            Whether the images are compressed, by default True.
+        encode_as_video : bool, optional
+            Whether to encode images as video or as images, by default True.
+        Returns
+        -------
+        dict[str, dict]
+            A dictionary where keys are topic names and values are dictionaries
+            containing feature information such as dtype, shape, and names.
+        """
+
+        # Initialize lists to store topics and features
+        topics: list[str] = []
+        features: dict[str, dict] = {}
+
+        # Open the HDF5 file
+        with h5py.File(hdf5_file_path, "r") as hdf5_file:
+            # Collect all dataset names in the HDF5 file
+            hdf5_file.visititems(
+                lambda name, obj: topics.append(name) if isinstance(obj, h5py.Dataset) else None
+            )
+
+            # Iterate over each topic to define its features
+            for topic in topics:
+                # If the topic is an image, define it as a video feature
+                destination_topic = (
+                    topic.replace("/", ".").replace("observations", "observation").replace("qpos", "state")
+                )
+                if "images" in topic.split("/"):
+                    sample = hdf5_file[topic][0]
+                    features[destination_topic] = {
+                        "dtype": "video" if encode_as_video else "image",
+                        "shape": cv2.imdecode(hdf5_file[topic][0], 1).transpose(2, 0, 1).shape
+                        if image_compressed
+                        else sample.shape,
+                        "names": [
+                            "channel",
+                            "height",
+                            "width",
+                        ],
+                    }
+                # Skip compressed length topics
+                elif "compress_len" in topic.split("/"):
+                    continue
+                # Otherwise, define it as a regular feature
+                else:
+                    features[destination_topic] = {
+                        "dtype": str(hdf5_file[topic][0].dtype),
+                        "shape": (topic_shape := hdf5_file[topic][0].shape),
+                        "names": [f"{topic.split('/')[-1]}_{k}" for k in range(topic_shape[0])],
+                    }
+        # Return the defined features
+        return features
+
+
+class DatasetConverter:
+    """
+    A class to convert datasets to Lerobot format.
+    Parameters
+    ----------
+    raw_path : Path or str
+        The path to the raw dataset.
+    dataset_repo_id : str
+        The repository ID where the dataset will be stored.
+    fps : int
+        Frames per second for the dataset.
+    robot_type : str, optional
+        The type of robot, by default "".
+    encode_as_videos : bool, optional
+        Whether to encode images as videos, by default True.
+    image_compressed : bool, optional
+        Whether the images are compressed, by default True.
+    image_writer_processes : int, optional
+        Number of processes for writing images, by default 0.
+    image_writer_threads : int, optional
+        Number of threads for writing images, by default 0.
+    Methods
+    -------
+    extract_episode(episode_path, task_description='')
+        Extracts frames from a single episode and saves it with a description.
+    extract_episodes(episode_description='')
+        Extracts frames from all episodes and saves them with a description.
+    push_dataset_to_hub(dataset_tags=None, private=False, push_videos=True, license="apache-2.0")
+        Pushes the dataset to the Hugging Face Hub.
+    init_lerobot_dataset()
+        Initializes the Lerobot dataset.
+    """
+
+    def __init__(
+        self,
+        raw_path: Path | str,
+        dataset_repo_id: str,
+        fps: int,
+        robot_type: str = "",
+        encode_as_videos: bool = True,
+        image_compressed: bool = True,
+        image_writer_processes: int = 0,
+        image_writer_threads: int = 0,
+    ):
+        self.raw_path = raw_path if isinstance(raw_path, Path) else Path(raw_path)
+        self.dataset_repo_id = dataset_repo_id
+        self.fps = fps
+        self.robot_type = robot_type
+        self.image_compressed = image_compressed
+        self.image_writer_threads = image_writer_threads
+        self.image_writer_processes = image_writer_processes
+        self.encode_as_videos = encode_as_videos
+
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.logger.setLevel(logging.INFO)
+
+        # Add console handler
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.INFO)
+        formatter = logging.Formatter("%(asctime)s - [%(name)s] - %(message)s")
+        console_handler.setFormatter(formatter)
+        self.logger.addHandler(console_handler)
+
+        self.logger.info(f"{'-' * 10} Aloha HD5 -> Lerobot Converter {'-' * 10}")
+        self.logger.info(f"Processing Aloha HD5 dataset from {self.raw_path}")
+        self.logger.info(f"Dataset will be stored in {self.dataset_repo_id}")
+        self.logger.info(f"FPS: {self.fps}")
+        self.logger.info(f"Robot type: {self.robot_type}")
+        self.logger.info(f"Image compressed: {self.image_compressed}")
+        self.logger.info(f"Encoding images as videos: {self.encode_as_videos}")
+        self.logger.info(f"#writer processes: {self.image_writer_processes}")
+        self.logger.info(f"#writer threads: {self.image_writer_threads}")
+
+        self.episode_list = list(self.raw_path.glob("episode_*.hdf5"))
+        AlohaHD5Extractor.check_format(self.episode_list, image_compressed=self.image_compressed)
+        self.features = AlohaHD5Extractor.define_features(
+            self.episode_list[0],
+            image_compressed=self.image_compressed,
+            encode_as_video=self.encode_as_videos,
+        )
+
+    def extract_episode(self, episode_path, task_description: str = ""):
+        """
+        Extracts frames from an episode and saves them to the dataset.
+
+        Parameters
+        ----------
+        episode_path : str
+            The path to the episode file.
+        task_description : str, optional
+            A description of the task associated with the episode (default is an empty string).
+
+        Returns
+        -------
+        None
+        """
+
+        for frame in AlohaHD5Extractor.extract_episode_frames(
+            episode_path, self.features, self.image_compressed
+        ):
+            self.dataset.add_frame(frame)
+        self.logger.info(f"Saving Episode with Description: {task_description} ...")
+        self.dataset.save_episode(task=task_description)
+
+    def extract_episodes(self, episode_description: str = ""):
+        """
+        Extracts episodes from the episode list and processes them.
+        Parameters
+        ----------
+        episode_description : str, optional
+            A description of the task to be passed to the extract_episode method (default is '').
+        Raises
+        ------
+        Exception
+            If an error occurs during the processing of an episode, it will be caught and printed.
+        Notes
+        -----
+        After processing all episodes, the dataset is consolidated.
+        """
+
+        for episode_path in self.episode_list:
+            try:
+                self.extract_episode(episode_path, task_description=episode_description)
+            except Exception as e:
+                print(f"Error processing episode {episode_path}", f"{e}")
+                traceback.print_exc()
+                continue
+        self.dataset.consolidate()
+
+    def push_dataset_to_hub(
+        self,
+        dataset_tags: list[str] | None = None,
+        private: bool = False,
+        push_videos: bool = True,
+        license: str | None = "apache-2.0",
+    ):
+        """
+        Pushes the dataset to the Hugging Face Hub.
+        Parameters
+        ----------
+        dataset_tags : list of str, optional
+            A list of tags to associate with the dataset on the Hub. Default is None.
+        private : bool, optional
+            If True, the dataset will be private. Default is False.
+        push_videos : bool, optional
+            If True, videos will be pushed along with the dataset. Default is True.
+        license : str, optional
+            The license under which the dataset is released. Default is "apache-2.0".
+        Returns
+        -------
+        None
+        """
+
+        self.logger.info(f"Pushing dataset to Hugging Face Hub. ID: {self.dataset_repo_id} ...")
+        self.dataset.push_to_hub(
+            tags=dataset_tags,
+            license=license,
+            push_videos=push_videos,
+            private=private,
+        )
+
+    def init_lerobot_dataset(self):
+        """
+        Initializes the LeRobot dataset.
+        This method cleans the cache if the dataset already exists and then creates a new LeRobot dataset.
+        Returns
+        -------
+        LeRobotDataset
+            The initialized LeRobot dataset.
+        """
+
+        # Clean the cache if the dataset already exists
+        if os.path.exists(LEROBOT_HOME / self.dataset_repo_id):
+            shutil.rmtree(LEROBOT_HOME / self.dataset_repo_id)
+        self.dataset = LeRobotDataset.create(
+            repo_id=self.dataset_repo_id,
+            fps=self.fps,
+            robot_type=self.robot_type,
+            features=self.features,
+            image_writer_threads=self.image_writer_threads,
+            image_writer_processes=self.image_writer_processes,
+        )
+
+        return self.dataset
+
+
+def str2bool(value):
+    if isinstance(value, bool):
+        return value
+    value = value.lower()
+    if value in ("yes", "true", "t", "y", "1"):
+        return True
+    elif value in ("no", "false", "f", "n", "0"):
+        return False
+    else:
+        raise argparse.ArgumentTypeError("Boolean value expected.")
+
+
+def main():
+    """
+    Convert Aloha HD5 dataset and push to Hugging Face hub.
+    This script processes raw HDF5 files from the Aloha dataset, converts them into a specified format,
+    and optionally uploads the dataset to the Hugging Face hub.
+    Parameters
+    ----------
+    --raw-path : Path
+        Directory containing the raw HDF5 files.
+    --dataset-repo-id : str
+        Repository ID where the dataset will be stored.
+    --fps : int
+        Frames per second for the dataset.
+    --robot-type : str, optional
+        Type of robot, either "aloha-stationary" or "aloha-mobile". Default is "aloha-stationary".
+    --private : bool, optional
+        Set to True to make the dataset private. Default is False.
+    --push-videos : bool, optional
+        Set to True to push videos to the hub. Default is True.
+    --license : str, optional
+        License for the dataset. Default is "apache-2.0".
+    --image-compressed : bool, optional
+        Set to True if the images are compressed. Default is True.
+    --video-encoding : bool, optional
+        Set to True to encode images as videos. Default is True.
+    --nproc : int, optional
+        Number of image writer processes. Default is 10.
+    --nthreads : int, optional
+        Number of image writer threads. Default is 5.
+    """
+
+    parser = argparse.ArgumentParser(description="Convert Aloha HD5 dataset and push to Hugging Face hub.")
+    parser.add_argument(
+        "--raw-path", type=Path, required=True, help="Directory containing the raw hdf5 files."
+    )
+    parser.add_argument(
+        "--dataset-repo-id", type=str, required=True, help="Repository ID where the dataset will be stored."
+    )
+    parser.add_argument("--fps", type=int, required=True, help="Frames per second for the dataset.")
+    parser.add_argument(
+        "--description", type=str, help="Description of the dataset.", default="Aloha recorded dataset."
+    )
+
+    parser.add_argument(
+        "--robot-type",
+        type=str,
+        choices=["aloha-stationary", "aloha-mobile"],
+        default="aloha-stationary",
+        help="Type of robot.",
+    )
+    parser.add_argument(
+        "--private", type=str2bool, default=False, help="Set to True to make the dataset private."
+    )
+    parser.add_argument("--push", type=str2bool, default=True, help="Set to True to push videos to the hub.")
+    parser.add_argument("--license", type=str, default="apache-2.0", help="License for the dataset.")
+    parser.add_argument(
+        "--image-compressed", type=str2bool, default=True, help="Set to True if the images are compressed."
+    )
+    parser.add_argument(
+        "--video-encoding", type=str2bool, default=True, help="Set to True to encode images as videos."
+    )
+
+    parser.add_argument("--nproc", type=int, default=10, help="Number of image writer processes.")
+    parser.add_argument("--nthreads", type=int, default=5, help="Number of image writer threads.")
+
+    args = parser.parse_args()
+    print(
+        args.video_encoding,
+        "-------------------------------------------------------------------------------------------------------",
+    )
+
+    converter = DatasetConverter(
+        raw_path=args.raw_path,
+        dataset_repo_id=args.dataset_repo_id,
+        fps=args.fps,
+        robot_type=args.robot_type,
+        image_compressed=args.image_compressed,
+        encode_as_videos=args.video_encoding,
+        image_writer_processes=args.nproc,
+        image_writer_threads=args.nthreads,
+    )
+    converter.init_lerobot_dataset()
+    converter.extract_episodes(episode_description=args.description)
+
+    if args.push:
+        converter.push_dataset_to_hub(
+            dataset_tags=AlohaHD5Extractor.TAGS, private=args.private, push_videos=True, license=args.license
+        )
+
+
+if __name__ == "__main__":
+    main()
--- a/lerobot/scripts/push_aloha_dataset_to_hub.py
+++ b/lerobot/scripts/push_aloha_dataset_to_hub.py
@ -0,0 +1,82 @@
+from pathlib import Path
+
+import cv2
+import h5py
+import torch
+
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
+
+data_path = Path("/home/ccop/code/aloha_data")
+
+
+def get_features(hdf5_file):
+    topics = []
+    features = {}
+    hdf5_file.visititems(lambda name, obj: topics.append(name) if isinstance(obj, h5py.Dataset) else None)
+    for topic in topics:
+        # print(topic.replace('/', '.'))
+        if "images" in topic.split("/"):
+            features[topic.replace("/", ".")] = {
+                "dtype": "image",
+                "shape": cv2.imdecode(hdf5_file[topic][0], 1).transpose(2, 0, 1).shape,
+                "names": None,
+            }
+        elif "compress_len" in topic.split("/"):
+            continue
+        else:
+            features[topic.replace("/", ".")] = {
+                "dtype": str(hdf5_file[topic][0].dtype),
+                "shape": hdf5_file[topic][0].shape,
+                "names": None,
+            }
+
+    return features
+
+
+def extract_episode(episode_path, features, n_frames, dataset):
+    with h5py.File(episode_path, "r") as file:
+        # List all groups
+        for frame_idx in range(n_frames):
+            frame = {}
+            for feature in features:
+                if "images" in feature.split("."):
+                    frame[feature] = torch.from_numpy(
+                        cv2.imdecode(file[feature.replace(".", "/")][frame_idx], 1).transpose(2, 0, 1)
+                    )
+                else:
+                    frame[feature] = torch.from_numpy(file[feature.replace(".", "/")][frame_idx])
+
+            dataset.add_frame(frame)
+
+
+def get_dataset_properties(raw_folder):
+    from os import listdir
+
+    episode_list = listdir(raw_folder)
+    with h5py.File(raw_folder / episode_list[0], "r") as file:
+        features = get_features(file)
+        n_frames = file["observations/images/cam_high"][:].shape[0]
+    return features, n_frames
+
+
+if __name__ == "__main__":
+    raw_folder = data_path.absolute() / "aloha_stationary_replay_test"
+    episode_file = "episode_0.hdf5"
+
+    features, n_frames = get_dataset_properties(raw_folder)
+
+    dataset = LeRobotDataset.create(
+        repo_id="ccop/aloha_stationary_replay_test_v3",
+        fps=50,
+        robot_type="aloha-stationary",
+        features=features,
+        image_writer_threads=4,
+    )
+
+    extract_episode(raw_folder / episode_file, features, n_frames, dataset)
+    print("save episode!")
+    dataset.save_episode(
+        task="move_cube",
+    )
+    dataset.consolidate()
+    dataset.push_to_hub()