Enhance dataset cards

2024-11-20 11:57:27 +01:00 · 2024-11-20 11:57:27 +01:00 · c6ad495176
parent f43e5d07f5
commit c6ad495176
6 changed files with 265 additions and 110 deletions
--- a/lerobot/common/datasets/card_template.md
+++ b/lerobot/common/datasets/card_template.md
@ -0,0 +1,27 @@
+---
+# For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
+# Doc / guide: https://huggingface.co/docs/hub/datasets-cards
+{{ card_data }}
+---
+
+This dataset was created using [LeRobot](https://github.com/huggingface/lerobot).
+
+## Dataset Description
+
+{{ dataset_description | default("", true) }}
+
+- **Homepage:** {{ url | default("[More Information Needed]", true)}}
+- **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}}
+- **License:** {{ license | default("[More Information Needed]", true)}}
+
+## Dataset Structure
+
+{{ dataset_structure | default("[More Information Needed]", true)}}
+
+## Citation [optional]
+
+**BibTeX:**
+
+```bibtex
+{{ citation_bibtex | default("[More Information Needed]", true)}}
+```
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@ -467,10 +467,10 @@ class LeRobotDataset(torch.utils.data.Dataset):
    def push_to_hub(
        self,
        tags: list | None = None,
-        text: str | None = None,
        license: str | None = "apache-2.0",
        push_videos: bool = True,
        private: bool = False,
+        **card_kwargs,
    ) -> None:
        if not self.consolidated:
            raise RuntimeError(
@ -495,7 +495,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
            repo_type="dataset",
            ignore_patterns=ignore_patterns,
        )
-        card = create_lerobot_dataset_card(tags=tags, text=text, info=self.meta.info, license=license)
+        card = create_lerobot_dataset_card(
+            tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs
+        )
        card.push_to_hub(repo_id=self.repo_id, repo_type="dataset")
        create_branch(repo_id=self.repo_id, branch=CODEBASE_VERSION, repo_type="dataset")

--- a/lerobot/common/datasets/utils.py
+++ b/lerobot/common/datasets/utils.py
@ -27,7 +27,7 @@ import numpy as np
 import pyarrow.compute as pc
 import torch
 from datasets.table import embed_table_storage
-from huggingface_hub import DatasetCard, HfApi
+from huggingface_hub import DatasetCard, DatasetCardData, HfApi
 from PIL import Image as PILImage
 from torchvision import transforms

@ -50,6 +50,8 @@ DATASET_CARD_TEMPLATE = """
 ---
 This dataset was created using [LeRobot](https://github.com/huggingface/lerobot).

+## {}
+
 """

 DEFAULT_FEATURES = {
@ -468,41 +470,33 @@ def create_branch(repo_id, *, branch: str, repo_type: str | None = None) -> None

 def create_lerobot_dataset_card(
    tags: list | None = None,
-    text: str | None = None,
-    info: dict | None = None,
-    license: str | None = None,
-    url: str | None = None,
-    citation: str | None = None,
-    arxiv: str | None = None,
+    dataset_info: dict | None = None,
+    **kwargs,
 ) -> DatasetCard:
    """
-    If specified, license must be one of https://huggingface.co/docs/hub/repositories-licenses.
+    Keyword arguments will be used to replace values in ./lerobot/common/datasets/card_template.md.
+    Note: If specified, license must be one of https://huggingface.co/docs/hub/repositories-licenses.
    """
-    card = DatasetCard(DATASET_CARD_TEMPLATE)
-    card.data.configs = [
-        {
-            "config_name": "default",
-            "data_files": "data/*/*.parquet",
-        }
-    ]
-    card.data.task_categories = ["robotics"]
-    card.data.license = license
-    card.data.tags = ["LeRobot"]
-    if license:
-        card.data.license = license
+    card_tags = ["LeRobot"]
    if tags:
-        card.data.tags += tags
-    if url:
-        card.text += f"## Homepage:\n{url}\n"
-    if text:
-        card.text += f"{text}\n"
-    if info:
-        card.text += "## Info\n"
-        card.text += "[meta/info.json](meta/info.json)\n"
-        card.text += f"```json\n{json.dumps(info, indent=4)}\n```"
-    if citation:
-        card.text += "## Citation\n"
-        card.text += f"```\n{citation}\n```\n"
-    if arxiv:
-        card.data.arxiv = arxiv
-    return card
+        card_tags += tags
+    if dataset_info:
+        dataset_structure = "[meta/info.json](meta/info.json):\n"
+        dataset_structure += f"```json\n{json.dumps(dataset_info, indent=4)}\n```\n"
+        kwargs = {**kwargs, "dataset_structure": dataset_structure}
+    card_data = DatasetCardData(
+        license=kwargs.get("license"),
+        tags=card_tags,
+        task_categories=["robotics"],
+        configs=[
+            {
+                "config_name": "default",
+                "data_files": "data/*/*.parquet",
+            }
+        ],
+    )
+    return DatasetCard.from_template(
+        card_data=card_data,
+        template_path="./lerobot/common/datasets/card_template.md",
+        **kwargs,
+    )
--- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py
@ -14,84 +14,220 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+"""
+Note: Since the original Aloha datasets don't use shadow motors, you need to comment those out in
+lerobot/configs/robot/aloha.yaml before running this script.
+"""
+
 import traceback
 from pathlib import Path
+from textwrap import dedent

-from lerobot import available_datasets
 from lerobot.common.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset, parse_robot_config

 LOCAL_DIR = Path("data/")
-ALOHA_SINGLE_TASKS_REAL = {
-    "aloha_mobile_cabinet": "Open the top cabinet, store the pot inside it then close the cabinet.",
-    "aloha_mobile_chair": "Push the chairs in front of the desk to place them against it.",
-    "aloha_mobile_elevator": "Take the elevator to the 1st floor.",
-    "aloha_mobile_shrimp": "Sauté the raw shrimp on both sides, then serve it in the bowl.",
-    "aloha_mobile_wash_pan": "Pick up the pan, rinse it in the sink and then place it in the drying rack.",
-    "aloha_mobile_wipe_wine": "Pick up the wet cloth on the faucet and use it to clean the spilled wine on the table and underneath the glass.",
-    "aloha_static_battery": "Place the battery into the slot of the remote controller.",
-    "aloha_static_candy": "Pick up the candy and unwrap it.",
-    "aloha_static_coffee": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray, then push the 'Hot Water' and 'Travel Mug' buttons.",
-    "aloha_static_coffee_new": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray.",
-    "aloha_static_cups_open": "Pick up the plastic cup and open its lid.",
-    "aloha_static_fork_pick_up": "Pick up the fork and place it on the plate.",
-    "aloha_static_pingpong_test": "Transfer one of the two balls in the right glass into the left glass, then transfer it back to the right glass.",
-    "aloha_static_pro_pencil": "Pick up the pencil with the right arm, hand it over to the left arm then place it back onto the table.",
-    "aloha_static_screw_driver": "Pick up the screwdriver with the right arm, hand it over to the left arm then place it into the cup.",
-    "aloha_static_tape": "Cut a small piece of tape from the tape dispenser then place it on the cardboard box's edge.",
-    "aloha_static_thread_velcro": "Pick up the velcro cable tie with the left arm, then insert the end of the velcro tie into the other end's loop with the right arm.",
-    "aloha_static_towel": "Pick up a piece of paper towel and place it on the spilled liquid.",
-    "aloha_static_vinh_cup": "Pick up the platic cup with the right arm, then pop its lid open with the left arm.",
-    "aloha_static_vinh_cup_left": "Pick up the platic cup with the left arm, then pop its lid open with the right arm.",
-    "aloha_static_ziploc_slide": "Slide open the ziploc bag.",
-}
+
 ALOHA_CONFIG = Path("lerobot/configs/robot/aloha.yaml")
+ALOHA_MOBILE_INFO = {
+    "robot_config": parse_robot_config(ALOHA_CONFIG),
+    "license": "mit",
+    "url": "https://mobile-aloha.github.io/",
+    "paper": "https://arxiv.org/abs/2401.02117",
+    "citation_bibtex": dedent("""
+        @inproceedings{fu2024mobile,
+            author    = {Fu, Zipeng and Zhao, Tony Z. and Finn, Chelsea},
+            title     = {Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation},
+            booktitle = {arXiv},
+            year      = {2024},
+        }""").lstrip(),
+}
+ALOHA_STATIC_INFO = {
+    "robot_config": parse_robot_config(ALOHA_CONFIG),
+    "license": "mit",
+    "url": "https://tonyzhaozh.github.io/aloha/",
+    "paper": "https://arxiv.org/abs/2304.13705",
+    "citation_bibtex": dedent("""
+        @article{Zhao2023LearningFB,
+            title={Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware},
+            author={Tony Zhao and Vikash Kumar and Sergey Levine and Chelsea Finn},
+            journal={RSS},
+            year={2023},
+            volume={abs/2304.13705},
+            url={https://arxiv.org/abs/2304.13705}
+        }""").lstrip(),
+}
+PUSHT_INFO = {
+    "license": "mit",
+    "url": "https://diffusion-policy.cs.columbia.edu/",
+    "paper": "https://arxiv.org/abs/2303.04137v5",
+    "citation_bibtex": dedent("""
+        @article{chi2024diffusionpolicy,
+            author = {Cheng Chi and Zhenjia Xu and Siyuan Feng and Eric Cousineau and Yilun Du and Benjamin Burchfiel and Russ Tedrake and Shuran Song},
+            title ={Diffusion Policy: Visuomotor Policy Learning via Action Diffusion},
+            journal = {The International Journal of Robotics Research},
+            year = {2024},
+        }""").lstrip(),
+}
+XARM_INFO = {
+    "license": "mit",
+    "url": "https://www.nicklashansen.com/td-mpc/",
+    "paper": "https://arxiv.org/abs/2203.04955",
+    "citation_bibtex": dedent("""
+        @inproceedings{Hansen2022tdmpc,
+            title={Temporal Difference Learning for Model Predictive Control},
+            author={Nicklas Hansen and Xiaolong Wang and Hao Su},
+            booktitle={ICML},
+            year={2022}
+        }
+    """),
+}
+UNITREEH_INFO = {
+    "license": "apache-2.0",
+}
+
+
+DATASETS = {
+    "aloha_mobile_cabinet": {
+        "single_task": "Open the top cabinet, store the pot inside it then close the cabinet.",
+        **ALOHA_MOBILE_INFO,
+    },
+    "aloha_mobile_chair": {
+        "single_task": "Push the chairs in front of the desk to place them against it.",
+        **ALOHA_MOBILE_INFO,
+    },
+    "aloha_mobile_elevator": {
+        "single_task": "Take the elevator to the 1st floor.",
+        **ALOHA_MOBILE_INFO,
+    },
+    "aloha_mobile_shrimp": {
+        "single_task": "Sauté the raw shrimp on both sides, then serve it in the bowl.",
+        **ALOHA_MOBILE_INFO,
+    },
+    "aloha_mobile_wash_pan": {
+        "single_task": "Pick up the pan, rinse it in the sink and then place it in the drying rack.",
+        **ALOHA_MOBILE_INFO,
+    },
+    "aloha_mobile_wipe_wine": {
+        "single_task": "Pick up the wet cloth on the faucet and use it to clean the spilled wine on the table and underneath the glass.",
+        **ALOHA_MOBILE_INFO,
+    },
+    "aloha_static_battery": {
+        "single_task": "Place the battery into the slot of the remote controller.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_candy": {"single_task": "Pick up the candy and unwrap it.", **ALOHA_STATIC_INFO},
+    "aloha_static_coffee": {
+        "single_task": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray, then push the 'Hot Water' and 'Travel Mug' buttons.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_coffee_new": {
+        "single_task": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_cups_open": {
+        "single_task": "Pick up the plastic cup and open its lid.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_fork_pick_up": {
+        "single_task": "Pick up the fork and place it on the plate.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_pingpong_test": {
+        "single_task": "Transfer one of the two balls in the right glass into the left glass, then transfer it back to the right glass.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_pro_pencil": {
+        "single_task": "Pick up the pencil with the right arm, hand it over to the left arm then place it back onto the table.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_screw_driver": {
+        "single_task": "Pick up the screwdriver with the right arm, hand it over to the left arm then place it into the cup.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_tape": {
+        "single_task": "Cut a small piece of tape from the tape dispenser then place it on the cardboard box's edge.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_thread_velcro": {
+        "single_task": "Pick up the velcro cable tie with the left arm, then insert the end of the velcro tie into the other end's loop with the right arm.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_towel": {
+        "single_task": "Pick up a piece of paper towel and place it on the spilled liquid.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_vinh_cup": {
+        "single_task": "Pick up the platic cup with the right arm, then pop its lid open with the left arm.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_vinh_cup_left": {
+        "single_task": "Pick up the platic cup with the left arm, then pop its lid open with the right arm.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_static_ziploc_slide": {"single_task": "Slide open the ziploc bag.", **ALOHA_STATIC_INFO},
+    "aloha_sim_insertion_scripted": {"single_task": "Insert the peg into the socket.", **ALOHA_STATIC_INFO},
+    "aloha_sim_insertion_scripted_image": {
+        "single_task": "Insert the peg into the socket.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_sim_insertion_human": {"single_task": "Insert the peg into the socket.", **ALOHA_STATIC_INFO},
+    "aloha_sim_insertion_human_image": {
+        "single_task": "Insert the peg into the socket.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_sim_transfer_cube_scripted": {
+        "single_task": "Pick up the cube with the right arm and transfer it to the left arm.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_sim_transfer_cube_scripted_image": {
+        "single_task": "Pick up the cube with the right arm and transfer it to the left arm.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_sim_transfer_cube_human": {
+        "single_task": "Pick up the cube with the right arm and transfer it to the left arm.",
+        **ALOHA_STATIC_INFO,
+    },
+    "aloha_sim_transfer_cube_human_image": {
+        "single_task": "Pick up the cube with the right arm and transfer it to the left arm.",
+        **ALOHA_STATIC_INFO,
+    },
+    "pusht": {"single_task": "Push the T-shaped block onto the T-shaped target.", **PUSHT_INFO},
+    "pusht_image": {"single_task": "Push the T-shaped block onto the T-shaped target.", **PUSHT_INFO},
+    "unitreeh1_fold_clothes": {"single_task": "Fold the sweatshirt.", **UNITREEH_INFO},
+    "unitreeh1_rearrange_objects": {"single_task": "Put the object into the bin.", **UNITREEH_INFO},
+    "unitreeh1_two_robot_greeting": {
+        "single_task": "Greet the other robot with a high five.",
+        **UNITREEH_INFO,
+    },
+    "unitreeh1_warehouse": {
+        "single_task": "Grab the spray paint on the shelf and place it in the bin on top of the robot dog.",
+        **UNITREEH_INFO,
+    },
+    "xarm_lift_medium": {"single_task": "Pick up the cube and lift it.", **XARM_INFO},
+    "xarm_lift_medium_image": {"single_task": "Pick up the cube and lift it.", **XARM_INFO},
+    "xarm_lift_medium_replay": {"single_task": "Pick up the cube and lift it.", **XARM_INFO},
+    "xarm_lift_medium_replay_image": {"single_task": "Pick up the cube and lift it.", **XARM_INFO},
+    "xarm_push_medium": {"single_task": "Push the cube onto the target.", **XARM_INFO},
+    "xarm_push_medium_image": {"single_task": "Push the cube onto the target.", **XARM_INFO},
+    "xarm_push_medium_replay": {"single_task": "Push the cube onto the target.", **XARM_INFO},
+    "xarm_push_medium_replay_image": {"single_task": "Push the cube onto the target.", **XARM_INFO},
+    "umi_cup_in_the_wild": {
+        "single_task": "Put the cup on the plate.",
+        "license": "apache-2.0",
+    },
+}


 def batch_convert():
    status = {}
    logfile = LOCAL_DIR / "conversion_log.txt"
-    for num, repo_id in enumerate(available_datasets):
-        print(f"\nConverting {repo_id} ({num}/{len(available_datasets)})")
+    # assert set(DATASETS) == set(id_.split("/")[1] for id_ in available_datasets)
+    for num, (name, kwargs) in enumerate(DATASETS.items()):
+        repo_id = f"lerobot/{name}"
+        print(f"\nConverting {repo_id} ({num}/{len(DATASETS)})")
        print("---------------------------------------------------------")
-        name = repo_id.split("/")[1]
-        single_task, tasks_col, robot_config = None, None, None
-
-        if "aloha" in name:
-            robot_config = parse_robot_config(ALOHA_CONFIG)
-            if "sim_insertion" in name:
-                single_task = "Insert the peg into the socket."
-            elif "sim_transfer" in name:
-                single_task = "Pick up the cube with the right arm and transfer it to the left arm."
-            else:
-                single_task = ALOHA_SINGLE_TASKS_REAL[name]
-        elif "unitreeh1" in name:
-            if "fold_clothes" in name:
-                single_task = "Fold the sweatshirt."
-            elif "rearrange_objects" in name or "rearrange_objects" in name:
-                single_task = "Put the object into the bin."
-            elif "two_robot_greeting" in name:
-                single_task = "Greet the other robot with a high five."
-            elif "warehouse" in name:
-                single_task = (
-                    "Grab the spray paint on the shelf and place it in the bin on top of the robot dog."
-                )
-        elif name != "columbia_cairlab_pusht_real" and "pusht" in name:
-            single_task = "Push the T-shaped block onto the T-shaped target."
-        elif "xarm_lift" in name or "xarm_push" in name:
-            single_task = "Pick up the cube and lift it."
-        elif name == "umi_cup_in_the_wild":
-            single_task = "Put the cup on the plate."
-        else:
-            tasks_col = "language_instruction"
-
        try:
-            convert_dataset(
-                repo_id=repo_id,
-                local_dir=LOCAL_DIR,
-                single_task=single_task,
-                tasks_col=tasks_col,
-                robot_config=robot_config,
-            )
+            convert_dataset(repo_id, LOCAL_DIR, **kwargs)
            status = f"{repo_id}: success."
            with open(logfile, "a") as file:
                file.write(status + "\n")
--- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
+++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py
@ -176,6 +176,7 @@ def parse_robot_config(config_path: Path, config_overrides: list[str] | None = N
        "robot_type": robot_cfg["robot_type"],
        "names": {
            "observation.state": state_names,
+            "observation.effort": state_names,
            "action": action_names,
        },
    }
@ -436,11 +437,8 @@ def convert_dataset(
    tasks_path: Path | None = None,
    tasks_col: Path | None = None,
    robot_config: dict | None = None,
-    license: str | None = None,
-    url: str | None = None,
-    arxiv: str | None = None,
-    citation: str | None = None,
    test_branch: str | None = None,
+    **card_kwargs,
 ):
    v1 = get_hub_safe_version(repo_id, V16)
    v1x_dir = local_dir / V16 / repo_id
@ -566,9 +564,7 @@ def convert_dataset(
    }
    write_json(metadata_v2_0, v20_dir / INFO_PATH)
    convert_stats_to_json(v1x_dir, v20_dir)
-    card = create_lerobot_dataset_card(
-        tags=repo_tags, info=metadata_v2_0, license=license, url=url, citation=citation, arxiv=arxiv
-    )
+    card = create_lerobot_dataset_card(tags=repo_tags, dataset_info=metadata_v2_0, **card_kwargs)

    with contextlib.suppress(EntryNotFoundError):
        hub_api.delete_folder(repo_id=repo_id, path_in_repo="data", repo_type="dataset", revision=branch)
--- a/lerobot/scripts/push_dataset_to_hub.py
+++ b/lerobot/scripts/push_dataset_to_hub.py
@ -120,11 +120,11 @@ def push_dataset_card_to_hub(
    repo_id: str,
    revision: str | None,
    tags: list | None = None,
-    text: str | None = None,
    license: str = "apache-2.0",
+    **card_kwargs,
 ):
    """Creates and pushes a LeRobotDataset Card with appropriate tags to easily find it on the hub."""
-    card = create_lerobot_dataset_card(tags=tags, text=text, license=license)
+    card = create_lerobot_dataset_card(tags=tags, license=license, **card_kwargs)
    card.push_to_hub(repo_id=repo_id, repo_type="dataset", revision=revision)