Enhance dataset cards

This commit is contained in:
Simon Alibert 2024-11-20 11:57:27 +01:00
parent f43e5d07f5
commit c6ad495176
6 changed files with 265 additions and 110 deletions

View File

@ -0,0 +1,27 @@
---
# For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1
# Doc / guide: https://huggingface.co/docs/hub/datasets-cards
{{ card_data }}
---
This dataset was created using [LeRobot](https://github.com/huggingface/lerobot).
## Dataset Description
{{ dataset_description | default("", true) }}
- **Homepage:** {{ url | default("[More Information Needed]", true)}}
- **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}}
- **License:** {{ license | default("[More Information Needed]", true)}}
## Dataset Structure
{{ dataset_structure | default("[More Information Needed]", true)}}
## Citation [optional]
**BibTeX:**
```bibtex
{{ citation_bibtex | default("[More Information Needed]", true)}}
```

View File

@ -467,10 +467,10 @@ class LeRobotDataset(torch.utils.data.Dataset):
def push_to_hub(
self,
tags: list | None = None,
text: str | None = None,
license: str | None = "apache-2.0",
push_videos: bool = True,
private: bool = False,
**card_kwargs,
) -> None:
if not self.consolidated:
raise RuntimeError(
@ -495,7 +495,9 @@ class LeRobotDataset(torch.utils.data.Dataset):
repo_type="dataset",
ignore_patterns=ignore_patterns,
)
card = create_lerobot_dataset_card(tags=tags, text=text, info=self.meta.info, license=license)
card = create_lerobot_dataset_card(
tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs
)
card.push_to_hub(repo_id=self.repo_id, repo_type="dataset")
create_branch(repo_id=self.repo_id, branch=CODEBASE_VERSION, repo_type="dataset")

View File

@ -27,7 +27,7 @@ import numpy as np
import pyarrow.compute as pc
import torch
from datasets.table import embed_table_storage
from huggingface_hub import DatasetCard, HfApi
from huggingface_hub import DatasetCard, DatasetCardData, HfApi
from PIL import Image as PILImage
from torchvision import transforms
@ -50,6 +50,8 @@ DATASET_CARD_TEMPLATE = """
---
This dataset was created using [LeRobot](https://github.com/huggingface/lerobot).
## {}
"""
DEFAULT_FEATURES = {
@ -468,41 +470,33 @@ def create_branch(repo_id, *, branch: str, repo_type: str | None = None) -> None
def create_lerobot_dataset_card(
tags: list | None = None,
text: str | None = None,
info: dict | None = None,
license: str | None = None,
url: str | None = None,
citation: str | None = None,
arxiv: str | None = None,
dataset_info: dict | None = None,
**kwargs,
) -> DatasetCard:
"""
If specified, license must be one of https://huggingface.co/docs/hub/repositories-licenses.
Keyword arguments will be used to replace values in ./lerobot/common/datasets/card_template.md.
Note: If specified, license must be one of https://huggingface.co/docs/hub/repositories-licenses.
"""
card = DatasetCard(DATASET_CARD_TEMPLATE)
card.data.configs = [
{
"config_name": "default",
"data_files": "data/*/*.parquet",
}
]
card.data.task_categories = ["robotics"]
card.data.license = license
card.data.tags = ["LeRobot"]
if license:
card.data.license = license
card_tags = ["LeRobot"]
if tags:
card.data.tags += tags
if url:
card.text += f"## Homepage:\n{url}\n"
if text:
card.text += f"{text}\n"
if info:
card.text += "## Info\n"
card.text += "[meta/info.json](meta/info.json)\n"
card.text += f"```json\n{json.dumps(info, indent=4)}\n```"
if citation:
card.text += "## Citation\n"
card.text += f"```\n{citation}\n```\n"
if arxiv:
card.data.arxiv = arxiv
return card
card_tags += tags
if dataset_info:
dataset_structure = "[meta/info.json](meta/info.json):\n"
dataset_structure += f"```json\n{json.dumps(dataset_info, indent=4)}\n```\n"
kwargs = {**kwargs, "dataset_structure": dataset_structure}
card_data = DatasetCardData(
license=kwargs.get("license"),
tags=card_tags,
task_categories=["robotics"],
configs=[
{
"config_name": "default",
"data_files": "data/*/*.parquet",
}
],
)
return DatasetCard.from_template(
card_data=card_data,
template_path="./lerobot/common/datasets/card_template.md",
**kwargs,
)

View File

@ -14,84 +14,220 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Note: Since the original Aloha datasets don't use shadow motors, you need to comment those out in
lerobot/configs/robot/aloha.yaml before running this script.
"""
import traceback
from pathlib import Path
from textwrap import dedent
from lerobot import available_datasets
from lerobot.common.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset, parse_robot_config
LOCAL_DIR = Path("data/")
ALOHA_SINGLE_TASKS_REAL = {
"aloha_mobile_cabinet": "Open the top cabinet, store the pot inside it then close the cabinet.",
"aloha_mobile_chair": "Push the chairs in front of the desk to place them against it.",
"aloha_mobile_elevator": "Take the elevator to the 1st floor.",
"aloha_mobile_shrimp": "Sauté the raw shrimp on both sides, then serve it in the bowl.",
"aloha_mobile_wash_pan": "Pick up the pan, rinse it in the sink and then place it in the drying rack.",
"aloha_mobile_wipe_wine": "Pick up the wet cloth on the faucet and use it to clean the spilled wine on the table and underneath the glass.",
"aloha_static_battery": "Place the battery into the slot of the remote controller.",
"aloha_static_candy": "Pick up the candy and unwrap it.",
"aloha_static_coffee": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray, then push the 'Hot Water' and 'Travel Mug' buttons.",
"aloha_static_coffee_new": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray.",
"aloha_static_cups_open": "Pick up the plastic cup and open its lid.",
"aloha_static_fork_pick_up": "Pick up the fork and place it on the plate.",
"aloha_static_pingpong_test": "Transfer one of the two balls in the right glass into the left glass, then transfer it back to the right glass.",
"aloha_static_pro_pencil": "Pick up the pencil with the right arm, hand it over to the left arm then place it back onto the table.",
"aloha_static_screw_driver": "Pick up the screwdriver with the right arm, hand it over to the left arm then place it into the cup.",
"aloha_static_tape": "Cut a small piece of tape from the tape dispenser then place it on the cardboard box's edge.",
"aloha_static_thread_velcro": "Pick up the velcro cable tie with the left arm, then insert the end of the velcro tie into the other end's loop with the right arm.",
"aloha_static_towel": "Pick up a piece of paper towel and place it on the spilled liquid.",
"aloha_static_vinh_cup": "Pick up the platic cup with the right arm, then pop its lid open with the left arm.",
"aloha_static_vinh_cup_left": "Pick up the platic cup with the left arm, then pop its lid open with the right arm.",
"aloha_static_ziploc_slide": "Slide open the ziploc bag.",
}
ALOHA_CONFIG = Path("lerobot/configs/robot/aloha.yaml")
ALOHA_MOBILE_INFO = {
"robot_config": parse_robot_config(ALOHA_CONFIG),
"license": "mit",
"url": "https://mobile-aloha.github.io/",
"paper": "https://arxiv.org/abs/2401.02117",
"citation_bibtex": dedent("""
@inproceedings{fu2024mobile,
author = {Fu, Zipeng and Zhao, Tony Z. and Finn, Chelsea},
title = {Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation},
booktitle = {arXiv},
year = {2024},
}""").lstrip(),
}
ALOHA_STATIC_INFO = {
"robot_config": parse_robot_config(ALOHA_CONFIG),
"license": "mit",
"url": "https://tonyzhaozh.github.io/aloha/",
"paper": "https://arxiv.org/abs/2304.13705",
"citation_bibtex": dedent("""
@article{Zhao2023LearningFB,
title={Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware},
author={Tony Zhao and Vikash Kumar and Sergey Levine and Chelsea Finn},
journal={RSS},
year={2023},
volume={abs/2304.13705},
url={https://arxiv.org/abs/2304.13705}
}""").lstrip(),
}
PUSHT_INFO = {
"license": "mit",
"url": "https://diffusion-policy.cs.columbia.edu/",
"paper": "https://arxiv.org/abs/2303.04137v5",
"citation_bibtex": dedent("""
@article{chi2024diffusionpolicy,
author = {Cheng Chi and Zhenjia Xu and Siyuan Feng and Eric Cousineau and Yilun Du and Benjamin Burchfiel and Russ Tedrake and Shuran Song},
title ={Diffusion Policy: Visuomotor Policy Learning via Action Diffusion},
journal = {The International Journal of Robotics Research},
year = {2024},
}""").lstrip(),
}
XARM_INFO = {
"license": "mit",
"url": "https://www.nicklashansen.com/td-mpc/",
"paper": "https://arxiv.org/abs/2203.04955",
"citation_bibtex": dedent("""
@inproceedings{Hansen2022tdmpc,
title={Temporal Difference Learning for Model Predictive Control},
author={Nicklas Hansen and Xiaolong Wang and Hao Su},
booktitle={ICML},
year={2022}
}
"""),
}
UNITREEH_INFO = {
"license": "apache-2.0",
}
DATASETS = {
"aloha_mobile_cabinet": {
"single_task": "Open the top cabinet, store the pot inside it then close the cabinet.",
**ALOHA_MOBILE_INFO,
},
"aloha_mobile_chair": {
"single_task": "Push the chairs in front of the desk to place them against it.",
**ALOHA_MOBILE_INFO,
},
"aloha_mobile_elevator": {
"single_task": "Take the elevator to the 1st floor.",
**ALOHA_MOBILE_INFO,
},
"aloha_mobile_shrimp": {
"single_task": "Sauté the raw shrimp on both sides, then serve it in the bowl.",
**ALOHA_MOBILE_INFO,
},
"aloha_mobile_wash_pan": {
"single_task": "Pick up the pan, rinse it in the sink and then place it in the drying rack.",
**ALOHA_MOBILE_INFO,
},
"aloha_mobile_wipe_wine": {
"single_task": "Pick up the wet cloth on the faucet and use it to clean the spilled wine on the table and underneath the glass.",
**ALOHA_MOBILE_INFO,
},
"aloha_static_battery": {
"single_task": "Place the battery into the slot of the remote controller.",
**ALOHA_STATIC_INFO,
},
"aloha_static_candy": {"single_task": "Pick up the candy and unwrap it.", **ALOHA_STATIC_INFO},
"aloha_static_coffee": {
"single_task": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray, then push the 'Hot Water' and 'Travel Mug' buttons.",
**ALOHA_STATIC_INFO,
},
"aloha_static_coffee_new": {
"single_task": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray.",
**ALOHA_STATIC_INFO,
},
"aloha_static_cups_open": {
"single_task": "Pick up the plastic cup and open its lid.",
**ALOHA_STATIC_INFO,
},
"aloha_static_fork_pick_up": {
"single_task": "Pick up the fork and place it on the plate.",
**ALOHA_STATIC_INFO,
},
"aloha_static_pingpong_test": {
"single_task": "Transfer one of the two balls in the right glass into the left glass, then transfer it back to the right glass.",
**ALOHA_STATIC_INFO,
},
"aloha_static_pro_pencil": {
"single_task": "Pick up the pencil with the right arm, hand it over to the left arm then place it back onto the table.",
**ALOHA_STATIC_INFO,
},
"aloha_static_screw_driver": {
"single_task": "Pick up the screwdriver with the right arm, hand it over to the left arm then place it into the cup.",
**ALOHA_STATIC_INFO,
},
"aloha_static_tape": {
"single_task": "Cut a small piece of tape from the tape dispenser then place it on the cardboard box's edge.",
**ALOHA_STATIC_INFO,
},
"aloha_static_thread_velcro": {
"single_task": "Pick up the velcro cable tie with the left arm, then insert the end of the velcro tie into the other end's loop with the right arm.",
**ALOHA_STATIC_INFO,
},
"aloha_static_towel": {
"single_task": "Pick up a piece of paper towel and place it on the spilled liquid.",
**ALOHA_STATIC_INFO,
},
"aloha_static_vinh_cup": {
"single_task": "Pick up the platic cup with the right arm, then pop its lid open with the left arm.",
**ALOHA_STATIC_INFO,
},
"aloha_static_vinh_cup_left": {
"single_task": "Pick up the platic cup with the left arm, then pop its lid open with the right arm.",
**ALOHA_STATIC_INFO,
},
"aloha_static_ziploc_slide": {"single_task": "Slide open the ziploc bag.", **ALOHA_STATIC_INFO},
"aloha_sim_insertion_scripted": {"single_task": "Insert the peg into the socket.", **ALOHA_STATIC_INFO},
"aloha_sim_insertion_scripted_image": {
"single_task": "Insert the peg into the socket.",
**ALOHA_STATIC_INFO,
},
"aloha_sim_insertion_human": {"single_task": "Insert the peg into the socket.", **ALOHA_STATIC_INFO},
"aloha_sim_insertion_human_image": {
"single_task": "Insert the peg into the socket.",
**ALOHA_STATIC_INFO,
},
"aloha_sim_transfer_cube_scripted": {
"single_task": "Pick up the cube with the right arm and transfer it to the left arm.",
**ALOHA_STATIC_INFO,
},
"aloha_sim_transfer_cube_scripted_image": {
"single_task": "Pick up the cube with the right arm and transfer it to the left arm.",
**ALOHA_STATIC_INFO,
},
"aloha_sim_transfer_cube_human": {
"single_task": "Pick up the cube with the right arm and transfer it to the left arm.",
**ALOHA_STATIC_INFO,
},
"aloha_sim_transfer_cube_human_image": {
"single_task": "Pick up the cube with the right arm and transfer it to the left arm.",
**ALOHA_STATIC_INFO,
},
"pusht": {"single_task": "Push the T-shaped block onto the T-shaped target.", **PUSHT_INFO},
"pusht_image": {"single_task": "Push the T-shaped block onto the T-shaped target.", **PUSHT_INFO},
"unitreeh1_fold_clothes": {"single_task": "Fold the sweatshirt.", **UNITREEH_INFO},
"unitreeh1_rearrange_objects": {"single_task": "Put the object into the bin.", **UNITREEH_INFO},
"unitreeh1_two_robot_greeting": {
"single_task": "Greet the other robot with a high five.",
**UNITREEH_INFO,
},
"unitreeh1_warehouse": {
"single_task": "Grab the spray paint on the shelf and place it in the bin on top of the robot dog.",
**UNITREEH_INFO,
},
"xarm_lift_medium": {"single_task": "Pick up the cube and lift it.", **XARM_INFO},
"xarm_lift_medium_image": {"single_task": "Pick up the cube and lift it.", **XARM_INFO},
"xarm_lift_medium_replay": {"single_task": "Pick up the cube and lift it.", **XARM_INFO},
"xarm_lift_medium_replay_image": {"single_task": "Pick up the cube and lift it.", **XARM_INFO},
"xarm_push_medium": {"single_task": "Push the cube onto the target.", **XARM_INFO},
"xarm_push_medium_image": {"single_task": "Push the cube onto the target.", **XARM_INFO},
"xarm_push_medium_replay": {"single_task": "Push the cube onto the target.", **XARM_INFO},
"xarm_push_medium_replay_image": {"single_task": "Push the cube onto the target.", **XARM_INFO},
"umi_cup_in_the_wild": {
"single_task": "Put the cup on the plate.",
"license": "apache-2.0",
},
}
def batch_convert():
status = {}
logfile = LOCAL_DIR / "conversion_log.txt"
for num, repo_id in enumerate(available_datasets):
print(f"\nConverting {repo_id} ({num}/{len(available_datasets)})")
# assert set(DATASETS) == set(id_.split("/")[1] for id_ in available_datasets)
for num, (name, kwargs) in enumerate(DATASETS.items()):
repo_id = f"lerobot/{name}"
print(f"\nConverting {repo_id} ({num}/{len(DATASETS)})")
print("---------------------------------------------------------")
name = repo_id.split("/")[1]
single_task, tasks_col, robot_config = None, None, None
if "aloha" in name:
robot_config = parse_robot_config(ALOHA_CONFIG)
if "sim_insertion" in name:
single_task = "Insert the peg into the socket."
elif "sim_transfer" in name:
single_task = "Pick up the cube with the right arm and transfer it to the left arm."
else:
single_task = ALOHA_SINGLE_TASKS_REAL[name]
elif "unitreeh1" in name:
if "fold_clothes" in name:
single_task = "Fold the sweatshirt."
elif "rearrange_objects" in name or "rearrange_objects" in name:
single_task = "Put the object into the bin."
elif "two_robot_greeting" in name:
single_task = "Greet the other robot with a high five."
elif "warehouse" in name:
single_task = (
"Grab the spray paint on the shelf and place it in the bin on top of the robot dog."
)
elif name != "columbia_cairlab_pusht_real" and "pusht" in name:
single_task = "Push the T-shaped block onto the T-shaped target."
elif "xarm_lift" in name or "xarm_push" in name:
single_task = "Pick up the cube and lift it."
elif name == "umi_cup_in_the_wild":
single_task = "Put the cup on the plate."
else:
tasks_col = "language_instruction"
try:
convert_dataset(
repo_id=repo_id,
local_dir=LOCAL_DIR,
single_task=single_task,
tasks_col=tasks_col,
robot_config=robot_config,
)
convert_dataset(repo_id, LOCAL_DIR, **kwargs)
status = f"{repo_id}: success."
with open(logfile, "a") as file:
file.write(status + "\n")

View File

@ -176,6 +176,7 @@ def parse_robot_config(config_path: Path, config_overrides: list[str] | None = N
"robot_type": robot_cfg["robot_type"],
"names": {
"observation.state": state_names,
"observation.effort": state_names,
"action": action_names,
},
}
@ -436,11 +437,8 @@ def convert_dataset(
tasks_path: Path | None = None,
tasks_col: Path | None = None,
robot_config: dict | None = None,
license: str | None = None,
url: str | None = None,
arxiv: str | None = None,
citation: str | None = None,
test_branch: str | None = None,
**card_kwargs,
):
v1 = get_hub_safe_version(repo_id, V16)
v1x_dir = local_dir / V16 / repo_id
@ -566,9 +564,7 @@ def convert_dataset(
}
write_json(metadata_v2_0, v20_dir / INFO_PATH)
convert_stats_to_json(v1x_dir, v20_dir)
card = create_lerobot_dataset_card(
tags=repo_tags, info=metadata_v2_0, license=license, url=url, citation=citation, arxiv=arxiv
)
card = create_lerobot_dataset_card(tags=repo_tags, dataset_info=metadata_v2_0, **card_kwargs)
with contextlib.suppress(EntryNotFoundError):
hub_api.delete_folder(repo_id=repo_id, path_in_repo="data", repo_type="dataset", revision=branch)

View File

@ -120,11 +120,11 @@ def push_dataset_card_to_hub(
repo_id: str,
revision: str | None,
tags: list | None = None,
text: str | None = None,
license: str = "apache-2.0",
**card_kwargs,
):
"""Creates and pushes a LeRobotDataset Card with appropriate tags to easily find it on the hub."""
card = create_lerobot_dataset_card(tags=tags, text=text, license=license)
card = create_lerobot_dataset_card(tags=tags, license=license, **card_kwargs)
card.push_to_hub(repo_id=repo_id, repo_type="dataset", revision=revision)