From a6015a55f930cdc51fdb035d68533d1434b1cf43 Mon Sep 17 00:00:00 2001 From: Steven Palma Date: Sun, 23 Mar 2025 01:16:50 +0100 Subject: [PATCH] chore(scripts): remove deprecated script (#887) --- examples/port_datasets/pusht_zarr.py | 243 ------- .../_aloha_raw_urls/mobile_cabinet.txt | 85 --- .../_aloha_raw_urls/mobile_chair.txt | 55 -- .../_aloha_raw_urls/mobile_elevator.txt | 20 - .../_aloha_raw_urls/mobile_shrimp.txt | 18 - .../_aloha_raw_urls/mobile_wash_pan.txt | 1 - .../_aloha_raw_urls/mobile_wipe_wine.txt | 4 - .../_aloha_raw_urls/sim_insertion_human.txt | 3 - .../sim_insertion_scripted.txt | 3 - .../sim_transfer_cube_human.txt | 3 - .../sim_transfer_cube_scripted.txt | 3 - .../_aloha_raw_urls/static_battery.txt | 2 - .../_aloha_raw_urls/static_candy.txt | 2 - .../_aloha_raw_urls/static_coffee.txt | 2 - .../_aloha_raw_urls/static_coffee_new.txt | 2 - .../_aloha_raw_urls/static_cups_open.txt | 2 - .../_aloha_raw_urls/static_fork_pick_up.txt | 53 -- .../_aloha_raw_urls/static_pingpong_test.txt | 1 - .../_aloha_raw_urls/static_pro_pencil.txt | 1 - .../_aloha_raw_urls/static_screw_driver.txt | 2 - .../_aloha_raw_urls/static_tape.txt | 2 - .../_aloha_raw_urls/static_thread_velcro.txt | 2 - .../_aloha_raw_urls/static_towel.txt | 2 - .../_aloha_raw_urls/static_vinh_cup.txt | 53 -- .../_aloha_raw_urls/static_vinh_cup_left.txt | 52 -- .../_aloha_raw_urls/static_ziploc_slide.txt | 8 - .../_diffusion_policy_replay_buffer.py | 634 ------------------ .../push_dataset_to_hub/_download_raw.py | 202 ------ .../push_dataset_to_hub/_encode_datasets.py | 184 ----- .../_umi_imagecodecs_numcodecs.py | 326 --------- .../push_dataset_to_hub/aloha_hdf5_format.py | 233 ------- .../push_dataset_to_hub/cam_png_format.py | 107 --- .../dora_parquet_format.py | 233 ------- .../push_dataset_to_hub/openx_rlds_format.py | 312 --------- .../push_dataset_to_hub/pusht_zarr_format.py | 275 -------- .../push_dataset_to_hub/umi_zarr_format.py | 234 ------- .../push_dataset_to_hub/xarm_pkl_format.py | 200 ------ lerobot/scripts/push_dataset_to_hub.py | 364 ---------- 38 files changed, 3928 deletions(-) delete mode 100644 examples/port_datasets/pusht_zarr.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_cabinet.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_chair.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_elevator.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_shrimp.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wash_pan.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wipe_wine.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_human.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_scripted.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_human.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_scripted.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_battery.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_candy.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee_new.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_cups_open.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_fork_pick_up.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pingpong_test.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pro_pencil.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_screw_driver.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_tape.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_thread_velcro.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_towel.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_vinh_cup.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_vinh_cup_left.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_ziploc_slide.txt delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_diffusion_policy_replay_buffer.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_download_raw.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_encode_datasets.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/_umi_imagecodecs_numcodecs.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py delete mode 100644 lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py delete mode 100644 lerobot/scripts/push_dataset_to_hub.py diff --git a/examples/port_datasets/pusht_zarr.py b/examples/port_datasets/pusht_zarr.py deleted file mode 100644 index ea2e8b60..00000000 --- a/examples/port_datasets/pusht_zarr.py +++ /dev/null @@ -1,243 +0,0 @@ -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import shutil -from pathlib import Path - -import numpy as np -from huggingface_hub import HfApi - -from lerobot.common.constants import HF_LEROBOT_HOME -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset -from lerobot.common.datasets.push_dataset_to_hub._download_raw import download_raw - -PUSHT_TASK = "Push the T-shaped blue block onto the T-shaped green target surface." -PUSHT_FEATURES = { - "observation.state": { - "dtype": "float32", - "shape": (2,), - "names": { - "axes": ["x", "y"], - }, - }, - "action": { - "dtype": "float32", - "shape": (2,), - "names": { - "axes": ["x", "y"], - }, - }, - "next.reward": { - "dtype": "float32", - "shape": (1,), - "names": None, - }, - "next.success": { - "dtype": "bool", - "shape": (1,), - "names": None, - }, - "observation.environment_state": { - "dtype": "float32", - "shape": (16,), - "names": [ - "keypoints", - ], - }, - "observation.image": { - "dtype": None, - "shape": (3, 96, 96), - "names": [ - "channels", - "height", - "width", - ], - }, -} - - -def build_features(mode: str) -> dict: - features = PUSHT_FEATURES - if mode == "keypoints": - features.pop("observation.image") - else: - features.pop("observation.environment_state") - features["observation.image"]["dtype"] = mode - - return features - - -def load_raw_dataset(zarr_path: Path): - try: - from lerobot.common.datasets.push_dataset_to_hub._diffusion_policy_replay_buffer import ( - ReplayBuffer as DiffusionPolicyReplayBuffer, - ) - except ModuleNotFoundError as e: - print("`gym_pusht` is not installed. Please install it with `pip install 'lerobot[gym_pusht]'`") - raise e - - zarr_data = DiffusionPolicyReplayBuffer.copy_from_path(zarr_path) - return zarr_data - - -def calculate_coverage(zarr_data): - try: - import pymunk - from gym_pusht.envs.pusht import PushTEnv, pymunk_to_shapely - except ModuleNotFoundError as e: - print("`gym_pusht` is not installed. Please install it with `pip install 'lerobot[gym_pusht]'`") - raise e - - block_pos = zarr_data["state"][:, 2:4] - block_angle = zarr_data["state"][:, 4] - - num_frames = len(block_pos) - - coverage = np.zeros((num_frames,), dtype=np.float32) - # 8 keypoints with 2 coords each - keypoints = np.zeros((num_frames, 16), dtype=np.float32) - - # Set x, y, theta (in radians) - goal_pos_angle = np.array([256, 256, np.pi / 4]) - goal_body = PushTEnv.get_goal_pose_body(goal_pos_angle) - - for i in range(num_frames): - space = pymunk.Space() - space.gravity = 0, 0 - space.damping = 0 - - # Add walls. - walls = [ - PushTEnv.add_segment(space, (5, 506), (5, 5), 2), - PushTEnv.add_segment(space, (5, 5), (506, 5), 2), - PushTEnv.add_segment(space, (506, 5), (506, 506), 2), - PushTEnv.add_segment(space, (5, 506), (506, 506), 2), - ] - space.add(*walls) - - block_body, block_shapes = PushTEnv.add_tee(space, block_pos[i].tolist(), block_angle[i].item()) - goal_geom = pymunk_to_shapely(goal_body, block_body.shapes) - block_geom = pymunk_to_shapely(block_body, block_body.shapes) - intersection_area = goal_geom.intersection(block_geom).area - goal_area = goal_geom.area - coverage[i] = intersection_area / goal_area - keypoints[i] = PushTEnv.get_keypoints(block_shapes).flatten() - - return coverage, keypoints - - -def calculate_success(coverage: float, success_threshold: float): - return coverage > success_threshold - - -def calculate_reward(coverage: float, success_threshold: float): - return np.clip(coverage / success_threshold, 0, 1) - - -def main(raw_dir: Path, repo_id: str, mode: str = "video", push_to_hub: bool = True): - if mode not in ["video", "image", "keypoints"]: - raise ValueError(mode) - - if (HF_LEROBOT_HOME / repo_id).exists(): - shutil.rmtree(HF_LEROBOT_HOME / repo_id) - - if not raw_dir.exists(): - download_raw(raw_dir, repo_id="lerobot-raw/pusht_raw") - - zarr_data = load_raw_dataset(zarr_path=raw_dir / "pusht_cchi_v7_replay.zarr") - - env_state = zarr_data["state"][:] - agent_pos = env_state[:, :2] - - action = zarr_data["action"][:] - image = zarr_data["img"] # (b, h, w, c) - - if image.dtype == np.float32 and image.max() == np.float32(255): - # HACK: images are loaded as float32 but they actually encode uint8 data - image = image.astype(np.uint8) - - episode_data_index = { - "from": np.concatenate(([0], zarr_data.meta["episode_ends"][:-1])), - "to": zarr_data.meta["episode_ends"], - } - - # Calculate success and reward based on the overlapping area - # of the T-object and the T-area. - coverage, keypoints = calculate_coverage(zarr_data) - success = calculate_success(coverage, success_threshold=0.95) - reward = calculate_reward(coverage, success_threshold=0.95) - - features = build_features(mode) - dataset = LeRobotDataset.create( - repo_id=repo_id, - fps=10, - robot_type="2d pointer", - features=features, - image_writer_threads=4, - ) - episodes = range(len(episode_data_index["from"])) - for ep_idx in episodes: - from_idx = episode_data_index["from"][ep_idx] - to_idx = episode_data_index["to"][ep_idx] - num_frames = to_idx - from_idx - - for frame_idx in range(num_frames): - i = from_idx + frame_idx - idx = i + (frame_idx < num_frames - 1) - frame = { - "action": action[i], - # Shift reward and success by +1 until the last item of the episode - "next.reward": reward[idx : idx + 1], - "next.success": success[idx : idx + 1], - "task": PUSHT_TASK, - } - - frame["observation.state"] = agent_pos[i] - - if mode == "keypoints": - frame["observation.environment_state"] = keypoints[i] - else: - frame["observation.image"] = image[i] - - dataset.add_frame(frame) - - dataset.save_episode() - - if push_to_hub: - dataset.push_to_hub() - hub_api = HfApi() - hub_api.create_tag(repo_id, tag=CODEBASE_VERSION, repo_type="dataset") - - -if __name__ == "__main__": - # To try this script, modify the repo id with your own HuggingFace user (e.g cadene/pusht) - repo_id = "lerobot/pusht" - - modes = ["video", "image", "keypoints"] - # Uncomment if you want to try with a specific mode - # modes = ["video"] - # modes = ["image"] - # modes = ["keypoints"] - - raw_dir = Path("data/lerobot-raw/pusht_raw") - for mode in modes: - if mode in ["image", "keypoints"]: - repo_id += f"_{mode}" - - # download and load raw dataset, create LeRobotDataset, populate it, push to hub - main(raw_dir, repo_id=repo_id, mode=mode) - - # Uncomment if you want to load the local dataset and explore it - # dataset = LeRobotDataset(repo_id=repo_id) - # breakpoint() diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_cabinet.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_cabinet.txt deleted file mode 100644 index 8e821d29..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_cabinet.txt +++ /dev/null @@ -1,85 +0,0 @@ -https://drive.google.com/file/d/1_SOJkgfP5yZyVjMhTt3nwhvyUjcnlI51/view?usp=drive_link -https://drive.google.com/file/d/1rmgN8UUzph1qwJnzG1d-uOafodn-gLvb/view?usp=drive_link -https://drive.google.com/file/d/1NYQ-XxsBVinB6dUoZmVWweT83367P3i2/view?usp=drive_link -https://drive.google.com/file/d/1oAv_j74zxxCJieMG7r5Vl2BeHK1__3s3/view?usp=drive_link -https://drive.google.com/file/d/1wFUJQROsrTJt64YRuIeExhFjr2wnK5uu/view?usp=drive_link -https://drive.google.com/file/d/1KzL3Tt0Le7jVl58XVRUcmigmXjyiuhbK/view?usp=drive_link -https://drive.google.com/file/d/1qy_YBladeHtianSSGtgAPSHtMin7msvf/view?usp=drive_link -https://drive.google.com/file/d/1rA_F0V_qL_nyuC_0aBKCisF4-0TIkF2Y/view?usp=drive_link -https://drive.google.com/file/d/1hw-8qMpz9VgSt62XoASqNRuPECpCwJQP/view?usp=drive_link -https://drive.google.com/file/d/1BpHOl9rKMzdvNGka6js7C0s40hH6vnDA/view?usp=drive_link -https://drive.google.com/file/d/1PazhkhiDnJ-OUMyDVDFxEZNKQQqHiNWS/view?usp=drive_link -https://drive.google.com/file/d/1lZ665R6ATl57dypxH4dGJ2NSt6XYnbuz/view?usp=drive_link -https://drive.google.com/file/d/1V9HzLaf-tlG15wUzT7KrTDCS_z1vi5NV/view?usp=drive_link -https://drive.google.com/file/d/1aKauWiXoKqbNwn_2xs4MrmLlaNYlVNmO/view?usp=drive_link -https://drive.google.com/file/d/1WVD5DFhriO1YmmOgiVHhacR6HWoTPxav/view?usp=drive_link -https://drive.google.com/file/d/1_X43WgeBAsfkhH9EmpyPki8U9joMeAGC/view?usp=drive_link -https://drive.google.com/file/d/1t8x0GqWoNKWtnBsB7_D40Z34nL9ak4kf/view?usp=drive_link -https://drive.google.com/file/d/15V_f26WaKOXjKnq2T3HRWAmtQUi4lbu2/view?usp=drive_link -https://drive.google.com/file/d/11VFIAsiSDsMOBANgrOcZBpKB9AFWnLy7/view?usp=drive_link -https://drive.google.com/file/d/1M0NS7vVaxJv3FHnuRYtdwTFYF7We4LxP/view?usp=drive_link -https://drive.google.com/file/d/1mR0OItTNqFnVLoczcyKYlm6drAy778lO/view?usp=drive_link -https://drive.google.com/file/d/1NbVFWDQAh-z4JJ4D-Zw6Lps9kdvpqh2j/view?usp=drive_link -https://drive.google.com/file/d/1JQoZGBzl4W3QG26-n39tefcGN0fDRMbB/view?usp=drive_link -https://drive.google.com/file/d/1VBjHl-TvZpncopvasIP5G9gecbB2a5f6/view?usp=drive_link -https://drive.google.com/file/d/1VzSf6zaB21nahm7MsPwroXbJ84NIwq0b/view?usp=drive_link -https://drive.google.com/file/d/1OtNnfMEydNtZOcivs4k6E_uJSpf8PkGy/view?usp=drive_link -https://drive.google.com/file/d/14nVvpvsrFr_03Pa_N7MKzwnRwibOUYM6/view?usp=drive_link -https://drive.google.com/file/d/1M8li6duiO2r3lv_9HhF_XJn0oZUIEK5F/view?usp=drive_link -https://drive.google.com/file/d/1Cpzea6fO14lxAaNfSBifqoa4ekhCiLD1/view?usp=drive_link -https://drive.google.com/file/d/1mbxRTm5vlbsY9UJ0jfjM6j9D7kPJjBpG/view?usp=drive_link -https://drive.google.com/file/d/1RXD1i6IfWsHRlCxVmG04h2h5Ycm_WwZN/view?usp=drive_link -https://drive.google.com/file/d/1QFqFSwDGOk1BkgGmqgCcc2BRWnJ6R3MA/view?usp=drive_link -https://drive.google.com/file/d/1bFqWR8DQM0ZUxxtS2bl-RANQvukeFLzp/view?usp=drive_link -https://drive.google.com/file/d/1pR-rH3yNGoyPdD4hJ6-3lXQ-PstBx9du/view?usp=drive_link -https://drive.google.com/file/d/107OAwLY-hva9HeQLIK7VCh-ytdDabVjr/view?usp=drive_link -https://drive.google.com/file/d/1Tpl08QOaSZ37GTO4awFWSdD8wBR9xdlT/view?usp=drive_link -https://drive.google.com/file/d/1MR164AOM-0S1T6RX8xKTV2IHyaCvpqAW/view?usp=drive_link -https://drive.google.com/file/d/1_wknJfVnStIhJ82lU_QtcrwahsqYIsr8/view?usp=drive_link -https://drive.google.com/file/d/1ZuEktWrbYkTx0l5pj3WiZ2CJrfbDOHNo/view?usp=drive_link -https://drive.google.com/file/d/15G_10hkkkq6yxvyI5NGZirlF-RzduR2F/view?usp=drive_link -https://drive.google.com/file/d/1DBKxg3ONqh7dhLuX6oh1Yyo2x383V1Hp/view?usp=drive_link -https://drive.google.com/file/d/1B5iDBkTUr5vopDddV_fHud18SqAHhauS/view?usp=drive_link -https://drive.google.com/file/d/1acwFV0eenRkki1QcjSKH5xqOtys-P3Pr/view?usp=drive_link -https://drive.google.com/file/d/1S47BI83xyrh-FKXsvAQqer98Biu_p8XK/view?usp=drive_link -https://drive.google.com/file/d/1JL6DmBZl3uyq9dyLfgSqtGF06e7E9JwM/view?usp=drive_link -https://drive.google.com/file/d/16WvRS4Kjog8Pxgr0E3sGGnI01YwL9Uql/view?usp=drive_link -https://drive.google.com/file/d/12ttGqL33IPWg0-s1SD44rr22M6LiSQBr/view?usp=drive_link -https://drive.google.com/file/d/1OyZqqnldTU_DliRbr6x0C4a_iWPwIN7j/view?usp=drive_link -https://drive.google.com/file/d/1oYk00IpLnR9fesLfD15Ebe7nVBffEbcS/view?usp=drive_link -https://drive.google.com/file/d/1eyE2-MQduCEqCd-5_kl5zsoOEERAzpZD/view?usp=drive_link -https://drive.google.com/file/d/1ir1Ya-vO0d97pfvbePlUeuKTTRc0qIMU/view?usp=drive_link -https://drive.google.com/file/d/1hOi-JnqlMt47gVnLZHMTqeojyYVErohl/view?usp=drive_link -https://drive.google.com/file/d/1NFFw5_PqigQ7xGqsL-MNq2B1r5yAscCf/view?usp=drive_link -https://drive.google.com/file/d/1uftq1-Zlh8d2sNLWrlVcKYQUwZTD7o24/view?usp=drive_link -https://drive.google.com/file/d/1-ax19dSLPacVgk000T-m3l4flPcg07pM/view?usp=drive_link -https://drive.google.com/file/d/126y-lgn86-ZmCz8hooF1THKJGGObw3OB/view?usp=drive_link -https://drive.google.com/file/d/1JiDniK0VmDIkk92AbBILb8J2Ba59PWML/view?usp=drive_link -https://drive.google.com/file/d/1kr8nPIRljiU0R4J9SMgj80o1FPQxzu9z/view?usp=drive_link -https://drive.google.com/file/d/1bbThWRij1pKBh_kFgV8FwK0sXtTHBoLX/view?usp=drive_link -https://drive.google.com/file/d/1WenzDW6lxk1xkOFm-OiGFfc0ROskAuKU/view?usp=drive_link -https://drive.google.com/file/d/1MiKRzuzUn1yN-k_6kPJJzIGy7dT-nnsD/view?usp=drive_link -https://drive.google.com/file/d/17rRg2tcmB-gNhQ0KoZJQmNfyFeoij1jH/view?usp=drive_link -https://drive.google.com/file/d/11mokBpvrY3ld6sY5WztREtJ1jgqfQV70/view?usp=drive_link -https://drive.google.com/file/d/1Il_6IOx9NDp1bX_KHizJfBwzTufTmn86/view?usp=drive_link -https://drive.google.com/file/d/1KswtJGsxJ7eeBDAmNA_aeLjOxcH6MIxa/view?usp=drive_link -https://drive.google.com/file/d/1gzMhi5uWu4C3Y6WbQ3L-08V96GxTZrRR/view?usp=drive_link -https://drive.google.com/file/d/1nRQFtaBxfUCYc2W90Qibh0kHCt6YQCfc/view?usp=drive_link -https://drive.google.com/file/d/1vs-gyW-KheqHbUATwAhA2mmR9GOGw7f_/view?usp=drive_link -https://drive.google.com/file/d/1MuxzGOA2fgLaHryq82KkQumtuRJGcUOC/view?usp=drive_link -https://drive.google.com/file/d/1IIwxZnGlqrXLUXqG6yMO0r7uhCvhpk9e/view?usp=drive_link -https://drive.google.com/file/d/1vE7XPyaFcXP4DtTY5Y9WKIt7zWgmX-Cr/view?usp=drive_link -https://drive.google.com/file/d/1j-bIV09gr21RC3-x1N_pK4RPLV3fmWKz/view?usp=drive_link -https://drive.google.com/file/d/1t3nW1rD3S-EL0Oymb5U7ZAj5UMkydkln/view?usp=drive_link -https://drive.google.com/file/d/14hbfHCdMKtJZ41F9CQReMec2jeRFTOqR/view?usp=drive_link -https://drive.google.com/file/d/1x-hUyOSne5BW0AzQ3W6_Pf4g5yXQWi9M/view?usp=drive_link -https://drive.google.com/file/d/1sw9JqRg6E-3P84I3ZhzTrJMu0vuiaMmP/view?usp=drive_link -https://drive.google.com/file/d/1LuqhQlL4MGZhB_6THmkovRxrlP26BbdC/view?usp=drive_link -https://drive.google.com/file/d/15C5K6v_lkjnMSmUvVyqHQKwh2N166e7K/view?usp=drive_link -https://drive.google.com/file/d/1ns_9eSsQeeoZ10nlbkLy8tu0GmJFSnkt/view?usp=drive_link -https://drive.google.com/file/d/1NpzWJeK6CqjxzjIMYe6aYdX8xGsQwD4o/view?usp=drive_link -https://drive.google.com/file/d/1NMLezwufKJ9_8xTc9KQThSzVVD71B9Ui/view?usp=drive_link -https://drive.google.com/file/d/1aa71DCUqs6oXlIxX35jgsmsgm-NlDxPV/view?usp=drive_link -https://drive.google.com/file/d/1UJzkIZzAL0j-D5YQBnoq7mHvttASy12O/view?usp=drive_link -https://drive.google.com/file/d/1nPgx36HIJFb7oI94VbRzWjpPP2GANxzG/view?usp=drive_link -https://drive.google.com/file/d/1NovAP-KVJjqcuvWy3d6G4ptGGAIDqcCx/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_chair.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_chair.txt deleted file mode 100644 index 497f8d04..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_chair.txt +++ /dev/null @@ -1,55 +0,0 @@ -https://drive.google.com/file/d/11M3Ye0r5agMaaicPbVGD0q2Hb3rGklbb/view?usp=drive_link -https://drive.google.com/file/d/1-tx7SvYYgSvXCvnf_EI2OVdwK-CkFY6S/view?usp=drive_link -https://drive.google.com/file/d/1EWJunmOpMHaU1hE106wwpbkGYcjQXYAF/view?usp=drive_link -https://drive.google.com/file/d/1IDn95Z7FSiCckrSENtGV4u3RyFHNQSDY/view?usp=drive_link -https://drive.google.com/file/d/1CwzvWj1i7QOtqrZvsCZ6BdZaKNDfpN32/view?usp=drive_link -https://drive.google.com/file/d/1HvAvlhm77nAD3Td24QPSeq8lw-Rl_aOh/view?usp=drive_link -https://drive.google.com/file/d/1t-suKYOPhXH666RpAYNRp2QU_DOy3AeM/view?usp=drive_link -https://drive.google.com/file/d/18xpKgWh7RWyjMN5PkLTOo-AxsAadAuRw/view?usp=drive_link -https://drive.google.com/file/d/1oci5Eto-ztv-AQNz8EnwZveBIhxvk-xJ/view?usp=drive_link -https://drive.google.com/file/d/1Y-t_4vxdE6NpHO0DLJR8f3mD0Q-Wj5-c/view?usp=drive_link -https://drive.google.com/file/d/1lylRqbbbB8bgtpsBWMPACmHJreuKmllv/view?usp=drive_link -https://drive.google.com/file/d/1yliSyMig_NXShWfQx6qyW7Ijf2Y5lFK6/view?usp=drive_link -https://drive.google.com/file/d/1XXhwJsJbeb7KXAooGvJapnm9bjnGUmxS/view?usp=drive_link -https://drive.google.com/file/d/1_xs1f3hW2JArKyvfF7UWubWjyROGTLs6/view?usp=drive_link -https://drive.google.com/file/d/1WVEHpr6EqKCZbkHapQSTXJq4xE4SWFT-/view?usp=drive_link -https://drive.google.com/file/d/1RqOHv9pEQGvW8NUA7ynffFmG999TL_Az/view?usp=drive_link -https://drive.google.com/file/d/1cu5AgD2gh-uA3PFJmzxxzNaF3qOSlYY1/view?usp=drive_link -https://drive.google.com/file/d/1SsrXqiPclNrnYToPZ9Uq-k3y0C4qdHT1/view?usp=drive_link -https://drive.google.com/file/d/1-J7EXf0vjkLIfSqT8ICEsP6CTjzSLBop/view?usp=drive_link -https://drive.google.com/file/d/11O7ewUmoZXfyyKjy_6B5RW4DpjICxqBT/view?usp=drive_link -https://drive.google.com/file/d/1iic44kZoCsjNsfAz2cMstZ9-WQvAhblF/view?usp=drive_link -https://drive.google.com/file/d/1yLV1lVX-2WnWQldGlnQZ0x7QBuDiVkL3/view?usp=drive_link -https://drive.google.com/file/d/1Tybp9ru98TTbGn4eyROpUQwDFuALWXmk/view?usp=drive_link -https://drive.google.com/file/d/13E9OTMiipVJByDs5-J19oWwAz7l94LTN/view?usp=drive_link -https://drive.google.com/file/d/1EeTpJQdMSliw4JzSMtJ6CyTvVdexjM4M/view?usp=drive_link -https://drive.google.com/file/d/1NHyNwoFqzeAu-1_PSpq5JfxaiD_xbpn9/view?usp=drive_link -https://drive.google.com/file/d/1fJcS0phDp4xm_FyGaJ5wr9Pe4KqtHaxD/view?usp=drive_link -https://drive.google.com/file/d/12AqrLUaewDPEcFRqPZeZFb_TQ0Lfi3At/view?usp=drive_link -https://drive.google.com/file/d/1x_hd4Qsq1oJS-aj2t3qM7WbbV7KZj05b/view?usp=drive_link -https://drive.google.com/file/d/14OUSUArmsB068hs6BuEIXQhI1Cyz8Sf0/view?usp=drive_link -https://drive.google.com/file/d/16zlzh1T5zeUJQnFf382NXkFEKEnDub4O/view?usp=drive_link -https://drive.google.com/file/d/1IbDltmN-NEFCNtr1TO4ILxEgQ94rtjWv/view?usp=drive_link -https://drive.google.com/file/d/15gmlf8Gx9455pZ1AlqcCSwh3nDPxMzSr/view?usp=drive_link -https://drive.google.com/file/d/1qHpRL1oZfIMo_vxnm8qfwQ-7l0BZIVva/view?usp=drive_link -https://drive.google.com/file/d/1H1xskIgiFZivkYn23rMzH3xePGOh3VTC/view?usp=drive_link -https://drive.google.com/file/d/1avls6Pv0kYiCMNVknbc1zQsgy64MUDMM/view?usp=drive_link -https://drive.google.com/file/d/1MmWVgCj5khc8KMIifmt3EzF1o-CtPyyn/view?usp=drive_link -https://drive.google.com/file/d/1U0kCc_xqW0WNppf4sbnK14euWKdPZtzB/view?usp=drive_link -https://drive.google.com/file/d/16CaEyQscOuhLj23PEGDTL9DeyNkohkMn/view?usp=drive_link -https://drive.google.com/file/d/1Iu8uM6UUJ0zW8tvN-9UiOe_4oSNzEutg/view?usp=drive_link -https://drive.google.com/file/d/1UImqiBaIxCR-1DNJaZhHqeHhaySOtVIr/view?usp=drive_link -https://drive.google.com/file/d/1VpU2V_leIoRIyv_lAvE7eLHBG8DxCTnp/view?usp=drive_link -https://drive.google.com/file/d/1_Q8J27OT3Xby7QY6yHvIJauFRWEMxkRm/view?usp=drive_link -https://drive.google.com/file/d/1bantmVo1L9Xz4tbiNw_a1UC2Z_HPO1wT/view?usp=drive_link -https://drive.google.com/file/d/1IRIXMJMCBDkBjbaHvAlEiBogSvZ1jK_3/view?usp=drive_link -https://drive.google.com/file/d/1mAHXKjiFbjwydypW2t5Lv8_H5x6nHegl/view?usp=drive_link -https://drive.google.com/file/d/1SfyY796fLrBCMY39OcyuxZafqSCRZPZk/view?usp=drive_link -https://drive.google.com/file/d/1X-44sZ8CcfzIskc0dvSx882o1yFhHaZB/view?usp=drive_link -https://drive.google.com/file/d/1BOIWCCCk6DLD4Bmvc75ZbbLi9AQm-1ao/view?usp=drive_link -https://drive.google.com/file/d/1RuyDtRE1kk76sw-wP8vx5SgLoPF3PA_H/view?usp=drive_link -https://drive.google.com/file/d/1c4eoQiBbGuy3CTAQDUSkd84Ponh1roAQ/view?usp=drive_link -https://drive.google.com/file/d/19PXB9z4Ljq6dsbf9TqcOrrP5SRbw2Tc_/view?usp=drive_link -https://drive.google.com/file/d/1nn1VVZVoIXWdYDozR7XHXE4mPLQG80PQ/view?usp=drive_link -https://drive.google.com/file/d/1MBdFGOKPV8GUhwoSsJ_Ky3qAMLM2Bv3K/view?usp=drive_link -https://drive.google.com/file/d/1of3k_M-7Nh3I1TndcWedxK4ca9dn8Sc5/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_elevator.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_elevator.txt deleted file mode 100644 index abb42b55..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_elevator.txt +++ /dev/null @@ -1,20 +0,0 @@ -https://drive.google.com/file/d/12ctkOAdkCNGN1JLbZb5ww3XTBn2LFpGI/view?usp=drive_link -https://drive.google.com/file/d/1G_Vd46_4fq6O64gHHjUbJX5Ld44ZZx0y/view?usp=drive_link -https://drive.google.com/file/d/1uKgUy73B3xBogQAOUhfZjO0X5qZGsi2c/view?usp=drive_link -https://drive.google.com/file/d/1fu9cIrfI-fE2LhdGUxbx7-8Ci_PF8Ypm/view?usp=drive_link -https://drive.google.com/file/d/1Ygk9ZPJzx8xw2A9JF3NHbJ44TqnvSTQR/view?usp=drive_link -https://drive.google.com/file/d/18m5xPuccNsEB20WPshm3zhxmXc6k63ED/view?usp=drive_link -https://drive.google.com/file/d/1DiqqxC44rriviRQpqogcv0-EB-Y6nr9g/view?usp=drive_link -https://drive.google.com/file/d/1qPdaoTVDizJXkfXLioWU7iJ8hqCXSyOQ/view?usp=drive_link -https://drive.google.com/file/d/1Fj9kIA_mG7f67WFfACJEaZ7izcHG7vUm/view?usp=drive_link -https://drive.google.com/file/d/1WpYehZnI2P7dUdJPfkE-ij1rqCnjZEbB/view?usp=drive_link -https://drive.google.com/file/d/1_zwWkT4jPyzB38STWb6whlzsPzXmfA9r/view?usp=drive_link -https://drive.google.com/file/d/1U6-J4I_fPlSFFGfhZPxS5_YzKXwXIZYp/view?usp=drive_link -https://drive.google.com/file/d/1pRhxxcTfZp5tQo_EScvJUwfc3amiS6Vk/view?usp=drive_link -https://drive.google.com/file/d/1lWLntqra83RlYU_gN7Vostnfydf6gutd/view?usp=drive_link -https://drive.google.com/file/d/1vIBKo0x-NYEHV1FvRpco1lQMpRdAWAIL/view?usp=drive_link -https://drive.google.com/file/d/1pdrLV3JTQou_XH0Aap61Ssf60iVKm1jJ/view?usp=drive_link -https://drive.google.com/file/d/1QTsLoQ7SwmKdQHjBGVDaR2uTwfFwtrOf/view?usp=drive_link -https://drive.google.com/file/d/1Gytai8M_12J36GY6L_TulEcOC-035jwS/view?usp=drive_link -https://drive.google.com/file/d/14LJudNc629NT-i8xreXtzl27ce_DxOFJ/view?usp=drive_link -https://drive.google.com/file/d/1sBvPCODbzxGAI0S3lgN5cSG9Go3lRi00/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_shrimp.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_shrimp.txt deleted file mode 100644 index a6d76bd7..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_shrimp.txt +++ /dev/null @@ -1,18 +0,0 @@ -https://drive.google.com/file/d/1MJn9GbC8p9lN4gC9KDMLEkTkP_gGpXj0/view?usp=drive_link -https://drive.google.com/file/d/1-4LXgjl7ZCOgp-8GCJmFRD8OeqN5Jf7-/view?usp=drive_link -https://drive.google.com/file/d/1Ho06Ce0SPbqU3juaMxNUwAt3zCRLGC8W/view?usp=drive_link -https://drive.google.com/file/d/1ivHoj7_7olBSxH-Y8kqXEW7ttITK-45j/view?usp=drive_link -https://drive.google.com/file/d/1qjY4hM_IvZ8cq2II_n9MeJbvyeuN4oBP/view?usp=drive_link -https://drive.google.com/file/d/1rKVhO_f92-7sw13T8hTVrza3B9oAVgoy/view?usp=drive_link -https://drive.google.com/file/d/1pcLPHO8fBkc1-CRa88tyQtEueE4xiXNi/view?usp=drive_link -https://drive.google.com/file/d/1Vev_chCsIeEdvQ8poEYNsOJFGy_QU8kZ/view?usp=drive_link -https://drive.google.com/file/d/1l5G4zpRkxSLCQjvGPYSN4zfCvVRQuzMz/view?usp=drive_link -https://drive.google.com/file/d/14vgthE1eoakXkr2-DRw50E6lAqYOiUuE/view?usp=drive_link -https://drive.google.com/file/d/17nPSmKKmgQ2B7zkzWrZYiLM3RBuFod82/view?usp=drive_link -https://drive.google.com/file/d/1QcDsxplVvb_ID9BVrihl5FvlC-j7waXi/view?usp=drive_link -https://drive.google.com/file/d/18pEejBpI-eEVaWAAjBCyC0vgbX3T1Esj/view?usp=drive_link -https://drive.google.com/file/d/1H8eH6_IRODtEFT6WoM77ltR5OoOrqXmI/view?usp=drive_link -https://drive.google.com/file/d/1IWlpFRZhoxyG4nS13CWK4leZVk5wbNx4/view?usp=drive_link -https://drive.google.com/file/d/1PbZA8_OCGmMLxNP9xbkLRSChniL4uGxl/view?usp=drive_link -https://drive.google.com/file/d/1p9XAdmG2f_WeflNO4DIJ_tr1rK6M9B4B/view?usp=drive_link -https://drive.google.com/file/d/1nS59Et1cNAvKo3Y4SeSGRuZD5TvBbCF3/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wash_pan.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wash_pan.txt deleted file mode 100644 index 5e3732bd..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wash_pan.txt +++ /dev/null @@ -1 +0,0 @@ -https://drive.google.com/drive/folders/1S8eFg98IaGAIKVZ8QFWG1bx4mHa-O204 diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wipe_wine.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wipe_wine.txt deleted file mode 100644 index 17a13f1a..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/mobile_wipe_wine.txt +++ /dev/null @@ -1,4 +0,0 @@ -https://drive.google.com/drive/folders/1tC_g1AJ8lglBLY-fjsQrG6DMBa3Ucp-0 -https://drive.google.com/file/d/1fG_Yi2MJrFjiUVN3XoiWXLtTxHlwwaDv/view?usp=drive_link -https://drive.google.com/file/d/1WX32VWfzzX3Blmd06DRxLwFbMJfVe7P4/view?usp=drive_link -https://drive.google.com/file/d/18onsX3vXg3xkFwP5bVUCjdV4n9TRn0C9/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_human.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_human.txt deleted file mode 100644 index 19bb7114..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_human.txt +++ /dev/null @@ -1,3 +0,0 @@ -https://drive.google.com/drive/folders/1RgyD0JgTX30H4IM5XZn8I3zSV_mr8pyF -https://drive.google.com/file/d/18Cudl6nikDtgRolea7je8iF_gGKzynOP/view?usp=drive_link -https://drive.google.com/file/d/1C1kZYyROzs-PrLc0SkDgUgMi4-L3lauE/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_scripted.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_scripted.txt deleted file mode 100644 index fc80579b..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_insertion_scripted.txt +++ /dev/null @@ -1,3 +0,0 @@ -https://drive.google.com/drive/folders/1TsojQQSXtHEoGnqgJ3gmpPQR2DPLtS2N -https://drive.google.com/file/d/1wfMSZ24oOh5KR_0aaP3Cnu_c4ZCveduB/view?usp=drive_link -https://drive.google.com/file/d/17EuCUWS6uCCr6yyNzpXdcdE-_TTNCKtf/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_human.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_human.txt deleted file mode 100644 index f5161ea2..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_human.txt +++ /dev/null @@ -1,3 +0,0 @@ -https://drive.google.com/drive/folders/1sc-E4QYW7A0o23m1u2VWNGVq5smAsfCo -https://drive.google.com/file/d/18smMymtr8tIxaNUQ61gW6dG50pt3MvGq/view?usp=drive_link -https://drive.google.com/file/d/1Nk7l53d9sJoGDBKAOnNrExX5nLacATc6/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_scripted.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_scripted.txt deleted file mode 100644 index d3a5b414..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/sim_transfer_cube_scripted.txt +++ /dev/null @@ -1,3 +0,0 @@ -https://drive.google.com/drive/folders/1aRyoOhQwxhyt1J8XgEig4s6kzaw__LXj -https://drive.google.com/file/d/1pnGIOd-E4-rhz2P3VxpknMKRZCoKt6eI/view?usp=drive_link -https://drive.google.com/file/d/1GKReZHrXU73NMiC5zKCq_UtqPVtYq8eo/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_battery.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_battery.txt deleted file mode 100644 index a3613eb7..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_battery.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://drive.google.com/drive/folders/19qS_n7vKgDcPeTMnvDHQ5-n73xEbJz5D -https://drive.google.com/file/d/1oC31By0A2bsBeHyUwBdQw1z4ng6yi9Za/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_candy.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_candy.txt deleted file mode 100644 index a39bde56..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_candy.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://drive.google.com/drive/folders/1m5rQ6UVH8Q9RQp_6c0CxkQ88-L-ScO7q -https://drive.google.com/file/d/1wHz2qcmwcVG0C0CZ9MjQDQcmj4OY9_a3/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee.txt deleted file mode 100644 index 3f4acbd0..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://drive.google.com/drive/folders/1seQGay470nGQ-knBI5TjsTr8iL9Qws5q -https://drive.google.com/file/d/1T89hSX5U99wLGvGTE7yUBaQPOpyj6Sai/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee_new.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee_new.txt deleted file mode 100644 index 06667fef..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_coffee_new.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://drive.google.com/drive/folders/1t3eDc5Rg0DveyRe8oTm6Dia_FYU5mXyf -https://drive.google.com/file/d/1TXFaduTakvS0ZWJqKCX-HIvYglum_5CY/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_cups_open.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_cups_open.txt deleted file mode 100644 index 2cde5fa0..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_cups_open.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://drive.google.com/drive/folders/1Z9X3DNzd6LS0FFjQemNUMoMA5yk5VQOh -https://drive.google.com/file/d/1Wlyc0vTkjXuWB6zbaVOWhEfD7BmPgUV_/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_fork_pick_up.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_fork_pick_up.txt deleted file mode 100644 index 92b0d474..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_fork_pick_up.txt +++ /dev/null @@ -1,53 +0,0 @@ -https://drive.google.com/drive/folders/1DYgB4ifX4uIid9m9jnC0Zdz8Nf7ZC0fc -https://drive.google.com/file/d/1Eb-NRNk_FmVleCbU_Ng5Y4dfcjTKN7Rv/view?usp=drive_link -https://drive.google.com/file/d/1dkhjEADakT-44l9jf-nK4x89kr4yG_qb/view?usp=drive_link -https://drive.google.com/file/d/14hDhgcZkVqNExGb4tIXpSjMshhqZETch/view?usp=drive_link -https://drive.google.com/file/d/1zVMEHpHbuNyP5A_lYU7RPSLB-4V0yfZw/view?usp=drive_link -https://drive.google.com/file/d/1JtgDjBvy7FnRpFzrx_foC3quorYQFAR-/view?usp=drive_link -https://drive.google.com/file/d/1EHdneB6F-PP0dQlX8qPaXbxmKoBy_YwO/view?usp=drive_link -https://drive.google.com/file/d/17Z0jjVBy1OPKREPu77_n_rQzorDiapji/view?usp=drive_link -https://drive.google.com/file/d/1F4i23qPJ_qTf5jWjfLo4ARGJChznYWt3/view?usp=drive_link -https://drive.google.com/file/d/1kZtXWM3uS0-rLblydBfJ0mMcVnMMXw9w/view?usp=drive_link -https://drive.google.com/file/d/1mNODox87xFfY5Z_o5mcLsr8SHb39jDik/view?usp=drive_link -https://drive.google.com/file/d/1Ob44VdmEUA93FKDECiRb5Ogz2xQg5IWp/view?usp=drive_link -https://drive.google.com/file/d/1fdQLdjj3Cwv33R1wZhfrLz9Del8mqgHb/view?usp=drive_link -https://drive.google.com/file/d/1Yu3L3ft21zP__XL8pCfhb788ZleuW1n5/view?usp=drive_link -https://drive.google.com/file/d/1ozBBWXVZ9hXDh9ooHUNroHdYm8UDqnhJ/view?usp=drive_link -https://drive.google.com/file/d/1o0TGqvfWw_Lunxb5ubKDS21Lr_WC0h75/view?usp=drive_link -https://drive.google.com/file/d/1jZnd5eP5L6BH5l98BPN6OnoQx3fu8e9n/view?usp=drive_link -https://drive.google.com/file/d/1S5sYbz8wcLYp0V67v13i4PRcBxodn4Hg/view?usp=drive_link -https://drive.google.com/file/d/1rFeg_x6ftJYwPtBv34D3h2L2cpDLeR4G/view?usp=drive_link -https://drive.google.com/file/d/1GvS3lcm4o6nm_scUk0XxKeVFNmzjucDZ/view?usp=drive_link -https://drive.google.com/file/d/1-9i0riphC7NhhDahcQfD1QoBXP5gF90A/view?usp=drive_link -https://drive.google.com/file/d/15p_IqGsMbKuvzMS872THAZr-3SBtb1Fr/view?usp=drive_link -https://drive.google.com/file/d/1ToyYcBfJL8gbQn0q_59zPLsFmm7dmMJo/view?usp=drive_link -https://drive.google.com/file/d/1e_7PNH7CYafE4pAebP7ZdI7XFbmEcy_i/view?usp=drive_link -https://drive.google.com/file/d/1JoabvGVsIQdug2xOhUIhetEIyDM91y_Y/view?usp=drive_link -https://drive.google.com/file/d/1kOMw1y0lmnVaCjwZICfzCsx6e0Z8MNGR/view?usp=drive_link -https://drive.google.com/file/d/16it_wd1JOevUQTK2_CvF_pBACTgpIPgM/view?usp=drive_link -https://drive.google.com/file/d/1IRcCj9HnJSfbyMgr5XEERGlEnWeZQwOc/view?usp=drive_link -https://drive.google.com/file/d/1Z2dIJfq_S3liGmPN9Rphvkmucnmw7tlb/view?usp=drive_link -https://drive.google.com/file/d/1J3NoAjzndGx9yNyaBOJHdNny1epzUoBt/view?usp=drive_link -https://drive.google.com/file/d/18nOvxV1k8FSmBrhT4TPo2sKKSZXougyx/view?usp=drive_link -https://drive.google.com/file/d/1CT8FxclafFMjSd7gCWVw3VSeryeiF04i/view?usp=drive_link -https://drive.google.com/file/d/16M9KVqQMFfSsXfypK0bocFft8Nz3j2Rt/view?usp=drive_link -https://drive.google.com/file/d/18QPVkw6bj6HW8LTPrQLWrrUX4R6RcF42/view?usp=drive_link -https://drive.google.com/file/d/1hQTVtA5hBTE_StXpJafTZJ3tgt2VQQ_t/view?usp=drive_link -https://drive.google.com/file/d/1Dn-d5g69H6EgAWgsFdrcbJKtz7ySsCQ8/view?usp=drive_link -https://drive.google.com/file/d/13hMr16483P7ALYv73yMRUN37fJdVQM62/view?usp=drive_link -https://drive.google.com/file/d/1848yN3XMN5zJMEgApt6KzrWgfRPfimtv/view?usp=drive_link -https://drive.google.com/file/d/1oAD9kSnS0fTgj-CjD4u9VdZ5X67IOIMa/view?usp=drive_link -https://drive.google.com/file/d/1ilzIWLCCG5b_KgF5s0wdN2I5-lFNpwC1/view?usp=drive_link -https://drive.google.com/file/d/1rjsT2YBjnidxod1s9s-myAYz8boHr-WB/view?usp=drive_link -https://drive.google.com/file/d/18Gg48HTub15bd8qzbhiCUufbVy0fbN5G/view?usp=drive_link -https://drive.google.com/file/d/1WsSnQSqmMTVSRwrhT1Y-v782My2zcjLm/view?usp=drive_link -https://drive.google.com/file/d/1ea9ZCvoyc-xqiFXgeDcA_mOWsw7VUuoi/view?usp=drive_link -https://drive.google.com/file/d/1wv1v3-XhPgbNzp62BXbJTDzMPu2tlDUc/view?usp=drive_link -https://drive.google.com/file/d/18-ikzt8LoZ83Gi3goKCELs4U4z8hrRoF/view?usp=drive_link -https://drive.google.com/file/d/16Bjhp7JNCXkGuLvyNcZowAx3W-Y-15DV/view?usp=drive_link -https://drive.google.com/file/d/1Gc-KRI-xwcp1fMR55ugbrLg_5y3SPde-/view?usp=drive_link -https://drive.google.com/file/d/1oP72Q386Z4Sy5MMm-t5yNogIe5Van_9k/view?usp=drive_link -https://drive.google.com/file/d/112T90eDUDVH-SyOV7UnZl5bscAH2hcfq/view?usp=drive_link -https://drive.google.com/file/d/1y-uKOesRRhjgDtFbG_j65f4SGg0v8XDg/view?usp=drive_link -https://drive.google.com/file/d/1LOP05OagoI3km-ZKQBrS204A85UVk7Ok/view?usp=drive_link -https://drive.google.com/file/d/1QkHQKgasVzWsmdPvkXgGhWyQ84d93_Az/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pingpong_test.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pingpong_test.txt deleted file mode 100644 index c622def6..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pingpong_test.txt +++ /dev/null @@ -1 +0,0 @@ -https://drive.google.com/drive/folders/1Ut2cv6o6Pkfgg46DgwVUM7Z5PkNG8eJ- diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pro_pencil.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pro_pencil.txt deleted file mode 100644 index bdfc447f..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_pro_pencil.txt +++ /dev/null @@ -1 +0,0 @@ -https://drive.google.com/drive/folders/1FqxPV0PgvgIu8XFjtvZSPSExuNcxVVAY diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_screw_driver.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_screw_driver.txt deleted file mode 100644 index fe5548fd..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_screw_driver.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://drive.google.com/drive/folders/1SKtG0ct9q0nVdYssJNMWSOjikcXliT58 -https://drive.google.com/file/d/1nchD21O30B3i3LDoqramo1zgW5YvpJIN/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_tape.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_tape.txt deleted file mode 100644 index 46d95479..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_tape.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://drive.google.com/drive/folders/1_4DHf2cma0xsChLQFghwigX6Ukti5-zQ -https://drive.google.com/file/d/1_8vS4hDNDgUQY-SmekrNaa7dF67QJYU-/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_thread_velcro.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_thread_velcro.txt deleted file mode 100644 index 46d95479..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_thread_velcro.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://drive.google.com/drive/folders/1_4DHf2cma0xsChLQFghwigX6Ukti5-zQ -https://drive.google.com/file/d/1_8vS4hDNDgUQY-SmekrNaa7dF67QJYU-/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_towel.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_towel.txt deleted file mode 100644 index 19288fa5..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_towel.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://drive.google.com/drive/folders/1fAD7vkyTGTFB_nGXIKofCU1U05oE3MFv -https://drive.google.com/file/d/1XzyQ2B6LLvcurIonOpEu4nij2qwNWshH/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_vinh_cup.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_vinh_cup.txt deleted file mode 100644 index 65ec35c4..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_vinh_cup.txt +++ /dev/null @@ -1,53 +0,0 @@ -https://drive.google.com/drive/folders/13EQsVsnxT86K20QAoyE_YpsFbQ7fZQdu -https://drive.google.com/file/d/1-W_JHghZG65FNTVhw1SXhtQrazdLL3Ue/view?usp=drive_link -https://drive.google.com/file/d/1VwRJgdWUo-2nQaNM7Bs77-fsm8iwUxEo/view?usp=drive_link -https://drive.google.com/file/d/1wFzGRo5iYA13WLi6IV1ry64RyahQBFio/view?usp=drive_link -https://drive.google.com/file/d/1IKtQzQ-n-UTv64hYpReu2R4cqUvmNQqD/view?usp=drive_link -https://drive.google.com/file/d/1GicVci9OiuuZZH79i5Mg7AtWod94MzwT/view?usp=drive_link -https://drive.google.com/file/d/1JVnIoR7EIQp70T4eAf9RX65JcTrzsjQc/view?usp=drive_link -https://drive.google.com/file/d/1W2xr4h23ucjPrc-mBEeqnACsfaImpc0p/view?usp=drive_link -https://drive.google.com/file/d/10xj_0V7A07o3uCa7v5omUrTC0YlPW8H3/view?usp=drive_link -https://drive.google.com/file/d/1FOc3EMaCy8Mb0_a7PuXLAwKwvxkbKmwU/view?usp=drive_link -https://drive.google.com/file/d/143PgDXBcf2GQ0Q07ZPMVMfBgZDd5sLJG/view?usp=drive_link -https://drive.google.com/file/d/1pE5Tyj0LlGbGWvUzuhixp86Ibu55Ez3I/view?usp=drive_link -https://drive.google.com/file/d/141668b1VzX80ncrVJPzhkoAeIFB4MEK9/view?usp=drive_link -https://drive.google.com/file/d/1bw12lo37p1ZvRvErHsll7cEYi2OxscvZ/view?usp=drive_link -https://drive.google.com/file/d/1zfnMFvbgBjl6SzYhksbaOzfbwLrCN6tb/view?usp=drive_link -https://drive.google.com/file/d/1-GIszA6mUJMaNB-tdh9r9skc77SWA0VX/view?usp=drive_link -https://drive.google.com/file/d/1fTB0zWFYU6zh4IIUFT2zX_OkwYqmElwY/view?usp=drive_link -https://drive.google.com/file/d/1gPIPNKGmrO9c7gKF7SP0SuUYbIBBq8z1/view?usp=drive_link -https://drive.google.com/file/d/12JeJ-dQd5lYyn6PlDOGdE-ChVeiZ-Uv0/view?usp=drive_link -https://drive.google.com/file/d/100_20cgCqerU6qoh3TfTbwLy9mlDAFEG/view?usp=drive_link -https://drive.google.com/file/d/111oAGJ76ku_pYgbBoIdZAC1_XEQcPI__/view?usp=drive_link -https://drive.google.com/file/d/1UhC8L-354ZQ2gblPFGI35EMsVwfpuKa0/view?usp=drive_link -https://drive.google.com/file/d/1sIXQSgUR_xdrNtGrL6QGBnkLMKErsIp1/view?usp=drive_link -https://drive.google.com/file/d/16Ax77bDSIXnsn4GFL8XYKKT1P6bPpfMd/view?usp=drive_link -https://drive.google.com/file/d/1pgRVYwwVIsWq_qsWqZpe1UBzZfF5Fa9D/view?usp=drive_link -https://drive.google.com/file/d/1jtimaZkWsY1P5gC2bbS64H_WCUU7HXN2/view?usp=drive_link -https://drive.google.com/file/d/1N6Bh02P-RiTEgtx1YH1Db_X3TGpP-X_r/view?usp=drive_link -https://drive.google.com/file/d/14Fy8EwJ8d9Vh97Yt1VOvUChSCrfIjBij/view?usp=drive_link -https://drive.google.com/file/d/1IRuv42dvIMPuKhcMZmuXaBjJ-lPFOmQd/view?usp=drive_link -https://drive.google.com/file/d/16XWzNY2D8ucVVn5geBgsVdhm3ppO4que/view?usp=drive_link -https://drive.google.com/file/d/1xsVOoQgthK_L_SDrmq_JvQgUpAvPEAY8/view?usp=drive_link -https://drive.google.com/file/d/1bZbw66DyEMvnJnzkdUUNbKjvNKg8KFYM/view?usp=drive_link -https://drive.google.com/file/d/1CyTVkdrNGGpouCXr4CfhKbMzE6Ah3oo3/view?usp=drive_link -https://drive.google.com/file/d/1hDRyeM-XEDpHXpptbT8LvNnlQUR3PWOh/view?usp=drive_link -https://drive.google.com/file/d/1XhHWxbra8Iy5irQZ83IvxwaJqHq9x4s1/view?usp=drive_link -https://drive.google.com/file/d/1haZcn6aM1o4JlmP9tJj3x2enrxiPaDSD/view?usp=drive_link -https://drive.google.com/file/d/1ypDyuUTbljaBZ34f-t7lj3O_0bRmyX2n/view?usp=drive_link -https://drive.google.com/file/d/1ILEEZo_tA9_ChIAprr2mPaNVKZi5vXsO/view?usp=drive_link -https://drive.google.com/file/d/1U7nVYFaGE8vVTfLCW33D74xOjDcqfgyJ/view?usp=drive_link -https://drive.google.com/file/d/1rZ93_rmCov5SMDxPkfM3qthcRELZrQX6/view?usp=drive_link -https://drive.google.com/file/d/1mYO1b_csddtyE3qT6cwLiw-m2w2_1Lxh/view?usp=drive_link -https://drive.google.com/file/d/1xz7Q5x2jikY8wJQjMRQpRws6AnfWlHm5/view?usp=drive_link -https://drive.google.com/file/d/1OO8GaO-0FrSZRd1kxMYwBmubyiLOWnbl/view?usp=drive_link -https://drive.google.com/file/d/1EXn4NVDmf-4_HCy34mYwT-vwK2CFI9ev/view?usp=drive_link -https://drive.google.com/file/d/10hH70XhXRL9C5SnAG4toHtfHqfJUJo4H/view?usp=drive_link -https://drive.google.com/file/d/18tiBcxea0guUai4lwsXQvt0q2LZ8ZnnJ/view?usp=drive_link -https://drive.google.com/file/d/1Q8R8qv37vk5PQ5kQ2ibx6BFLOySD0VpX/view?usp=drive_link -https://drive.google.com/file/d/17aNriHzjhdibCyuUjQoMFZqjybJZtggG/view?usp=drive_link -https://drive.google.com/file/d/1LVjEYHSdeKm6CotU1QguIeNEPaIaFl_1/view?usp=drive_link -https://drive.google.com/file/d/1ufAhE_EkgJ85slg2EW8aW_grOzE_Lmxd/view?usp=drive_link -https://drive.google.com/file/d/1wtzLtXrkw9eXRGESTPIOlpl1tInu-b2m/view?usp=drive_link -https://drive.google.com/file/d/1Mk5qvVtD_QHwGOUApRq76TUw2T5THu6f/view?usp=drive_link -https://drive.google.com/file/d/1y1WQ3hboWVJ68KEYQQ3OhreGuaUpSgwc/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_vinh_cup_left.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_vinh_cup_left.txt deleted file mode 100644 index 8823a9b5..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_vinh_cup_left.txt +++ /dev/null @@ -1,52 +0,0 @@ -https://drive.google.com/drive/folders/1dxWh6YFZUDt6qXIoxgD9bla3CiFjZ11C -https://drive.google.com/file/d/1hNBJN00SCAlOl0ZEgm7RRGbAGDjyBs0p/view?usp=drive_link -https://drive.google.com/file/d/17He0CVwXGeoMmXg4SHKo-osNn7YPKVL7/view?usp=drive_link -https://drive.google.com/file/d/1laNKUVID1x2CV6a2O2WQjwFewKu4lidL/view?usp=drive_link -https://drive.google.com/file/d/1pNf36xbZJGRArYLmNAvRj5y6CoqdC6kB/view?usp=drive_link -https://drive.google.com/file/d/1_4E1-y3JXk5I0ebycLYM70YDPK9g52gZ/view?usp=drive_link -https://drive.google.com/file/d/1PHfzhGPdbolKyOpS3FnR2w7Q8zUlJXSk/view?usp=drive_link -https://drive.google.com/file/d/17ls2PPN-Pi3tEuK059cwV2_iDT8aGhOO/view?usp=drive_link -https://drive.google.com/file/d/1LWsg6PmCT00Kv_N_slrmcwKmQPGoBT3k/view?usp=drive_link -https://drive.google.com/file/d/12LckrchoHTUVH7rxi8J7zD9dA19GXvoW/view?usp=drive_link -https://drive.google.com/file/d/1VqrJKjAIkj5gtFXL69grdSeu9CyaqnSw/view?usp=drive_link -https://drive.google.com/file/d/1g5rQYDBZvW-kUtYPeyF3qmd53v6k7kXu/view?usp=drive_link -https://drive.google.com/file/d/10kUgaSJ0TS7teaG83G3Rf_DG4XGrBt6A/view?usp=drive_link -https://drive.google.com/file/d/1je9XmneZQZvTma5adMJICUPDovW3ppei/view?usp=drive_link -https://drive.google.com/file/d/1v28r6bedwZGbUPVVTVImXhK-42XdtGfj/view?usp=drive_link -https://drive.google.com/file/d/1-TEEx9sGVvzMMaNXYfQMtY2JJ6cvl0dT/view?usp=drive_link -https://drive.google.com/file/d/1YdBKdJFP9rJWBUX7qrOYL_gfUA8o6J9M/view?usp=drive_link -https://drive.google.com/file/d/1X9vffwQHNUSKLXr2RlYNtbWDIFCIDfdF/view?usp=drive_link -https://drive.google.com/file/d/11hqesqa5kvEe5FABUnZRcvmOhR373cYM/view?usp=drive_link -https://drive.google.com/file/d/1ltTTECjEcbQPgS3UPRgMzaE2x9n6H7dC/view?usp=drive_link -https://drive.google.com/file/d/1Zxqfa29JdwT-bfMpivi6IG2vz34d21dD/view?usp=drive_link -https://drive.google.com/file/d/11LQlVxS5hz494dYUJ_PNRPx2NHIJbQns/view?usp=drive_link -https://drive.google.com/file/d/1i1JhNtnZpO_E8rAv8gxBP3ZTZRvcvsZi/view?usp=drive_link -https://drive.google.com/file/d/11jOXAr2EULUO4Qkm748634lg4UUFho5U/view?usp=drive_link -https://drive.google.com/file/d/1rj67wur8DdB_Pipwx24bY43xu4X1eQ5e/view?usp=drive_link -https://drive.google.com/file/d/15ZTm6lO6f_JQy_4SNfrOu3iPYn1Ro8mh/view?usp=drive_link -https://drive.google.com/file/d/1q4gBtqWPJtCwXEvknGgN0WHGp7Vfn1b9/view?usp=drive_link -https://drive.google.com/file/d/1t17keyre47AYqm8GgXiQ7EcvcUkeSiDQ/view?usp=drive_link -https://drive.google.com/file/d/1OYUPGxtZgOF86Ng_BEOTXm_XOYpuQPsO/view?usp=drive_link -https://drive.google.com/file/d/1cBjbGHi3dwWHtx6r9EQJi0JT_CE3LuHt/view?usp=drive_link -https://drive.google.com/file/d/14qaMyF0mcbCB-fCYKNyo5_2NahSC6D5u/view?usp=drive_link -https://drive.google.com/file/d/12FgX86eA7Y5co9ULBVK80XMsiKQSs-Ri/view?usp=drive_link -https://drive.google.com/file/d/1yvoHWidf-jdBVw6qCCXOFfkVwKj_2hPk/view?usp=drive_link -https://drive.google.com/file/d/1a2SugsSDlC8UtUrFzp-_KAwyZckQOvdQ/view?usp=drive_link -https://drive.google.com/file/d/1l8pILBFSAosypWJMza2K09Vm7rug9axm/view?usp=drive_link -https://drive.google.com/file/d/1hfPQ8dBCk97PnOhq6_MIISm3IEzcOxJG/view?usp=drive_link -https://drive.google.com/file/d/1PPAUwlJCFKpms8cqF_k1v2_fCgDBOc3S/view?usp=drive_link -https://drive.google.com/file/d/1lVKQZeqFfK3amEmLuFhYLUFQ2eyE8rOW/view?usp=drive_link -https://drive.google.com/file/d/1K9iPMLfDowcIFoyzpvgn88dQ6x6kVwNG/view?usp=drive_link -https://drive.google.com/file/d/1PNvMqG9tL7QxeLaYBGHiWYR6SYb5iIct/view?usp=drive_link -https://drive.google.com/file/d/1xkRtzbvIkUsylx9hrFLGQsJn0h1EYu-5/view?usp=drive_link -https://drive.google.com/file/d/1nxMRrJlSayjDIfr5CmHO1NzAw3COhsLi/view?usp=drive_link -https://drive.google.com/file/d/1Qs3WEyMGrmagiHIkkFEueWNnJhkUeR1s/view?usp=drive_link -https://drive.google.com/file/d/1D-G2_Q0SS3M8zyJbg_XzkF2ANPw1HTuX/view?usp=drive_link -https://drive.google.com/file/d/1mdmJsDGO-YtJAOF_yPKl6lq4PJOIbQhT/view?usp=drive_link -https://drive.google.com/file/d/11m9bwfop_sPmnQr_8amB6EEsrbAeG_z5/view?usp=drive_link -https://drive.google.com/file/d/19tyYt5FMn5kru0g9o2nMJhKPnsDqkIZv/view?usp=drive_link -https://drive.google.com/file/d/1XvTpUdsVTZ-vydvdYYmynbma--HfUGSl/view?usp=drive_link -https://drive.google.com/file/d/1MO3hFu68J6NohTzr9aB_fY02VA6QSOqj/view?usp=drive_link -https://drive.google.com/file/d/1Lh-UjwAk__04YOTWINF_QGVU8SjetVaY/view?usp=drive_link -https://drive.google.com/file/d/1jkSOUwZV5GJ7rZlVeErjcu0DBQs8Np0d/view?usp=drive_link -https://drive.google.com/file/d/1VIN1eLI-93WrVQwCjsv6XQr353DqqBYA/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_ziploc_slide.txt b/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_ziploc_slide.txt deleted file mode 100644 index 5db6ed95..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_aloha_raw_urls/static_ziploc_slide.txt +++ /dev/null @@ -1,8 +0,0 @@ -https://drive.google.com/drive/folders/1EgKar7rWBmTIRmeJYZciSwjZx3uP2mHO -https://drive.google.com/file/d/12eYWQO15atK2hBjXhynPJd9MKAj_42pz/view?usp=drive_link -https://drive.google.com/file/d/1Ul4oEeICJDjgfYTl4H1uaisTzVYIM6wd/view?usp=drive_link -https://drive.google.com/file/d/1WSF-OG8lKSe2wVYCv5D1aJNipxpgddk-/view?usp=drive_link -https://drive.google.com/file/d/1_ppD5j5sFh26aWW0JmhLzJMeNB-lCArk/view?usp=drive_link -https://drive.google.com/file/d/1WUp846dgWXYhu4oJfhHxiU6YL_7N6s4W/view?usp=drive_link -https://drive.google.com/file/d/1HRZNAIoAQw_uYiPwnBvtBioQoqiqoXdA/view?usp=drive_link -https://drive.google.com/file/d/1hedGq-QDMnIn8GlXXBC3GiEJ_Y-LTxyt/view?usp=drive_link diff --git a/lerobot/common/datasets/push_dataset_to_hub/_diffusion_policy_replay_buffer.py b/lerobot/common/datasets/push_dataset_to_hub/_diffusion_policy_replay_buffer.py deleted file mode 100644 index 33b4c974..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_diffusion_policy_replay_buffer.py +++ /dev/null @@ -1,634 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Helper code for loading PushT dataset from Diffusion Policy (https://diffusion-policy.cs.columbia.edu/) - -Copied from the original Diffusion Policy repository and used in our `download_and_upload_dataset.py` script. -""" - -from __future__ import annotations - -import math -import numbers -import os -from functools import cached_property - -import numcodecs -import numpy as np -import zarr - - -def check_chunks_compatible(chunks: tuple, shape: tuple): - assert len(shape) == len(chunks) - for c in chunks: - assert isinstance(c, numbers.Integral) - assert c > 0 - - -def rechunk_recompress_array(group, name, chunks=None, chunk_length=None, compressor=None, tmp_key="_temp"): - old_arr = group[name] - if chunks is None: - chunks = (chunk_length,) + old_arr.chunks[1:] if chunk_length is not None else old_arr.chunks - check_chunks_compatible(chunks, old_arr.shape) - - if compressor is None: - compressor = old_arr.compressor - - if (chunks == old_arr.chunks) and (compressor == old_arr.compressor): - # no change - return old_arr - - # rechunk recompress - group.move(name, tmp_key) - old_arr = group[tmp_key] - n_copied, n_skipped, n_bytes_copied = zarr.copy( - source=old_arr, - dest=group, - name=name, - chunks=chunks, - compressor=compressor, - ) - del group[tmp_key] - arr = group[name] - return arr - - -def get_optimal_chunks(shape, dtype, target_chunk_bytes=2e6, max_chunk_length=None): - """ - Common shapes - T,D - T,N,D - T,H,W,C - T,N,H,W,C - """ - itemsize = np.dtype(dtype).itemsize - # reversed - rshape = list(shape[::-1]) - if max_chunk_length is not None: - rshape[-1] = int(max_chunk_length) - split_idx = len(shape) - 1 - for i in range(len(shape) - 1): - this_chunk_bytes = itemsize * np.prod(rshape[:i]) - next_chunk_bytes = itemsize * np.prod(rshape[: i + 1]) - if this_chunk_bytes <= target_chunk_bytes and next_chunk_bytes > target_chunk_bytes: - split_idx = i - - rchunks = rshape[:split_idx] - item_chunk_bytes = itemsize * np.prod(rshape[:split_idx]) - this_max_chunk_length = rshape[split_idx] - next_chunk_length = min(this_max_chunk_length, math.ceil(target_chunk_bytes / item_chunk_bytes)) - rchunks.append(next_chunk_length) - len_diff = len(shape) - len(rchunks) - rchunks.extend([1] * len_diff) - chunks = tuple(rchunks[::-1]) - # print(np.prod(chunks) * itemsize / target_chunk_bytes) - return chunks - - -class ReplayBuffer: - """ - Zarr-based temporal datastructure. - Assumes first dimension to be time. Only chunk in time dimension. - """ - - def __init__(self, root: zarr.Group | dict[str, dict]): - """ - Dummy constructor. Use copy_from* and create_from* class methods instead. - """ - assert "data" in root - assert "meta" in root - assert "episode_ends" in root["meta"] - for value in root["data"].values(): - assert value.shape[0] == root["meta"]["episode_ends"][-1] - self.root = root - - # ============= create constructors =============== - @classmethod - def create_empty_zarr(cls, storage=None, root=None): - if root is None: - if storage is None: - storage = zarr.MemoryStore() - root = zarr.group(store=storage) - root.require_group("data", overwrite=False) - meta = root.require_group("meta", overwrite=False) - if "episode_ends" not in meta: - meta.zeros("episode_ends", shape=(0,), dtype=np.int64, compressor=None, overwrite=False) - return cls(root=root) - - @classmethod - def create_empty_numpy(cls): - root = {"data": {}, "meta": {"episode_ends": np.zeros((0,), dtype=np.int64)}} - return cls(root=root) - - @classmethod - def create_from_group(cls, group, **kwargs): - if "data" not in group: - # create from stratch - buffer = cls.create_empty_zarr(root=group, **kwargs) - else: - # already exist - buffer = cls(root=group, **kwargs) - return buffer - - @classmethod - def create_from_path(cls, zarr_path, mode="r", **kwargs): - """ - Open a on-disk zarr directly (for dataset larger than memory). - Slower. - """ - group = zarr.open(os.path.expanduser(zarr_path), mode) - return cls.create_from_group(group, **kwargs) - - # ============= copy constructors =============== - @classmethod - def copy_from_store( - cls, - src_store, - store=None, - keys=None, - chunks: dict[str, tuple] | None = None, - compressors: dict | str | numcodecs.abc.Codec | None = None, - if_exists="replace", - **kwargs, - ): - """ - Load to memory. - """ - src_root = zarr.group(src_store) - if chunks is None: - chunks = {} - if compressors is None: - compressors = {} - root = None - if store is None: - # numpy backend - meta = {} - for key, value in src_root["meta"].items(): - if len(value.shape) == 0: - meta[key] = np.array(value) - else: - meta[key] = value[:] - - if keys is None: - keys = src_root["data"].keys() - data = {} - for key in keys: - arr = src_root["data"][key] - data[key] = arr[:] - - root = {"meta": meta, "data": data} - else: - root = zarr.group(store=store) - # copy without recompression - n_copied, n_skipped, n_bytes_copied = zarr.copy_store( - source=src_store, dest=store, source_path="/meta", dest_path="/meta", if_exists=if_exists - ) - data_group = root.create_group("data", overwrite=True) - if keys is None: - keys = src_root["data"].keys() - for key in keys: - value = src_root["data"][key] - cks = cls._resolve_array_chunks(chunks=chunks, key=key, array=value) - cpr = cls._resolve_array_compressor(compressors=compressors, key=key, array=value) - if cks == value.chunks and cpr == value.compressor: - # copy without recompression - this_path = "/data/" + key - n_copied, n_skipped, n_bytes_copied = zarr.copy_store( - source=src_store, - dest=store, - source_path=this_path, - dest_path=this_path, - if_exists=if_exists, - ) - else: - # copy with recompression - n_copied, n_skipped, n_bytes_copied = zarr.copy( - source=value, - dest=data_group, - name=key, - chunks=cks, - compressor=cpr, - if_exists=if_exists, - ) - buffer = cls(root=root) - return buffer - - @classmethod - def copy_from_path( - cls, - zarr_path, - backend=None, - store=None, - keys=None, - chunks: dict[str, tuple] | None = None, - compressors: dict | str | numcodecs.abc.Codec | None = None, - if_exists="replace", - **kwargs, - ): - """ - Copy a on-disk zarr to in-memory compressed. - Recommended - """ - if chunks is None: - chunks = {} - if compressors is None: - compressors = {} - if backend == "numpy": - print("backend argument is deprecated!") - store = None - group = zarr.open(os.path.expanduser(zarr_path), "r") - return cls.copy_from_store( - src_store=group.store, - store=store, - keys=keys, - chunks=chunks, - compressors=compressors, - if_exists=if_exists, - **kwargs, - ) - - # ============= save methods =============== - def save_to_store( - self, - store, - chunks: dict[str, tuple] | None = None, - compressors: str | numcodecs.abc.Codec | dict | None = None, - if_exists="replace", - **kwargs, - ): - root = zarr.group(store) - if chunks is None: - chunks = {} - if compressors is None: - compressors = {} - if self.backend == "zarr": - # recompression free copy - n_copied, n_skipped, n_bytes_copied = zarr.copy_store( - source=self.root.store, - dest=store, - source_path="/meta", - dest_path="/meta", - if_exists=if_exists, - ) - else: - meta_group = root.create_group("meta", overwrite=True) - # save meta, no chunking - for key, value in self.root["meta"].items(): - _ = meta_group.array(name=key, data=value, shape=value.shape, chunks=value.shape) - - # save data, chunk - data_group = root.create_group("data", overwrite=True) - for key, value in self.root["data"].items(): - cks = self._resolve_array_chunks(chunks=chunks, key=key, array=value) - cpr = self._resolve_array_compressor(compressors=compressors, key=key, array=value) - if isinstance(value, zarr.Array): - if cks == value.chunks and cpr == value.compressor: - # copy without recompression - this_path = "/data/" + key - n_copied, n_skipped, n_bytes_copied = zarr.copy_store( - source=self.root.store, - dest=store, - source_path=this_path, - dest_path=this_path, - if_exists=if_exists, - ) - else: - # copy with recompression - n_copied, n_skipped, n_bytes_copied = zarr.copy( - source=value, - dest=data_group, - name=key, - chunks=cks, - compressor=cpr, - if_exists=if_exists, - ) - else: - # numpy - _ = data_group.array(name=key, data=value, chunks=cks, compressor=cpr) - return store - - def save_to_path( - self, - zarr_path, - chunks: dict[str, tuple] | None = None, - compressors: str | numcodecs.abc.Codec | dict | None = None, - if_exists="replace", - **kwargs, - ): - if chunks is None: - chunks = {} - if compressors is None: - compressors = {} - store = zarr.DirectoryStore(os.path.expanduser(zarr_path)) - return self.save_to_store( - store, chunks=chunks, compressors=compressors, if_exists=if_exists, **kwargs - ) - - @staticmethod - def resolve_compressor(compressor="default"): - if compressor == "default": - compressor = numcodecs.Blosc(cname="lz4", clevel=5, shuffle=numcodecs.Blosc.NOSHUFFLE) - elif compressor == "disk": - compressor = numcodecs.Blosc("zstd", clevel=5, shuffle=numcodecs.Blosc.BITSHUFFLE) - return compressor - - @classmethod - def _resolve_array_compressor(cls, compressors: dict | str | numcodecs.abc.Codec, key, array): - # allows compressor to be explicitly set to None - cpr = "nil" - if isinstance(compressors, dict): - if key in compressors: - cpr = cls.resolve_compressor(compressors[key]) - elif isinstance(array, zarr.Array): - cpr = array.compressor - else: - cpr = cls.resolve_compressor(compressors) - # backup default - if cpr == "nil": - cpr = cls.resolve_compressor("default") - return cpr - - @classmethod - def _resolve_array_chunks(cls, chunks: dict | tuple, key, array): - cks = None - if isinstance(chunks, dict): - if key in chunks: - cks = chunks[key] - elif isinstance(array, zarr.Array): - cks = array.chunks - elif isinstance(chunks, tuple): - cks = chunks - else: - raise TypeError(f"Unsupported chunks type {type(chunks)}") - # backup default - if cks is None: - cks = get_optimal_chunks(shape=array.shape, dtype=array.dtype) - # check - check_chunks_compatible(chunks=cks, shape=array.shape) - return cks - - # ============= properties ================= - @cached_property - def data(self): - return self.root["data"] - - @cached_property - def meta(self): - return self.root["meta"] - - def update_meta(self, data): - # sanitize data - np_data = {} - for key, value in data.items(): - if isinstance(value, np.ndarray): - np_data[key] = value - else: - arr = np.array(value) - if arr.dtype == object: - raise TypeError(f"Invalid value type {type(value)}") - np_data[key] = arr - - meta_group = self.meta - if self.backend == "zarr": - for key, value in np_data.items(): - _ = meta_group.array( - name=key, data=value, shape=value.shape, chunks=value.shape, overwrite=True - ) - else: - meta_group.update(np_data) - - return meta_group - - @property - def episode_ends(self): - return self.meta["episode_ends"] - - def get_episode_idxs(self): - import numba - - numba.jit(nopython=True) - - def _get_episode_idxs(episode_ends): - result = np.zeros((episode_ends[-1],), dtype=np.int64) - for i in range(len(episode_ends)): - start = 0 - if i > 0: - start = episode_ends[i - 1] - end = episode_ends[i] - for idx in range(start, end): - result[idx] = i - return result - - return _get_episode_idxs(self.episode_ends) - - @property - def backend(self): - backend = "numpy" - if isinstance(self.root, zarr.Group): - backend = "zarr" - return backend - - # =========== dict-like API ============== - def __repr__(self) -> str: - if self.backend == "zarr": - return str(self.root.tree()) - else: - return super().__repr__() - - def keys(self): - return self.data.keys() - - def values(self): - return self.data.values() - - def items(self): - return self.data.items() - - def __getitem__(self, key): - return self.data[key] - - def __contains__(self, key): - return key in self.data - - # =========== our API ============== - @property - def n_steps(self): - if len(self.episode_ends) == 0: - return 0 - return self.episode_ends[-1] - - @property - def n_episodes(self): - return len(self.episode_ends) - - @property - def chunk_size(self): - if self.backend == "zarr": - return next(iter(self.data.arrays()))[-1].chunks[0] - return None - - @property - def episode_lengths(self): - ends = self.episode_ends[:] - ends = np.insert(ends, 0, 0) - lengths = np.diff(ends) - return lengths - - def add_episode( - self, - data: dict[str, np.ndarray], - chunks: dict[str, tuple] | None = None, - compressors: str | numcodecs.abc.Codec | dict | None = None, - ): - if chunks is None: - chunks = {} - if compressors is None: - compressors = {} - assert len(data) > 0 - is_zarr = self.backend == "zarr" - - curr_len = self.n_steps - episode_length = None - for value in data.values(): - assert len(value.shape) >= 1 - if episode_length is None: - episode_length = len(value) - else: - assert episode_length == len(value) - new_len = curr_len + episode_length - - for key, value in data.items(): - new_shape = (new_len,) + value.shape[1:] - # create array - if key not in self.data: - if is_zarr: - cks = self._resolve_array_chunks(chunks=chunks, key=key, array=value) - cpr = self._resolve_array_compressor(compressors=compressors, key=key, array=value) - arr = self.data.zeros( - name=key, shape=new_shape, chunks=cks, dtype=value.dtype, compressor=cpr - ) - else: - # copy data to prevent modify - arr = np.zeros(shape=new_shape, dtype=value.dtype) - self.data[key] = arr - else: - arr = self.data[key] - assert value.shape[1:] == arr.shape[1:] - # same method for both zarr and numpy - if is_zarr: - arr.resize(new_shape) - else: - arr.resize(new_shape, refcheck=False) - # copy data - arr[-value.shape[0] :] = value - - # append to episode ends - episode_ends = self.episode_ends - if is_zarr: - episode_ends.resize(episode_ends.shape[0] + 1) - else: - episode_ends.resize(episode_ends.shape[0] + 1, refcheck=False) - episode_ends[-1] = new_len - - # rechunk - if is_zarr and episode_ends.chunks[0] < episode_ends.shape[0]: - rechunk_recompress_array(self.meta, "episode_ends", chunk_length=int(episode_ends.shape[0] * 1.5)) - - def drop_episode(self): - is_zarr = self.backend == "zarr" - episode_ends = self.episode_ends[:].copy() - assert len(episode_ends) > 0 - start_idx = 0 - if len(episode_ends) > 1: - start_idx = episode_ends[-2] - for value in self.data.values(): - new_shape = (start_idx,) + value.shape[1:] - if is_zarr: - value.resize(new_shape) - else: - value.resize(new_shape, refcheck=False) - if is_zarr: - self.episode_ends.resize(len(episode_ends) - 1) - else: - self.episode_ends.resize(len(episode_ends) - 1, refcheck=False) - - def pop_episode(self): - assert self.n_episodes > 0 - episode = self.get_episode(self.n_episodes - 1, copy=True) - self.drop_episode() - return episode - - def extend(self, data): - self.add_episode(data) - - def get_episode(self, idx, copy=False): - idx = list(range(len(self.episode_ends)))[idx] - start_idx = 0 - if idx > 0: - start_idx = self.episode_ends[idx - 1] - end_idx = self.episode_ends[idx] - result = self.get_steps_slice(start_idx, end_idx, copy=copy) - return result - - def get_episode_slice(self, idx): - start_idx = 0 - if idx > 0: - start_idx = self.episode_ends[idx - 1] - end_idx = self.episode_ends[idx] - return slice(start_idx, end_idx) - - def get_steps_slice(self, start, stop, step=None, copy=False): - _slice = slice(start, stop, step) - - result = {} - for key, value in self.data.items(): - x = value[_slice] - if copy and isinstance(value, np.ndarray): - x = x.copy() - result[key] = x - return result - - # =========== chunking ============= - def get_chunks(self) -> dict: - assert self.backend == "zarr" - chunks = {} - for key, value in self.data.items(): - chunks[key] = value.chunks - return chunks - - def set_chunks(self, chunks: dict): - assert self.backend == "zarr" - for key, value in chunks.items(): - if key in self.data: - arr = self.data[key] - if value != arr.chunks: - check_chunks_compatible(chunks=value, shape=arr.shape) - rechunk_recompress_array(self.data, key, chunks=value) - - def get_compressors(self) -> dict: - assert self.backend == "zarr" - compressors = {} - for key, value in self.data.items(): - compressors[key] = value.compressor - return compressors - - def set_compressors(self, compressors: dict): - assert self.backend == "zarr" - for key, value in compressors.items(): - if key in self.data: - arr = self.data[key] - compressor = self.resolve_compressor(value) - if compressor != arr.compressor: - rechunk_recompress_array(self.data, key, compressor=compressor) diff --git a/lerobot/common/datasets/push_dataset_to_hub/_download_raw.py b/lerobot/common/datasets/push_dataset_to_hub/_download_raw.py deleted file mode 100644 index cc291cea..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_download_raw.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This file contains download scripts for raw datasets. - -Example of usage: -``` -python lerobot/common/datasets/push_dataset_to_hub/_download_raw.py \ ---raw-dir data/lerobot-raw/pusht_raw \ ---repo-id lerobot-raw/pusht_raw -``` -""" - -import argparse -import logging -import warnings -from pathlib import Path - -from huggingface_hub import snapshot_download - -from lerobot.common.datasets.push_dataset_to_hub.utils import check_repo_id - -# {raw_repo_id: raw_format} -AVAILABLE_RAW_REPO_IDS = { - "lerobot-raw/aloha_mobile_cabinet_raw": "aloha_hdf5", - "lerobot-raw/aloha_mobile_chair_raw": "aloha_hdf5", - "lerobot-raw/aloha_mobile_elevator_raw": "aloha_hdf5", - "lerobot-raw/aloha_mobile_shrimp_raw": "aloha_hdf5", - "lerobot-raw/aloha_mobile_wash_pan_raw": "aloha_hdf5", - "lerobot-raw/aloha_mobile_wipe_wine_raw": "aloha_hdf5", - "lerobot-raw/aloha_sim_insertion_human_raw": "aloha_hdf5", - "lerobot-raw/aloha_sim_insertion_scripted_raw": "aloha_hdf5", - "lerobot-raw/aloha_sim_transfer_cube_human_raw": "aloha_hdf5", - "lerobot-raw/aloha_sim_transfer_cube_scripted_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_battery_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_candy_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_coffee_new_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_coffee_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_cups_open_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_fork_pick_up_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_pingpong_test_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_pro_pencil_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_screw_driver_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_tape_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_thread_velcro_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_towel_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_vinh_cup_left_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_vinh_cup_raw": "aloha_hdf5", - "lerobot-raw/aloha_static_ziploc_slide_raw": "aloha_hdf5", - "lerobot-raw/umi_cup_in_the_wild_raw": "umi_zarr", - "lerobot-raw/pusht_raw": "pusht_zarr", - "lerobot-raw/unitreeh1_fold_clothes_raw": "aloha_hdf5", - "lerobot-raw/unitreeh1_rearrange_objects_raw": "aloha_hdf5", - "lerobot-raw/unitreeh1_two_robot_greeting_raw": "aloha_hdf5", - "lerobot-raw/unitreeh1_warehouse_raw": "aloha_hdf5", - "lerobot-raw/xarm_lift_medium_raw": "xarm_pkl", - "lerobot-raw/xarm_lift_medium_replay_raw": "xarm_pkl", - "lerobot-raw/xarm_push_medium_raw": "xarm_pkl", - "lerobot-raw/xarm_push_medium_replay_raw": "xarm_pkl", - "lerobot-raw/fractal20220817_data_raw": "openx_rlds.fractal20220817_data", - "lerobot-raw/kuka_raw": "openx_rlds.kuka", - "lerobot-raw/bridge_openx_raw": "openx_rlds.bridge_openx", - "lerobot-raw/taco_play_raw": "openx_rlds.taco_play", - "lerobot-raw/jaco_play_raw": "openx_rlds.jaco_play", - "lerobot-raw/berkeley_cable_routing_raw": "openx_rlds.berkeley_cable_routing", - "lerobot-raw/roboturk_raw": "openx_rlds.roboturk", - "lerobot-raw/nyu_door_opening_surprising_effectiveness_raw": "openx_rlds.nyu_door_opening_surprising_effectiveness", - "lerobot-raw/viola_raw": "openx_rlds.viola", - "lerobot-raw/berkeley_autolab_ur5_raw": "openx_rlds.berkeley_autolab_ur5", - "lerobot-raw/toto_raw": "openx_rlds.toto", - "lerobot-raw/language_table_raw": "openx_rlds.language_table", - "lerobot-raw/columbia_cairlab_pusht_real_raw": "openx_rlds.columbia_cairlab_pusht_real", - "lerobot-raw/stanford_kuka_multimodal_dataset_raw": "openx_rlds.stanford_kuka_multimodal_dataset", - "lerobot-raw/nyu_rot_dataset_raw": "openx_rlds.nyu_rot_dataset", - "lerobot-raw/io_ai_tech_raw": "openx_rlds.io_ai_tech", - "lerobot-raw/stanford_hydra_dataset_raw": "openx_rlds.stanford_hydra_dataset", - "lerobot-raw/austin_buds_dataset_raw": "openx_rlds.austin_buds_dataset", - "lerobot-raw/nyu_franka_play_dataset_raw": "openx_rlds.nyu_franka_play_dataset", - "lerobot-raw/maniskill_dataset_raw": "openx_rlds.maniskill_dataset", - "lerobot-raw/furniture_bench_dataset_raw": "openx_rlds.furniture_bench_dataset", - "lerobot-raw/cmu_franka_exploration_dataset_raw": "openx_rlds.cmu_franka_exploration_dataset", - "lerobot-raw/ucsd_kitchen_dataset_raw": "openx_rlds.ucsd_kitchen_dataset", - "lerobot-raw/ucsd_pick_and_place_dataset_raw": "openx_rlds.ucsd_pick_and_place_dataset", - "lerobot-raw/spoc_raw": "openx_rlds.spoc", - "lerobot-raw/austin_sailor_dataset_raw": "openx_rlds.austin_sailor_dataset", - "lerobot-raw/austin_sirius_dataset_raw": "openx_rlds.austin_sirius_dataset", - "lerobot-raw/bc_z_raw": "openx_rlds.bc_z", - "lerobot-raw/utokyo_pr2_opening_fridge_raw": "openx_rlds.utokyo_pr2_opening_fridge", - "lerobot-raw/utokyo_pr2_tabletop_manipulation_raw": "openx_rlds.utokyo_pr2_tabletop_manipulation", - "lerobot-raw/utokyo_xarm_pick_and_place_raw": "openx_rlds.utokyo_xarm_pick_and_place", - "lerobot-raw/utokyo_xarm_bimanual_raw": "openx_rlds.utokyo_xarm_bimanual", - "lerobot-raw/utokyo_saytap_raw": "openx_rlds.utokyo_saytap", - "lerobot-raw/robo_net_raw": "openx_rlds.robo_net", - "lerobot-raw/robo_set_raw": "openx_rlds.robo_set", - "lerobot-raw/berkeley_mvp_raw": "openx_rlds.berkeley_mvp", - "lerobot-raw/berkeley_rpt_raw": "openx_rlds.berkeley_rpt", - "lerobot-raw/kaist_nonprehensile_raw": "openx_rlds.kaist_nonprehensile", - "lerobot-raw/stanford_mask_vit_raw": "openx_rlds.stanford_mask_vit", - "lerobot-raw/tokyo_u_lsmo_raw": "openx_rlds.tokyo_u_lsmo", - "lerobot-raw/dlr_sara_pour_raw": "openx_rlds.dlr_sara_pour", - "lerobot-raw/dlr_sara_grid_clamp_raw": "openx_rlds.dlr_sara_grid_clamp", - "lerobot-raw/dlr_edan_shared_control_raw": "openx_rlds.dlr_edan_shared_control", - "lerobot-raw/asu_table_top_raw": "openx_rlds.asu_table_top", - "lerobot-raw/stanford_robocook_raw": "openx_rlds.stanford_robocook", - "lerobot-raw/imperialcollege_sawyer_wrist_cam_raw": "openx_rlds.imperialcollege_sawyer_wrist_cam", - "lerobot-raw/iamlab_cmu_pickup_insert_raw": "openx_rlds.iamlab_cmu_pickup_insert", - "lerobot-raw/uiuc_d3field_raw": "openx_rlds.uiuc_d3field", - "lerobot-raw/utaustin_mutex_raw": "openx_rlds.utaustin_mutex", - "lerobot-raw/berkeley_fanuc_manipulation_raw": "openx_rlds.berkeley_fanuc_manipulation", - "lerobot-raw/cmu_playing_with_food_raw": "openx_rlds.cmu_playing_with_food", - "lerobot-raw/cmu_play_fusion_raw": "openx_rlds.cmu_play_fusion", - "lerobot-raw/cmu_stretch_raw": "openx_rlds.cmu_stretch", - "lerobot-raw/berkeley_gnm_recon_raw": "openx_rlds.berkeley_gnm_recon", - "lerobot-raw/berkeley_gnm_cory_hall_raw": "openx_rlds.berkeley_gnm_cory_hall", - "lerobot-raw/berkeley_gnm_sac_son_raw": "openx_rlds.berkeley_gnm_sac_son", - "lerobot-raw/droid_raw": "openx_rlds.droid", - "lerobot-raw/droid_100_raw": "openx_rlds.droid100", - "lerobot-raw/fmb_raw": "openx_rlds.fmb", - "lerobot-raw/dobbe_raw": "openx_rlds.dobbe", - "lerobot-raw/usc_cloth_sim_raw": "openx_rlds.usc_cloth_sim", - "lerobot-raw/plex_robosuite_raw": "openx_rlds.plex_robosuite", - "lerobot-raw/conq_hose_manipulation_raw": "openx_rlds.conq_hose_manipulation", - "lerobot-raw/vima_raw": "openx_rlds.vima", - "lerobot-raw/robot_vqa_raw": "openx_rlds.robot_vqa", - "lerobot-raw/mimic_play_raw": "openx_rlds.mimic_play", - "lerobot-raw/tidybot_raw": "openx_rlds.tidybot", - "lerobot-raw/eth_agent_affordances_raw": "openx_rlds.eth_agent_affordances", -} - - -def download_raw(raw_dir: Path, repo_id: str): - check_repo_id(repo_id) - user_id, dataset_id = repo_id.split("/") - - if not dataset_id.endswith("_raw"): - warnings.warn( - f"""`dataset_id` ({dataset_id}) doesn't end with '_raw' (e.g. 'lerobot/pusht_raw'). Following this - naming convention by renaming your repository is advised, but not mandatory.""", - stacklevel=1, - ) - - # Send warning if raw_dir isn't well formatted - if raw_dir.parts[-2] != user_id or raw_dir.parts[-1] != dataset_id: - warnings.warn( - f"""`raw_dir` ({raw_dir}) doesn't contain a community or user id `/` the name of the dataset that - match the `repo_id` (e.g. 'data/lerobot/pusht_raw'). Following this naming convention is advised, - but not mandatory.""", - stacklevel=1, - ) - raw_dir.mkdir(parents=True, exist_ok=True) - - logging.info(f"Start downloading from huggingface.co/{user_id} for {dataset_id}") - snapshot_download(repo_id, repo_type="dataset", local_dir=raw_dir) - logging.info(f"Finish downloading from huggingface.co/{user_id} for {dataset_id}") - - -def download_all_raw_datasets(data_dir: Path | None = None): - if data_dir is None: - data_dir = Path("data") - for repo_id in AVAILABLE_RAW_REPO_IDS: - raw_dir = data_dir / repo_id - download_raw(raw_dir, repo_id) - - -def main(): - parser = argparse.ArgumentParser( - description=f"""A script to download raw datasets from Hugging Face hub to a local directory. Here is a - non exhaustive list of available repositories to use in `--repo-id`: {list(AVAILABLE_RAW_REPO_IDS.keys())}""", - ) - - parser.add_argument( - "--raw-dir", - type=Path, - required=True, - help="Directory containing input raw datasets (e.g. `data/aloha_mobile_chair_raw` or `data/pusht_raw).", - ) - parser.add_argument( - "--repo-id", - type=str, - required=True, - help="""Repositery identifier on Hugging Face: a community or a user name `/` the name of - the dataset (e.g. `lerobot/pusht_raw`, `cadene/aloha_sim_insertion_human_raw`).""", - ) - args = parser.parse_args() - download_raw(**vars(args)) - - -if __name__ == "__main__": - main() diff --git a/lerobot/common/datasets/push_dataset_to_hub/_encode_datasets.py b/lerobot/common/datasets/push_dataset_to_hub/_encode_datasets.py deleted file mode 100644 index 184d79fb..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_encode_datasets.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Use this script to batch encode lerobot dataset from their raw format to LeRobotDataset and push their updated -version to the hub. Under the hood, this script reuses 'push_dataset_to_hub.py'. It assumes that you already -downloaded raw datasets, which you can do with the related '_download_raw.py' script. - -For instance, for codebase_version = 'v1.6', the following command was run, assuming raw datasets from -lerobot-raw were downloaded in 'raw/datasets/directory': -```bash -python lerobot/common/datasets/push_dataset_to_hub/_encode_datasets.py \ - --raw-dir raw/datasets/directory \ - --raw-repo-ids lerobot-raw \ - --local-dir push/datasets/directory \ - --tests-data-dir tests/data \ - --push-repo lerobot \ - --vcodec libsvtav1 \ - --pix-fmt yuv420p \ - --g 2 \ - --crf 30 -``` -""" - -import argparse -from pathlib import Path - -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION -from lerobot.common.datasets.push_dataset_to_hub._download_raw import AVAILABLE_RAW_REPO_IDS -from lerobot.common.datasets.push_dataset_to_hub.utils import check_repo_id -from lerobot.scripts.push_dataset_to_hub import push_dataset_to_hub - - -def get_push_repo_id_from_raw(raw_repo_id: str, push_repo: str) -> str: - dataset_id_raw = raw_repo_id.split("/")[1] - dataset_id = dataset_id_raw.removesuffix("_raw") - return f"{push_repo}/{dataset_id}" - - -def encode_datasets( - raw_dir: Path, - raw_repo_ids: list[str], - push_repo: str, - vcodec: str, - pix_fmt: str, - g: int, - crf: int, - local_dir: Path | None = None, - tests_data_dir: Path | None = None, - raw_format: str | None = None, - dry_run: bool = False, -) -> None: - if len(raw_repo_ids) == 1 and raw_repo_ids[0].lower() == "lerobot-raw": - raw_repo_ids_format = AVAILABLE_RAW_REPO_IDS - else: - if raw_format is None: - raise ValueError(raw_format) - raw_repo_ids_format = {id_: raw_format for id_ in raw_repo_ids} - - for raw_repo_id, repo_raw_format in raw_repo_ids_format.items(): - check_repo_id(raw_repo_id) - dataset_repo_id_push = get_push_repo_id_from_raw(raw_repo_id, push_repo) - dataset_raw_dir = raw_dir / raw_repo_id - dataset_dir = local_dir / dataset_repo_id_push if local_dir is not None else None - encoding = { - "vcodec": vcodec, - "pix_fmt": pix_fmt, - "g": g, - "crf": crf, - } - - if not (dataset_raw_dir).is_dir(): - raise NotADirectoryError(dataset_raw_dir) - - if not dry_run: - push_dataset_to_hub( - dataset_raw_dir, - raw_format=repo_raw_format, - repo_id=dataset_repo_id_push, - local_dir=dataset_dir, - resume=True, - encoding=encoding, - tests_data_dir=tests_data_dir, - ) - else: - print( - f"DRY RUN: {dataset_raw_dir} --> {dataset_dir} --> {dataset_repo_id_push}@{CODEBASE_VERSION}" - ) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--raw-dir", - type=Path, - default=Path("data"), - help="Directory where raw datasets are located.", - ) - parser.add_argument( - "--raw-repo-ids", - type=str, - nargs="*", - default=["lerobot-raw"], - help="""Raw dataset repo ids. if 'lerobot-raw', the keys from `AVAILABLE_RAW_REPO_IDS` will be - used and raw datasets will be fetched from the 'lerobot-raw/' repo and pushed with their - associated format. It is assumed that each dataset is located at `raw_dir / raw_repo_id` """, - ) - parser.add_argument( - "--raw-format", - type=str, - default=None, - help="""Raw format to use for the raw repo-ids. Must be specified if --raw-repo-ids is not - 'lerobot-raw'""", - ) - parser.add_argument( - "--local-dir", - type=Path, - default=None, - help="""When provided, writes the dataset converted to LeRobotDataset format in this directory - (e.g. `data/lerobot/aloha_mobile_chair`).""", - ) - parser.add_argument( - "--push-repo", - type=str, - default="lerobot", - help="Repo to upload datasets to", - ) - parser.add_argument( - "--vcodec", - type=str, - default="libsvtav1", - help="Codec to use for encoding videos", - ) - parser.add_argument( - "--pix-fmt", - type=str, - default="yuv420p", - help="Pixel formats (chroma subsampling) to be used for encoding", - ) - parser.add_argument( - "--g", - type=int, - default=2, - help="Group of pictures sizes to be used for encoding.", - ) - parser.add_argument( - "--crf", - type=int, - default=30, - help="Constant rate factors to be used for encoding.", - ) - parser.add_argument( - "--tests-data-dir", - type=Path, - default=None, - help=( - "When provided, save tests artifacts into the given directory " - "(e.g. `--tests-data-dir tests/data` will save to tests/data/{--repo-id})." - ), - ) - parser.add_argument( - "--dry-run", - type=int, - default=0, - help="If not set to 0, this script won't download or upload anything.", - ) - args = parser.parse_args() - encode_datasets(**vars(args)) - - -if __name__ == "__main__": - main() diff --git a/lerobot/common/datasets/push_dataset_to_hub/_umi_imagecodecs_numcodecs.py b/lerobot/common/datasets/push_dataset_to_hub/_umi_imagecodecs_numcodecs.py deleted file mode 100644 index a118b7e7..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/_umi_imagecodecs_numcodecs.py +++ /dev/null @@ -1,326 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# imagecodecs/numcodecs.py - -# Copyright (c) 2021-2022, Christoph Gohlke -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - -# Copied from: https://github.com/real-stanford/universal_manipulation_interface/blob/298776ce251f33b6b3185a98d6e7d1f9ad49168b/diffusion_policy/codecs/imagecodecs_numcodecs.py#L1 -"""Additional numcodecs implemented using imagecodecs.""" - -__version__ = "2022.9.26" - -__all__ = ("register_codecs",) - -import imagecodecs -import numpy -from numcodecs.abc import Codec -from numcodecs.registry import get_codec, register_codec - -# TODO (azouitine): Remove useless codecs - - -def protective_squeeze(x: numpy.ndarray): - """ - Squeeze dim only if it's not the last dim. - Image dim expected to be *, H, W, C - """ - img_shape = x.shape[-3:] - if len(x.shape) > 3: - n_imgs = numpy.prod(x.shape[:-3]) - if n_imgs > 1: - img_shape = (-1,) + img_shape - return x.reshape(img_shape) - - -def get_default_image_compressor(**kwargs): - if imagecodecs.JPEGXL: - # has JPEGXL - this_kwargs = { - "effort": 3, - "distance": 0.3, - # bug in libjxl, invalid codestream for non-lossless - # when decoding speed > 1 - "decodingspeed": 1, - } - this_kwargs.update(kwargs) - return JpegXl(**this_kwargs) - else: - this_kwargs = {"level": 50} - this_kwargs.update(kwargs) - return Jpeg2k(**this_kwargs) - - -class Jpeg2k(Codec): - """JPEG 2000 codec for numcodecs.""" - - codec_id = "imagecodecs_jpeg2k" - - def __init__( - self, - level=None, - codecformat=None, - colorspace=None, - tile=None, - reversible=None, - bitspersample=None, - resolutions=None, - numthreads=None, - verbose=0, - ): - self.level = level - self.codecformat = codecformat - self.colorspace = colorspace - self.tile = None if tile is None else tuple(tile) - self.reversible = reversible - self.bitspersample = bitspersample - self.resolutions = resolutions - self.numthreads = numthreads - self.verbose = verbose - - def encode(self, buf): - buf = protective_squeeze(numpy.asarray(buf)) - return imagecodecs.jpeg2k_encode( - buf, - level=self.level, - codecformat=self.codecformat, - colorspace=self.colorspace, - tile=self.tile, - reversible=self.reversible, - bitspersample=self.bitspersample, - resolutions=self.resolutions, - numthreads=self.numthreads, - verbose=self.verbose, - ) - - def decode(self, buf, out=None): - return imagecodecs.jpeg2k_decode(buf, verbose=self.verbose, numthreads=self.numthreads, out=out) - - -class JpegXl(Codec): - """JPEG XL codec for numcodecs.""" - - codec_id = "imagecodecs_jpegxl" - - def __init__( - self, - # encode - level=None, - effort=None, - distance=None, - lossless=None, - decodingspeed=None, - photometric=None, - planar=None, - usecontainer=None, - # decode - index=None, - keeporientation=None, - # both - numthreads=None, - ): - """ - Return JPEG XL image from numpy array. - Float must be in nominal range 0..1. - - Currently L, LA, RGB, RGBA images are supported in contig mode. - Extra channels are only supported for grayscale images in planar mode. - - Parameters - ---------- - level : Default to None, i.e. not overwriting lossess and decodingspeed options. - When < 0: Use lossless compression - When in [0,1,2,3,4]: Sets the decoding speed tier for the provided options. - Minimum is 0 (slowest to decode, best quality/density), and maximum - is 4 (fastest to decode, at the cost of some quality/density). - effort : Default to 3. - Sets encoder effort/speed level without affecting decoding speed. - Valid values are, from faster to slower speed: 1:lightning 2:thunder - 3:falcon 4:cheetah 5:hare 6:wombat 7:squirrel 8:kitten 9:tortoise. - Speed: lightning, thunder, falcon, cheetah, hare, wombat, squirrel, kitten, tortoise - control the encoder effort in ascending order. - This also affects memory usage: using lower effort will typically reduce memory - consumption during encoding. - lightning and thunder are fast modes useful for lossless mode (modular). - falcon disables all of the following tools. - cheetah enables coefficient reordering, context clustering, and heuristics for selecting DCT sizes and quantization steps. - hare enables Gaborish filtering, chroma from luma, and an initial estimate of quantization steps. - wombat enables error diffusion quantization and full DCT size selection heuristics. - squirrel (default) enables dots, patches, and spline detection, and full context clustering. - kitten optimizes the adaptive quantization for a psychovisual metric. - tortoise enables a more thorough adaptive quantization search. - distance : Default to 1.0 - Sets the distance level for lossy compression: target max butteraugli distance, - lower = higher quality. Range: 0 .. 15. 0.0 = mathematically lossless - (however, use JxlEncoderSetFrameLossless instead to use true lossless, - as setting distance to 0 alone is not the only requirement). - 1.0 = visually lossless. Recommended range: 0.5 .. 3.0. - lossess : Default to False. - Use lossess encoding. - decodingspeed : Default to 0. - Duplicate to level. [0,4] - photometric : Return JxlColorSpace value. - Default logic is quite complicated but works most of the time. - Accepted value: - int: [-1,3] - str: ['RGB', - 'WHITEISZERO', 'MINISWHITE', - 'BLACKISZERO', 'MINISBLACK', 'GRAY', - 'XYB', 'KNOWN'] - planar : Enable multi-channel mode. - Default to false. - usecontainer : - Forces the encoder to use the box-based container format (BMFF) - even when not necessary. - When using JxlEncoderUseBoxes, JxlEncoderStoreJPEGMetadata or - JxlEncoderSetCodestreamLevel with level 10, the encoder will - automatically also use the container format, it is not necessary - to use JxlEncoderUseContainer for those use cases. - By default this setting is disabled. - index : Selectively decode frames for animation. - Default to 0, decode all frames. - When set to > 0, decode that frame index only. - keeporientation : - Enables or disables preserving of as-in-bitstream pixeldata orientation. - Some images are encoded with an Orientation tag indicating that the - decoder must perform a rotation and/or mirroring to the encoded image data. - - If skip_reorientation is JXL_FALSE (the default): the decoder will apply - the transformation from the orientation setting, hence rendering the image - according to its specified intent. When producing a JxlBasicInfo, the decoder - will always set the orientation field to JXL_ORIENT_IDENTITY (matching the - returned pixel data) and also align xsize and ysize so that they correspond - to the width and the height of the returned pixel data. - - If skip_reorientation is JXL_TRUE: the decoder will skip applying the - transformation from the orientation setting, returning the image in - the as-in-bitstream pixeldata orientation. This may be faster to decode - since the decoder doesnt have to apply the transformation, but can - cause wrong display of the image if the orientation tag is not correctly - taken into account by the user. - - By default, this option is disabled, and the returned pixel data is - re-oriented according to the images Orientation setting. - threads : Default to 1. - If <= 0, use all cores. - If > 32, clipped to 32. - """ - - self.level = level - self.effort = effort - self.distance = distance - self.lossless = bool(lossless) - self.decodingspeed = decodingspeed - self.photometric = photometric - self.planar = planar - self.usecontainer = usecontainer - self.index = index - self.keeporientation = keeporientation - self.numthreads = numthreads - - def encode(self, buf): - # TODO: only squeeze all but last dim - buf = protective_squeeze(numpy.asarray(buf)) - return imagecodecs.jpegxl_encode( - buf, - level=self.level, - effort=self.effort, - distance=self.distance, - lossless=self.lossless, - decodingspeed=self.decodingspeed, - photometric=self.photometric, - planar=self.planar, - usecontainer=self.usecontainer, - numthreads=self.numthreads, - ) - - def decode(self, buf, out=None): - return imagecodecs.jpegxl_decode( - buf, - index=self.index, - keeporientation=self.keeporientation, - numthreads=self.numthreads, - out=out, - ) - - -def _flat(out): - """Return numpy array as contiguous view of bytes if possible.""" - if out is None: - return None - view = memoryview(out) - if view.readonly or not view.contiguous: - return None - return view.cast("B") - - -def register_codecs(codecs=None, force=False, verbose=True): - """Register codecs in this module with numcodecs.""" - for name, cls in globals().items(): - if not hasattr(cls, "codec_id") or name == "Codec": - continue - if codecs is not None and cls.codec_id not in codecs: - continue - try: - try: # noqa: SIM105 - get_codec({"id": cls.codec_id}) - except TypeError: - # registered, but failed - pass - except ValueError: - # not registered yet - pass - else: - if not force: - if verbose: - log_warning(f"numcodec {cls.codec_id!r} already registered") - continue - if verbose: - log_warning(f"replacing registered numcodec {cls.codec_id!r}") - register_codec(cls) - - -def log_warning(msg, *args, **kwargs): - """Log message with level WARNING.""" - import logging - - logging.getLogger(__name__).warning(msg, *args, **kwargs) diff --git a/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py b/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py deleted file mode 100644 index e2973ef8..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/aloha_hdf5_format.py +++ /dev/null @@ -1,233 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Contains utilities to process raw data format of HDF5 files like in: https://github.com/tonyzhaozh/act -""" - -import gc -import shutil -from pathlib import Path - -import h5py -import numpy as np -import torch -import tqdm -from datasets import Dataset, Features, Image, Sequence, Value -from PIL import Image as PILImage - -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION -from lerobot.common.datasets.push_dataset_to_hub.utils import ( - calculate_episode_data_index, - concatenate_episodes, - get_default_encoding, - save_images_concurrently, -) -from lerobot.common.datasets.utils import ( - hf_transform_to_torch, -) -from lerobot.common.datasets.video_utils import VideoFrame, encode_video_frames - - -def get_cameras(hdf5_data): - # ignore depth channel, not currently handled - # TODO(rcadene): add depth - rgb_cameras = [key for key in hdf5_data["/observations/images"].keys() if "depth" not in key] # noqa: SIM118 - return rgb_cameras - - -def check_format(raw_dir) -> bool: - # only frames from simulation are uncompressed - compressed_images = "sim" not in raw_dir.name - - hdf5_paths = list(raw_dir.glob("episode_*.hdf5")) - assert len(hdf5_paths) != 0 - for hdf5_path in hdf5_paths: - with h5py.File(hdf5_path, "r") as data: - assert "/action" in data - assert "/observations/qpos" in data - - assert data["/action"].ndim == 2 - assert data["/observations/qpos"].ndim == 2 - - num_frames = data["/action"].shape[0] - assert num_frames == data["/observations/qpos"].shape[0] - - for camera in get_cameras(data): - assert num_frames == data[f"/observations/images/{camera}"].shape[0] - - if compressed_images: - assert data[f"/observations/images/{camera}"].ndim == 2 - else: - assert data[f"/observations/images/{camera}"].ndim == 4 - b, h, w, c = data[f"/observations/images/{camera}"].shape - assert c < h and c < w, f"Expect (h,w,c) image format but ({h=},{w=},{c=}) provided." - - -def load_from_raw( - raw_dir: Path, - videos_dir: Path, - fps: int, - video: bool, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - # only frames from simulation are uncompressed - compressed_images = "sim" not in raw_dir.name - - hdf5_files = sorted(raw_dir.glob("episode_*.hdf5")) - num_episodes = len(hdf5_files) - - ep_dicts = [] - ep_ids = episodes if episodes else range(num_episodes) - for ep_idx in tqdm.tqdm(ep_ids): - ep_path = hdf5_files[ep_idx] - with h5py.File(ep_path, "r") as ep: - num_frames = ep["/action"].shape[0] - - # last step of demonstration is considered done - done = torch.zeros(num_frames, dtype=torch.bool) - done[-1] = True - - state = torch.from_numpy(ep["/observations/qpos"][:]) - action = torch.from_numpy(ep["/action"][:]) - if "/observations/qvel" in ep: - velocity = torch.from_numpy(ep["/observations/qvel"][:]) - if "/observations/effort" in ep: - effort = torch.from_numpy(ep["/observations/effort"][:]) - - ep_dict = {} - - for camera in get_cameras(ep): - img_key = f"observation.images.{camera}" - - if compressed_images: - import cv2 - - # load one compressed image after the other in RAM and uncompress - imgs_array = [] - for data in ep[f"/observations/images/{camera}"]: - imgs_array.append(cv2.imdecode(data, 1)) - imgs_array = np.array(imgs_array) - - else: - # load all images in RAM - imgs_array = ep[f"/observations/images/{camera}"][:] - - if video: - # save png images in temporary directory - tmp_imgs_dir = videos_dir / "tmp_images" - save_images_concurrently(imgs_array, tmp_imgs_dir) - - # encode images to a mp4 video - fname = f"{img_key}_episode_{ep_idx:06d}.mp4" - video_path = videos_dir / fname - encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {})) - - # clean temporary images directory - shutil.rmtree(tmp_imgs_dir) - - # store the reference to the video frame - ep_dict[img_key] = [ - {"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames) - ] - else: - ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array] - - ep_dict["observation.state"] = state - if "/observations/velocity" in ep: - ep_dict["observation.velocity"] = velocity - if "/observations/effort" in ep: - ep_dict["observation.effort"] = effort - ep_dict["action"] = action - ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames) - ep_dict["frame_index"] = torch.arange(0, num_frames, 1) - ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps - ep_dict["next.done"] = done - # TODO(rcadene): add reward and success by computing them in sim - - assert isinstance(ep_idx, int) - ep_dicts.append(ep_dict) - - gc.collect() - - data_dict = concatenate_episodes(ep_dicts) - - total_frames = data_dict["frame_index"].shape[0] - data_dict["index"] = torch.arange(0, total_frames, 1) - return data_dict - - -def to_hf_dataset(data_dict, video) -> Dataset: - features = {} - - keys = [key for key in data_dict if "observation.images." in key] - for key in keys: - if video: - features[key] = VideoFrame() - else: - features[key] = Image() - - features["observation.state"] = Sequence( - length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None) - ) - if "observation.velocity" in data_dict: - features["observation.velocity"] = Sequence( - length=data_dict["observation.velocity"].shape[1], feature=Value(dtype="float32", id=None) - ) - if "observation.effort" in data_dict: - features["observation.effort"] = Sequence( - length=data_dict["observation.effort"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["action"] = Sequence( - length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["episode_index"] = Value(dtype="int64", id=None) - features["frame_index"] = Value(dtype="int64", id=None) - features["timestamp"] = Value(dtype="float32", id=None) - features["next.done"] = Value(dtype="bool", id=None) - features["index"] = Value(dtype="int64", id=None) - - hf_dataset = Dataset.from_dict(data_dict, features=Features(features)) - hf_dataset.set_transform(hf_transform_to_torch) - return hf_dataset - - -def from_raw_to_lerobot_format( - raw_dir: Path, - videos_dir: Path, - fps: int | None = None, - video: bool = True, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - # sanity check - check_format(raw_dir) - - if fps is None: - fps = 50 - - data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding) - hf_dataset = to_hf_dataset(data_dict, video) - episode_data_index = calculate_episode_data_index(hf_dataset) - info = { - "codebase_version": CODEBASE_VERSION, - "fps": fps, - "video": video, - } - if video: - info["encoding"] = get_default_encoding() - - return hf_dataset, episode_data_index, info diff --git a/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py b/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py deleted file mode 100644 index 26492576..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/cam_png_format.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Contains utilities to process raw data format of png images files recorded with capture_camera_feed.py -""" - -from pathlib import Path - -import torch -from datasets import Dataset, Features, Image, Value -from PIL import Image as PILImage - -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION -from lerobot.common.datasets.push_dataset_to_hub.utils import ( - calculate_episode_data_index, - concatenate_episodes, -) -from lerobot.common.datasets.utils import hf_transform_to_torch -from lerobot.common.datasets.video_utils import VideoFrame - - -def check_format(raw_dir: Path) -> bool: - image_paths = list(raw_dir.glob("frame_*.png")) - if len(image_paths) == 0: - raise ValueError - - -def load_from_raw(raw_dir: Path, fps: int, episodes: list[int] | None = None): - if episodes is not None: - # TODO(aliberts): add support for multi-episodes. - raise NotImplementedError() - - ep_dict = {} - ep_idx = 0 - - image_paths = sorted(raw_dir.glob("frame_*.png")) - num_frames = len(image_paths) - - ep_dict["observation.image"] = [PILImage.open(x) for x in image_paths] - ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames) - ep_dict["frame_index"] = torch.arange(0, num_frames, 1) - ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps - - ep_dicts = [ep_dict] - data_dict = concatenate_episodes(ep_dicts) - total_frames = data_dict["frame_index"].shape[0] - data_dict["index"] = torch.arange(0, total_frames, 1) - return data_dict - - -def to_hf_dataset(data_dict, video) -> Dataset: - features = {} - if video: - features["observation.image"] = VideoFrame() - else: - features["observation.image"] = Image() - - features["episode_index"] = Value(dtype="int64", id=None) - features["frame_index"] = Value(dtype="int64", id=None) - features["timestamp"] = Value(dtype="float32", id=None) - features["index"] = Value(dtype="int64", id=None) - - hf_dataset = Dataset.from_dict(data_dict, features=Features(features)) - hf_dataset.set_transform(hf_transform_to_torch) - return hf_dataset - - -def from_raw_to_lerobot_format( - raw_dir: Path, - videos_dir: Path, - fps: int | None = None, - video: bool = True, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - if video or episodes or encoding is not None: - # TODO(aliberts): support this - raise NotImplementedError - - # sanity check - check_format(raw_dir) - - if fps is None: - fps = 30 - - data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes) - hf_dataset = to_hf_dataset(data_dict, video) - episode_data_index = calculate_episode_data_index(hf_dataset) - info = { - "codebase_version": CODEBASE_VERSION, - "fps": fps, - "video": video, - } - return hf_dataset, episode_data_index, info diff --git a/lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py b/lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py deleted file mode 100644 index acf820bf..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/dora_parquet_format.py +++ /dev/null @@ -1,233 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Contains utilities to process raw data format from dora-record -""" - -import re -import warnings -from pathlib import Path - -import pandas as pd -import torch -from datasets import Dataset, Features, Image, Sequence, Value - -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION -from lerobot.common.datasets.push_dataset_to_hub.utils import calculate_episode_data_index -from lerobot.common.datasets.utils import ( - hf_transform_to_torch, -) -from lerobot.common.datasets.video_utils import VideoFrame - - -def check_format(raw_dir) -> bool: - assert raw_dir.exists() - - leader_file = list(raw_dir.glob("*.parquet")) - if len(leader_file) == 0: - raise ValueError(f"Missing parquet files in '{raw_dir}'") - return True - - -def load_from_raw(raw_dir: Path, videos_dir: Path, fps: int, video: bool, episodes: list[int] | None = None): - # Load data stream that will be used as reference for the timestamps synchronization - reference_files = list(raw_dir.glob("observation.images.cam_*.parquet")) - if len(reference_files) == 0: - raise ValueError(f"Missing reference files for camera, starting with in '{raw_dir}'") - # select first camera in alphanumeric order - reference_key = sorted(reference_files)[0].stem - reference_df = pd.read_parquet(raw_dir / f"{reference_key}.parquet") - reference_df = reference_df[["timestamp_utc", reference_key]] - - # Merge all data stream using nearest backward strategy - df = reference_df - for path in raw_dir.glob("*.parquet"): - key = path.stem # action or observation.state or ... - if key == reference_key: - continue - if "failed_episode_index" in key: - # TODO(rcadene): add support for removing episodes that are tagged as "failed" - continue - modality_df = pd.read_parquet(path) - modality_df = modality_df[["timestamp_utc", key]] - df = pd.merge_asof( - df, - modality_df, - on="timestamp_utc", - # "nearest" is the best option over "backward", since the latter can desynchronizes camera timestamps by - # matching timestamps that are too far apart, in order to fit the backward constraints. It's not the case for "nearest". - # However, note that "nearest" might synchronize the reference camera with other cameras on slightly future timestamps. - # are too far apart. - direction="nearest", - tolerance=pd.Timedelta(f"{1 / fps} seconds"), - ) - # Remove rows with episode_index -1 which indicates data that correspond to in-between episodes - df = df[df["episode_index"] != -1] - - image_keys = [key for key in df if "observation.images." in key] - - def get_episode_index(row): - episode_index_per_cam = {} - for key in image_keys: - path = row[key][0]["path"] - match = re.search(r"_(\d{6}).mp4", path) - if not match: - raise ValueError(path) - episode_index = int(match.group(1)) - episode_index_per_cam[key] = episode_index - if len(set(episode_index_per_cam.values())) != 1: - raise ValueError( - f"All cameras are expected to belong to the same episode, but getting {episode_index_per_cam}" - ) - return episode_index - - df["episode_index"] = df.apply(get_episode_index, axis=1) - - # dora only use arrays, so single values are encapsulated into a list - df["frame_index"] = df.groupby("episode_index").cumcount() - df = df.reset_index() - df["index"] = df.index - - # set 'next.done' to True for the last frame of each episode - df["next.done"] = False - df.loc[df.groupby("episode_index").tail(1).index, "next.done"] = True - - df["timestamp"] = df["timestamp_utc"].map(lambda x: x.timestamp()) - # each episode starts with timestamp 0 to match the ones from the video - df["timestamp"] = df.groupby("episode_index")["timestamp"].transform(lambda x: x - x.iloc[0]) - - del df["timestamp_utc"] - - # sanity check - has_nan = df.isna().any().any() - if has_nan: - raise ValueError("Dataset contains Nan values.") - - # sanity check episode indices go from 0 to n-1 - ep_ids = [ep_idx for ep_idx, _ in df.groupby("episode_index")] - expected_ep_ids = list(range(df["episode_index"].max() + 1)) - if ep_ids != expected_ep_ids: - raise ValueError(f"Episodes indices go from {ep_ids} instead of {expected_ep_ids}") - - # Create symlink to raw videos directory (that needs to be absolute not relative) - videos_dir.parent.mkdir(parents=True, exist_ok=True) - videos_dir.symlink_to((raw_dir / "videos").absolute()) - - # sanity check the video paths are well formatted - for key in df: - if "observation.images." not in key: - continue - for ep_idx in ep_ids: - video_path = videos_dir / f"{key}_episode_{ep_idx:06d}.mp4" - if not video_path.exists(): - raise ValueError(f"Video file not found in {video_path}") - - data_dict = {} - for key in df: - # is video frame - if "observation.images." in key: - # we need `[0] because dora only use arrays, so single values are encapsulated into a list. - # it is the case for video_frame dictionary = [{"path": ..., "timestamp": ...}] - data_dict[key] = [video_frame[0] for video_frame in df[key].values] - - # sanity check the video path is well formatted - video_path = videos_dir.parent / data_dict[key][0]["path"] - if not video_path.exists(): - raise ValueError(f"Video file not found in {video_path}") - # is number - elif df[key].iloc[0].ndim == 0 or df[key].iloc[0].shape[0] == 1: - data_dict[key] = torch.from_numpy(df[key].values) - # is vector - elif df[key].iloc[0].shape[0] > 1: - data_dict[key] = torch.stack([torch.from_numpy(x.copy()) for x in df[key].values]) - else: - raise ValueError(key) - - return data_dict - - -def to_hf_dataset(data_dict, video) -> Dataset: - features = {} - - keys = [key for key in data_dict if "observation.images." in key] - for key in keys: - if video: - features[key] = VideoFrame() - else: - features[key] = Image() - - features["observation.state"] = Sequence( - length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None) - ) - if "observation.velocity" in data_dict: - features["observation.velocity"] = Sequence( - length=data_dict["observation.velocity"].shape[1], feature=Value(dtype="float32", id=None) - ) - if "observation.effort" in data_dict: - features["observation.effort"] = Sequence( - length=data_dict["observation.effort"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["action"] = Sequence( - length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["episode_index"] = Value(dtype="int64", id=None) - features["frame_index"] = Value(dtype="int64", id=None) - features["timestamp"] = Value(dtype="float32", id=None) - features["next.done"] = Value(dtype="bool", id=None) - features["index"] = Value(dtype="int64", id=None) - - hf_dataset = Dataset.from_dict(data_dict, features=Features(features)) - hf_dataset.set_transform(hf_transform_to_torch) - return hf_dataset - - -def from_raw_to_lerobot_format( - raw_dir: Path, - videos_dir: Path, - fps: int | None = None, - video: bool = True, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - # sanity check - check_format(raw_dir) - - if fps is None: - fps = 30 - else: - raise NotImplementedError() - - if not video: - raise NotImplementedError() - - if encoding is not None: - warnings.warn( - "Video encoding is currently done outside of LeRobot for the dora_parquet format.", - stacklevel=1, - ) - - data_df = load_from_raw(raw_dir, videos_dir, fps, episodes) - hf_dataset = to_hf_dataset(data_df, video) - episode_data_index = calculate_episode_data_index(hf_dataset) - info = { - "codebase_version": CODEBASE_VERSION, - "fps": fps, - "video": video, - } - if video: - info["encoding"] = "unknown" - - return hf_dataset, episode_data_index, info diff --git a/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py b/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py deleted file mode 100644 index 2ffb8369..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/openx_rlds_format.py +++ /dev/null @@ -1,312 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -For all datasets in the RLDS format. -For https://github.com/google-deepmind/open_x_embodiment (OPENX) datasets. - -NOTE: You need to install tensorflow and tensorflow_datasets before running this script. - -Example: - python lerobot/scripts/push_dataset_to_hub.py \ - --raw-dir /path/to/data/bridge_dataset/1.0.0/ \ - --repo-id your_hub/sampled_bridge_data_v2 \ - --raw-format rlds \ - --episodes 3 4 5 8 9 - -Exact dataset fps defined in openx/config.py, obtained from: - https://docs.google.com/spreadsheets/d/1rPBD77tk60AEIGZrGSODwyyzs5FgCU9Uz3h-3_t2A9g/edit?gid=0#gid=0&range=R:R -""" - -import shutil -from pathlib import Path - -import numpy as np -import tensorflow as tf -import tensorflow_datasets as tfds -import torch -import tqdm -from datasets import Dataset, Features, Image, Sequence, Value -from PIL import Image as PILImage - -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION -from lerobot.common.datasets.push_dataset_to_hub.utils import ( - calculate_episode_data_index, - concatenate_episodes, - get_default_encoding, - save_images_concurrently, -) -from lerobot.common.datasets.utils import ( - hf_transform_to_torch, -) -from lerobot.common.datasets.video_utils import VideoFrame, encode_video_frames - -np.set_printoptions(precision=2) - - -def tf_to_torch(data): - return torch.from_numpy(data.numpy()) - - -def tf_img_convert(img): - if img.dtype == tf.string: - img = tf.io.decode_image(img, expand_animations=False, dtype=tf.uint8) - elif img.dtype != tf.uint8: - raise ValueError(f"Unsupported image dtype: found with dtype {img.dtype}") - return img.numpy() - - -def _broadcast_metadata_rlds(i: tf.Tensor, traj: dict) -> dict: - """ - In the RLDS format, each trajectory has some top-level metadata that is explicitly separated out, and a "steps" - entry. This function moves the "steps" entry to the top level, broadcasting any metadata to the length of the - trajectory. This function also adds the extra metadata fields `_len`, `_traj_index`, and `_frame_index`. - - NOTE: adapted from DLimp library https://github.com/kvablack/dlimp/ - """ - steps = traj.pop("steps") - - traj_len = tf.shape(tf.nest.flatten(steps)[0])[0] - - # broadcast metadata to the length of the trajectory - metadata = tf.nest.map_structure(lambda x: tf.repeat(x, traj_len), traj) - - # put steps back in - assert "traj_metadata" not in steps - traj = {**steps, "traj_metadata": metadata} - - assert "_len" not in traj - assert "_traj_index" not in traj - assert "_frame_index" not in traj - traj["_len"] = tf.repeat(traj_len, traj_len) - traj["_traj_index"] = tf.repeat(i, traj_len) - traj["_frame_index"] = tf.range(traj_len) - - return traj - - -def load_from_raw( - raw_dir: Path, - videos_dir: Path, - fps: int, - video: bool, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - """ - Args: - raw_dir (Path): _description_ - videos_dir (Path): _description_ - fps (int): _description_ - video (bool): _description_ - episodes (list[int] | None, optional): _description_. Defaults to None. - """ - ds_builder = tfds.builder_from_directory(str(raw_dir)) - dataset = ds_builder.as_dataset( - split="all", - decoders={"steps": tfds.decode.SkipDecoding()}, - ) - - dataset_info = ds_builder.info - print("dataset_info: ", dataset_info) - - ds_length = len(dataset) - dataset = dataset.take(ds_length) - # "flatten" the dataset as such we can apply trajectory level map() easily - # each [obs][key] has a shape of (frame_size, ...) - dataset = dataset.enumerate().map(_broadcast_metadata_rlds) - - # we will apply the standardization transform if the dataset_name is provided - # if the dataset name is not provided and the goal is to convert any rlds formatted dataset - # search for 'image' keys in the observations - image_keys = [] - state_keys = [] - observation_info = dataset_info.features["steps"]["observation"] - for key in observation_info: - # check whether the key is for an image or a vector observation - if len(observation_info[key].shape) == 3: - # only adding uint8 images discards depth images - if observation_info[key].dtype == tf.uint8: - image_keys.append(key) - else: - state_keys.append(key) - - lang_key = "language_instruction" if "language_instruction" in dataset.element_spec else None - - print(" - image_keys: ", image_keys) - print(" - lang_key: ", lang_key) - - it = iter(dataset) - - ep_dicts = [] - # Init temp path to save ep_dicts in case of crash - tmp_ep_dicts_dir = videos_dir.parent.joinpath("ep_dicts") - tmp_ep_dicts_dir.mkdir(parents=True, exist_ok=True) - - # check if ep_dicts have already been saved in /tmp - starting_ep_idx = 0 - saved_ep_dicts = [ep.__str__() for ep in tmp_ep_dicts_dir.iterdir()] - if len(saved_ep_dicts) > 0: - saved_ep_dicts.sort() - # get last ep_idx number - starting_ep_idx = int(saved_ep_dicts[-1][-13:-3]) + 1 - for i in range(starting_ep_idx): - episode = next(it) - ep_dicts.append(torch.load(saved_ep_dicts[i])) - - # if we user specified episodes, skip the ones not in the list - if episodes is not None: - if ds_length == 0: - raise ValueError("No episodes found.") - # convert episodes index to sorted list - episodes = sorted(episodes) - - for ep_idx in tqdm.tqdm(range(starting_ep_idx, ds_length)): - episode = next(it) - - # if user specified episodes, skip the ones not in the list - if episodes is not None: - if len(episodes) == 0: - break - if ep_idx == episodes[0]: - # process this episode - print(" selecting episode idx: ", ep_idx) - episodes.pop(0) - else: - continue # skip - - num_frames = episode["action"].shape[0] - - ep_dict = {} - for key in state_keys: - ep_dict[f"observation.{key}"] = tf_to_torch(episode["observation"][key]) - - ep_dict["action"] = tf_to_torch(episode["action"]) - ep_dict["next.reward"] = tf_to_torch(episode["reward"]).float() - ep_dict["next.done"] = tf_to_torch(episode["is_last"]) - ep_dict["is_terminal"] = tf_to_torch(episode["is_terminal"]) - ep_dict["is_first"] = tf_to_torch(episode["is_first"]) - ep_dict["discount"] = tf_to_torch(episode["discount"]) - - # If lang_key is present, convert the entire tensor at once - if lang_key is not None: - ep_dict["language_instruction"] = [x.numpy().decode("utf-8") for x in episode[lang_key]] - - ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps - ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames) - ep_dict["frame_index"] = torch.arange(0, num_frames, 1) - - image_array_dict = {key: [] for key in image_keys} - - for im_key in image_keys: - imgs = episode["observation"][im_key] - image_array_dict[im_key] = [tf_img_convert(img) for img in imgs] - - # loop through all cameras - for im_key in image_keys: - img_key = f"observation.images.{im_key}" - imgs_array = image_array_dict[im_key] - imgs_array = np.array(imgs_array) - if video: - # save png images in temporary directory - tmp_imgs_dir = videos_dir / "tmp_images" - save_images_concurrently(imgs_array, tmp_imgs_dir) - - # encode images to a mp4 video - fname = f"{img_key}_episode_{ep_idx:06d}.mp4" - video_path = videos_dir / fname - encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {})) - - # clean temporary images directory - shutil.rmtree(tmp_imgs_dir) - - # store the reference to the video frame - ep_dict[img_key] = [ - {"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames) - ] - else: - ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array] - - path_ep_dict = tmp_ep_dicts_dir.joinpath( - "ep_dict_" + "0" * (10 - len(str(ep_idx))) + str(ep_idx) + ".pt" - ) - torch.save(ep_dict, path_ep_dict) - - ep_dicts.append(ep_dict) - - data_dict = concatenate_episodes(ep_dicts) - - total_frames = data_dict["frame_index"].shape[0] - data_dict["index"] = torch.arange(0, total_frames, 1) - return data_dict - - -def to_hf_dataset(data_dict, video) -> Dataset: - features = {} - - for key in data_dict: - # check if vector state obs - if key.startswith("observation.") and "observation.images." not in key: - features[key] = Sequence(length=data_dict[key].shape[1], feature=Value(dtype="float32", id=None)) - # check if image obs - elif "observation.images." in key: - if video: - features[key] = VideoFrame() - else: - features[key] = Image() - - if "language_instruction" in data_dict: - features["language_instruction"] = Value(dtype="string", id=None) - - features["action"] = Sequence( - length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None) - ) - - features["is_terminal"] = Value(dtype="bool", id=None) - features["is_first"] = Value(dtype="bool", id=None) - features["discount"] = Value(dtype="float32", id=None) - - features["episode_index"] = Value(dtype="int64", id=None) - features["frame_index"] = Value(dtype="int64", id=None) - features["timestamp"] = Value(dtype="float32", id=None) - features["next.reward"] = Value(dtype="float32", id=None) - features["next.done"] = Value(dtype="bool", id=None) - features["index"] = Value(dtype="int64", id=None) - - hf_dataset = Dataset.from_dict(data_dict, features=Features(features)) - hf_dataset.set_transform(hf_transform_to_torch) - return hf_dataset - - -def from_raw_to_lerobot_format( - raw_dir: Path, - videos_dir: Path, - fps: int | None = None, - video: bool = True, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding) - hf_dataset = to_hf_dataset(data_dict, video) - episode_data_index = calculate_episode_data_index(hf_dataset) - info = { - "codebase_version": CODEBASE_VERSION, - "fps": fps, - "video": video, - } - if video: - info["encoding"] = get_default_encoding() - - return hf_dataset, episode_data_index, info diff --git a/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py b/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py deleted file mode 100644 index 27b31ba2..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/pusht_zarr_format.py +++ /dev/null @@ -1,275 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Process zarr files formatted like in: https://github.com/real-stanford/diffusion_policy""" - -import shutil -from pathlib import Path - -import numpy as np -import torch -import tqdm -import zarr -from datasets import Dataset, Features, Image, Sequence, Value -from PIL import Image as PILImage - -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION -from lerobot.common.datasets.push_dataset_to_hub.utils import ( - calculate_episode_data_index, - concatenate_episodes, - get_default_encoding, - save_images_concurrently, -) -from lerobot.common.datasets.utils import ( - hf_transform_to_torch, -) -from lerobot.common.datasets.video_utils import VideoFrame, encode_video_frames - - -def check_format(raw_dir): - zarr_path = raw_dir / "pusht_cchi_v7_replay.zarr" - zarr_data = zarr.open(zarr_path, mode="r") - - required_datasets = { - "data/action", - "data/img", - "data/keypoint", - "data/n_contacts", - "data/state", - "meta/episode_ends", - } - for dataset in required_datasets: - assert dataset in zarr_data - nb_frames = zarr_data["data/img"].shape[0] - - required_datasets.remove("meta/episode_ends") - - assert all(nb_frames == zarr_data[dataset].shape[0] for dataset in required_datasets) - - -def load_from_raw( - raw_dir: Path, - videos_dir: Path, - fps: int, - video: bool, - episodes: list[int] | None = None, - keypoints_instead_of_image: bool = False, - encoding: dict | None = None, -): - try: - import pymunk - from gym_pusht.envs.pusht import PushTEnv, pymunk_to_shapely - - from lerobot.common.datasets.push_dataset_to_hub._diffusion_policy_replay_buffer import ( - ReplayBuffer as DiffusionPolicyReplayBuffer, - ) - except ModuleNotFoundError as e: - print("`gym_pusht` is not installed. Please install it with `pip install 'lerobot[gym_pusht]'`") - raise e - # as define in gmy-pusht env: https://github.com/huggingface/gym-pusht/blob/e0684ff988d223808c0a9dcfaba9dc4991791370/gym_pusht/envs/pusht.py#L174 - success_threshold = 0.95 # 95% coverage, - - zarr_path = raw_dir / "pusht_cchi_v7_replay.zarr" - zarr_data = DiffusionPolicyReplayBuffer.copy_from_path(zarr_path) - - episode_ids = torch.from_numpy(zarr_data.get_episode_idxs()) - assert len( - {zarr_data[key].shape[0] for key in zarr_data.keys()} # noqa: SIM118 - ), "Some data type dont have the same number of total frames." - - # TODO(rcadene): verify that goal pose is expected to be fixed - goal_pos_angle = np.array([256, 256, np.pi / 4]) # x, y, theta (in radians) - goal_body = PushTEnv.get_goal_pose_body(goal_pos_angle) - - imgs = torch.from_numpy(zarr_data["img"]) # b h w c - states = torch.from_numpy(zarr_data["state"]) - actions = torch.from_numpy(zarr_data["action"]) - - # load data indices from which each episode starts and ends - from_ids, to_ids = [], [] - from_idx = 0 - for to_idx in zarr_data.meta["episode_ends"]: - from_ids.append(from_idx) - to_ids.append(to_idx) - from_idx = to_idx - - num_episodes = len(from_ids) - - ep_dicts = [] - ep_ids = episodes if episodes else range(num_episodes) - for ep_idx, selected_ep_idx in tqdm.tqdm(enumerate(ep_ids)): - from_idx = from_ids[selected_ep_idx] - to_idx = to_ids[selected_ep_idx] - num_frames = to_idx - from_idx - - # sanity check - assert (episode_ids[from_idx:to_idx] == ep_idx).all() - - # get image - if not keypoints_instead_of_image: - image = imgs[from_idx:to_idx] - assert image.min() >= 0.0 - assert image.max() <= 255.0 - image = image.type(torch.uint8) - - # get state - state = states[from_idx:to_idx] - agent_pos = state[:, :2] - block_pos = state[:, 2:4] - block_angle = state[:, 4] - - # get reward, success, done, and (maybe) keypoints - reward = torch.zeros(num_frames) - success = torch.zeros(num_frames, dtype=torch.bool) - if keypoints_instead_of_image: - keypoints = torch.zeros(num_frames, 16) # 8 keypoints each with 2 coords - done = torch.zeros(num_frames, dtype=torch.bool) - for i in range(num_frames): - space = pymunk.Space() - space.gravity = 0, 0 - space.damping = 0 - - # Add walls. - walls = [ - PushTEnv.add_segment(space, (5, 506), (5, 5), 2), - PushTEnv.add_segment(space, (5, 5), (506, 5), 2), - PushTEnv.add_segment(space, (506, 5), (506, 506), 2), - PushTEnv.add_segment(space, (5, 506), (506, 506), 2), - ] - space.add(*walls) - - block_body, block_shapes = PushTEnv.add_tee(space, block_pos[i].tolist(), block_angle[i].item()) - goal_geom = pymunk_to_shapely(goal_body, block_body.shapes) - block_geom = pymunk_to_shapely(block_body, block_body.shapes) - intersection_area = goal_geom.intersection(block_geom).area - goal_area = goal_geom.area - coverage = intersection_area / goal_area - reward[i] = np.clip(coverage / success_threshold, 0, 1) - success[i] = coverage > success_threshold - if keypoints_instead_of_image: - keypoints[i] = torch.from_numpy(PushTEnv.get_keypoints(block_shapes).flatten()) - - # last step of demonstration is considered done - done[-1] = True - - ep_dict = {} - - if not keypoints_instead_of_image: - imgs_array = [x.numpy() for x in image] - img_key = "observation.image" - if video: - # save png images in temporary directory - tmp_imgs_dir = videos_dir / "tmp_images" - save_images_concurrently(imgs_array, tmp_imgs_dir) - - # encode images to a mp4 video - fname = f"{img_key}_episode_{ep_idx:06d}.mp4" - video_path = videos_dir / fname - encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {})) - - # clean temporary images directory - shutil.rmtree(tmp_imgs_dir) - - # store the reference to the video frame - ep_dict[img_key] = [ - {"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames) - ] - else: - ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array] - - ep_dict["observation.state"] = agent_pos - if keypoints_instead_of_image: - ep_dict["observation.environment_state"] = keypoints - ep_dict["action"] = actions[from_idx:to_idx] - ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64) - ep_dict["frame_index"] = torch.arange(0, num_frames, 1) - ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps - # ep_dict["next.observation.image"] = image[1:], - # ep_dict["next.observation.state"] = agent_pos[1:], - # TODO(rcadene)] = verify that reward and done are aligned with image and agent_pos - ep_dict["next.reward"] = torch.cat([reward[1:], reward[[-1]]]) - ep_dict["next.done"] = torch.cat([done[1:], done[[-1]]]) - ep_dict["next.success"] = torch.cat([success[1:], success[[-1]]]) - ep_dicts.append(ep_dict) - data_dict = concatenate_episodes(ep_dicts) - - total_frames = data_dict["frame_index"].shape[0] - data_dict["index"] = torch.arange(0, total_frames, 1) - return data_dict - - -def to_hf_dataset(data_dict, video, keypoints_instead_of_image: bool = False): - features = {} - - if not keypoints_instead_of_image: - if video: - features["observation.image"] = VideoFrame() - else: - features["observation.image"] = Image() - - features["observation.state"] = Sequence( - length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None) - ) - if keypoints_instead_of_image: - features["observation.environment_state"] = Sequence( - length=data_dict["observation.environment_state"].shape[1], - feature=Value(dtype="float32", id=None), - ) - features["action"] = Sequence( - length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["episode_index"] = Value(dtype="int64", id=None) - features["frame_index"] = Value(dtype="int64", id=None) - features["timestamp"] = Value(dtype="float32", id=None) - features["next.reward"] = Value(dtype="float32", id=None) - features["next.done"] = Value(dtype="bool", id=None) - features["next.success"] = Value(dtype="bool", id=None) - features["index"] = Value(dtype="int64", id=None) - - hf_dataset = Dataset.from_dict(data_dict, features=Features(features)) - hf_dataset.set_transform(hf_transform_to_torch) - return hf_dataset - - -def from_raw_to_lerobot_format( - raw_dir: Path, - videos_dir: Path, - fps: int | None = None, - video: bool = True, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - # Manually change this to True to use keypoints of the T instead of an image observation (but don't merge - # with True). Also make sure to use video = 0 in the `push_dataset_to_hub.py` script. - keypoints_instead_of_image = False - - # sanity check - check_format(raw_dir) - - if fps is None: - fps = 10 - - data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, keypoints_instead_of_image, encoding) - hf_dataset = to_hf_dataset(data_dict, video, keypoints_instead_of_image) - episode_data_index = calculate_episode_data_index(hf_dataset) - info = { - "codebase_version": CODEBASE_VERSION, - "fps": fps, - "video": video if not keypoints_instead_of_image else 0, - } - if video: - info["encoding"] = get_default_encoding() - - return hf_dataset, episode_data_index, info diff --git a/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py b/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py deleted file mode 100644 index fec893a7..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/umi_zarr_format.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Process UMI (Universal Manipulation Interface) data stored in Zarr format like in: https://github.com/real-stanford/universal_manipulation_interface""" - -import logging -import shutil -from pathlib import Path - -import torch -import tqdm -import zarr -from datasets import Dataset, Features, Image, Sequence, Value -from PIL import Image as PILImage - -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION -from lerobot.common.datasets.push_dataset_to_hub._umi_imagecodecs_numcodecs import register_codecs -from lerobot.common.datasets.push_dataset_to_hub.utils import ( - calculate_episode_data_index, - concatenate_episodes, - get_default_encoding, - save_images_concurrently, -) -from lerobot.common.datasets.utils import ( - hf_transform_to_torch, -) -from lerobot.common.datasets.video_utils import VideoFrame, encode_video_frames - - -def check_format(raw_dir) -> bool: - zarr_path = raw_dir / "cup_in_the_wild.zarr" - zarr_data = zarr.open(zarr_path, mode="r") - - required_datasets = { - "data/robot0_demo_end_pose", - "data/robot0_demo_start_pose", - "data/robot0_eef_pos", - "data/robot0_eef_rot_axis_angle", - "data/robot0_gripper_width", - "meta/episode_ends", - "data/camera0_rgb", - } - for dataset in required_datasets: - if dataset not in zarr_data: - return False - - # mandatory to access zarr_data - register_codecs() - nb_frames = zarr_data["data/camera0_rgb"].shape[0] - - required_datasets.remove("meta/episode_ends") - assert all(nb_frames == zarr_data[dataset].shape[0] for dataset in required_datasets) - - -def load_from_raw( - raw_dir: Path, - videos_dir: Path, - fps: int, - video: bool, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - zarr_path = raw_dir / "cup_in_the_wild.zarr" - zarr_data = zarr.open(zarr_path, mode="r") - - # We process the image data separately because it is too large to fit in memory - end_pose = torch.from_numpy(zarr_data["data/robot0_demo_end_pose"][:]) - start_pos = torch.from_numpy(zarr_data["data/robot0_demo_start_pose"][:]) - eff_pos = torch.from_numpy(zarr_data["data/robot0_eef_pos"][:]) - eff_rot_axis_angle = torch.from_numpy(zarr_data["data/robot0_eef_rot_axis_angle"][:]) - gripper_width = torch.from_numpy(zarr_data["data/robot0_gripper_width"][:]) - - states_pos = torch.cat([eff_pos, eff_rot_axis_angle], dim=1) - states = torch.cat([states_pos, gripper_width], dim=1) - - episode_ends = zarr_data["meta/episode_ends"][:] - num_episodes = episode_ends.shape[0] - - # We convert it in torch tensor later because the jit function does not support torch tensors - episode_ends = torch.from_numpy(episode_ends) - - # load data indices from which each episode starts and ends - from_ids, to_ids = [], [] - from_idx = 0 - for to_idx in episode_ends: - from_ids.append(from_idx) - to_ids.append(to_idx) - from_idx = to_idx - - ep_dicts_dir = videos_dir / "ep_dicts" - ep_dicts_dir.mkdir(exist_ok=True, parents=True) - ep_dicts = [] - - ep_ids = episodes if episodes else range(num_episodes) - for ep_idx, selected_ep_idx in tqdm.tqdm(enumerate(ep_ids)): - ep_dict_path = ep_dicts_dir / f"{ep_idx}" - if not ep_dict_path.is_file(): - from_idx = from_ids[selected_ep_idx] - to_idx = to_ids[selected_ep_idx] - num_frames = to_idx - from_idx - - # TODO(rcadene): save temporary images of the episode? - - state = states[from_idx:to_idx] - - ep_dict = {} - - # load 57MB of images in RAM (400x224x224x3 uint8) - imgs_array = zarr_data["data/camera0_rgb"][from_idx:to_idx] - img_key = "observation.image" - if video: - fname = f"{img_key}_episode_{ep_idx:06d}.mp4" - video_path = videos_dir / fname - if not video_path.is_file(): - # save png images in temporary directory - tmp_imgs_dir = videos_dir / "tmp_images" - save_images_concurrently(imgs_array, tmp_imgs_dir) - - # encode images to a mp4 video - encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {})) - - # clean temporary images directory - shutil.rmtree(tmp_imgs_dir) - - # store the reference to the video frame - ep_dict[img_key] = [ - {"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames) - ] - else: - ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array] - - ep_dict["observation.state"] = state - ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64) - ep_dict["frame_index"] = torch.arange(0, num_frames, 1) - ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps - ep_dict["episode_data_index_from"] = torch.tensor([from_idx] * num_frames) - ep_dict["episode_data_index_to"] = torch.tensor([from_idx + num_frames] * num_frames) - ep_dict["end_pose"] = end_pose[from_idx:to_idx] - ep_dict["start_pos"] = start_pos[from_idx:to_idx] - ep_dict["gripper_width"] = gripper_width[from_idx:to_idx] - torch.save(ep_dict, ep_dict_path) - else: - ep_dict = torch.load(ep_dict_path) - - ep_dicts.append(ep_dict) - - data_dict = concatenate_episodes(ep_dicts) - - total_frames = data_dict["frame_index"].shape[0] - data_dict["index"] = torch.arange(0, total_frames, 1) - return data_dict - - -def to_hf_dataset(data_dict, video): - features = {} - - if video: - features["observation.image"] = VideoFrame() - else: - features["observation.image"] = Image() - - features["observation.state"] = Sequence( - length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["episode_index"] = Value(dtype="int64", id=None) - features["frame_index"] = Value(dtype="int64", id=None) - features["timestamp"] = Value(dtype="float32", id=None) - features["index"] = Value(dtype="int64", id=None) - features["episode_data_index_from"] = Value(dtype="int64", id=None) - features["episode_data_index_to"] = Value(dtype="int64", id=None) - # `start_pos` and `end_pos` respectively represent the positions of the end-effector - # at the beginning and the end of the episode. - # `gripper_width` indicates the distance between the grippers, and this value is included - # in the state vector, which comprises the concatenation of the end-effector position - # and gripper width. - features["end_pose"] = Sequence( - length=data_dict["end_pose"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["start_pos"] = Sequence( - length=data_dict["start_pos"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["gripper_width"] = Sequence( - length=data_dict["gripper_width"].shape[1], feature=Value(dtype="float32", id=None) - ) - - hf_dataset = Dataset.from_dict(data_dict, features=Features(features)) - hf_dataset.set_transform(hf_transform_to_torch) - return hf_dataset - - -def from_raw_to_lerobot_format( - raw_dir: Path, - videos_dir: Path, - fps: int | None = None, - video: bool = True, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - # sanity check - check_format(raw_dir) - - if fps is None: - # For umi cup in the wild: https://arxiv.org/pdf/2402.10329#table.caption.16 - fps = 10 - - if not video: - logging.warning( - "Generating UMI dataset without `video=True` creates ~150GB on disk and requires ~80GB in RAM." - ) - - data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding) - hf_dataset = to_hf_dataset(data_dict, video) - episode_data_index = calculate_episode_data_index(hf_dataset) - info = { - "codebase_version": CODEBASE_VERSION, - "fps": fps, - "video": video, - } - if video: - info["encoding"] = get_default_encoding() - - return hf_dataset, episode_data_index, info diff --git a/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py b/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py deleted file mode 100644 index 0047e48c..00000000 --- a/lerobot/common/datasets/push_dataset_to_hub/xarm_pkl_format.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Process pickle files formatted like in: https://github.com/fyhMer/fowm""" - -import pickle -import shutil -from pathlib import Path - -import einops -import torch -import tqdm -from datasets import Dataset, Features, Image, Sequence, Value -from PIL import Image as PILImage - -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION -from lerobot.common.datasets.push_dataset_to_hub.utils import ( - calculate_episode_data_index, - concatenate_episodes, - get_default_encoding, - save_images_concurrently, -) -from lerobot.common.datasets.utils import ( - hf_transform_to_torch, -) -from lerobot.common.datasets.video_utils import VideoFrame, encode_video_frames - - -def check_format(raw_dir): - keys = {"actions", "rewards", "dones"} - nested_keys = {"observations": {"rgb", "state"}, "next_observations": {"rgb", "state"}} - - xarm_files = list(raw_dir.glob("*.pkl")) - assert len(xarm_files) > 0 - - with open(xarm_files[0], "rb") as f: - dataset_dict = pickle.load(f) - - assert isinstance(dataset_dict, dict) - assert all(k in dataset_dict for k in keys) - - # Check for consistent lengths in nested keys - expected_len = len(dataset_dict["actions"]) - assert all(len(dataset_dict[key]) == expected_len for key in keys if key in dataset_dict) - - for key, subkeys in nested_keys.items(): - nested_dict = dataset_dict.get(key, {}) - assert all(len(nested_dict[subkey]) == expected_len for subkey in subkeys if subkey in nested_dict) - - -def load_from_raw( - raw_dir: Path, - videos_dir: Path, - fps: int, - video: bool, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - pkl_path = raw_dir / "buffer.pkl" - - with open(pkl_path, "rb") as f: - pkl_data = pickle.load(f) - - # load data indices from which each episode starts and ends - from_ids, to_ids = [], [] - from_idx, to_idx = 0, 0 - for done in pkl_data["dones"]: - to_idx += 1 - if not done: - continue - from_ids.append(from_idx) - to_ids.append(to_idx) - from_idx = to_idx - - num_episodes = len(from_ids) - - ep_dicts = [] - ep_ids = episodes if episodes else range(num_episodes) - for ep_idx, selected_ep_idx in tqdm.tqdm(enumerate(ep_ids)): - from_idx = from_ids[selected_ep_idx] - to_idx = to_ids[selected_ep_idx] - num_frames = to_idx - from_idx - - image = torch.tensor(pkl_data["observations"]["rgb"][from_idx:to_idx]) - image = einops.rearrange(image, "b c h w -> b h w c") - state = torch.tensor(pkl_data["observations"]["state"][from_idx:to_idx]) - action = torch.tensor(pkl_data["actions"][from_idx:to_idx]) - # TODO(rcadene): we have a missing last frame which is the observation when the env is done - # it is critical to have this frame for tdmpc to predict a "done observation/state" - # next_image = torch.tensor(pkl_data["next_observations"]["rgb"][from_idx:to_idx]) - # next_state = torch.tensor(pkl_data["next_observations"]["state"][from_idx:to_idx]) - next_reward = torch.tensor(pkl_data["rewards"][from_idx:to_idx]) - next_done = torch.tensor(pkl_data["dones"][from_idx:to_idx]) - - ep_dict = {} - - imgs_array = [x.numpy() for x in image] - img_key = "observation.image" - if video: - # save png images in temporary directory - tmp_imgs_dir = videos_dir / "tmp_images" - save_images_concurrently(imgs_array, tmp_imgs_dir) - - # encode images to a mp4 video - fname = f"{img_key}_episode_{ep_idx:06d}.mp4" - video_path = videos_dir / fname - encode_video_frames(tmp_imgs_dir, video_path, fps, **(encoding or {})) - - # clean temporary images directory - shutil.rmtree(tmp_imgs_dir) - - # store the reference to the video frame - ep_dict[img_key] = [{"path": f"videos/{fname}", "timestamp": i / fps} for i in range(num_frames)] - else: - ep_dict[img_key] = [PILImage.fromarray(x) for x in imgs_array] - - ep_dict["observation.state"] = state - ep_dict["action"] = action - ep_dict["episode_index"] = torch.tensor([ep_idx] * num_frames, dtype=torch.int64) - ep_dict["frame_index"] = torch.arange(0, num_frames, 1) - ep_dict["timestamp"] = torch.arange(0, num_frames, 1) / fps - # ep_dict["next.observation.image"] = next_image - # ep_dict["next.observation.state"] = next_state - ep_dict["next.reward"] = next_reward - ep_dict["next.done"] = next_done - ep_dicts.append(ep_dict) - - data_dict = concatenate_episodes(ep_dicts) - - total_frames = data_dict["frame_index"].shape[0] - data_dict["index"] = torch.arange(0, total_frames, 1) - return data_dict - - -def to_hf_dataset(data_dict, video): - features = {} - - if video: - features["observation.image"] = VideoFrame() - else: - features["observation.image"] = Image() - - features["observation.state"] = Sequence( - length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["action"] = Sequence( - length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None) - ) - features["episode_index"] = Value(dtype="int64", id=None) - features["frame_index"] = Value(dtype="int64", id=None) - features["timestamp"] = Value(dtype="float32", id=None) - features["next.reward"] = Value(dtype="float32", id=None) - features["next.done"] = Value(dtype="bool", id=None) - features["index"] = Value(dtype="int64", id=None) - # TODO(rcadene): add success - # features["next.success"] = Value(dtype='bool', id=None) - - hf_dataset = Dataset.from_dict(data_dict, features=Features(features)) - hf_dataset.set_transform(hf_transform_to_torch) - return hf_dataset - - -def from_raw_to_lerobot_format( - raw_dir: Path, - videos_dir: Path, - fps: int | None = None, - video: bool = True, - episodes: list[int] | None = None, - encoding: dict | None = None, -): - # sanity check - check_format(raw_dir) - - if fps is None: - fps = 15 - - data_dict = load_from_raw(raw_dir, videos_dir, fps, video, episodes, encoding) - hf_dataset = to_hf_dataset(data_dict, video) - episode_data_index = calculate_episode_data_index(hf_dataset) - info = { - "codebase_version": CODEBASE_VERSION, - "fps": fps, - "video": video, - } - if video: - info["encoding"] = get_default_encoding() - - return hf_dataset, episode_data_index, info diff --git a/lerobot/scripts/push_dataset_to_hub.py b/lerobot/scripts/push_dataset_to_hub.py deleted file mode 100644 index 3de2462b..00000000 --- a/lerobot/scripts/push_dataset_to_hub.py +++ /dev/null @@ -1,364 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Use this script to convert your dataset into LeRobot dataset format and upload it to the Hugging Face hub, -or store it locally. LeRobot dataset format is lightweight, fast to load from, and does not require any -installation of neural net specific packages like pytorch, tensorflow, jax. - -Example of how to download raw datasets, convert them into LeRobotDataset format, and push them to the hub: -``` -python lerobot/scripts/push_dataset_to_hub.py \ ---raw-dir data/pusht_raw \ ---raw-format pusht_zarr \ ---repo-id lerobot/pusht - -python lerobot/scripts/push_dataset_to_hub.py \ ---raw-dir data/xarm_lift_medium_raw \ ---raw-format xarm_pkl \ ---repo-id lerobot/xarm_lift_medium - -python lerobot/scripts/push_dataset_to_hub.py \ ---raw-dir data/aloha_sim_insertion_scripted_raw \ ---raw-format aloha_hdf5 \ ---repo-id lerobot/aloha_sim_insertion_scripted - -python lerobot/scripts/push_dataset_to_hub.py \ ---raw-dir data/umi_cup_in_the_wild_raw \ ---raw-format umi_zarr \ ---repo-id lerobot/umi_cup_in_the_wild -``` -""" - -import argparse -import json -import shutil -import warnings -from pathlib import Path -from typing import Any - -import torch -from huggingface_hub import HfApi -from safetensors.torch import save_file - -from lerobot.common.datasets.compute_stats import compute_stats -from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION, LeRobotDataset -from lerobot.common.datasets.push_dataset_to_hub.utils import check_repo_id -from lerobot.common.datasets.utils import create_branch, create_lerobot_dataset_card, flatten_dict - - -def get_from_raw_to_lerobot_format_fn(raw_format: str): - if raw_format == "pusht_zarr": - from lerobot.common.datasets.push_dataset_to_hub.pusht_zarr_format import from_raw_to_lerobot_format - elif raw_format == "umi_zarr": - from lerobot.common.datasets.push_dataset_to_hub.umi_zarr_format import from_raw_to_lerobot_format - elif raw_format == "aloha_hdf5": - from lerobot.common.datasets.push_dataset_to_hub.aloha_hdf5_format import from_raw_to_lerobot_format - elif raw_format in ["rlds", "openx"]: - from lerobot.common.datasets.push_dataset_to_hub.openx_rlds_format import from_raw_to_lerobot_format - elif raw_format == "dora_parquet": - from lerobot.common.datasets.push_dataset_to_hub.dora_parquet_format import from_raw_to_lerobot_format - elif raw_format == "xarm_pkl": - from lerobot.common.datasets.push_dataset_to_hub.xarm_pkl_format import from_raw_to_lerobot_format - elif raw_format == "cam_png": - from lerobot.common.datasets.push_dataset_to_hub.cam_png_format import from_raw_to_lerobot_format - else: - raise ValueError( - f"The selected {raw_format} can't be found. Did you add it to `lerobot/scripts/push_dataset_to_hub.py::get_from_raw_to_lerobot_format_fn`?" - ) - - return from_raw_to_lerobot_format - - -def save_meta_data( - info: dict[str, Any], stats: dict, episode_data_index: dict[str, list], meta_data_dir: Path -): - meta_data_dir.mkdir(parents=True, exist_ok=True) - - # save info - info_path = meta_data_dir / "info.json" - with open(str(info_path), "w") as f: - json.dump(info, f, indent=4) - - # save stats - stats_path = meta_data_dir / "stats.safetensors" - save_file(flatten_dict(stats), stats_path) - - # save episode_data_index - episode_data_index = {key: torch.tensor(episode_data_index[key]) for key in episode_data_index} - ep_data_idx_path = meta_data_dir / "episode_data_index.safetensors" - save_file(episode_data_index, ep_data_idx_path) - - -def push_meta_data_to_hub(repo_id: str, meta_data_dir: str | Path, revision: str | None): - """Expect all meta data files to be all stored in a single "meta_data" directory. - On the hugging face repositery, they will be uploaded in a "meta_data" directory at the root. - """ - api = HfApi() - api.upload_folder( - folder_path=meta_data_dir, - path_in_repo="meta_data", - repo_id=repo_id, - revision=revision, - repo_type="dataset", - ) - - -def push_dataset_card_to_hub( - repo_id: str, - revision: str | None, - tags: list | None = None, - license: str = "apache-2.0", - **card_kwargs, -): - """Creates and pushes a LeRobotDataset Card with appropriate tags to easily find it on the hub.""" - card = create_lerobot_dataset_card(tags=tags, license=license, **card_kwargs) - card.push_to_hub(repo_id=repo_id, repo_type="dataset", revision=revision) - - -def push_videos_to_hub(repo_id: str, videos_dir: str | Path, revision: str | None): - """Expect mp4 files to be all stored in a single "videos" directory. - On the hugging face repositery, they will be uploaded in a "videos" directory at the root. - """ - api = HfApi() - api.upload_folder( - folder_path=videos_dir, - path_in_repo="videos", - repo_id=repo_id, - revision=revision, - repo_type="dataset", - allow_patterns="*.mp4", - ) - - -def push_dataset_to_hub( - raw_dir: Path, - raw_format: str, - repo_id: str, - push_to_hub: bool = True, - local_dir: Path | None = None, - fps: int | None = None, - video: bool = True, - batch_size: int = 32, - num_workers: int = 8, - episodes: list[int] | None = None, - force_override: bool = False, - resume: bool = False, - cache_dir: Path = Path("/tmp"), - tests_data_dir: Path | None = None, - encoding: dict | None = None, -): - check_repo_id(repo_id) - user_id, dataset_id = repo_id.split("/") - - # Robustify when `raw_dir` is str instead of Path - raw_dir = Path(raw_dir) - if not raw_dir.exists(): - raise NotADirectoryError( - f"{raw_dir} does not exists. Check your paths or run this command to download an existing raw dataset on the hub: " - f"`python lerobot/common/datasets/push_dataset_to_hub/_download_raw.py --raw-dir your/raw/dir --repo-id your/repo/id_raw`" - ) - - if local_dir: - # Robustify when `local_dir` is str instead of Path - local_dir = Path(local_dir) - - # Send warning if local_dir isn't well formatted - if local_dir.parts[-2] != user_id or local_dir.parts[-1] != dataset_id: - warnings.warn( - f"`local_dir` ({local_dir}) doesn't contain a community or user id `/` the name of the dataset that match the `repo_id` (e.g. 'data/lerobot/pusht'). Following this naming convention is advised, but not mandatory.", - stacklevel=1, - ) - - # Check we don't override an existing `local_dir` by mistake - if local_dir.exists(): - if force_override: - shutil.rmtree(local_dir) - elif not resume: - raise ValueError(f"`local_dir` already exists ({local_dir}). Use `--force-override 1`.") - - meta_data_dir = local_dir / "meta_data" - videos_dir = local_dir / "videos" - else: - # Temporary directory used to store images, videos, meta_data - meta_data_dir = Path(cache_dir) / "meta_data" - videos_dir = Path(cache_dir) / "videos" - - if raw_format is None: - # TODO(rcadene, adilzouitine): implement auto_find_raw_format - raise NotImplementedError() - # raw_format = auto_find_raw_format(raw_dir) - - # convert dataset from original raw format to LeRobot format - from_raw_to_lerobot_format = get_from_raw_to_lerobot_format_fn(raw_format) - - hf_dataset, episode_data_index, info = from_raw_to_lerobot_format( - raw_dir, - videos_dir, - fps, - video, - episodes, - encoding, - ) - - lerobot_dataset = LeRobotDataset.from_preloaded( - repo_id=repo_id, - hf_dataset=hf_dataset, - episode_data_index=episode_data_index, - info=info, - videos_dir=videos_dir, - ) - stats = compute_stats(lerobot_dataset, batch_size, num_workers) - - if local_dir: - hf_dataset = hf_dataset.with_format(None) # to remove transforms that cant be saved - hf_dataset.save_to_disk(str(local_dir / "train")) - - if push_to_hub or local_dir: - # mandatory for upload - save_meta_data(info, stats, episode_data_index, meta_data_dir) - - if push_to_hub: - hf_dataset.push_to_hub(repo_id, revision="main") - push_meta_data_to_hub(repo_id, meta_data_dir, revision="main") - push_dataset_card_to_hub(repo_id, revision="main") - if video: - push_videos_to_hub(repo_id, videos_dir, revision="main") - create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION) - - if tests_data_dir: - # get the first episode - num_items_first_ep = episode_data_index["to"][0] - episode_data_index["from"][0] - test_hf_dataset = hf_dataset.select(range(num_items_first_ep)) - episode_data_index = {k: v[:1] for k, v in episode_data_index.items()} - - test_hf_dataset = test_hf_dataset.with_format(None) - test_hf_dataset.save_to_disk(str(tests_data_dir / repo_id / "train")) - - tests_meta_data = tests_data_dir / repo_id / "meta_data" - save_meta_data(info, stats, episode_data_index, tests_meta_data) - - # copy videos of first episode to tests directory - episode_index = 0 - tests_videos_dir = tests_data_dir / repo_id / "videos" - tests_videos_dir.mkdir(parents=True, exist_ok=True) - for key in lerobot_dataset.camera_keys: - fname = f"{key}_episode_{episode_index:06d}.mp4" - shutil.copy(videos_dir / fname, tests_videos_dir / fname) - - if local_dir is None: - # clear cache - shutil.rmtree(meta_data_dir) - shutil.rmtree(videos_dir) - - return lerobot_dataset - - -def main(): - parser = argparse.ArgumentParser() - - parser.add_argument( - "--raw-dir", - type=Path, - required=True, - help="Directory containing input raw datasets (e.g. `data/aloha_mobile_chair_raw` or `data/pusht_raw).", - ) - # TODO(rcadene): add automatic detection of the format - parser.add_argument( - "--raw-format", - type=str, - required=True, - help="Dataset type (e.g. `pusht_zarr`, `umi_zarr`, `aloha_hdf5`, `xarm_pkl`, `dora_parquet`, `rlds`, `openx`).", - ) - parser.add_argument( - "--repo-id", - type=str, - required=True, - help="Repositery identifier on Hugging Face: a community or a user name `/` the name of the dataset (e.g. `lerobot/pusht`, `cadene/aloha_sim_insertion_human`).", - ) - parser.add_argument( - "--local-dir", - type=Path, - help="When provided, writes the dataset converted to LeRobotDataset format in this directory (e.g. `data/lerobot/aloha_mobile_chair`).", - ) - parser.add_argument( - "--push-to-hub", - type=int, - default=1, - help="Upload to hub.", - ) - parser.add_argument( - "--fps", - type=int, - help="Frame rate used to collect videos. If not provided, use the default one specified in the code.", - ) - parser.add_argument( - "--video", - type=int, - default=1, - help="Convert each episode of the raw dataset to an mp4 video. This option allows 60 times lower disk space consumption and 25 faster loading time during training.", - ) - parser.add_argument( - "--batch-size", - type=int, - default=32, - help="Batch size loaded by DataLoader for computing the dataset statistics.", - ) - parser.add_argument( - "--num-workers", - type=int, - default=8, - help="Number of processes of Dataloader for computing the dataset statistics.", - ) - parser.add_argument( - "--episodes", - type=int, - nargs="*", - help="When provided, only converts the provided episodes (e.g `--episodes 2 3 4`). Useful to test the code on 1 episode.", - ) - parser.add_argument( - "--force-override", - type=int, - default=0, - help="When set to 1, removes provided output directory if it already exists. By default, raises a ValueError exception.", - ) - parser.add_argument( - "--resume", - type=int, - default=0, - help="When set to 1, resumes a previous run.", - ) - parser.add_argument( - "--cache-dir", - type=Path, - required=False, - default="/tmp", - help="Directory to store the temporary videos and images generated while creating the dataset.", - ) - parser.add_argument( - "--tests-data-dir", - type=Path, - help=( - "When provided, save tests artifacts into the given directory " - "(e.g. `--tests-data-dir tests/data` will save to tests/data/{--repo-id})." - ), - ) - - args = parser.parse_args() - push_dataset_to_hub(**vars(args)) - - -if __name__ == "__main__": - main()