diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index 20b874b5..c5ae0354 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -13,7 +13,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json import logging import os import shutil @@ -138,6 +137,11 @@ class LeRobotDatasetMetadata: """Formattable string for the video files.""" return self.info["video_path"] + @property + def robot_type(self) -> str | None: + """Robot type used in recording this dataset.""" + return self.info["robot_type"] + @property def fps(self) -> int: """Frames per second used during data collection.""" @@ -258,10 +262,14 @@ class LeRobotDatasetMetadata: write_json(self.info, self.root / INFO_PATH) def __repr__(self): + feature_keys = list(self.features) return ( - f"{self.__class__.__name__}\n" - f"Repository ID: '{self.repo_id}',\n" - f"{json.dumps(self.meta.info, indent=4)}\n" + f"{self.__class__.__name__}({{\n" + f" Repository ID: '{self.repo_id}',\n" + f" Total episodes: '{self.total_episodes}',\n" + f" Total frames: '{self.total_frames}',\n" + f" Features: '{feature_keys}',\n" + "})',\n" ) @classmethod @@ -657,13 +665,14 @@ class LeRobotDataset(torch.utils.data.Dataset): return item def __repr__(self): + feature_keys = list(self.features) return ( - f"{self.__class__.__name__}\n" - f" Repository ID: '{self.repo_id}',\n" - f" Selected episodes: {self.episodes},\n" - f" Number of selected episodes: {self.num_episodes},\n" - f" Number of selected samples: {self.num_frames},\n" - f"\n{json.dumps(self.meta.info, indent=4)}\n" + f"{self.__class__.__name__}({{\n" + f" Repository ID: '{self.repo_id}',\n" + f" Number of selected episodes: '{self.num_episodes}',\n" + f" Number of selected samples: '{self.num_frames}',\n" + f" Features: '{feature_keys}',\n" + "})',\n" ) def _create_episode_buffer(self, episode_index: int | None = None) -> dict: diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index 875d5169..dc43d112 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -468,6 +468,7 @@ def create_lerobot_dataset_card( text: str | None = None, info: dict | None = None, license: str | None = None, + url: str | None = None, citation: str | None = None, arxiv: str | None = None, ) -> DatasetCard: @@ -488,6 +489,8 @@ def create_lerobot_dataset_card( card.data.license = license if tags: card.data.tags += tags + if url: + card.text += f"## Homepage:\n{url}\n" if text: card.text += f"{text}\n" if info: diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py index 827cc1de..de8ff4c4 100644 --- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py +++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py @@ -222,12 +222,12 @@ def get_features_from_hf_dataset(dataset: Dataset, robot_config: dict | None = N dtype = "image" image = dataset[0][key] # Assuming first row channels = get_image_pixel_channels(image) - shape = (image.width, image.height, channels) - names = ["width", "height", "channel"] + shape = (image.height, image.width, channels) + names = ["height", "width", "channel"] elif ft._type == "VideoFrame": dtype = "video" shape = None # Add shape later - names = ["width", "height", "channel"] + names = ["height", "width", "channel"] features[key] = { "dtype": dtype, @@ -437,8 +437,9 @@ def convert_dataset( tasks_col: Path | None = None, robot_config: dict | None = None, license: str | None = None, - citation: str | None = None, + url: str | None = None, arxiv: str | None = None, + citation: str | None = None, test_branch: str | None = None, ): v1 = get_hub_safe_version(repo_id, V16) @@ -518,8 +519,8 @@ def convert_dataset( videos_info = get_videos_info(repo_id, v1x_dir, video_keys=video_keys, branch=branch) for key in video_keys: features[key]["shape"] = ( - videos_info[key].pop("video.width"), videos_info[key].pop("video.height"), + videos_info[key].pop("video.width"), videos_info[key].pop("video.channels"), ) features[key]["video_info"] = videos_info[key] @@ -566,7 +567,7 @@ def convert_dataset( write_json(metadata_v2_0, v20_dir / INFO_PATH) convert_stats_to_json(v1x_dir, v20_dir) card = create_lerobot_dataset_card( - tags=repo_tags, info=metadata_v2_0, license=license, citation=citation, arxiv=arxiv + tags=repo_tags, info=metadata_v2_0, license=license, url=url, citation=citation, arxiv=arxiv ) with contextlib.suppress(EntryNotFoundError): diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py index 80cc79cc..8ed3318d 100644 --- a/lerobot/common/datasets/video_utils.py +++ b/lerobot/common/datasets/video_utils.py @@ -279,8 +279,8 @@ def get_video_info(video_path: Path | str) -> dict: video_info = { "video.fps": fps, - "video.width": video_stream_info["width"], "video.height": video_stream_info["height"], + "video.width": video_stream_info["width"], "video.channels": pixel_channels, "video.codec": video_stream_info["codec_name"], "video.pix_fmt": video_stream_info["pix_fmt"], diff --git a/lerobot/common/robot_devices/robots/manipulator.py b/lerobot/common/robot_devices/robots/manipulator.py index 6bdad3e6..61810506 100644 --- a/lerobot/common/robot_devices/robots/manipulator.py +++ b/lerobot/common/robot_devices/robots/manipulator.py @@ -235,8 +235,8 @@ class ManipulatorRobot: for cam_key, cam in self.cameras.items(): key = f"observation.images.{cam_key}" cam_ft[key] = { - "shape": (cam.width, cam.height, cam.channels), - "names": ["width", "height", "channels"], + "shape": (cam.height, cam.width, cam.channels), + "names": ["height", "width", "channels"], "info": None, } return cam_ft diff --git a/tests/fixtures/dataset_factories.py b/tests/fixtures/dataset_factories.py index 5d003e1f..6d442664 100644 --- a/tests/fixtures/dataset_factories.py +++ b/tests/fixtures/dataset_factories.py @@ -27,15 +27,6 @@ from tests.fixtures.defaults import ( ) -def make_dummy_shapes(keys: list[str] | None = None, camera_keys: list[str] | None = None) -> dict: - shapes = {} - if keys: - shapes.update({key: 10 for key in keys}) - if camera_keys: - shapes.update({key: {"width": 100, "height": 70, "channels": 3} for key in camera_keys}) - return shapes - - def get_task_index(task_dicts: dict, task: str) -> int: tasks = {d["task_index"]: d["task"] for d in task_dicts} task_to_task_index = {task: task_idx for task_idx, task in tasks.items()} @@ -44,7 +35,7 @@ def get_task_index(task_dicts: dict, task: str) -> int: @pytest.fixture(scope="session") def img_tensor_factory(): - def _create_img_tensor(width=100, height=100, channels=3, dtype=torch.float32) -> torch.Tensor: + def _create_img_tensor(height=100, width=100, channels=3, dtype=torch.float32) -> torch.Tensor: return torch.rand((channels, height, width), dtype=dtype) return _create_img_tensor @@ -52,7 +43,7 @@ def img_tensor_factory(): @pytest.fixture(scope="session") def img_array_factory(): - def _create_img_array(width=100, height=100, channels=3, dtype=np.uint8) -> np.ndarray: + def _create_img_array(height=100, width=100, channels=3, dtype=np.uint8) -> np.ndarray: if np.issubdtype(dtype, np.unsignedinteger): # Int array in [0, 255] range img_array = np.random.randint(0, 256, size=(height, width, channels), dtype=dtype) @@ -68,8 +59,8 @@ def img_array_factory(): @pytest.fixture(scope="session") def img_factory(img_array_factory): - def _create_img(width=100, height=100) -> PIL.Image.Image: - img_array = img_array_factory(width=width, height=height) + def _create_img(height=100, width=100) -> PIL.Image.Image: + img_array = img_array_factory(height=height, width=width) return PIL.Image.fromarray(img_array) return _create_img @@ -259,7 +250,7 @@ def hf_dataset_factory(features_factory, tasks_factory, episodes_factory, img_ar for key, ft in features.items(): if ft["dtype"] == "image": robot_cols[key] = [ - img_array_factory(width=ft["shapes"][0], height=ft["shapes"][1]) + img_array_factory(height=ft["shapes"][1], width=ft["shapes"][0]) for _ in range(len(index_col)) ] elif ft["shape"][0] > 1 and ft["dtype"] != "video": diff --git a/tests/fixtures/defaults.py b/tests/fixtures/defaults.py index a430ead8..bfe6c339 100644 --- a/tests/fixtures/defaults.py +++ b/tests/fixtures/defaults.py @@ -16,8 +16,8 @@ DUMMY_MOTOR_FEATURES = { }, } DUMMY_CAMERA_FEATURES = { - "laptop": {"shape": (640, 480, 3), "names": ["width", "height", "channels"], "info": None}, - "phone": {"shape": (640, 480, 3), "names": ["width", "height", "channels"], "info": None}, + "laptop": {"shape": (480, 640, 3), "names": ["height", "width", "channels"], "info": None}, + "phone": {"shape": (480, 640, 3), "names": ["height", "width", "channels"], "info": None}, } DEFAULT_FPS = 30 DUMMY_VIDEO_INFO = { diff --git a/tests/test_image_writer.py b/tests/test_image_writer.py index 2b0884a1..f51e86b4 100644 --- a/tests/test_image_writer.py +++ b/tests/test_image_writer.py @@ -265,7 +265,7 @@ def test_wait_until_done(tmp_path, img_array_factory): writer = AsyncImageWriter(num_processes=0, num_threads=4) try: num_images = 100 - image_arrays = [img_array_factory(width=500, height=500) for _ in range(num_images)] + image_arrays = [img_array_factory(height=500, width=500) for _ in range(num_images)] fpaths = [tmp_path / f"frame_{i:06d}.png" for i in range(num_images)] for image_array, fpath in zip(image_arrays, fpaths, strict=True): fpath.parent.mkdir(parents=True, exist_ok=True)