Add upload_large_folder

This commit is contained in:
Remi Cadene 2025-02-23 18:19:12 +00:00
parent c36d2253d0
commit 3daab2acbb
1 changed files with 14 additions and 8 deletions

View File

@ -516,6 +516,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
push_videos: bool = True, push_videos: bool = True,
private: bool = False, private: bool = False,
allow_patterns: list[str] | str | None = None, allow_patterns: list[str] | str | None = None,
upload_large_folder: bool = False,
**card_kwargs, **card_kwargs,
) -> None: ) -> None:
ignore_patterns = ["images/"] ignore_patterns = ["images/"]
@ -538,14 +539,19 @@ class LeRobotDataset(torch.utils.data.Dataset):
exist_ok=True, exist_ok=True,
) )
hub_api.upload_folder( upload_kwargs = {
repo_id=self.repo_id, "repo_id": self.repo_id,
folder_path=self.root, "folder_path": self.root,
repo_type="dataset", "repo_type": "dataset",
revision=branch, "revision": branch,
allow_patterns=allow_patterns, "allow_patterns": allow_patterns,
ignore_patterns=ignore_patterns, "ignore_patterns": ignore_patterns,
) }
if upload_large_folder:
hub_api.upload_large_folder(**upload_kwargs)
else:
hub_api.upload_folder(**upload_kwargs)
if not hub_api.file_exists(self.repo_id, REPOCARD_NAME, repo_type="dataset", revision=branch): if not hub_api.file_exists(self.repo_id, REPOCARD_NAME, repo_type="dataset", revision=branch):
card = create_lerobot_dataset_card( card = create_lerobot_dataset_card(
tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs