fix upload with upload_folder

This commit is contained in:
Cadene 2024-05-02 14:45:59 +00:00
parent 3522e98428
commit c1512fd2e8
1 changed files with 16 additions and 32 deletions

View File

@ -53,7 +53,6 @@ python lerobot/scripts/push_dataset_to_hub.py \
import argparse import argparse
import json import json
import logging
import shutil import shutil
from pathlib import Path from pathlib import Path
@ -105,43 +104,28 @@ def push_meta_data_to_hub(repo_id, meta_data_dir, revision):
On the hugging face repositery, they will be uploaded in a "meta_data" directory at the root. On the hugging face repositery, they will be uploaded in a "meta_data" directory at the root.
""" """
api = HfApi() api = HfApi()
api.upload_folder(
def upload_meta_data(filename, revision): folder_path=meta_data_dir,
api.upload_file( path_in_repo="meta_data",
path_or_fileobj=meta_data_dir / filename,
path_in_repo=f"meta_data/{filename}",
repo_id=repo_id, repo_id=repo_id,
revision=revision, revision=revision,
repo_type="dataset", repo_type="dataset",
allow_patterns=["*.json, *.safetensors"],
) )
upload_meta_data("info.json", revision)
upload_meta_data("stats.safetensors", revision)
upload_meta_data("episode_data_index.safetensors", revision)
def push_videos_to_hub(repo_id, videos_dir, revision): def push_videos_to_hub(repo_id, videos_dir, revision):
"""Expect mp4 files to be all stored in a single "videos" directory. """Expect mp4 files to be all stored in a single "videos" directory.
On the hugging face repositery, they will be uploaded in a "videos" directory at the root. On the hugging face repositery, they will be uploaded in a "videos" directory at the root.
""" """
api = HfApi() api = HfApi()
api.upload_folder(
def upload_video(filename, revision): folder_path=videos_dir,
api.upload_file( path_in_repo="videos",
path_or_fileobj=videos_dir / filename,
path_in_repo=f"videos/{filename}",
repo_id=repo_id, repo_id=repo_id,
revision=revision, revision=revision,
repo_type="dataset", repo_type="dataset",
) allow_patterns="*.mp4",
for i, path in enumerate(videos_dir.glob("*.mp4")):
upload_video(path.name, revision)
if i == 10000:
# TODO(rcadene): implement sharding
logging.warning(
"You are updating more than 10000 video files that will be stored inside a single directory. You might experience slower loading time during training. Consider sharding: dividing files across multiple smaller directories."
) )