fix upload with upload_folder
This commit is contained in:
parent
3522e98428
commit
c1512fd2e8
|
@ -53,7 +53,6 @@ python lerobot/scripts/push_dataset_to_hub.py \
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import logging
|
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -105,19 +104,14 @@ def push_meta_data_to_hub(repo_id, meta_data_dir, revision):
|
||||||
On the hugging face repositery, they will be uploaded in a "meta_data" directory at the root.
|
On the hugging face repositery, they will be uploaded in a "meta_data" directory at the root.
|
||||||
"""
|
"""
|
||||||
api = HfApi()
|
api = HfApi()
|
||||||
|
api.upload_folder(
|
||||||
def upload_meta_data(filename, revision):
|
folder_path=meta_data_dir,
|
||||||
api.upload_file(
|
path_in_repo="meta_data",
|
||||||
path_or_fileobj=meta_data_dir / filename,
|
repo_id=repo_id,
|
||||||
path_in_repo=f"meta_data/{filename}",
|
revision=revision,
|
||||||
repo_id=repo_id,
|
repo_type="dataset",
|
||||||
revision=revision,
|
allow_patterns=["*.json, *.safetensors"],
|
||||||
repo_type="dataset",
|
)
|
||||||
)
|
|
||||||
|
|
||||||
upload_meta_data("info.json", revision)
|
|
||||||
upload_meta_data("stats.safetensors", revision)
|
|
||||||
upload_meta_data("episode_data_index.safetensors", revision)
|
|
||||||
|
|
||||||
|
|
||||||
def push_videos_to_hub(repo_id, videos_dir, revision):
|
def push_videos_to_hub(repo_id, videos_dir, revision):
|
||||||
|
@ -125,24 +119,14 @@ def push_videos_to_hub(repo_id, videos_dir, revision):
|
||||||
On the hugging face repositery, they will be uploaded in a "videos" directory at the root.
|
On the hugging face repositery, they will be uploaded in a "videos" directory at the root.
|
||||||
"""
|
"""
|
||||||
api = HfApi()
|
api = HfApi()
|
||||||
|
api.upload_folder(
|
||||||
def upload_video(filename, revision):
|
folder_path=videos_dir,
|
||||||
api.upload_file(
|
path_in_repo="videos",
|
||||||
path_or_fileobj=videos_dir / filename,
|
repo_id=repo_id,
|
||||||
path_in_repo=f"videos/{filename}",
|
revision=revision,
|
||||||
repo_id=repo_id,
|
repo_type="dataset",
|
||||||
revision=revision,
|
allow_patterns="*.mp4",
|
||||||
repo_type="dataset",
|
)
|
||||||
)
|
|
||||||
|
|
||||||
for i, path in enumerate(videos_dir.glob("*.mp4")):
|
|
||||||
upload_video(path.name, revision)
|
|
||||||
|
|
||||||
if i == 10000:
|
|
||||||
# TODO(rcadene): implement sharding
|
|
||||||
logging.warning(
|
|
||||||
"You are updating more than 10000 video files that will be stored inside a single directory. You might experience slower loading time during training. Consider sharding: dividing files across multiple smaller directories."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def push_dataset_to_hub(
|
def push_dataset_to_hub(
|
||||||
|
|
Loading…
Reference in New Issue