WIP
This commit is contained in:
parent
f8a6574698
commit
609531677b
|
@ -0,0 +1,46 @@
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
filename = "benchmark_data.dat"
|
||||||
|
shape = (10000, 10000) # Large array
|
||||||
|
dtype = np.float32
|
||||||
|
torch_dtype = torch.float32
|
||||||
|
|
||||||
|
# Calculate file size
|
||||||
|
element_size = np.dtype(dtype).itemsize
|
||||||
|
file_size = shape[0] * shape[1] * element_size
|
||||||
|
|
||||||
|
# Create a large file and write random data to it
|
||||||
|
if not os.path.exists(filename) or os.path.getsize(filename) != file_size:
|
||||||
|
data = np.random.rand(*shape).astype(dtype)
|
||||||
|
with open(filename, "wb") as f:
|
||||||
|
f.write(data.tobytes())
|
||||||
|
|
||||||
|
# Benchmark numpy.memmap
|
||||||
|
start_time = time.time()
|
||||||
|
data_np = np.memmap(filename, dtype=dtype, mode="r", shape=shape)
|
||||||
|
tensor_np = torch.from_numpy(data_np)
|
||||||
|
np_load_time = time.time() - start_time
|
||||||
|
print(f"np.memmap load time: {np_load_time:.4f} seconds")
|
||||||
|
|
||||||
|
# Benchmark torch.UntypedStorage
|
||||||
|
start_time = time.time()
|
||||||
|
storage = torch.UntypedStorage.from_file(filename, shared=True, nbytes=file_size)
|
||||||
|
tensor = torch.FloatTensor(storage).reshape(shape)
|
||||||
|
torch_load_time = time.time() - start_time
|
||||||
|
print(f"torch.UntypedStorage load time: {torch_load_time:.4f} seconds")
|
||||||
|
|
||||||
|
# Set NumPy print precision
|
||||||
|
# np.set_printoptions(precision=4)
|
||||||
|
|
||||||
|
# Print part of the arrays to compare precision
|
||||||
|
print("NumPy memmap array sample:\n", data_np[:5, :5])
|
||||||
|
print("PyTorch tensor sample:\n", tensor[:5, :5].numpy())
|
||||||
|
|
||||||
|
# Output the results
|
||||||
|
print(f"Numpy memmap load time: {np_load_time:.4f} seconds")
|
||||||
|
print(f"Torch UntypedStorage load time: {torch_load_time:.4f} seconds")
|
|
@ -0,0 +1,84 @@
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from huggingface_hub import HfApi
|
||||||
|
|
||||||
|
from lerobot import available_datasets
|
||||||
|
from lerobot.common.datasets.lerobot_dataset import CODEBASE_VERSION
|
||||||
|
|
||||||
|
api = HfApi()
|
||||||
|
LOCAL_DIR = Path("outputs/test_artifacts/")
|
||||||
|
# LOCAL_DIR = Path("tests/data/")
|
||||||
|
|
||||||
|
datasets_info = api.list_datasets(author="lerobot")
|
||||||
|
hub_available_datasets = [info.id for info in datasets_info if info.id in available_datasets]
|
||||||
|
|
||||||
|
for repo_id in hub_available_datasets:
|
||||||
|
print(repo_id)
|
||||||
|
|
||||||
|
dataset_info = api.list_repo_refs(repo_id, repo_type="dataset")
|
||||||
|
branches = [b.name for b in dataset_info.branches]
|
||||||
|
if CODEBASE_VERSION in branches:
|
||||||
|
# if "_image" not in repo_id:
|
||||||
|
# print(f"{repo_id} already @{CODEBASE_VERSION}, skipping.")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Check if meta_data/info.json exists in the main branch
|
||||||
|
files = api.list_repo_files(repo_id, repo_type="dataset", revision="main")
|
||||||
|
info_file_path = "meta_data/info.json"
|
||||||
|
|
||||||
|
if info_file_path in files:
|
||||||
|
local_dir = LOCAL_DIR / repo_id
|
||||||
|
local_dir.mkdir(exist_ok=True, parents=True)
|
||||||
|
# Download the meta_data/info.json file from the main branch
|
||||||
|
local_info_file_path = api.hf_hub_download(
|
||||||
|
repo_id=repo_id,
|
||||||
|
filename=info_file_path,
|
||||||
|
revision="main",
|
||||||
|
repo_type="dataset",
|
||||||
|
local_dir=local_dir,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
with open(local_info_file_path) as f:
|
||||||
|
info_data = json.load(f)
|
||||||
|
|
||||||
|
# Update the JSON data
|
||||||
|
new_info_data = {}
|
||||||
|
new_info_data["codebase_version"] = CODEBASE_VERSION
|
||||||
|
for k, v in info_data.items():
|
||||||
|
if k != "codebase_version":
|
||||||
|
new_info_data[k] = v
|
||||||
|
|
||||||
|
# Save the updated JSON file
|
||||||
|
with open(local_info_file_path, "w") as f:
|
||||||
|
json.dump(new_info_data, f, indent=4)
|
||||||
|
|
||||||
|
# Upload the modified file to the new branch
|
||||||
|
api.upload_file(
|
||||||
|
path_or_fileobj=local_info_file_path,
|
||||||
|
path_in_repo=info_file_path,
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_type="dataset",
|
||||||
|
commit_message=f"Update meta_data/info.json for {CODEBASE_VERSION}",
|
||||||
|
revision="main",
|
||||||
|
)
|
||||||
|
print(f"{repo_id} meta_data/info.json updated with new codebase version")
|
||||||
|
|
||||||
|
# Now create a branch named after the new version by branching out from "main"
|
||||||
|
# which is expected to be the preceding version
|
||||||
|
api.create_branch(repo_id, repo_type="dataset", branch=CODEBASE_VERSION, revision="main")
|
||||||
|
print(f"{repo_id} successfully updated @{CODEBASE_VERSION}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# TODO: from list of repos, download:
|
||||||
|
# - data/
|
||||||
|
# - meta_data/
|
||||||
|
# - video/{key}_episode_000001.mp4
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
Reference in New Issue