lerobot/tests/scripts/mock_dataset.py

"""
    usage: `python tests/scripts/mock_dataset.py --in-data-dir data/pusht --out-data-dir tests/data/pusht`
"""

import argparse
import shutil

from tensordict import TensorDict
from pathlib import Path


def mock_dataset(in_data_dir, out_data_dir, num_frames=50):
    in_data_dir = Path(in_data_dir)
    out_data_dir = Path(out_data_dir)

    # load full dataset as a tensor dict
    in_td_data = TensorDict.load_memmap(in_data_dir / "replay_buffer")

    # use 1 frame to know the specification of the dataset
    # and copy it over `n` frames in the test artifact directory
    out_td_data = in_td_data[0].expand(num_frames).memmap_like(out_data_dir / "replay_buffer")

    # copy the first `n` frames so that we have real data
    out_td_data[:num_frames] = in_td_data[:num_frames].clone()

    # make sure everything has been properly written
    out_td_data.lock_()

    # copy the full statistics of dataset since it's pretty small
    in_stats_path = in_data_dir / "stats.pth"
    out_stats_path = out_data_dir / "stats.pth"
    shutil.copy(in_stats_path, out_stats_path)


if __name__ == "__main__":

    parser = argparse.ArgumentParser(description="Create dataset")

    parser.add_argument("--in-data-dir", type=str, help="Path to input data")
    parser.add_argument("--out-data-dir", type=str, help="Path to save the output data")

    args = parser.parse_args()

    mock_dataset(args.in_data_dir, args.out_data_dir)