From 15e7a9d541107f6ab181afed80dbcef4d9951a5a Mon Sep 17 00:00:00 2001 From: Remi Cadene Date: Fri, 21 Feb 2025 23:14:22 +0000 Subject: [PATCH] before new launch from scratch --- examples/port_datasets/openx_rlds.py | 5 +++-- examples/port_datasets/openx_rlds_datatrove.py | 16 +++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/examples/port_datasets/openx_rlds.py b/examples/port_datasets/openx_rlds.py index 38e21d71..99ff6d4d 100644 --- a/examples/port_datasets/openx_rlds.py +++ b/examples/port_datasets/openx_rlds.py @@ -142,8 +142,6 @@ def save_as_lerobot_dataset( print(f"Total number of episodes {total_num_episodes}") if num_shards is not None: - num_shards = 10000 - shard_index = 9999 sharded_dataset = raw_dataset.shard(num_shards=num_shards, index=shard_index) sharded_num_episodes = sharded_dataset.cardinality().numpy().item() print(f"{sharded_num_episodes=}") @@ -153,6 +151,9 @@ def save_as_lerobot_dataset( num_episodes = total_num_episodes iter_ = iter(raw_dataset) + if num_episodes <= 0: + raise ValueError(f"Number of episodes is {num_episodes}, but needs to be positive.") + for episode_index in range(num_episodes): print(f"{episode_index} / {num_episodes}") episode = next(iter_) diff --git a/examples/port_datasets/openx_rlds_datatrove.py b/examples/port_datasets/openx_rlds_datatrove.py index 1163345f..acb8e94e 100644 --- a/examples/port_datasets/openx_rlds_datatrove.py +++ b/examples/port_datasets/openx_rlds_datatrove.py @@ -1,5 +1,4 @@ import datetime as dt -import shutil from pathlib import Path from datatrove.executor import LocalPipelineExecutor @@ -45,24 +44,27 @@ class AggregateDatasets(PipelineStep): def main(slurm=True): - for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"): - shutil.rmtree(dir_) + # breakpoint() + # for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"): + # shutil.rmtree(dir_) now = dt.datetime.now() port_job_name = "port_openx_droid" logs_dir = Path("/fsx/remi_cadene/logs") - port_log_dir = logs_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{port_job_name}" + # port_log_dir = logs_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{port_job_name}" + port_log_dir = Path("/fsx/remi_cadene/logs/2025-02-20_23-24-12_port_openx_droid") if slurm: executor_class = SlurmPipelineExecutor dist_extra_kwargs = { "job_name": port_job_name, "tasks": 10000, - "workers": 8 * 16, + "workers": 20, # 8 * 16, "time": "08:00:00", "partition": "hopper-cpu", - "cpus_per_task": 12, - "mem_per_cpu_gb": 4, + "cpus_per_task": 24, + "mem_per_cpu_gb": 2, + "max_array_launch_parallel": True, } else: executor_class = LocalPipelineExecutor