before new launch from scratch
This commit is contained in:
parent
52fb4143b5
commit
15e7a9d541
|
@ -142,8 +142,6 @@ def save_as_lerobot_dataset(
|
||||||
print(f"Total number of episodes {total_num_episodes}")
|
print(f"Total number of episodes {total_num_episodes}")
|
||||||
|
|
||||||
if num_shards is not None:
|
if num_shards is not None:
|
||||||
num_shards = 10000
|
|
||||||
shard_index = 9999
|
|
||||||
sharded_dataset = raw_dataset.shard(num_shards=num_shards, index=shard_index)
|
sharded_dataset = raw_dataset.shard(num_shards=num_shards, index=shard_index)
|
||||||
sharded_num_episodes = sharded_dataset.cardinality().numpy().item()
|
sharded_num_episodes = sharded_dataset.cardinality().numpy().item()
|
||||||
print(f"{sharded_num_episodes=}")
|
print(f"{sharded_num_episodes=}")
|
||||||
|
@ -153,6 +151,9 @@ def save_as_lerobot_dataset(
|
||||||
num_episodes = total_num_episodes
|
num_episodes = total_num_episodes
|
||||||
iter_ = iter(raw_dataset)
|
iter_ = iter(raw_dataset)
|
||||||
|
|
||||||
|
if num_episodes <= 0:
|
||||||
|
raise ValueError(f"Number of episodes is {num_episodes}, but needs to be positive.")
|
||||||
|
|
||||||
for episode_index in range(num_episodes):
|
for episode_index in range(num_episodes):
|
||||||
print(f"{episode_index} / {num_episodes}")
|
print(f"{episode_index} / {num_episodes}")
|
||||||
episode = next(iter_)
|
episode = next(iter_)
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
import shutil
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from datatrove.executor import LocalPipelineExecutor
|
from datatrove.executor import LocalPipelineExecutor
|
||||||
|
@ -45,24 +44,27 @@ class AggregateDatasets(PipelineStep):
|
||||||
|
|
||||||
|
|
||||||
def main(slurm=True):
|
def main(slurm=True):
|
||||||
for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"):
|
# breakpoint()
|
||||||
shutil.rmtree(dir_)
|
# for dir_ in Path("/fsx/remi_cadene/.cache/huggingface/lerobot/cadene").glob("droid_world*"):
|
||||||
|
# shutil.rmtree(dir_)
|
||||||
|
|
||||||
now = dt.datetime.now()
|
now = dt.datetime.now()
|
||||||
port_job_name = "port_openx_droid"
|
port_job_name = "port_openx_droid"
|
||||||
logs_dir = Path("/fsx/remi_cadene/logs")
|
logs_dir = Path("/fsx/remi_cadene/logs")
|
||||||
port_log_dir = logs_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{port_job_name}"
|
# port_log_dir = logs_dir / f"{now:%Y-%m-%d}_{now:%H-%M-%S}_{port_job_name}"
|
||||||
|
port_log_dir = Path("/fsx/remi_cadene/logs/2025-02-20_23-24-12_port_openx_droid")
|
||||||
|
|
||||||
if slurm:
|
if slurm:
|
||||||
executor_class = SlurmPipelineExecutor
|
executor_class = SlurmPipelineExecutor
|
||||||
dist_extra_kwargs = {
|
dist_extra_kwargs = {
|
||||||
"job_name": port_job_name,
|
"job_name": port_job_name,
|
||||||
"tasks": 10000,
|
"tasks": 10000,
|
||||||
"workers": 8 * 16,
|
"workers": 20, # 8 * 16,
|
||||||
"time": "08:00:00",
|
"time": "08:00:00",
|
||||||
"partition": "hopper-cpu",
|
"partition": "hopper-cpu",
|
||||||
"cpus_per_task": 12,
|
"cpus_per_task": 24,
|
||||||
"mem_per_cpu_gb": 4,
|
"mem_per_cpu_gb": 2,
|
||||||
|
"max_array_launch_parallel": True,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
executor_class = LocalPipelineExecutor
|
executor_class = LocalPipelineExecutor
|
||||||
|
|
Loading…
Reference in New Issue