diff --git a/examples/2_evaluate_pretrained_policy.py b/examples/2_evaluate_pretrained_policy.py index be6abd1b..b3d13f74 100644 --- a/examples/2_evaluate_pretrained_policy.py +++ b/examples/2_evaluate_pretrained_policy.py @@ -11,6 +11,7 @@ from lerobot.common.utils import init_hydra_config from lerobot.scripts.eval import eval # Get a pretrained policy from the hub. +# TODO(alexander-soare): This no longer works until we upload a new model that uses the current configs. hub_id = "lerobot/diffusion_policy_pusht_image" folder = Path(snapshot_download(hub_id)) # OR uncomment the following to evaluate a policy from the local outputs/train folder. diff --git a/examples/3_train_policy.py b/examples/3_train_policy.py index 012efddd..83563ffd 100644 --- a/examples/3_train_policy.py +++ b/examples/3_train_policy.py @@ -11,7 +11,6 @@ import torch from omegaconf import OmegaConf from lerobot.common.datasets.factory import make_dataset -from lerobot.common.datasets.utils import cycle from lerobot.common.policies.diffusion.configuration_diffusion import DiffusionConfig from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy from lerobot.common.utils import init_hydra_config @@ -26,8 +25,8 @@ device = torch.device("cuda") log_freq = 250 # Set up the dataset. -cfg = init_hydra_config("lerobot/configs/default.yaml", overrides=["env=pusht"]) -dataset = make_dataset(cfg) +hydra_cfg = init_hydra_config("lerobot/configs/default.yaml", overrides=["env=pusht"]) +dataset = make_dataset(hydra_cfg) # Set up the the policy. # Policies are initialized with a configuration class, in this case `DiffusionConfig`. @@ -50,17 +49,25 @@ dataloader = torch.utils.data.DataLoader( ) # Run training loop. -dataloader = cycle(dataloader) -for step in range(training_steps): - batch = {k: v.to(device, non_blocking=True) for k, v in next(dataloader).items()} - info = policy(batch) - if step % log_freq == 0: - num_samples = (step + 1) * cfg.batch_size - loss = info["loss"] - update_s = info["update_s"] - print(f"step: {step} samples: {num_samples} loss: {loss:.3f} update_time: {update_s:.3f} (seconds)") +step = 0 +done = False +while not done: + for batch in dataloader: + batch = {k: v.to(device, non_blocking=True) for k, v in batch.items()} + info = policy(batch) + if step % log_freq == 0: + num_samples = (step + 1) * cfg.batch_size + loss = info["loss"] + update_s = info["update_s"] + print( + f"step: {step} samples: {num_samples} loss: {loss:.3f} update_time: {update_s:.3f} (seconds)" + ) + step += 1 + if step >= training_steps: + done = True + break # Save the policy, configuration, and normalization stats for later use. policy.save(output_directory / "model.pt") -OmegaConf.save(cfg, output_directory / "config.yaml") +OmegaConf.save(hydra_cfg, output_directory / "config.yaml") torch.save(dataset.transform.transforms[-1].stats, output_directory / "stats.pth") diff --git a/lerobot/common/policies/diffusion/modeling_diffusion.py b/lerobot/common/policies/diffusion/modeling_diffusion.py index a95effb2..9a02c6a2 100644 --- a/lerobot/common/policies/diffusion/modeling_diffusion.py +++ b/lerobot/common/policies/diffusion/modeling_diffusion.py @@ -1,4 +1,5 @@ -""" +"""Diffusion Policy as per "Diffusion Policy: Visuomotor Policy Learning via Action Diffusion" + TODO(alexander-soare): - Remove reliance on Robomimic for SpatialSoftmax. - Remove reliance on diffusers for DDPMScheduler and LR scheduler. diff --git a/tests/test_examples.py b/tests/test_examples.py index 83fdad5e..c264610b 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -29,11 +29,12 @@ def test_examples_3_and_2(): with open(path, "r") as file: file_contents = file.read() - # Do less steps and use CPU. + # Do less steps, use CPU, and don't complicate things with dataloader workers. file_contents = _find_and_replace( file_contents, [ - ("offline_steps = 5000", "offline_steps = 1"), + ("training_steps = 5000", "training_steps = 1"), + ("num_workers=4", "num_workers=0"), ('device = torch.device("cuda")', 'device = torch.device("cpu")'), ], )