Tidy up yaml configs (#121)
This commit is contained in:
parent
e4e739f4f8
commit
9d60dce6f3
42
Makefile
42
Makefile
|
@ -30,21 +30,21 @@ test-act-ete-train:
|
||||||
policy=act \
|
policy=act \
|
||||||
env=aloha \
|
env=aloha \
|
||||||
wandb.enable=False \
|
wandb.enable=False \
|
||||||
offline_steps=2 \
|
training.offline_steps=2 \
|
||||||
online_steps=0 \
|
training.online_steps=0 \
|
||||||
eval_episodes=1 \
|
eval.n_episodes=1 \
|
||||||
device=cpu \
|
device=cpu \
|
||||||
save_model=true \
|
training.save_model=true \
|
||||||
save_freq=2 \
|
training.save_freq=2 \
|
||||||
policy.n_action_steps=20 \
|
policy.n_action_steps=20 \
|
||||||
policy.chunk_size=20 \
|
policy.chunk_size=20 \
|
||||||
policy.batch_size=2 \
|
training.batch_size=2 \
|
||||||
hydra.run.dir=tests/outputs/act/
|
hydra.run.dir=tests/outputs/act/
|
||||||
|
|
||||||
test-act-ete-eval:
|
test-act-ete-eval:
|
||||||
python lerobot/scripts/eval.py \
|
python lerobot/scripts/eval.py \
|
||||||
--config tests/outputs/act/.hydra/config.yaml \
|
--config tests/outputs/act/.hydra/config.yaml \
|
||||||
eval_episodes=1 \
|
eval.n_episodes=1 \
|
||||||
env.episode_length=8 \
|
env.episode_length=8 \
|
||||||
device=cpu \
|
device=cpu \
|
||||||
policy.pretrained_model_path=tests/outputs/act/models/2.pt
|
policy.pretrained_model_path=tests/outputs/act/models/2.pt
|
||||||
|
@ -54,19 +54,19 @@ test-diffusion-ete-train:
|
||||||
policy=diffusion \
|
policy=diffusion \
|
||||||
env=pusht \
|
env=pusht \
|
||||||
wandb.enable=False \
|
wandb.enable=False \
|
||||||
offline_steps=2 \
|
training.offline_steps=2 \
|
||||||
online_steps=0 \
|
training.online_steps=0 \
|
||||||
eval_episodes=1 \
|
eval.n_episodes=1 \
|
||||||
device=cpu \
|
device=cpu \
|
||||||
save_model=true \
|
training.save_model=true \
|
||||||
save_freq=2 \
|
training.save_freq=2 \
|
||||||
policy.batch_size=2 \
|
training.batch_size=2 \
|
||||||
hydra.run.dir=tests/outputs/diffusion/
|
hydra.run.dir=tests/outputs/diffusion/
|
||||||
|
|
||||||
test-diffusion-ete-eval:
|
test-diffusion-ete-eval:
|
||||||
python lerobot/scripts/eval.py \
|
python lerobot/scripts/eval.py \
|
||||||
--config tests/outputs/diffusion/.hydra/config.yaml \
|
--config tests/outputs/diffusion/.hydra/config.yaml \
|
||||||
eval_episodes=1 \
|
eval.n_episodes=1 \
|
||||||
env.episode_length=8 \
|
env.episode_length=8 \
|
||||||
device=cpu \
|
device=cpu \
|
||||||
policy.pretrained_model_path=tests/outputs/diffusion/models/2.pt
|
policy.pretrained_model_path=tests/outputs/diffusion/models/2.pt
|
||||||
|
@ -76,20 +76,20 @@ test-tdmpc-ete-train:
|
||||||
policy=tdmpc \
|
policy=tdmpc \
|
||||||
env=xarm \
|
env=xarm \
|
||||||
wandb.enable=False \
|
wandb.enable=False \
|
||||||
offline_steps=1 \
|
training.offline_steps=1 \
|
||||||
online_steps=2 \
|
training.online_steps=2 \
|
||||||
eval_episodes=1 \
|
eval.n_episodes=1 \
|
||||||
env.episode_length=2 \
|
env.episode_length=2 \
|
||||||
device=cpu \
|
device=cpu \
|
||||||
save_model=true \
|
training.save_model=true \
|
||||||
save_freq=2 \
|
training.save_freq=2 \
|
||||||
policy.batch_size=2 \
|
training.batch_size=2 \
|
||||||
hydra.run.dir=tests/outputs/tdmpc/
|
hydra.run.dir=tests/outputs/tdmpc/
|
||||||
|
|
||||||
test-tdmpc-ete-eval:
|
test-tdmpc-ete-eval:
|
||||||
python lerobot/scripts/eval.py \
|
python lerobot/scripts/eval.py \
|
||||||
--config tests/outputs/tdmpc/.hydra/config.yaml \
|
--config tests/outputs/tdmpc/.hydra/config.yaml \
|
||||||
eval_episodes=1 \
|
eval.n_episodes=1 \
|
||||||
env.episode_length=8 \
|
env.episode_length=8 \
|
||||||
device=cpu \
|
device=cpu \
|
||||||
policy.pretrained_model_path=tests/outputs/tdmpc/models/2.pt
|
policy.pretrained_model_path=tests/outputs/tdmpc/models/2.pt
|
||||||
|
|
|
@ -23,8 +23,8 @@ weights_path = folder / "model.pt"
|
||||||
# Override some config parameters to do with evaluation.
|
# Override some config parameters to do with evaluation.
|
||||||
overrides = [
|
overrides = [
|
||||||
f"policy.pretrained_model_path={weights_path}",
|
f"policy.pretrained_model_path={weights_path}",
|
||||||
"eval_episodes=10",
|
"eval.n_episodes=10",
|
||||||
"rollout_batch_size=10",
|
"eval.batch_size=10",
|
||||||
"device=cuda",
|
"device=cuda",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -38,15 +38,13 @@ policy = DiffusionPolicy(cfg, lr_scheduler_num_training_steps=training_steps, da
|
||||||
policy.train()
|
policy.train()
|
||||||
policy.to(device)
|
policy.to(device)
|
||||||
|
|
||||||
optimizer = torch.optim.Adam(
|
optimizer = torch.optim.Adam(policy.parameters(), lr=1e-4)
|
||||||
policy.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create dataloader for offline training.
|
# Create dataloader for offline training.
|
||||||
dataloader = torch.utils.data.DataLoader(
|
dataloader = torch.utils.data.DataLoader(
|
||||||
dataset,
|
dataset,
|
||||||
num_workers=4,
|
num_workers=4,
|
||||||
batch_size=cfg.batch_size,
|
batch_size=64,
|
||||||
shuffle=True,
|
shuffle=True,
|
||||||
pin_memory=device != torch.device("cpu"),
|
pin_memory=device != torch.device("cpu"),
|
||||||
drop_last=True,
|
drop_last=True,
|
||||||
|
|
|
@ -14,12 +14,13 @@ def make_dataset(
|
||||||
cfg,
|
cfg,
|
||||||
split="train",
|
split="train",
|
||||||
):
|
):
|
||||||
if cfg.env.name not in cfg.dataset.repo_id:
|
if cfg.env.name not in cfg.dataset_repo_id:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"There might be a mismatch between your training dataset ({cfg.dataset.repo_id=}) and your environment ({cfg.env.name=})."
|
f"There might be a mismatch between your training dataset ({cfg.dataset_repo_id=}) and your "
|
||||||
|
f"environment ({cfg.env.name=})."
|
||||||
)
|
)
|
||||||
|
|
||||||
delta_timestamps = cfg.policy.get("delta_timestamps")
|
delta_timestamps = cfg.training.get("delta_timestamps")
|
||||||
if delta_timestamps is not None:
|
if delta_timestamps is not None:
|
||||||
for key in delta_timestamps:
|
for key in delta_timestamps:
|
||||||
if isinstance(delta_timestamps[key], str):
|
if isinstance(delta_timestamps[key], str):
|
||||||
|
@ -28,7 +29,7 @@ def make_dataset(
|
||||||
# TODO(rcadene): add data augmentations
|
# TODO(rcadene): add data augmentations
|
||||||
|
|
||||||
dataset = LeRobotDataset(
|
dataset = LeRobotDataset(
|
||||||
cfg.dataset.repo_id,
|
cfg.dataset_repo_id,
|
||||||
split=split,
|
split=split,
|
||||||
root=DATA_DIR,
|
root=DATA_DIR,
|
||||||
delta_timestamps=delta_timestamps,
|
delta_timestamps=delta_timestamps,
|
||||||
|
|
|
@ -29,9 +29,9 @@ class Logger:
|
||||||
self._job_name = job_name
|
self._job_name = job_name
|
||||||
self._model_dir = self._log_dir / "models"
|
self._model_dir = self._log_dir / "models"
|
||||||
self._buffer_dir = self._log_dir / "buffers"
|
self._buffer_dir = self._log_dir / "buffers"
|
||||||
self._save_model = cfg.save_model
|
self._save_model = cfg.training.save_model
|
||||||
self._disable_wandb_artifact = cfg.wandb.disable_artifact
|
self._disable_wandb_artifact = cfg.wandb.disable_artifact
|
||||||
self._save_buffer = cfg.save_buffer
|
self._save_buffer = cfg.training.get("save_buffer", False)
|
||||||
self._group = cfg_to_group(cfg)
|
self._group = cfg_to_group(cfg)
|
||||||
self._seed = cfg.seed
|
self._seed = cfg.seed
|
||||||
self._cfg = cfg
|
self._cfg = cfg
|
||||||
|
|
|
@ -112,15 +112,6 @@ class ActionChunkingTransformerConfig:
|
||||||
dropout: float = 0.1
|
dropout: float = 0.1
|
||||||
kl_weight: float = 10.0
|
kl_weight: float = 10.0
|
||||||
|
|
||||||
# ---
|
|
||||||
# TODO(alexander-soare): Remove these from the policy config.
|
|
||||||
batch_size: int = 8
|
|
||||||
lr: float = 1e-5
|
|
||||||
lr_backbone: float = 1e-5
|
|
||||||
weight_decay: float = 1e-4
|
|
||||||
grad_clip_norm: float = 10
|
|
||||||
utd: int = 1
|
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
"""Input validation (not exhaustive)."""
|
"""Input validation (not exhaustive)."""
|
||||||
if not self.vision_backbone.startswith("resnet"):
|
if not self.vision_backbone.startswith("resnet"):
|
||||||
|
|
|
@ -119,15 +119,6 @@ class DiffusionConfig:
|
||||||
|
|
||||||
# ---
|
# ---
|
||||||
# TODO(alexander-soare): Remove these from the policy config.
|
# TODO(alexander-soare): Remove these from the policy config.
|
||||||
batch_size: int = 64
|
|
||||||
grad_clip_norm: int = 10
|
|
||||||
lr: float = 1.0e-4
|
|
||||||
lr_scheduler: str = "cosine"
|
|
||||||
lr_warmup_steps: int = 500
|
|
||||||
adam_betas: tuple[float, float] = (0.95, 0.999)
|
|
||||||
adam_eps: float = 1.0e-8
|
|
||||||
adam_weight_decay: float = 1.0e-6
|
|
||||||
utd: int = 1
|
|
||||||
use_ema: bool = True
|
use_ema: bool = True
|
||||||
ema_update_after_step: int = 0
|
ema_update_after_step: int = 0
|
||||||
ema_min_alpha: float = 0.0
|
ema_min_alpha: float = 0.0
|
||||||
|
|
|
@ -35,7 +35,7 @@ def make_policy(hydra_cfg: DictConfig, dataset_stats=None):
|
||||||
from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
from lerobot.common.policies.diffusion.modeling_diffusion import DiffusionPolicy
|
||||||
|
|
||||||
policy_cfg = _policy_cfg_from_hydra_cfg(DiffusionConfig, hydra_cfg)
|
policy_cfg = _policy_cfg_from_hydra_cfg(DiffusionConfig, hydra_cfg)
|
||||||
policy = DiffusionPolicy(policy_cfg, hydra_cfg.offline_steps, dataset_stats)
|
policy = DiffusionPolicy(policy_cfg, hydra_cfg.training.offline_steps, dataset_stats)
|
||||||
policy.to(get_safe_torch_device(hydra_cfg.device))
|
policy.to(get_safe_torch_device(hydra_cfg.device))
|
||||||
elif hydra_cfg.policy.name == "act":
|
elif hydra_cfg.policy.name == "act":
|
||||||
from lerobot.common.policies.act.configuration_act import ActionChunkingTransformerConfig
|
from lerobot.common.policies.act.configuration_act import ActionChunkingTransformerConfig
|
||||||
|
|
|
@ -9,31 +9,23 @@ hydra:
|
||||||
job:
|
job:
|
||||||
name: default
|
name: default
|
||||||
|
|
||||||
seed: 1337
|
|
||||||
# batch size for TorchRL SerialEnv. Each underlying env will get the seed = seed + env_index
|
|
||||||
# NOTE: only diffusion policy supports rollout_batch_size > 1
|
|
||||||
rollout_batch_size: 1
|
|
||||||
device: cuda # cpu
|
device: cuda # cpu
|
||||||
prefetch: 4
|
seed: ???
|
||||||
eval_freq: ???
|
dataset_repo_id: lerobot/pusht
|
||||||
save_freq: ???
|
|
||||||
eval_episodes: ???
|
|
||||||
save_video: false
|
|
||||||
save_model: false
|
|
||||||
save_buffer: false
|
|
||||||
train_steps: ???
|
|
||||||
fps: ???
|
|
||||||
|
|
||||||
offline_prioritized_sampler: true
|
training:
|
||||||
|
offline_steps: ???
|
||||||
|
online_steps: ???
|
||||||
|
online_steps_between_rollouts: ???
|
||||||
|
eval_freq: ???
|
||||||
|
save_freq: ???
|
||||||
|
log_freq: 250
|
||||||
|
save_model: false
|
||||||
|
|
||||||
dataset:
|
eval:
|
||||||
repo_id: ???
|
n_episodes: 1
|
||||||
|
# TODO(alexander-soare): Right now this does not work. Reinstate this.
|
||||||
n_action_steps: ???
|
batch_size: 1
|
||||||
n_obs_steps: ???
|
|
||||||
env: ???
|
|
||||||
|
|
||||||
policy: ???
|
|
||||||
|
|
||||||
wandb:
|
wandb:
|
||||||
enable: true
|
enable: true
|
||||||
|
|
|
@ -1,18 +1,7 @@
|
||||||
# @package _global_
|
# @package _global_
|
||||||
|
|
||||||
eval_episodes: 50
|
|
||||||
eval_freq: 7500
|
|
||||||
save_freq: 75000
|
|
||||||
log_freq: 250
|
|
||||||
# TODO: same as xarm, need to adjust
|
|
||||||
offline_steps: 25000
|
|
||||||
online_steps: 25000
|
|
||||||
|
|
||||||
fps: 50
|
fps: 50
|
||||||
|
|
||||||
dataset:
|
|
||||||
repo_id: lerobot/aloha_sim_insertion_human
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
name: aloha
|
name: aloha
|
||||||
task: AlohaInsertion-v0
|
task: AlohaInsertion-v0
|
||||||
|
|
|
@ -1,18 +1,7 @@
|
||||||
# @package _global_
|
# @package _global_
|
||||||
|
|
||||||
eval_episodes: 50
|
|
||||||
eval_freq: 7500
|
|
||||||
save_freq: 75000
|
|
||||||
log_freq: 250
|
|
||||||
# TODO: same as xarm, need to adjust
|
|
||||||
offline_steps: 25000
|
|
||||||
online_steps: 25000
|
|
||||||
|
|
||||||
fps: 10
|
fps: 10
|
||||||
|
|
||||||
dataset:
|
|
||||||
repo_id: lerobot/pusht
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
name: pusht
|
name: pusht
|
||||||
task: PushT-v0
|
task: PushT-v0
|
||||||
|
|
|
@ -1,17 +1,7 @@
|
||||||
# @package _global_
|
# @package _global_
|
||||||
|
|
||||||
eval_episodes: 20
|
|
||||||
eval_freq: 1000
|
|
||||||
save_freq: 10000
|
|
||||||
log_freq: 50
|
|
||||||
offline_steps: 25000
|
|
||||||
online_steps: 25000
|
|
||||||
|
|
||||||
fps: 15
|
fps: 15
|
||||||
|
|
||||||
dataset:
|
|
||||||
repo_id: lerobot/xarm_lift_medium
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
name: xarm
|
name: xarm
|
||||||
task: XarmLift-v0
|
task: XarmLift-v0
|
||||||
|
|
|
@ -1,21 +1,34 @@
|
||||||
# @package _global_
|
# @package _global_
|
||||||
|
|
||||||
offline_steps: 80000
|
seed: 1000
|
||||||
online_steps: 0
|
dataset_repo_id: lerobot/aloha_sim_insertion_human
|
||||||
|
|
||||||
eval_episodes: 1
|
training:
|
||||||
eval_freq: 10000
|
offline_steps: 80000
|
||||||
save_freq: 100000
|
online_steps: 0
|
||||||
log_freq: 250
|
eval_freq: 10000
|
||||||
|
save_freq: 100000
|
||||||
|
log_freq: 250
|
||||||
|
save_model: true
|
||||||
|
|
||||||
n_obs_steps: 1
|
batch_size: 8
|
||||||
# when temporal_agg=False, n_action_steps=horizon
|
lr: 1e-5
|
||||||
|
lr_backbone: 1e-5
|
||||||
|
weight_decay: 1e-4
|
||||||
|
grad_clip_norm: 10
|
||||||
|
online_steps_between_rollouts: 1
|
||||||
|
|
||||||
override_dataset_stats:
|
override_dataset_stats:
|
||||||
observation.images.top:
|
observation.images.top:
|
||||||
# stats from imagenet, since we use a pretrained vision model
|
# stats from imagenet, since we use a pretrained vision model
|
||||||
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
mean: [[[0.485]], [[0.456]], [[0.406]]] # (c,1,1)
|
||||||
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
std: [[[0.229]], [[0.224]], [[0.225]]] # (c,1,1)
|
||||||
|
|
||||||
|
delta_timestamps:
|
||||||
|
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
|
||||||
|
|
||||||
|
eval:
|
||||||
|
n_episodes:: 50
|
||||||
|
|
||||||
# See `configuration_act.py` for more details.
|
# See `configuration_act.py` for more details.
|
||||||
policy:
|
policy:
|
||||||
|
@ -24,7 +37,7 @@ policy:
|
||||||
pretrained_model_path:
|
pretrained_model_path:
|
||||||
|
|
||||||
# Input / output structure.
|
# Input / output structure.
|
||||||
n_obs_steps: ${n_obs_steps}
|
n_obs_steps: 1
|
||||||
chunk_size: 100 # chunk_size
|
chunk_size: 100 # chunk_size
|
||||||
n_action_steps: 100
|
n_action_steps: 100
|
||||||
|
|
||||||
|
@ -66,15 +79,3 @@ policy:
|
||||||
# Training and loss computation.
|
# Training and loss computation.
|
||||||
dropout: 0.1
|
dropout: 0.1
|
||||||
kl_weight: 10.0
|
kl_weight: 10.0
|
||||||
|
|
||||||
# ---
|
|
||||||
# TODO(alexander-soare): Remove these from the policy config.
|
|
||||||
batch_size: 8
|
|
||||||
lr: 1e-5
|
|
||||||
lr_backbone: 1e-5
|
|
||||||
weight_decay: 1e-4
|
|
||||||
grad_clip_norm: 10
|
|
||||||
utd: 1
|
|
||||||
|
|
||||||
delta_timestamps:
|
|
||||||
action: "[i / ${fps} for i in range(${policy.chunk_size})]"
|
|
||||||
|
|
|
@ -1,22 +1,33 @@
|
||||||
# @package _global_
|
# @package _global_
|
||||||
|
|
||||||
seed: 100000
|
seed: 100000
|
||||||
horizon: 16
|
dataset_repo_id: lerobot/pusht
|
||||||
n_obs_steps: 2
|
|
||||||
n_action_steps: 8
|
|
||||||
dataset_obs_steps: ${n_obs_steps}
|
|
||||||
past_action_visible: False
|
|
||||||
keypoint_visible_rate: 1.0
|
|
||||||
|
|
||||||
eval_episodes: 50
|
training:
|
||||||
eval_freq: 5000
|
offline_steps: 200000
|
||||||
save_freq: 5000
|
online_steps: 0
|
||||||
log_freq: 250
|
eval_freq: 5000
|
||||||
|
save_freq: 5000
|
||||||
|
log_freq: 250
|
||||||
|
save_model: true
|
||||||
|
|
||||||
offline_steps: 200000
|
batch_size: 64
|
||||||
online_steps: 0
|
grad_clip_norm: 10
|
||||||
|
lr: 1.0e-4
|
||||||
|
lr_scheduler: cosine
|
||||||
|
lr_warmup_steps: 500
|
||||||
|
adam_betas: [0.95, 0.999]
|
||||||
|
adam_eps: 1.0e-8
|
||||||
|
adam_weight_decay: 1.0e-6
|
||||||
|
online_steps_between_rollouts: 1
|
||||||
|
|
||||||
offline_prioritized_sampler: true
|
delta_timestamps:
|
||||||
|
observation.image: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]"
|
||||||
|
observation.state: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1)]"
|
||||||
|
action: "[i / ${fps} for i in range(1 - ${policy.n_obs_steps}, 1 - ${policy.n_obs_steps} + ${policy.horizon})]"
|
||||||
|
|
||||||
|
eval:
|
||||||
|
n_episodes: 50
|
||||||
|
|
||||||
override_dataset_stats:
|
override_dataset_stats:
|
||||||
# TODO(rcadene, alexander-soare): should we remove image stats as well? do we use a pretrained vision model?
|
# TODO(rcadene, alexander-soare): should we remove image stats as well? do we use a pretrained vision model?
|
||||||
|
@ -38,9 +49,9 @@ policy:
|
||||||
pretrained_model_path:
|
pretrained_model_path:
|
||||||
|
|
||||||
# Input / output structure.
|
# Input / output structure.
|
||||||
n_obs_steps: ${n_obs_steps}
|
n_obs_steps: 2
|
||||||
horizon: ${horizon}
|
horizon: 16
|
||||||
n_action_steps: ${n_action_steps}
|
n_action_steps: 8
|
||||||
|
|
||||||
input_shapes:
|
input_shapes:
|
||||||
# TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
|
# TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
|
||||||
|
@ -84,23 +95,9 @@ policy:
|
||||||
|
|
||||||
# ---
|
# ---
|
||||||
# TODO(alexander-soare): Remove these from the policy config.
|
# TODO(alexander-soare): Remove these from the policy config.
|
||||||
batch_size: 64
|
|
||||||
grad_clip_norm: 10
|
|
||||||
lr: 1.0e-4
|
|
||||||
lr_scheduler: cosine
|
|
||||||
lr_warmup_steps: 500
|
|
||||||
adam_betas: [0.95, 0.999]
|
|
||||||
adam_eps: 1.0e-8
|
|
||||||
adam_weight_decay: 1.0e-6
|
|
||||||
utd: 1
|
|
||||||
use_ema: true
|
use_ema: true
|
||||||
ema_update_after_step: 0
|
ema_update_after_step: 0
|
||||||
ema_min_alpha: 0.0
|
ema_min_alpha: 0.0
|
||||||
ema_max_alpha: 0.9999
|
ema_max_alpha: 0.9999
|
||||||
ema_inv_gamma: 1.0
|
ema_inv_gamma: 1.0
|
||||||
ema_power: 0.75
|
ema_power: 0.75
|
||||||
|
|
||||||
delta_timestamps:
|
|
||||||
observation.image: "[i / ${fps} for i in range(1 - ${n_obs_steps}, 1)]"
|
|
||||||
observation.state: "[i / ${fps} for i in range(1 - ${n_obs_steps}, 1)]"
|
|
||||||
action: "[i / ${fps} for i in range(1 - ${n_obs_steps}, 1 - ${n_obs_steps} + ${policy.horizon})]"
|
|
||||||
|
|
|
@ -54,7 +54,7 @@ policy:
|
||||||
seed_steps: 0
|
seed_steps: 0
|
||||||
update_freq: 2
|
update_freq: 2
|
||||||
tau: 0.01
|
tau: 0.01
|
||||||
utd: 1
|
online_steps_between_rollouts: 1
|
||||||
|
|
||||||
# offline rl
|
# offline rl
|
||||||
# dataset_dir: ???
|
# dataset_dir: ???
|
||||||
|
|
|
@ -16,14 +16,14 @@ You have a specific config file to go with trained model weights, and want to ru
|
||||||
python lerobot/scripts/eval.py \
|
python lerobot/scripts/eval.py \
|
||||||
--config PATH/TO/FOLDER/config.yaml \
|
--config PATH/TO/FOLDER/config.yaml \
|
||||||
policy.pretrained_model_path=PATH/TO/FOLDER/weights.pth \
|
policy.pretrained_model_path=PATH/TO/FOLDER/weights.pth \
|
||||||
eval_episodes=10
|
eval.n_episodes=10
|
||||||
```
|
```
|
||||||
|
|
||||||
You have a HuggingFace Hub ID, you know which revision you want, and want to run 10 episodes (note that in this case,
|
You have a HuggingFace Hub ID, you know which revision you want, and want to run 10 episodes (note that in this case,
|
||||||
you don't need to specify which weights to use):
|
you don't need to specify which weights to use):
|
||||||
|
|
||||||
```
|
```
|
||||||
python lerobot/scripts/eval.py --hub-id HUB/ID --revision v1.0 eval_episodes=10
|
python lerobot/scripts/eval.py --hub-id HUB/ID --revision v1.0 eval.n_episodes=10
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -365,7 +365,7 @@ def eval(cfg: dict, out_dir=None):
|
||||||
log_output_dir(out_dir)
|
log_output_dir(out_dir)
|
||||||
|
|
||||||
logging.info("Making environment.")
|
logging.info("Making environment.")
|
||||||
env = make_env(cfg, num_parallel_envs=cfg.eval_episodes)
|
env = make_env(cfg, num_parallel_envs=cfg.eval.n_episodes)
|
||||||
|
|
||||||
logging.info("Making policy.")
|
logging.info("Making policy.")
|
||||||
policy = make_policy(cfg)
|
policy = make_policy(cfg)
|
||||||
|
|
|
@ -81,7 +81,7 @@ def log_train_info(logger, info, step, cfg, dataset, is_offline):
|
||||||
|
|
||||||
# A sample is an (observation,action) pair, where observation and action
|
# A sample is an (observation,action) pair, where observation and action
|
||||||
# can be on multiple timestamps. In a batch, we have `batch_size`` number of samples.
|
# can be on multiple timestamps. In a batch, we have `batch_size`` number of samples.
|
||||||
num_samples = (step + 1) * cfg.policy.batch_size
|
num_samples = (step + 1) * cfg.training.batch_size
|
||||||
avg_samples_per_ep = dataset.num_samples / dataset.num_episodes
|
avg_samples_per_ep = dataset.num_samples / dataset.num_episodes
|
||||||
num_episodes = num_samples / avg_samples_per_ep
|
num_episodes = num_samples / avg_samples_per_ep
|
||||||
num_epochs = num_samples / dataset.num_samples
|
num_epochs = num_samples / dataset.num_samples
|
||||||
|
@ -117,7 +117,7 @@ def log_eval_info(logger, info, step, cfg, dataset, is_offline):
|
||||||
|
|
||||||
# A sample is an (observation,action) pair, where observation and action
|
# A sample is an (observation,action) pair, where observation and action
|
||||||
# can be on multiple timestamps. In a batch, we have `batch_size`` number of samples.
|
# can be on multiple timestamps. In a batch, we have `batch_size`` number of samples.
|
||||||
num_samples = (step + 1) * cfg.policy.batch_size
|
num_samples = (step + 1) * cfg.training.batch_size
|
||||||
avg_samples_per_ep = dataset.num_samples / dataset.num_episodes
|
avg_samples_per_ep = dataset.num_samples / dataset.num_episodes
|
||||||
num_episodes = num_samples / avg_samples_per_ep
|
num_episodes = num_samples / avg_samples_per_ep
|
||||||
num_epochs = num_samples / dataset.num_samples
|
num_epochs = num_samples / dataset.num_samples
|
||||||
|
@ -246,8 +246,8 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
if job_name is None:
|
if job_name is None:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
if cfg.online_steps > 0:
|
if cfg.training.online_steps > 0:
|
||||||
assert cfg.rollout_batch_size == 1, "rollout_batch_size > 1 not supported for online training steps"
|
assert cfg.eval.batch_size == 1, "eval.batch_size > 1 not supported for online training steps"
|
||||||
|
|
||||||
init_logging()
|
init_logging()
|
||||||
|
|
||||||
|
@ -262,7 +262,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
offline_dataset = make_dataset(cfg)
|
offline_dataset = make_dataset(cfg)
|
||||||
|
|
||||||
logging.info("make_env")
|
logging.info("make_env")
|
||||||
env = make_env(cfg, num_parallel_envs=cfg.eval_episodes)
|
env = make_env(cfg, num_parallel_envs=cfg.eval.n_episodes)
|
||||||
|
|
||||||
logging.info("make_policy")
|
logging.info("make_policy")
|
||||||
policy = make_policy(cfg, dataset_stats=offline_dataset.stats)
|
policy = make_policy(cfg, dataset_stats=offline_dataset.stats)
|
||||||
|
@ -282,31 +282,27 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
"params": [
|
"params": [
|
||||||
p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad
|
p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad
|
||||||
],
|
],
|
||||||
"lr": cfg.policy.lr_backbone,
|
"lr": cfg.training.lr_backbone,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
optimizer = torch.optim.AdamW(
|
optimizer = torch.optim.AdamW(
|
||||||
optimizer_params_dicts, lr=cfg.policy.lr, weight_decay=cfg.policy.weight_decay
|
optimizer_params_dicts, lr=cfg.training.lr, weight_decay=cfg.training.weight_decay
|
||||||
)
|
)
|
||||||
lr_scheduler = None
|
lr_scheduler = None
|
||||||
elif cfg.policy.name == "diffusion":
|
elif cfg.policy.name == "diffusion":
|
||||||
optimizer = torch.optim.Adam(
|
optimizer = torch.optim.Adam(
|
||||||
policy.diffusion.parameters(),
|
policy.diffusion.parameters(),
|
||||||
cfg.policy.lr,
|
cfg.training.lr,
|
||||||
cfg.policy.adam_betas,
|
cfg.training.adam_betas,
|
||||||
cfg.policy.adam_eps,
|
cfg.training.adam_eps,
|
||||||
cfg.policy.adam_weight_decay,
|
cfg.training.adam_weight_decay,
|
||||||
)
|
)
|
||||||
# TODO(rcadene): modify lr scheduler so that it doesn't depend on epochs but steps
|
assert cfg.training.online_steps == 0, "Diffusion Policy does not handle online training."
|
||||||
# configure lr scheduler
|
|
||||||
lr_scheduler = get_scheduler(
|
lr_scheduler = get_scheduler(
|
||||||
cfg.policy.lr_scheduler,
|
cfg.training.lr_scheduler,
|
||||||
optimizer=optimizer,
|
optimizer=optimizer,
|
||||||
num_warmup_steps=cfg.policy.lr_warmup_steps,
|
num_warmup_steps=cfg.training.lr_warmup_steps,
|
||||||
num_training_steps=cfg.offline_steps,
|
num_training_steps=cfg.training.offline_steps,
|
||||||
# pytorch assumes stepping LRScheduler every epoch
|
|
||||||
# however huggingface diffusers steps it every batch
|
|
||||||
last_epoch=-1,
|
|
||||||
)
|
)
|
||||||
elif policy.name == "tdmpc":
|
elif policy.name == "tdmpc":
|
||||||
raise NotImplementedError("TD-MPC not implemented yet.")
|
raise NotImplementedError("TD-MPC not implemented yet.")
|
||||||
|
@ -319,8 +315,8 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
|
|
||||||
log_output_dir(out_dir)
|
log_output_dir(out_dir)
|
||||||
logging.info(f"{cfg.env.task=}")
|
logging.info(f"{cfg.env.task=}")
|
||||||
logging.info(f"{cfg.offline_steps=} ({format_big_number(cfg.offline_steps)})")
|
logging.info(f"{cfg.training.offline_steps=} ({format_big_number(cfg.training.offline_steps)})")
|
||||||
logging.info(f"{cfg.online_steps=}")
|
logging.info(f"{cfg.training.online_steps=}")
|
||||||
logging.info(f"{offline_dataset.num_samples=} ({format_big_number(offline_dataset.num_samples)})")
|
logging.info(f"{offline_dataset.num_samples=} ({format_big_number(offline_dataset.num_samples)})")
|
||||||
logging.info(f"{offline_dataset.num_episodes=}")
|
logging.info(f"{offline_dataset.num_episodes=}")
|
||||||
logging.info(f"{num_learnable_params=} ({format_big_number(num_learnable_params)})")
|
logging.info(f"{num_learnable_params=} ({format_big_number(num_learnable_params)})")
|
||||||
|
@ -328,7 +324,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
|
|
||||||
# Note: this helper will be used in offline and online training loops.
|
# Note: this helper will be used in offline and online training loops.
|
||||||
def _maybe_eval_and_maybe_save(step):
|
def _maybe_eval_and_maybe_save(step):
|
||||||
if step % cfg.eval_freq == 0:
|
if step % cfg.training.eval_freq == 0:
|
||||||
logging.info(f"Eval policy at step {step}")
|
logging.info(f"Eval policy at step {step}")
|
||||||
eval_info = eval_policy(
|
eval_info = eval_policy(
|
||||||
env,
|
env,
|
||||||
|
@ -342,7 +338,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
logger.log_video(eval_info["videos"][0], step, mode="eval")
|
logger.log_video(eval_info["videos"][0], step, mode="eval")
|
||||||
logging.info("Resume training")
|
logging.info("Resume training")
|
||||||
|
|
||||||
if cfg.save_model and step % cfg.save_freq == 0:
|
if cfg.training.save_model and step % cfg.training.save_freq == 0:
|
||||||
logging.info(f"Checkpoint policy after step {step}")
|
logging.info(f"Checkpoint policy after step {step}")
|
||||||
logger.save_model(policy, identifier=step)
|
logger.save_model(policy, identifier=step)
|
||||||
logging.info("Resume training")
|
logging.info("Resume training")
|
||||||
|
@ -351,7 +347,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
dataloader = torch.utils.data.DataLoader(
|
dataloader = torch.utils.data.DataLoader(
|
||||||
offline_dataset,
|
offline_dataset,
|
||||||
num_workers=4,
|
num_workers=4,
|
||||||
batch_size=cfg.policy.batch_size,
|
batch_size=cfg.training.batch_size,
|
||||||
shuffle=True,
|
shuffle=True,
|
||||||
pin_memory=cfg.device != "cpu",
|
pin_memory=cfg.device != "cpu",
|
||||||
drop_last=False,
|
drop_last=False,
|
||||||
|
@ -360,7 +356,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
|
|
||||||
step = 0 # number of policy update (forward + backward + optim)
|
step = 0 # number of policy update (forward + backward + optim)
|
||||||
is_offline = True
|
is_offline = True
|
||||||
for offline_step in range(cfg.offline_steps):
|
for offline_step in range(cfg.training.offline_steps):
|
||||||
if offline_step == 0:
|
if offline_step == 0:
|
||||||
logging.info("Start offline training on a fixed dataset")
|
logging.info("Start offline training on a fixed dataset")
|
||||||
policy.train()
|
policy.train()
|
||||||
|
@ -369,10 +365,10 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
for key in batch:
|
for key in batch:
|
||||||
batch[key] = batch[key].to(cfg.device, non_blocking=True)
|
batch[key] = batch[key].to(cfg.device, non_blocking=True)
|
||||||
|
|
||||||
train_info = update_policy(policy, batch, optimizer, cfg.policy.grad_clip_norm, lr_scheduler)
|
train_info = update_policy(policy, batch, optimizer, cfg.training.grad_clip_norm, lr_scheduler)
|
||||||
|
|
||||||
# TODO(rcadene): is it ok if step_t=0 = 0 and not 1 as previously done?
|
# TODO(rcadene): is it ok if step_t=0 = 0 and not 1 as previously done?
|
||||||
if step % cfg.log_freq == 0:
|
if step % cfg.training.log_freq == 0:
|
||||||
log_train_info(logger, train_info, step, cfg, offline_dataset, is_offline)
|
log_train_info(logger, train_info, step, cfg, offline_dataset, is_offline)
|
||||||
|
|
||||||
# Note: _maybe_eval_and_maybe_save happens **after** the `step`th training update has completed, so we pass in
|
# Note: _maybe_eval_and_maybe_save happens **after** the `step`th training update has completed, so we pass in
|
||||||
|
@ -398,7 +394,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
dataloader = torch.utils.data.DataLoader(
|
dataloader = torch.utils.data.DataLoader(
|
||||||
concat_dataset,
|
concat_dataset,
|
||||||
num_workers=4,
|
num_workers=4,
|
||||||
batch_size=cfg.policy.batch_size,
|
batch_size=cfg.training.batch_size,
|
||||||
sampler=sampler,
|
sampler=sampler,
|
||||||
pin_memory=cfg.device != "cpu",
|
pin_memory=cfg.device != "cpu",
|
||||||
drop_last=False,
|
drop_last=False,
|
||||||
|
@ -407,7 +403,7 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
|
|
||||||
online_step = 0
|
online_step = 0
|
||||||
is_offline = False
|
is_offline = False
|
||||||
for env_step in range(cfg.online_steps):
|
for env_step in range(cfg.training.online_steps):
|
||||||
if env_step == 0:
|
if env_step == 0:
|
||||||
logging.info("Start online training by interacting with environment")
|
logging.info("Start online training by interacting with environment")
|
||||||
|
|
||||||
|
@ -428,16 +424,16 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
||||||
pc_online_samples=cfg.get("demo_schedule", 0.5),
|
pc_online_samples=cfg.get("demo_schedule", 0.5),
|
||||||
)
|
)
|
||||||
|
|
||||||
for _ in range(cfg.policy.utd):
|
for _ in range(cfg.training.online_steps_between_rollouts):
|
||||||
policy.train()
|
policy.train()
|
||||||
batch = next(dl_iter)
|
batch = next(dl_iter)
|
||||||
|
|
||||||
for key in batch:
|
for key in batch:
|
||||||
batch[key] = batch[key].to(cfg.device, non_blocking=True)
|
batch[key] = batch[key].to(cfg.device, non_blocking=True)
|
||||||
|
|
||||||
train_info = update_policy(policy, batch, optimizer, cfg.policy.grad_clip_norm, lr_scheduler)
|
train_info = update_policy(policy, batch, optimizer, cfg.training.grad_clip_norm, lr_scheduler)
|
||||||
|
|
||||||
if step % cfg.log_freq == 0:
|
if step % cfg.training.log_freq == 0:
|
||||||
log_train_info(logger, train_info, step, cfg, online_dataset, is_offline)
|
log_train_info(logger, train_info, step, cfg, online_dataset, is_offline)
|
||||||
|
|
||||||
# Note: _maybe_eval_and_maybe_save happens **after** the `step`th training update has completed, so we pass
|
# Note: _maybe_eval_and_maybe_save happens **after** the `step`th training update has completed, so we pass
|
||||||
|
|
|
@ -33,7 +33,7 @@ def test_factory(env_name, repo_id, policy_name):
|
||||||
DEFAULT_CONFIG_PATH,
|
DEFAULT_CONFIG_PATH,
|
||||||
overrides=[
|
overrides=[
|
||||||
f"env={env_name}",
|
f"env={env_name}",
|
||||||
f"dataset.repo_id={repo_id}",
|
f"dataset_repo_id={repo_id}",
|
||||||
f"policy={policy_name}",
|
f"policy={policy_name}",
|
||||||
f"device={DEVICE}",
|
f"device={DEVICE}",
|
||||||
],
|
],
|
||||||
|
|
|
@ -39,7 +39,7 @@ def test_examples_3_and_2():
|
||||||
("training_steps = 5000", "training_steps = 1"),
|
("training_steps = 5000", "training_steps = 1"),
|
||||||
("num_workers=4", "num_workers=0"),
|
("num_workers=4", "num_workers=0"),
|
||||||
('device = torch.device("cuda")', 'device = torch.device("cpu")'),
|
('device = torch.device("cuda")', 'device = torch.device("cpu")'),
|
||||||
("batch_size=cfg.batch_size", "batch_size=1"),
|
("batch_size=64", "batch_size=1"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -58,8 +58,8 @@ def test_examples_3_and_2():
|
||||||
file_contents = _find_and_replace(
|
file_contents = _find_and_replace(
|
||||||
file_contents,
|
file_contents,
|
||||||
[
|
[
|
||||||
('"eval_episodes=10"', '"eval_episodes=1"'),
|
('"eval.n_episodes=10"', '"eval.n_episodes=1"'),
|
||||||
('"rollout_batch_size=10"', '"rollout_batch_size=1"'),
|
('"eval.batch_size=10"', '"eval.batch_size=1"'),
|
||||||
('"device=cuda"', '"device=cpu"'),
|
('"device=cuda"', '"device=cpu"'),
|
||||||
(
|
(
|
||||||
'# folder = Path("outputs/train/example_pusht_diffusion")',
|
'# folder = Path("outputs/train/example_pusht_diffusion")',
|
||||||
|
|
|
@ -21,21 +21,21 @@ from tests.utils import DEFAULT_CONFIG_PATH, DEVICE, require_env
|
||||||
# ("xarm", "tdmpc", ["policy.mpc=true"]),
|
# ("xarm", "tdmpc", ["policy.mpc=true"]),
|
||||||
# ("pusht", "tdmpc", ["policy.mpc=false"]),
|
# ("pusht", "tdmpc", ["policy.mpc=false"]),
|
||||||
("pusht", "diffusion", []),
|
("pusht", "diffusion", []),
|
||||||
("aloha", "act", ["env.task=AlohaInsertion-v0", "dataset.repo_id=lerobot/aloha_sim_insertion_human"]),
|
("aloha", "act", ["env.task=AlohaInsertion-v0", "dataset_repo_id=lerobot/aloha_sim_insertion_human"]),
|
||||||
(
|
(
|
||||||
"aloha",
|
"aloha",
|
||||||
"act",
|
"act",
|
||||||
["env.task=AlohaInsertion-v0", "dataset.repo_id=lerobot/aloha_sim_insertion_scripted"],
|
["env.task=AlohaInsertion-v0", "dataset_repo_id=lerobot/aloha_sim_insertion_scripted"],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"aloha",
|
"aloha",
|
||||||
"act",
|
"act",
|
||||||
["env.task=AlohaTransferCube-v0", "dataset.repo_id=lerobot/aloha_sim_transfer_cube_human"],
|
["env.task=AlohaTransferCube-v0", "dataset_repo_id=lerobot/aloha_sim_transfer_cube_human"],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"aloha",
|
"aloha",
|
||||||
"act",
|
"act",
|
||||||
["env.task=AlohaTransferCube-v0", "dataset.repo_id=lerobot/aloha_sim_transfer_cube_scripted"],
|
["env.task=AlohaTransferCube-v0", "dataset_repo_id=lerobot/aloha_sim_transfer_cube_scripted"],
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
|
@ -20,7 +20,7 @@ def test_visualize_dataset(tmpdir, repo_id):
|
||||||
overrides=[
|
overrides=[
|
||||||
"policy=act",
|
"policy=act",
|
||||||
"env=aloha",
|
"env=aloha",
|
||||||
f"dataset.repo_id={repo_id}",
|
f"dataset_repo_id={repo_id}",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
video_paths = visualize_dataset(cfg, out_dir=tmpdir)
|
video_paths = visualize_dataset(cfg, out_dir=tmpdir)
|
||||||
|
|
Loading…
Reference in New Issue