2024-02-25 18:50:23 +08:00
|
|
|
def make_policy(cfg):
|
2024-02-25 19:09:02 +08:00
|
|
|
if cfg.policy.name == "tdmpc":
|
2024-03-10 01:44:36 +08:00
|
|
|
from lerobot.common.policies.tdmpc.policy import TDMPC
|
2024-02-26 01:42:47 +08:00
|
|
|
|
2024-03-03 19:47:26 +08:00
|
|
|
policy = TDMPC(cfg.policy, cfg.device)
|
2024-02-26 01:42:47 +08:00
|
|
|
elif cfg.policy.name == "diffusion":
|
2024-03-03 01:04:39 +08:00
|
|
|
from lerobot.common.policies.diffusion.policy import DiffusionPolicy
|
2024-02-26 01:42:47 +08:00
|
|
|
|
2024-02-26 09:10:09 +08:00
|
|
|
policy = DiffusionPolicy(
|
2024-02-28 23:21:30 +08:00
|
|
|
cfg=cfg.policy,
|
2024-03-06 00:00:17 +08:00
|
|
|
cfg_device=cfg.device,
|
2024-02-28 23:21:30 +08:00
|
|
|
cfg_noise_scheduler=cfg.noise_scheduler,
|
|
|
|
cfg_rgb_model=cfg.rgb_model,
|
|
|
|
cfg_obs_encoder=cfg.obs_encoder,
|
|
|
|
cfg_optimizer=cfg.optimizer,
|
|
|
|
cfg_ema=cfg.ema,
|
2024-02-26 09:10:09 +08:00
|
|
|
n_action_steps=cfg.n_action_steps + cfg.n_latency_steps,
|
|
|
|
**cfg.policy,
|
|
|
|
)
|
2024-02-25 18:50:23 +08:00
|
|
|
else:
|
2024-02-25 19:09:02 +08:00
|
|
|
raise ValueError(cfg.policy.name)
|
2024-02-25 18:50:23 +08:00
|
|
|
|
2024-02-25 19:09:02 +08:00
|
|
|
if cfg.policy.pretrained_model_path:
|
2024-02-25 18:50:23 +08:00
|
|
|
# TODO(rcadene): hack for old pretrained models from fowm
|
2024-02-25 19:09:02 +08:00
|
|
|
if cfg.policy.name == "tdmpc" and "fowm" in cfg.policy.pretrained_model_path:
|
2024-02-25 18:50:23 +08:00
|
|
|
if "offline" in cfg.pretrained_model_path:
|
|
|
|
policy.step[0] = 25000
|
|
|
|
elif "final" in cfg.pretrained_model_path:
|
|
|
|
policy.step[0] = 100000
|
|
|
|
else:
|
|
|
|
raise NotImplementedError()
|
2024-02-25 19:09:02 +08:00
|
|
|
policy.load(cfg.policy.pretrained_model_path)
|
2024-02-25 18:50:23 +08:00
|
|
|
|
|
|
|
return policy
|