diff --git a/README.md b/README.md index cf21337e..74a9b6ca 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,17 @@ conda env create -f environment.yaml conda activate lerobot ``` +**dev** + +``` +python setup.py develop +``` + +## Contribute + +**style** +``` +isort . +black . +pylint lerobot +``` diff --git a/lerobot/configs/default.yaml b/lerobot/configs/default.yaml index 7c861d7b..f6202afa 100644 --- a/lerobot/configs/default.yaml +++ b/lerobot/configs/default.yaml @@ -1,2 +1,71 @@ seed: 1337 log_dir: logs/2024_01_26_train + +# env +env: simxarm +task: lift +from_pixels: True +pixels_only: False +image_size: 84 + + +# pixels +frame_stack: 1 +num_channels: 32 +img_size: 84 + + +# TDMPC + +# planning +mpc: true +iterations: 6 +num_samples: 512 +num_elites: 50 +mixture_coef: 0.1 +min_std: 0.05 +max_std: 2.0 +temperature: 0.5 +momentum: 0.1 +uncertainty_cost: 1 + +# actor +log_std_min: -10 +log_std_max: 2 + +# learning +batch_size: 256 +max_buffer_size: 10000 +horizon: 5 +reward_coef: 0.5 +value_coef: 0.1 +consistency_coef: 20 +rho: 0.5 +kappa: 0.1 +lr: 3e-4 +std_schedule: ${min_std} +horizon_schedule: ${horizon} +per: true +per_alpha: 0.6 +per_beta: 0.4 +grad_clip_norm: 10 +seed_steps: 0 +update_freq: 2 +tau: 0.01 +utd: 1 + + +# architecture +enc_dim: 256 +num_q: 5 +mlp_dim: 512 +latent_dim: 50 + + +# xarm_lift +A_scaling: 3.0 +expectile: 0.9 +episode_length: 25 +modality: 'all' +action_repeat: 2 +discount: 0.9 \ No newline at end of file diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py index 3b0f61ef..0da0c60e 100644 --- a/lerobot/scripts/eval.py +++ b/lerobot/scripts/eval.py @@ -4,10 +4,12 @@ import hydra import imageio import numpy as np import torch +from tensordict import TensorDict from termcolor import colored -from ..lib.envs import make_env -from ..lib.utils import set_seed +from lerobot.lib.envs.factory import make_env +from lerobot.lib.tdmpc import TDMPC +from lerobot.lib.utils import set_seed def eval_agent( @@ -21,32 +23,45 @@ def eval_agent( episode_successes = [] episode_lengths = [] for i in range(num_episodes): - obs, done, ep_reward, t = env.reset(), False, 0, 0 + td = env.reset() + obs = {} + obs["rgb"] = td["observation"]["camera"] + obs["state"] = td["observation"]["robot_state"] + + done = False + ep_reward = 0 + t = 0 ep_success = False + if save_video: frames = [] while not done: - action = agent.act(obs, t0=t == 0, eval_mode=True, step=step) - obs, reward, done, info = env.step(action.cpu().numpy()) + action = agent.act(obs, t0=t == 0, eval_mode=True, step=100000) + td = TensorDict({"action": action}, batch_size=[]) + + td = env.step(td) + + reward = td["next", "reward"].item() + success = td["next", "success"].item() + done = td["next", "done"].item() + + obs = {} + obs["rgb"] = td["next", "observation"]["camera"] + obs["state"] = td["next", "observation"]["robot_state"] + ep_reward += reward - if "success" in info and info["success"]: + if success: ep_success = True if save_video: - frame = env.render( - mode="rgb_array", - # TODO(rcadene): make height, width, camera_id configurable - height=384, - width=384, - camera_id=0, - ) + frame = env.render() frames.append(frame) t += 1 episode_rewards.append(float(ep_reward)) episode_successes.append(float(ep_success)) episode_lengths.append(t) if save_video: - frames = np.stack(frames).transpose(0, 3, 1, 2) video_path.parent.mkdir(parents=True, exist_ok=True) + frames = np.stack(frames) # .transpose(0, 3, 1, 2) # TODO(rcadene): make fps configurable imageio.mimsave(video_path, frames, fps=15) return { @@ -63,8 +78,20 @@ def eval(cfg: dict): print(colored("Log dir:", "yellow", attrs=["bold"]), cfg.log_dir) env = make_env(cfg) + agent = TDMPC(cfg) + # ckpt_path = "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/offline.pt" + ckpt_path = "/home/rcadene/code/fowm/logs/xarm_lift/all/default/2/models/final.pt" + agent.load(ckpt_path) - eval_metrics = eval_agent(env, agent, num_episodes=10, save_video=True) + eval_metrics = eval_agent( + env, + agent, + num_episodes=10, + save_video=True, + video_path=Path("tmp/2023_01_29_xarm_lift_final/eval.mp4"), + ) + + print(eval_metrics) if __name__ == "__main__": diff --git a/test/test_envs.py b/test/test_envs.py index 49b1cae2..0968971b 100644 --- a/test/test_envs.py +++ b/test/test_envs.py @@ -31,6 +31,8 @@ def test_simxarm(task, from_pixels, pixels_only): print("observation_spec:", env.observation_spec) print("action_spec:", env.action_spec) print("reward_spec:", env.reward_spec) + print("done_spec:", env.done_spec) + print("success_spec:", env.success_spec) td = env.reset() print("reset tensordict", td)