From ab3cd3a7ba318e5eefea67a47b63b2f9688923dd Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Fri, 5 Apr 2024 15:35:20 +0200 Subject: [PATCH 1/3] (WIP) Add gym-xarm --- lerobot/common/envs/factory.py | 12 ++++++++++-- poetry.lock | 25 ++++++++++++++++++++++-- pyproject.toml | 4 ++++ tests/test_envs.py | 35 +++++++++++++++++----------------- 4 files changed, 54 insertions(+), 22 deletions(-) diff --git a/lerobot/common/envs/factory.py b/lerobot/common/envs/factory.py index 788af3cb..ab3e9294 100644 --- a/lerobot/common/envs/factory.py +++ b/lerobot/common/envs/factory.py @@ -9,9 +9,17 @@ def make_env(cfg, num_parallel_envs=0) -> gym.Env | gym.vector.SyncVectorEnv: kwargs = {} if cfg.env.name == "simxarm": - kwargs["task"] = cfg.env.task + import gym_xarm # noqa: F401 + + assert cfg.env.task == "lift" + env_fn = lambda: gym.make( + "gym_xarm/XarmLift-v0", + render_mode="rgb_array", + max_episode_steps=cfg.env.episode_length, + **kwargs, + ) elif cfg.env.name == "pusht": - import gym_pusht # noqa + import gym_pusht # noqa: F401 # assert kwargs["seed"] > 200, "Seed 0-200 are used for the demonstration dataset, so we don't want to seed the eval env with this range." kwargs.update( diff --git a/poetry.lock b/poetry.lock index 8fb6b7a7..b9e31930 100644 --- a/poetry.lock +++ b/poetry.lock @@ -900,7 +900,27 @@ shapely = "^2.0.3" type = "git" url = "git@github.com:huggingface/gym-pusht.git" reference = "HEAD" -resolved_reference = "d7e1a39a31b1368741e9674791007d7cccf046a3" +resolved_reference = "0fe4449cca5a2b08f529f7a07fbf5b9df24962ec" + +[[package]] +name = "gym-xarm" +version = "0.1.0" +description = "A gym environment for xArm" +optional = true +python-versions = "^3.10" +files = [] +develop = false + +[package.dependencies] +gymnasium = "^0.29.1" +gymnasium-robotics = "^1.2.4" +mujoco = "^2.3.7" + +[package.source] +type = "git" +url = "git@github.com:huggingface/gym-xarm.git" +reference = "HEAD" +resolved_reference = "2eb83fc4fc871b9d271c946d169e42f226ac3a7c" [[package]] name = "gymnasium" @@ -3611,8 +3631,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [extras] pusht = ["gym_pusht"] +xarm = ["gym_xarm"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "3eee17e4bf2b7a570f41ef9c400ec5a24a3113f62a13162229cf43504ca0d005" +content-hash = "c9524cdf000eaa755a2ab3be669118222b4f8b1c262013f103f6874cbd54eeb6" diff --git a/pyproject.toml b/pyproject.toml index d0fc7c0d..b7e1b9fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,9 +53,13 @@ gymnasium-robotics = "^1.2.4" gymnasium = "^0.29.1" cmake = "^3.29.0.1" gym_pusht = { git = "git@github.com:huggingface/gym-pusht.git", optional = true} +gym_xarm = { git = "git@github.com:huggingface/gym-xarm.git", optional = true} +# gym_pusht = { path = "../gym-pusht", develop = true, optional = true} +# gym_xarm = { path = "../gym-xarm", develop = true, optional = true} [tool.poetry.extras] pusht = ["gym_pusht"] +xarm = ["gym_xarm"] [tool.poetry.group.dev.dependencies] pre-commit = "^3.6.2" diff --git a/tests/test_envs.py b/tests/test_envs.py index 0c56f4fc..665c1fba 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -3,6 +3,8 @@ from tensordict import TensorDict import torch from torchrl.envs.utils import check_env_specs, step_mdp from lerobot.common.datasets.factory import make_dataset +import gymnasium as gym +from gymnasium.utils.env_checker import check_env from lerobot.common.envs.aloha.env import AlohaEnv from lerobot.common.envs.factory import make_env @@ -61,29 +63,26 @@ def test_aloha(task, from_pixels, pixels_only): @pytest.mark.parametrize( - "task,from_pixels,pixels_only", + "task, obs_type", [ - ("lift", False, False), - ("lift", True, False), - ("lift", True, True), + ("XarmLift-v0", "state"), + ("XarmLift-v0", "pixels"), + ("XarmLift-v0", "pixels_agent_pos"), # TODO(aliberts): Add simxarm other tasks - # ("reach", False, False), - # ("reach", True, False), - # ("push", False, False), - # ("push", True, False), - # ("peg_in_box", False, False), - # ("peg_in_box", True, False), ], ) -def test_simxarm(task, from_pixels, pixels_only): - env = SimxarmEnv( - task, - from_pixels=from_pixels, - pixels_only=pixels_only, - image_size=84 if from_pixels else None, - ) +def test_xarm(env_task, obs_type): + import gym_xarm + env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type) + # env = SimxarmEnv( + # task, + # from_pixels=from_pixels, + # pixels_only=pixels_only, + # image_size=84 if from_pixels else None, + # ) # print_spec_rollout(env) - check_env_specs(env) + # check_env_specs(env) + check_env(env) @pytest.mark.parametrize( From f56b1a0e1630867691fd07329cd52c257ee3d44c Mon Sep 17 00:00:00 2001 From: Cadene Date: Fri, 5 Apr 2024 13:40:31 +0000 Subject: [PATCH 2/3] WIP tdmpc --- lerobot/common/policies/tdmpc/policy.py | 207 +++++++++++++----------- lerobot/configs/env/simxarm.yaml | 2 +- lerobot/configs/policy/tdmpc.yaml | 2 +- sbatch.sh | 4 +- 4 files changed, 121 insertions(+), 94 deletions(-) diff --git a/lerobot/common/policies/tdmpc/policy.py b/lerobot/common/policies/tdmpc/policy.py index 64dcc94d..85700913 100644 --- a/lerobot/common/policies/tdmpc/policy.py +++ b/lerobot/common/policies/tdmpc/policy.py @@ -1,6 +1,7 @@ # ruff: noqa: N806 import time +from collections import deque from copy import deepcopy import einops @@ -9,7 +10,7 @@ import torch import torch.nn as nn import lerobot.common.policies.tdmpc.helper as h -from lerobot.common.policies.abstract import AbstractPolicy +from lerobot.common.policies.utils import populate_queues from lerobot.common.utils import get_safe_torch_device FIRST_FRAME = 0 @@ -87,16 +88,18 @@ class TOLD(nn.Module): return torch.min(Q1, Q2) if return_type == "min" else (Q1 + Q2) / 2 -class TDMPCPolicy(AbstractPolicy): +class TDMPCPolicy(nn.Module): """Implementation of TD-MPC learning + inference.""" name = "tdmpc" - def __init__(self, cfg, device): - super().__init__(None) + def __init__(self, cfg, n_obs_steps, n_action_steps, device): + super().__init__() self.action_dim = cfg.action_dim self.cfg = cfg + self.n_obs_steps = n_obs_steps + self.n_action_steps = n_action_steps self.device = get_safe_torch_device(device) self.std = h.linear_schedule(cfg.std_schedule, 0) self.model = TOLD(cfg) @@ -128,20 +131,42 @@ class TDMPCPolicy(AbstractPolicy): self.model.load_state_dict(d["model"]) self.model_target.load_state_dict(d["model_target"]) - @torch.no_grad() - def select_actions(self, observation, step_count): - if observation["image"].shape[0] != 1: - raise NotImplementedError("Batch size > 1 not handled") - - t0 = step_count.item() == 0 - - obs = { - # TODO(rcadene): remove contiguous hack... - "rgb": observation["image"].contiguous(), - "state": observation["state"].contiguous(), + def reset(self): + """ + Clear observation and action queues. Should be called on `env.reset()` + """ + self._queues = { + "observation.image": deque(maxlen=self.n_obs_steps), + "observation.state": deque(maxlen=self.n_obs_steps), + "action": deque(maxlen=self.n_action_steps), } - # Note: unsqueeze needed because `act` still uses non-batch logic. - action = self.act(obs, t0=t0, step=self.step.item()).unsqueeze(0) + + @torch.no_grad() + def select_action(self, batch, step): + assert "observation.image" in batch + assert "observation.state" in batch + assert len(batch) == 2 + + self._queues = populate_queues(self._queues, batch) + + t0 = step == 0 + + if len(self._queues["action"]) == 0: + batch = {key: torch.stack(list(self._queues[key]), dim=1) for key in batch} + + actions = [] + batch_size = batch["observation.image."].shape[0] + for i in range(batch_size): + obs = { + "rgb": batch["observation.image"][[i]], + "state": batch["observation.state"][[i]], + } + # Note: unsqueeze needed because `act` still uses non-batch logic. + action = self.act(obs, t0=t0, step=self.step) + actions.append(action) + action = torch.stack(actions) + + action = self._queues["action"].popleft() return action @torch.no_grad() @@ -293,97 +318,97 @@ class TDMPCPolicy(AbstractPolicy): td_target = reward + self.cfg.discount * mask * next_v return td_target - def update(self, replay_buffer, step, demo_buffer=None): + def forward(self, batch, step): """Main update function. Corresponds to one iteration of the model learning.""" start_time = time.time() - num_slices = self.cfg.batch_size - batch_size = self.cfg.horizon * num_slices + # num_slices = self.cfg.batch_size + # batch_size = self.cfg.horizon * num_slices - if demo_buffer is None: - demo_batch_size = 0 - else: - # Update oversampling ratio - demo_pc_batch = h.linear_schedule(self.cfg.demo_schedule, step) - demo_num_slices = int(demo_pc_batch * self.batch_size) - demo_batch_size = self.cfg.horizon * demo_num_slices - batch_size -= demo_batch_size - num_slices -= demo_num_slices - replay_buffer._sampler.num_slices = num_slices - demo_buffer._sampler.num_slices = demo_num_slices + # if demo_buffer is None: + # demo_batch_size = 0 + # else: + # # Update oversampling ratio + # demo_pc_batch = h.linear_schedule(self.cfg.demo_schedule, step) + # demo_num_slices = int(demo_pc_batch * self.batch_size) + # demo_batch_size = self.cfg.horizon * demo_num_slices + # batch_size -= demo_batch_size + # num_slices -= demo_num_slices + # replay_buffer._sampler.num_slices = num_slices + # demo_buffer._sampler.num_slices = demo_num_slices - assert demo_batch_size % self.cfg.horizon == 0 - assert demo_batch_size % demo_num_slices == 0 + # assert demo_batch_size % self.cfg.horizon == 0 + # assert demo_batch_size % demo_num_slices == 0 - assert batch_size % self.cfg.horizon == 0 - assert batch_size % num_slices == 0 + # assert batch_size % self.cfg.horizon == 0 + # assert batch_size % num_slices == 0 - # Sample from interaction dataset + # # Sample from interaction dataset - def process_batch(batch, horizon, num_slices): - # trajectory t = 256, horizon h = 5 - # (t h) ... -> h t ... - batch = batch.reshape(num_slices, horizon).transpose(1, 0).contiguous() + # def process_batch(batch, horizon, num_slices): + # # trajectory t = 256, horizon h = 5 + # # (t h) ... -> h t ... + # batch = batch.reshape(num_slices, horizon).transpose(1, 0).contiguous() - obs = { - "rgb": batch["observation", "image"][FIRST_FRAME].to(self.device, non_blocking=True), - "state": batch["observation", "state"][FIRST_FRAME].to(self.device, non_blocking=True), - } - action = batch["action"].to(self.device, non_blocking=True) - next_obses = { - "rgb": batch["next", "observation", "image"].to(self.device, non_blocking=True), - "state": batch["next", "observation", "state"].to(self.device, non_blocking=True), - } - reward = batch["next", "reward"].to(self.device, non_blocking=True) + # obs = { + # "rgb": batch["observation", "image"][FIRST_FRAME].to(self.device, non_blocking=True), + # "state": batch["observation", "state"][FIRST_FRAME].to(self.device, non_blocking=True), + # } + # action = batch["action"].to(self.device, non_blocking=True) + # next_obses = { + # "rgb": batch["next", "observation", "image"].to(self.device, non_blocking=True), + # "state": batch["next", "observation", "state"].to(self.device, non_blocking=True), + # } + # reward = batch["next", "reward"].to(self.device, non_blocking=True) - idxs = batch["index"][FIRST_FRAME].to(self.device, non_blocking=True) - weights = batch["_weight"][FIRST_FRAME, :, None].to(self.device, non_blocking=True) + # idxs = batch["index"][FIRST_FRAME].to(self.device, non_blocking=True) + # weights = batch["_weight"][FIRST_FRAME, :, None].to(self.device, non_blocking=True) - # TODO(rcadene): rearrange directly in offline dataset - if reward.ndim == 2: - reward = einops.rearrange(reward, "h t -> h t 1") + # # TODO(rcadene): rearrange directly in offline dataset + # if reward.ndim == 2: + # reward = einops.rearrange(reward, "h t -> h t 1") - assert reward.ndim == 3 - assert reward.shape == (horizon, num_slices, 1) - # We dont use `batch["next", "done"]` since it only indicates the end of an - # episode, but not the end of the trajectory of an episode. - # Neither does `batch["next", "terminated"]` - done = torch.zeros_like(reward, dtype=torch.bool, device=reward.device) - mask = torch.ones_like(reward, dtype=torch.bool, device=reward.device) - return obs, action, next_obses, reward, mask, done, idxs, weights + # assert reward.ndim == 3 + # assert reward.shape == (horizon, num_slices, 1) + # # We dont use `batch["next", "done"]` since it only indicates the end of an + # # episode, but not the end of the trajectory of an episode. + # # Neither does `batch["next", "terminated"]` + # done = torch.zeros_like(reward, dtype=torch.bool, device=reward.device) + # mask = torch.ones_like(reward, dtype=torch.bool, device=reward.device) + # return obs, action, next_obses, reward, mask, done, idxs, weights - batch = replay_buffer.sample(batch_size) if self.cfg.balanced_sampling else replay_buffer.sample() + # batch = replay_buffer.sample(batch_size) if self.cfg.balanced_sampling else replay_buffer.sample() - obs, action, next_obses, reward, mask, done, idxs, weights = process_batch( - batch, self.cfg.horizon, num_slices - ) + # obs, action, next_obses, reward, mask, done, idxs, weights = process_batch( + # batch, self.cfg.horizon, num_slices + # ) # Sample from demonstration dataset - if demo_batch_size > 0: - demo_batch = demo_buffer.sample(demo_batch_size) - ( - demo_obs, - demo_action, - demo_next_obses, - demo_reward, - demo_mask, - demo_done, - demo_idxs, - demo_weights, - ) = process_batch(demo_batch, self.cfg.horizon, demo_num_slices) + # if demo_batch_size > 0: + # demo_batch = demo_buffer.sample(demo_batch_size) + # ( + # demo_obs, + # demo_action, + # demo_next_obses, + # demo_reward, + # demo_mask, + # demo_done, + # demo_idxs, + # demo_weights, + # ) = process_batch(demo_batch, self.cfg.horizon, demo_num_slices) - if isinstance(obs, dict): - obs = {k: torch.cat([obs[k], demo_obs[k]]) for k in obs} - next_obses = {k: torch.cat([next_obses[k], demo_next_obses[k]], dim=1) for k in next_obses} - else: - obs = torch.cat([obs, demo_obs]) - next_obses = torch.cat([next_obses, demo_next_obses], dim=1) - action = torch.cat([action, demo_action], dim=1) - reward = torch.cat([reward, demo_reward], dim=1) - mask = torch.cat([mask, demo_mask], dim=1) - done = torch.cat([done, demo_done], dim=1) - idxs = torch.cat([idxs, demo_idxs]) - weights = torch.cat([weights, demo_weights]) + # if isinstance(obs, dict): + # obs = {k: torch.cat([obs[k], demo_obs[k]]) for k in obs} + # next_obses = {k: torch.cat([next_obses[k], demo_next_obses[k]], dim=1) for k in next_obses} + # else: + # obs = torch.cat([obs, demo_obs]) + # next_obses = torch.cat([next_obses, demo_next_obses], dim=1) + # action = torch.cat([action, demo_action], dim=1) + # reward = torch.cat([reward, demo_reward], dim=1) + # mask = torch.cat([mask, demo_mask], dim=1) + # done = torch.cat([done, demo_done], dim=1) + # idxs = torch.cat([idxs, demo_idxs]) + # weights = torch.cat([weights, demo_weights]) # Apply augmentations aug_tf = h.aug(self.cfg) diff --git a/lerobot/configs/env/simxarm.yaml b/lerobot/configs/env/simxarm.yaml index f79db8f7..843f80c6 100644 --- a/lerobot/configs/env/simxarm.yaml +++ b/lerobot/configs/env/simxarm.yaml @@ -17,7 +17,7 @@ env: from_pixels: True pixels_only: False image_size: 84 - action_repeat: 2 + # action_repeat: 2 # we can remove if policy has n_action_steps=2 episode_length: 25 fps: ${fps} diff --git a/lerobot/configs/policy/tdmpc.yaml b/lerobot/configs/policy/tdmpc.yaml index ff0e6b04..5d5d8b62 100644 --- a/lerobot/configs/policy/tdmpc.yaml +++ b/lerobot/configs/policy/tdmpc.yaml @@ -1,6 +1,6 @@ # @package _global_ -n_action_steps: 1 +n_action_steps: 2 n_obs_steps: 1 policy: diff --git a/sbatch.sh b/sbatch.sh index cb5b285a..c08f7055 100644 --- a/sbatch.sh +++ b/sbatch.sh @@ -6,7 +6,7 @@ #SBATCH --time=2-00:00:00 #SBATCH --output=/home/rcadene/slurm/%j.out #SBATCH --error=/home/rcadene/slurm/%j.err -#SBATCH --qos=medium +#SBATCH --qos=low #SBATCH --mail-user=re.cadene@gmail.com #SBATCH --mail-type=ALL @@ -20,4 +20,6 @@ source ~/.bashrc #conda activate fowm conda activate lerobot +export DATA_DIR="data" + srun $CMD From 26602269cd7760e8eb9d54eafd01d7828f17c126 Mon Sep 17 00:00:00 2001 From: Cadene Date: Fri, 5 Apr 2024 16:21:07 +0000 Subject: [PATCH 3/3] test_envs.py are passing, remove simxarm and pusht directories --- lerobot/common/envs/abstract.py | 92 ----- lerobot/common/envs/factory.py | 19 +- lerobot/common/envs/pusht/env.py | 245 ------------ lerobot/common/envs/pusht/pusht_env.py | 378 ------------------ lerobot/common/envs/pusht/pusht_image_env.py | 41 -- lerobot/common/envs/pusht/pymunk_override.py | 244 ----------- lerobot/common/envs/simxarm/env.py | 237 ----------- .../common/envs/simxarm/simxarm/__init__.py | 166 -------- .../envs/simxarm/simxarm/tasks/__init__.py | 0 .../simxarm/simxarm/tasks/assets/lift.xml | 53 --- .../simxarm/tasks/assets/mesh/base_link.stl | 3 - .../simxarm/tasks/assets/mesh/block_inner.stl | 3 - .../tasks/assets/mesh/block_inner2.stl | 3 - .../simxarm/tasks/assets/mesh/block_outer.stl | 3 - .../simxarm/tasks/assets/mesh/left_finger.stl | 3 - .../tasks/assets/mesh/left_inner_knuckle.stl | 3 - .../tasks/assets/mesh/left_outer_knuckle.stl | 3 - .../simxarm/tasks/assets/mesh/link1.stl | 3 - .../simxarm/tasks/assets/mesh/link2.stl | 3 - .../simxarm/tasks/assets/mesh/link3.stl | 3 - .../simxarm/tasks/assets/mesh/link4.stl | 3 - .../simxarm/tasks/assets/mesh/link5.stl | 3 - .../simxarm/tasks/assets/mesh/link6.stl | 3 - .../simxarm/tasks/assets/mesh/link7.stl | 3 - .../simxarm/tasks/assets/mesh/link_base.stl | 3 - .../tasks/assets/mesh/right_finger.stl | 3 - .../tasks/assets/mesh/right_inner_knuckle.stl | 3 - .../tasks/assets/mesh/right_outer_knuckle.stl | 3 - .../simxarm/tasks/assets/peg_in_box.xml | 74 ---- .../simxarm/simxarm/tasks/assets/push.xml | 54 --- .../simxarm/simxarm/tasks/assets/reach.xml | 48 --- .../simxarm/simxarm/tasks/assets/shared.xml | 51 --- .../simxarm/simxarm/tasks/assets/xarm.xml | 88 ---- .../common/envs/simxarm/simxarm/tasks/base.py | 145 ------- .../common/envs/simxarm/simxarm/tasks/lift.py | 100 ----- .../envs/simxarm/simxarm/tasks/mocap.py | 67 ---- .../envs/simxarm/simxarm/tasks/peg_in_box.py | 86 ---- .../common/envs/simxarm/simxarm/tasks/push.py | 78 ---- .../envs/simxarm/simxarm/tasks/reach.py | 44 -- lerobot/common/envs/utils.py | 32 ++ lerobot/scripts/eval.py | 34 +- tests/test_envs.py | 100 ++--- 42 files changed, 86 insertions(+), 2444 deletions(-) delete mode 100644 lerobot/common/envs/abstract.py delete mode 100644 lerobot/common/envs/pusht/env.py delete mode 100644 lerobot/common/envs/pusht/pusht_env.py delete mode 100644 lerobot/common/envs/pusht/pusht_image_env.py delete mode 100644 lerobot/common/envs/pusht/pymunk_override.py delete mode 100644 lerobot/common/envs/simxarm/env.py delete mode 100644 lerobot/common/envs/simxarm/simxarm/__init__.py delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/__init__.py delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/lift.xml delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/base_link.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner2.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_outer.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_finger.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_inner_knuckle.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_outer_knuckle.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link1.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link2.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link3.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link4.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link5.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link6.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link7.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link_base.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_finger.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_inner_knuckle.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_outer_knuckle.stl delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/peg_in_box.xml delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/push.xml delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/reach.xml delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/shared.xml delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/assets/xarm.xml delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/base.py delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/lift.py delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/mocap.py delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/peg_in_box.py delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/push.py delete mode 100644 lerobot/common/envs/simxarm/simxarm/tasks/reach.py create mode 100644 lerobot/common/envs/utils.py diff --git a/lerobot/common/envs/abstract.py b/lerobot/common/envs/abstract.py deleted file mode 100644 index ea5ce3da..00000000 --- a/lerobot/common/envs/abstract.py +++ /dev/null @@ -1,92 +0,0 @@ -from collections import deque -from typing import Optional - -from tensordict import TensorDict -from torchrl.envs import EnvBase - -from lerobot.common.utils import set_global_seed - - -class AbstractEnv(EnvBase): - """ - Note: - When implementing a concrete class (e.g. `AlohaDataset`, `PushtEnv`, `DiffusionPolicy`), you need to: - 1. set the required class attributes: - - for classes inheriting from `AbstractDataset`: `available_datasets` - - for classes inheriting from `AbstractEnv`: `name`, `available_tasks` - - for classes inheriting from `AbstractPolicy`: `name` - 2. update variables in `lerobot/__init__.py` (e.g. `available_envs`, `available_datasets_per_envs`, `available_policies`) - 3. update variables in `tests/test_available.py` by importing your new class - """ - - name: str | None = None # same name should be used to instantiate the environment in factory.py - available_tasks: list[str] | None = None # for instance: sim_insertion, sim_transfer_cube, pusht, lift - - def __init__( - self, - task, - frame_skip: int = 1, - from_pixels: bool = False, - pixels_only: bool = False, - image_size=None, - seed=1337, - device="cpu", - num_prev_obs=1, - num_prev_action=0, - ): - super().__init__(device=device, batch_size=[]) - assert self.name is not None, "Subclasses of `AbstractEnv` should set the `name` class attribute." - assert ( - self.available_tasks is not None - ), "Subclasses of `AbstractEnv` should set the `available_tasks` class attribute." - assert ( - task in self.available_tasks - ), f"The provided task ({task}) is not on the list of available tasks {self.available_tasks}." - - self.task = task - self.frame_skip = frame_skip - self.from_pixels = from_pixels - self.pixels_only = pixels_only - self.image_size = image_size - self.num_prev_obs = num_prev_obs - self.num_prev_action = num_prev_action - - if pixels_only: - assert from_pixels - if from_pixels: - assert image_size - - self._make_env() - self._make_spec() - - # self._next_seed will be used for the next reset. It is recommended that when self.set_seed is called - # you store the return value in self._next_seed (it will be a new randomly generated seed). - self._next_seed = seed - # Don't store the result of this in self._next_seed, as we want to make sure that the first time - # self._reset is called, we use seed. - self.set_seed(seed) - - if self.num_prev_obs > 0: - self._prev_obs_image_queue = deque(maxlen=self.num_prev_obs) - self._prev_obs_state_queue = deque(maxlen=self.num_prev_obs) - if self.num_prev_action > 0: - raise NotImplementedError() - # self._prev_action_queue = deque(maxlen=self.num_prev_action) - - def render(self, mode="rgb_array", width=640, height=480): - raise NotImplementedError("Abstract method") - - def _reset(self, tensordict: Optional[TensorDict] = None): - raise NotImplementedError("Abstract method") - - def _step(self, tensordict: TensorDict): - raise NotImplementedError("Abstract method") - - def _make_env(self): - raise NotImplementedError("Abstract method") - - def _make_spec(self): - raise NotImplementedError("Abstract method") - - def _set_seed(self, seed: Optional[int]): - set_global_seed(seed) diff --git a/lerobot/common/envs/factory.py b/lerobot/common/envs/factory.py index ab3e9294..4ddb81a2 100644 --- a/lerobot/common/envs/factory.py +++ b/lerobot/common/envs/factory.py @@ -6,32 +6,27 @@ def make_env(cfg, num_parallel_envs=0) -> gym.Env | gym.vector.SyncVectorEnv: Note: When `num_parallel_envs > 0`, this function returns a `SyncVectorEnv` which takes batched action as input and returns batched observation, reward, terminated, truncated of `num_parallel_envs` items. """ - kwargs = {} + kwargs = { + "obs_type": "pixels_agent_pos", + "max_episode_steps": cfg.env.episode_length, + "visualization_width": 384, + "visualization_height": 384, + } if cfg.env.name == "simxarm": import gym_xarm # noqa: F401 assert cfg.env.task == "lift" - env_fn = lambda: gym.make( + env_fn = lambda: gym.make( # noqa: E731 "gym_xarm/XarmLift-v0", - render_mode="rgb_array", - max_episode_steps=cfg.env.episode_length, **kwargs, ) elif cfg.env.name == "pusht": import gym_pusht # noqa: F401 # assert kwargs["seed"] > 200, "Seed 0-200 are used for the demonstration dataset, so we don't want to seed the eval env with this range." - kwargs.update( - { - "obs_type": "pixels_agent_pos", - "render_action": False, - } - ) env_fn = lambda: gym.make( # noqa: E731 "gym_pusht/PushTPixels-v0", - render_mode="rgb_array", - max_episode_steps=cfg.env.episode_length, **kwargs, ) elif cfg.env.name == "aloha": diff --git a/lerobot/common/envs/pusht/env.py b/lerobot/common/envs/pusht/env.py deleted file mode 100644 index 5f7fb2c3..00000000 --- a/lerobot/common/envs/pusht/env.py +++ /dev/null @@ -1,245 +0,0 @@ -import importlib -import logging -from collections import deque -from typing import Optional - -import cv2 -import numpy as np -import torch -from tensordict import TensorDict -from torchrl.data.tensor_specs import ( - BoundedTensorSpec, - CompositeSpec, - DiscreteTensorSpec, - UnboundedContinuousTensorSpec, -) -from torchrl.envs.libs.gym import _gym_to_torchrl_spec_transform - -from lerobot.common.envs.abstract import AbstractEnv -from lerobot.common.utils import set_global_seed - -_has_gym = importlib.util.find_spec("gymnasium") is not None - - -class PushtEnv(AbstractEnv): - name = "pusht" - available_tasks = ["pusht"] - _reset_warning_issued = False - - def __init__( - self, - task="pusht", - frame_skip: int = 1, - from_pixels: bool = False, - pixels_only: bool = False, - image_size=None, - seed=1337, - device="cpu", - num_prev_obs=1, - num_prev_action=0, - ): - super().__init__( - task=task, - frame_skip=frame_skip, - from_pixels=from_pixels, - pixels_only=pixels_only, - image_size=image_size, - seed=seed, - device=device, - num_prev_obs=num_prev_obs, - num_prev_action=num_prev_action, - ) - - def _make_env(self): - if not _has_gym: - raise ImportError("Cannot import gymnasium.") - - # TODO(rcadene) (PushTEnv is similar to PushTImageEnv, but without the image rendering, it's faster to iterate on) - # from lerobot.common.envs.pusht.pusht_env import PushTEnv - - if not self.from_pixels: - raise NotImplementedError("Use PushTEnv, instead of PushTImageEnv") - from lerobot.common.envs.pusht.pusht_image_env import PushTImageEnv - - self._env = PushTImageEnv(render_size=self.image_size) - - def render(self, mode="rgb_array", width=96, height=96, with_marker=True): - """ - with_marker adds a cursor showing the targeted action for the controller. - """ - if width != height: - raise NotImplementedError() - tmp = self._env.render_size - if width != self._env.render_size: - self._env.render_cache = None - self._env.render_size = width - out = self._env.render(mode).copy() - if with_marker and self._env.latest_action is not None: - action = np.array(self._env.latest_action) - coord = (action / 512 * self._env.render_size).astype(np.int32) - marker_size = int(8 / 96 * self._env.render_size) - thickness = int(1 / 96 * self._env.render_size) - cv2.drawMarker( - out, - coord, - color=(255, 0, 0), - markerType=cv2.MARKER_CROSS, - markerSize=marker_size, - thickness=thickness, - ) - self._env.render_size = tmp - return out - - def _format_raw_obs(self, raw_obs): - if self.from_pixels: - image = torch.from_numpy(raw_obs["image"]) - obs = {"image": image} - - if not self.pixels_only: - obs["state"] = torch.from_numpy(raw_obs["agent_pos"]).type(torch.float32) - else: - # TODO: - obs = {"state": torch.from_numpy(raw_obs["observation"]).type(torch.float32)} - - return obs - - def _reset(self, tensordict: Optional[TensorDict] = None): - if tensordict is not None and not PushtEnv._reset_warning_issued: - logging.warning(f"{self.__class__.__name__}._reset ignores the provided tensordict.") - PushtEnv._reset_warning_issued = True - - # Seed the environment and update the seed to be used for the next reset. - self._next_seed = self.set_seed(self._next_seed) - raw_obs = self._env.reset() - - obs = self._format_raw_obs(raw_obs) - - if self.num_prev_obs > 0: - stacked_obs = {} - if "image" in obs: - self._prev_obs_image_queue = deque( - [obs["image"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1) - ) - stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue)) - if "state" in obs: - self._prev_obs_state_queue = deque( - [obs["state"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1) - ) - stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue)) - obs = stacked_obs - - td = TensorDict( - { - "observation": TensorDict(obs, batch_size=[]), - "done": torch.tensor([False], dtype=torch.bool), - }, - batch_size=[], - ) - - return td - - def _step(self, tensordict: TensorDict): - td = tensordict - action = td["action"].numpy() - assert action.ndim == 1 - # TODO(rcadene): add info["is_success"] and info["success"] ? - - raw_obs, reward, done, info = self._env.step(action) - - obs = self._format_raw_obs(raw_obs) - - if self.num_prev_obs > 0: - stacked_obs = {} - if "image" in obs: - self._prev_obs_image_queue.append(obs["image"]) - stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue)) - if "state" in obs: - self._prev_obs_state_queue.append(obs["state"]) - stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue)) - obs = stacked_obs - - td = TensorDict( - { - "observation": TensorDict(obs, batch_size=[]), - "reward": torch.tensor([reward], dtype=torch.float32), - # success and done are true when coverage > self.success_threshold in env - "done": torch.tensor([done], dtype=torch.bool), - "success": torch.tensor([done], dtype=torch.bool), - }, - batch_size=[], - ) - return td - - def _make_spec(self): - obs = {} - if self.from_pixels: - image_shape = (3, self.image_size, self.image_size) - if self.num_prev_obs > 0: - image_shape = (self.num_prev_obs + 1, *image_shape) - - obs["image"] = BoundedTensorSpec( - low=0, - high=255, - shape=image_shape, - dtype=torch.uint8, - device=self.device, - ) - if not self.pixels_only: - state_shape = self._env.observation_space["agent_pos"].shape - if self.num_prev_obs > 0: - state_shape = (self.num_prev_obs + 1, *state_shape) - - obs["state"] = BoundedTensorSpec( - low=0, - high=512, - shape=state_shape, - dtype=torch.float32, - device=self.device, - ) - else: - # TODO(rcadene): add observation_space achieved_goal and desired_goal? - state_shape = self._env.observation_space["observation"].shape - if self.num_prev_obs > 0: - state_shape = (self.num_prev_obs + 1, *state_shape) - - obs["state"] = UnboundedContinuousTensorSpec( - # TODO: - shape=state_shape, - dtype=torch.float32, - device=self.device, - ) - self.observation_spec = CompositeSpec({"observation": obs}) - - self.action_spec = _gym_to_torchrl_spec_transform( - self._env.action_space, - device=self.device, - ) - - self.reward_spec = UnboundedContinuousTensorSpec( - shape=(1,), - dtype=torch.float32, - device=self.device, - ) - - self.done_spec = CompositeSpec( - { - "done": DiscreteTensorSpec( - 2, - shape=(1,), - dtype=torch.bool, - device=self.device, - ), - "success": DiscreteTensorSpec( - 2, - shape=(1,), - dtype=torch.bool, - device=self.device, - ), - } - ) - - def _set_seed(self, seed: Optional[int]): - # Set global seed. - set_global_seed(seed) - # Set PushTImageEnv seed as it relies on it's own internal _seed attribute. - self._env.seed(seed) diff --git a/lerobot/common/envs/pusht/pusht_env.py b/lerobot/common/envs/pusht/pusht_env.py deleted file mode 100644 index 6ef70aec..00000000 --- a/lerobot/common/envs/pusht/pusht_env.py +++ /dev/null @@ -1,378 +0,0 @@ -import collections - -import cv2 -import gymnasium as gym -import numpy as np -import pygame -import pymunk -import pymunk.pygame_util -import shapely.geometry as sg -import skimage.transform as st -from gymnasium import spaces -from pymunk.vec2d import Vec2d - -from lerobot.common.envs.pusht.pymunk_override import DrawOptions - - -def pymunk_to_shapely(body, shapes): - geoms = [] - for shape in shapes: - if isinstance(shape, pymunk.shapes.Poly): - verts = [body.local_to_world(v) for v in shape.get_vertices()] - verts += [verts[0]] - geoms.append(sg.Polygon(verts)) - else: - raise RuntimeError(f"Unsupported shape type {type(shape)}") - geom = sg.MultiPolygon(geoms) - return geom - - -class PushTEnv(gym.Env): - metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 10} - reward_range = (0.0, 1.0) - - def __init__( - self, - legacy=True, # compatibility with original - block_cog=None, - damping=None, - render_action=True, - render_size=96, - reset_to_state=None, - ): - self._seed = None - self.seed() - self.window_size = ws = 512 # The size of the PyGame window - self.render_size = render_size - self.sim_hz = 100 - # Local controller params. - self.k_p, self.k_v = 100, 20 # PD control.z - self.control_hz = self.metadata["video.frames_per_second"] - # legcay set_state for data compatibility - self.legacy = legacy - - # agent_pos, block_pos, block_angle - self.observation_space = spaces.Box( - low=np.array([0, 0, 0, 0, 0], dtype=np.float64), - high=np.array([ws, ws, ws, ws, np.pi * 2], dtype=np.float64), - shape=(5,), - dtype=np.float64, - ) - - # positional goal for agent - self.action_space = spaces.Box( - low=np.array([0, 0], dtype=np.float64), - high=np.array([ws, ws], dtype=np.float64), - shape=(2,), - dtype=np.float64, - ) - - self.block_cog = block_cog - self.damping = damping - self.render_action = render_action - - """ - If human-rendering is used, `self.window` will be a reference - to the window that we draw to. `self.clock` will be a clock that is used - to ensure that the environment is rendered at the correct framerate in - human-mode. They will remain `None` until human-mode is used for the - first time. - """ - self.window = None - self.clock = None - self.screen = None - - self.space = None - self.teleop = None - self.render_buffer = None - self.latest_action = None - self.reset_to_state = reset_to_state - - def reset(self): - seed = self._seed - self._setup() - if self.block_cog is not None: - self.block.center_of_gravity = self.block_cog - if self.damping is not None: - self.space.damping = self.damping - - # use legacy RandomState for compatibility - state = self.reset_to_state - if state is None: - rs = np.random.RandomState(seed=seed) - state = np.array( - [ - rs.randint(50, 450), - rs.randint(50, 450), - rs.randint(100, 400), - rs.randint(100, 400), - rs.randn() * 2 * np.pi - np.pi, - ] - ) - self._set_state(state) - - observation = self._get_obs() - return observation - - def step(self, action): - dt = 1.0 / self.sim_hz - self.n_contact_points = 0 - n_steps = self.sim_hz // self.control_hz - if action is not None: - self.latest_action = action - for _ in range(n_steps): - # Step PD control. - # self.agent.velocity = self.k_p * (act - self.agent.position) # P control works too. - acceleration = self.k_p * (action - self.agent.position) + self.k_v * ( - Vec2d(0, 0) - self.agent.velocity - ) - self.agent.velocity += acceleration * dt - - # Step physics. - self.space.step(dt) - - # compute reward - goal_body = self._get_goal_pose_body(self.goal_pose) - goal_geom = pymunk_to_shapely(goal_body, self.block.shapes) - block_geom = pymunk_to_shapely(self.block, self.block.shapes) - - intersection_area = goal_geom.intersection(block_geom).area - goal_area = goal_geom.area - coverage = intersection_area / goal_area - reward = np.clip(coverage / self.success_threshold, 0, 1) - done = coverage > self.success_threshold - - observation = self._get_obs() - info = self._get_info() - - return observation, reward, done, info - - def render(self, mode): - return self._render_frame(mode) - - def teleop_agent(self): - TeleopAgent = collections.namedtuple("TeleopAgent", ["act"]) - - def act(obs): - act = None - mouse_position = pymunk.pygame_util.from_pygame(Vec2d(*pygame.mouse.get_pos()), self.screen) - if self.teleop or (mouse_position - self.agent.position).length < 30: - self.teleop = True - act = mouse_position - return act - - return TeleopAgent(act) - - def _get_obs(self): - obs = np.array( - tuple(self.agent.position) + tuple(self.block.position) + (self.block.angle % (2 * np.pi),) - ) - return obs - - def _get_goal_pose_body(self, pose): - mass = 1 - inertia = pymunk.moment_for_box(mass, (50, 100)) - body = pymunk.Body(mass, inertia) - # preserving the legacy assignment order for compatibility - # the order here doesn't matter somehow, maybe because CoM is aligned with body origin - body.position = pose[:2].tolist() - body.angle = pose[2] - return body - - def _get_info(self): - n_steps = self.sim_hz // self.control_hz - n_contact_points_per_step = int(np.ceil(self.n_contact_points / n_steps)) - info = { - "pos_agent": np.array(self.agent.position), - "vel_agent": np.array(self.agent.velocity), - "block_pose": np.array(list(self.block.position) + [self.block.angle]), - "goal_pose": self.goal_pose, - "n_contacts": n_contact_points_per_step, - } - return info - - def _render_frame(self, mode): - if self.window is None and mode == "human": - pygame.init() - pygame.display.init() - self.window = pygame.display.set_mode((self.window_size, self.window_size)) - if self.clock is None and mode == "human": - self.clock = pygame.time.Clock() - - canvas = pygame.Surface((self.window_size, self.window_size)) - canvas.fill((255, 255, 255)) - self.screen = canvas - - draw_options = DrawOptions(canvas) - - # Draw goal pose. - goal_body = self._get_goal_pose_body(self.goal_pose) - for shape in self.block.shapes: - goal_points = [ - pymunk.pygame_util.to_pygame(goal_body.local_to_world(v), draw_options.surface) - for v in shape.get_vertices() - ] - goal_points += [goal_points[0]] - pygame.draw.polygon(canvas, self.goal_color, goal_points) - - # Draw agent and block. - self.space.debug_draw(draw_options) - - if mode == "human": - # The following line copies our drawings from `canvas` to the visible window - self.window.blit(canvas, canvas.get_rect()) - pygame.event.pump() - pygame.display.update() - - # the clock is already ticked during in step for "human" - - img = np.transpose(np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)) - img = cv2.resize(img, (self.render_size, self.render_size)) - if self.render_action and self.latest_action is not None: - action = np.array(self.latest_action) - coord = (action / 512 * 96).astype(np.int32) - marker_size = int(8 / 96 * self.render_size) - thickness = int(1 / 96 * self.render_size) - cv2.drawMarker( - img, - coord, - color=(255, 0, 0), - markerType=cv2.MARKER_CROSS, - markerSize=marker_size, - thickness=thickness, - ) - return img - - def close(self): - if self.window is not None: - pygame.display.quit() - pygame.quit() - - def seed(self, seed=None): - if seed is None: - seed = np.random.randint(0, 25536) - self._seed = seed - self.np_random = np.random.default_rng(seed) - - def _handle_collision(self, arbiter, space, data): - self.n_contact_points += len(arbiter.contact_point_set.points) - - def _set_state(self, state): - if isinstance(state, np.ndarray): - state = state.tolist() - pos_agent = state[:2] - pos_block = state[2:4] - rot_block = state[4] - self.agent.position = pos_agent - # setting angle rotates with respect to center of mass - # therefore will modify the geometric position - # if not the same as CoM - # therefore should be modified first. - if self.legacy: - # for compatibility with legacy data - self.block.position = pos_block - self.block.angle = rot_block - else: - self.block.angle = rot_block - self.block.position = pos_block - - # Run physics to take effect - self.space.step(1.0 / self.sim_hz) - - def _set_state_local(self, state_local): - agent_pos_local = state_local[:2] - block_pose_local = state_local[2:] - tf_img_obj = st.AffineTransform(translation=self.goal_pose[:2], rotation=self.goal_pose[2]) - tf_obj_new = st.AffineTransform(translation=block_pose_local[:2], rotation=block_pose_local[2]) - tf_img_new = st.AffineTransform(matrix=tf_img_obj.params @ tf_obj_new.params) - agent_pos_new = tf_img_new(agent_pos_local) - new_state = np.array(list(agent_pos_new[0]) + list(tf_img_new.translation) + [tf_img_new.rotation]) - self._set_state(new_state) - return new_state - - def _setup(self): - self.space = pymunk.Space() - self.space.gravity = 0, 0 - self.space.damping = 0 - self.teleop = False - self.render_buffer = [] - - # Add walls. - walls = [ - self._add_segment((5, 506), (5, 5), 2), - self._add_segment((5, 5), (506, 5), 2), - self._add_segment((506, 5), (506, 506), 2), - self._add_segment((5, 506), (506, 506), 2), - ] - self.space.add(*walls) - - # Add agent, block, and goal zone. - self.agent = self.add_circle((256, 400), 15) - self.block = self.add_tee((256, 300), 0) - self.goal_color = pygame.Color("LightGreen") - self.goal_pose = np.array([256, 256, np.pi / 4]) # x, y, theta (in radians) - - # Add collision handling - self.collision_handeler = self.space.add_collision_handler(0, 0) - self.collision_handeler.post_solve = self._handle_collision - self.n_contact_points = 0 - - self.max_score = 50 * 100 - self.success_threshold = 0.95 # 95% coverage. - - def _add_segment(self, a, b, radius): - shape = pymunk.Segment(self.space.static_body, a, b, radius) - shape.color = pygame.Color("LightGray") # https://htmlcolorcodes.com/color-names - return shape - - def add_circle(self, position, radius): - body = pymunk.Body(body_type=pymunk.Body.KINEMATIC) - body.position = position - body.friction = 1 - shape = pymunk.Circle(body, radius) - shape.color = pygame.Color("RoyalBlue") - self.space.add(body, shape) - return body - - def add_box(self, position, height, width): - mass = 1 - inertia = pymunk.moment_for_box(mass, (height, width)) - body = pymunk.Body(mass, inertia) - body.position = position - shape = pymunk.Poly.create_box(body, (height, width)) - shape.color = pygame.Color("LightSlateGray") - self.space.add(body, shape) - return body - - def add_tee(self, position, angle, scale=30, color="LightSlateGray", mask=None): - if mask is None: - mask = pymunk.ShapeFilter.ALL_MASKS() - mass = 1 - length = 4 - vertices1 = [ - (-length * scale / 2, scale), - (length * scale / 2, scale), - (length * scale / 2, 0), - (-length * scale / 2, 0), - ] - inertia1 = pymunk.moment_for_poly(mass, vertices=vertices1) - vertices2 = [ - (-scale / 2, scale), - (-scale / 2, length * scale), - (scale / 2, length * scale), - (scale / 2, scale), - ] - inertia2 = pymunk.moment_for_poly(mass, vertices=vertices1) - body = pymunk.Body(mass, inertia1 + inertia2) - shape1 = pymunk.Poly(body, vertices1) - shape2 = pymunk.Poly(body, vertices2) - shape1.color = pygame.Color(color) - shape2.color = pygame.Color(color) - shape1.filter = pymunk.ShapeFilter(mask=mask) - shape2.filter = pymunk.ShapeFilter(mask=mask) - body.center_of_gravity = (shape1.center_of_gravity + shape2.center_of_gravity) / 2 - body.position = position - body.angle = angle - body.friction = 1 - self.space.add(body, shape1, shape2) - return body diff --git a/lerobot/common/envs/pusht/pusht_image_env.py b/lerobot/common/envs/pusht/pusht_image_env.py deleted file mode 100644 index 6547835a..00000000 --- a/lerobot/common/envs/pusht/pusht_image_env.py +++ /dev/null @@ -1,41 +0,0 @@ -import numpy as np -from gymnasium import spaces - -from lerobot.common.envs.pusht.pusht_env import PushTEnv - - -class PushTImageEnv(PushTEnv): - metadata = {"render.modes": ["rgb_array"], "video.frames_per_second": 10} - - # Note: legacy defaults to True for compatibility with original - def __init__(self, legacy=True, block_cog=None, damping=None, render_size=96): - super().__init__( - legacy=legacy, block_cog=block_cog, damping=damping, render_size=render_size, render_action=False - ) - ws = self.window_size - self.observation_space = spaces.Dict( - { - "image": spaces.Box(low=0, high=1, shape=(3, render_size, render_size), dtype=np.float32), - "agent_pos": spaces.Box(low=0, high=ws, shape=(2,), dtype=np.float32), - } - ) - self.render_cache = None - - def _get_obs(self): - img = super()._render_frame(mode="rgb_array") - - agent_pos = np.array(self.agent.position) - img_obs = np.moveaxis(img, -1, 0) - obs = {"image": img_obs, "agent_pos": agent_pos} - - self.render_cache = img - - return obs - - def render(self, mode): - assert mode == "rgb_array" - - if self.render_cache is None: - self._get_obs() - - return self.render_cache diff --git a/lerobot/common/envs/pusht/pymunk_override.py b/lerobot/common/envs/pusht/pymunk_override.py deleted file mode 100644 index 7ad76237..00000000 --- a/lerobot/common/envs/pusht/pymunk_override.py +++ /dev/null @@ -1,244 +0,0 @@ -# ---------------------------------------------------------------------------- -# pymunk -# Copyright (c) 2007-2016 Victor Blomqvist -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# ---------------------------------------------------------------------------- - -"""This submodule contains helper functions to help with quick prototyping -using pymunk together with pygame. - -Intended to help with debugging and prototyping, not for actual production use -in a full application. The methods contained in this module is opinionated -about your coordinate system and not in any way optimized. -""" - -__docformat__ = "reStructuredText" - -__all__ = [ - "DrawOptions", - "get_mouse_pos", - "to_pygame", - "from_pygame", - # "lighten", - "positive_y_is_up", -] - -from typing import Sequence, Tuple - -import numpy as np -import pygame -import pymunk -from pymunk.space_debug_draw_options import SpaceDebugColor -from pymunk.vec2d import Vec2d - -positive_y_is_up: bool = False -"""Make increasing values of y point upwards. - -When True:: - - y - ^ - | . (3, 3) - | - | . (2, 2) - | - +------ > x - -When False:: - - +------ > x - | - | . (2, 2) - | - | . (3, 3) - v - y - -""" - - -class DrawOptions(pymunk.SpaceDebugDrawOptions): - def __init__(self, surface: pygame.Surface) -> None: - """Draw a pymunk.Space on a pygame.Surface object. - - Typical usage:: - - >>> import pymunk - >>> surface = pygame.Surface((10,10)) - >>> space = pymunk.Space() - >>> options = pymunk.pygame_util.DrawOptions(surface) - >>> space.debug_draw(options) - - You can control the color of a shape by setting shape.color to the color - you want it drawn in:: - - >>> c = pymunk.Circle(None, 10) - >>> c.color = pygame.Color("pink") - - See pygame_util.demo.py for a full example - - Since pygame uses a coordinate system where y points down (in contrast - to many other cases), you either have to make the physics simulation - with Pymunk also behave in that way, or flip everything when you draw. - - The easiest is probably to just make the simulation behave the same - way as Pygame does. In that way all coordinates used are in the same - orientation and easy to reason about:: - - >>> space = pymunk.Space() - >>> space.gravity = (0, -1000) - >>> body = pymunk.Body() - >>> body.position = (0, 0) # will be positioned in the top left corner - >>> space.debug_draw(options) - - To flip the drawing its possible to set the module property - :py:data:`positive_y_is_up` to True. Then the pygame drawing will flip - the simulation upside down before drawing:: - - >>> positive_y_is_up = True - >>> body = pymunk.Body() - >>> body.position = (0, 0) - >>> # Body will be position in bottom left corner - - :Parameters: - surface : pygame.Surface - Surface that the objects will be drawn on - """ - self.surface = surface - super().__init__() - - def draw_circle( - self, - pos: Vec2d, - angle: float, - radius: float, - outline_color: SpaceDebugColor, - fill_color: SpaceDebugColor, - ) -> None: - p = to_pygame(pos, self.surface) - - pygame.draw.circle(self.surface, fill_color.as_int(), p, round(radius), 0) - pygame.draw.circle(self.surface, light_color(fill_color).as_int(), p, round(radius - 4), 0) - - # circle_edge = pos + Vec2d(radius, 0).rotated(angle) - # p2 = to_pygame(circle_edge, self.surface) - # line_r = 2 if radius > 20 else 1 - # pygame.draw.lines(self.surface, outline_color.as_int(), False, [p, p2], line_r) - - def draw_segment(self, a: Vec2d, b: Vec2d, color: SpaceDebugColor) -> None: - p1 = to_pygame(a, self.surface) - p2 = to_pygame(b, self.surface) - - pygame.draw.aalines(self.surface, color.as_int(), False, [p1, p2]) - - def draw_fat_segment( - self, - a: Tuple[float, float], - b: Tuple[float, float], - radius: float, - outline_color: SpaceDebugColor, - fill_color: SpaceDebugColor, - ) -> None: - p1 = to_pygame(a, self.surface) - p2 = to_pygame(b, self.surface) - - r = round(max(1, radius * 2)) - pygame.draw.lines(self.surface, fill_color.as_int(), False, [p1, p2], r) - if r > 2: - orthog = [abs(p2[1] - p1[1]), abs(p2[0] - p1[0])] - if orthog[0] == 0 and orthog[1] == 0: - return - scale = radius / (orthog[0] * orthog[0] + orthog[1] * orthog[1]) ** 0.5 - orthog[0] = round(orthog[0] * scale) - orthog[1] = round(orthog[1] * scale) - points = [ - (p1[0] - orthog[0], p1[1] - orthog[1]), - (p1[0] + orthog[0], p1[1] + orthog[1]), - (p2[0] + orthog[0], p2[1] + orthog[1]), - (p2[0] - orthog[0], p2[1] - orthog[1]), - ] - pygame.draw.polygon(self.surface, fill_color.as_int(), points) - pygame.draw.circle( - self.surface, - fill_color.as_int(), - (round(p1[0]), round(p1[1])), - round(radius), - ) - pygame.draw.circle( - self.surface, - fill_color.as_int(), - (round(p2[0]), round(p2[1])), - round(radius), - ) - - def draw_polygon( - self, - verts: Sequence[Tuple[float, float]], - radius: float, - outline_color: SpaceDebugColor, - fill_color: SpaceDebugColor, - ) -> None: - ps = [to_pygame(v, self.surface) for v in verts] - ps += [ps[0]] - - radius = 2 - pygame.draw.polygon(self.surface, light_color(fill_color).as_int(), ps) - - if radius > 0: - for i in range(len(verts)): - a = verts[i] - b = verts[(i + 1) % len(verts)] - self.draw_fat_segment(a, b, radius, fill_color, fill_color) - - def draw_dot(self, size: float, pos: Tuple[float, float], color: SpaceDebugColor) -> None: - p = to_pygame(pos, self.surface) - pygame.draw.circle(self.surface, color.as_int(), p, round(size), 0) - - -def get_mouse_pos(surface: pygame.Surface) -> Tuple[int, int]: - """Get position of the mouse pointer in pymunk coordinates.""" - p = pygame.mouse.get_pos() - return from_pygame(p, surface) - - -def to_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]: - """Convenience method to convert pymunk coordinates to pygame surface - local coordinates. - - Note that in case positive_y_is_up is False, this function won't actually do - anything except converting the point to integers. - """ - if positive_y_is_up: - return round(p[0]), surface.get_height() - round(p[1]) - else: - return round(p[0]), round(p[1]) - - -def from_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]: - """Convenience method to convert pygame surface local coordinates to - pymunk coordinates - """ - return to_pygame(p, surface) - - -def light_color(color: SpaceDebugColor): - color = np.minimum(1.2 * np.float32([color.r, color.g, color.b, color.a]), np.float32([255])) - color = SpaceDebugColor(r=color[0], g=color[1], b=color[2], a=color[3]) - return color diff --git a/lerobot/common/envs/simxarm/env.py b/lerobot/common/envs/simxarm/env.py deleted file mode 100644 index 8ce6b24c..00000000 --- a/lerobot/common/envs/simxarm/env.py +++ /dev/null @@ -1,237 +0,0 @@ -import importlib -import logging -from collections import deque -from typing import Optional - -import einops -import numpy as np -import torch -from tensordict import TensorDict -from torchrl.data.tensor_specs import ( - BoundedTensorSpec, - CompositeSpec, - DiscreteTensorSpec, - UnboundedContinuousTensorSpec, -) -from torchrl.envs.libs.gym import _gym_to_torchrl_spec_transform - -from lerobot.common.envs.abstract import AbstractEnv -from lerobot.common.utils import set_global_seed - -MAX_NUM_ACTIONS = 4 - -_has_gym = importlib.util.find_spec("gymnasium") is not None - - -class SimxarmEnv(AbstractEnv): - name = "simxarm" - available_tasks = ["lift"] - - def __init__( - self, - task, - frame_skip: int = 1, - from_pixels: bool = False, - pixels_only: bool = False, - image_size=None, - seed=1337, - device="cpu", - num_prev_obs=0, - num_prev_action=0, - ): - super().__init__( - task=task, - frame_skip=frame_skip, - from_pixels=from_pixels, - pixels_only=pixels_only, - image_size=image_size, - seed=seed, - device=device, - num_prev_obs=num_prev_obs, - num_prev_action=num_prev_action, - ) - - def _make_env(self): - if not _has_gym: - raise ImportError("Cannot import gymnasium.") - - import gymnasium as gym - - from lerobot.common.envs.simxarm.simxarm import TASKS - - if self.task not in TASKS: - raise ValueError(f"Unknown task {self.task}. Must be one of {list(TASKS.keys())}") - - self._env = TASKS[self.task]["env"]() - - num_actions = len(TASKS[self.task]["action_space"]) - self._action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(num_actions,)) - self._action_padding = np.zeros((MAX_NUM_ACTIONS - num_actions), dtype=np.float32) - if "w" not in TASKS[self.task]["action_space"]: - self._action_padding[-1] = 1.0 - - def render(self, mode="rgb_array", width=384, height=384): - return self._env.render(mode, width=width, height=height) - - def _format_raw_obs(self, raw_obs): - if self.from_pixels: - image = self.render(mode="rgb_array", width=self.image_size, height=self.image_size) - image = image.transpose(2, 0, 1) # (H, W, C) -> (C, H, W) - image = torch.tensor(image.copy(), dtype=torch.uint8) - - obs = {"image": image} - - if not self.pixels_only: - obs["state"] = torch.tensor(self._env.robot_state, dtype=torch.float32) - else: - obs = {"state": torch.tensor(raw_obs["observation"], dtype=torch.float32)} - - # obs = TensorDict(obs, batch_size=[]) - return obs - - def _reset(self, tensordict: Optional[TensorDict] = None): - td = tensordict - if td is None or td.is_empty(): - raw_obs = self._env.reset() - - obs = self._format_raw_obs(raw_obs) - - if self.num_prev_obs > 0: - stacked_obs = {} - if "image" in obs: - self._prev_obs_image_queue = deque( - [obs["image"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1) - ) - stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue)) - if "state" in obs: - self._prev_obs_state_queue = deque( - [obs["state"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1) - ) - stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue)) - obs = stacked_obs - - td = TensorDict( - { - "observation": TensorDict(obs, batch_size=[]), - "done": torch.tensor([False], dtype=torch.bool), - }, - batch_size=[], - ) - else: - raise NotImplementedError() - - return td - - def _step(self, tensordict: TensorDict): - td = tensordict - action = td["action"].numpy() - # step expects shape=(4,) so we pad if necessary - action = np.concatenate([action, self._action_padding]) - # TODO(rcadene): add info["is_success"] and info["success"] ? - sum_reward = 0 - - if action.ndim == 1: - action = einops.repeat(action, "c -> t c", t=self.frame_skip) - else: - if self.frame_skip > 1: - raise NotImplementedError() - - num_action_steps = action.shape[0] - for i in range(num_action_steps): - raw_obs, reward, done, info = self._env.step(action[i]) - sum_reward += reward - - obs = self._format_raw_obs(raw_obs) - - if self.num_prev_obs > 0: - stacked_obs = {} - if "image" in obs: - self._prev_obs_image_queue.append(obs["image"]) - stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue)) - if "state" in obs: - self._prev_obs_state_queue.append(obs["state"]) - stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue)) - obs = stacked_obs - - td = TensorDict( - { - "observation": self._format_raw_obs(raw_obs), - "reward": torch.tensor([sum_reward], dtype=torch.float32), - "done": torch.tensor([done], dtype=torch.bool), - "success": torch.tensor([info["success"]], dtype=torch.bool), - }, - batch_size=[], - ) - return td - - def _make_spec(self): - obs = {} - if self.from_pixels: - image_shape = (3, self.image_size, self.image_size) - if self.num_prev_obs > 0: - image_shape = (self.num_prev_obs + 1, *image_shape) - - obs["image"] = BoundedTensorSpec( - low=0, - high=255, - shape=image_shape, - dtype=torch.uint8, - device=self.device, - ) - if not self.pixels_only: - state_shape = (len(self._env.robot_state),) - if self.num_prev_obs > 0: - state_shape = (self.num_prev_obs + 1, *state_shape) - - obs["state"] = UnboundedContinuousTensorSpec( - shape=state_shape, - dtype=torch.float32, - device=self.device, - ) - else: - # TODO(rcadene): add observation_space achieved_goal and desired_goal? - state_shape = self._env.observation_space["observation"].shape - if self.num_prev_obs > 0: - state_shape = (self.num_prev_obs + 1, *state_shape) - - obs["state"] = UnboundedContinuousTensorSpec( - # TODO: - shape=state_shape, - dtype=torch.float32, - device=self.device, - ) - self.observation_spec = CompositeSpec({"observation": obs}) - - self.action_spec = _gym_to_torchrl_spec_transform( - self._action_space, - device=self.device, - ) - - self.reward_spec = UnboundedContinuousTensorSpec( - shape=(1,), - dtype=torch.float32, - device=self.device, - ) - - self.done_spec = CompositeSpec( - { - "done": DiscreteTensorSpec( - 2, - shape=(1,), - dtype=torch.bool, - device=self.device, - ), - "success": DiscreteTensorSpec( - 2, - shape=(1,), - dtype=torch.bool, - device=self.device, - ), - } - ) - - def _set_seed(self, seed: Optional[int]): - set_global_seed(seed) - self._seed = seed - # TODO(aliberts): change self._reset so that it takes in a seed value - logging.warning("simxarm env is not properly seeded") diff --git a/lerobot/common/envs/simxarm/simxarm/__init__.py b/lerobot/common/envs/simxarm/simxarm/__init__.py deleted file mode 100644 index 903d6042..00000000 --- a/lerobot/common/envs/simxarm/simxarm/__init__.py +++ /dev/null @@ -1,166 +0,0 @@ -from collections import OrderedDict, deque - -import gymnasium as gym -import numpy as np -from gymnasium.wrappers import TimeLimit - -from lerobot.common.envs.simxarm.simxarm.tasks.base import Base as Base -from lerobot.common.envs.simxarm.simxarm.tasks.lift import Lift -from lerobot.common.envs.simxarm.simxarm.tasks.peg_in_box import PegInBox -from lerobot.common.envs.simxarm.simxarm.tasks.push import Push -from lerobot.common.envs.simxarm.simxarm.tasks.reach import Reach - -TASKS = OrderedDict( - ( - ( - "reach", - { - "env": Reach, - "action_space": "xyz", - "episode_length": 50, - "description": "Reach a target location with the end effector", - }, - ), - ( - "push", - { - "env": Push, - "action_space": "xyz", - "episode_length": 50, - "description": "Push a cube to a target location", - }, - ), - ( - "peg_in_box", - { - "env": PegInBox, - "action_space": "xyz", - "episode_length": 50, - "description": "Insert a peg into a box", - }, - ), - ( - "lift", - { - "env": Lift, - "action_space": "xyzw", - "episode_length": 50, - "description": "Lift a cube above a height threshold", - }, - ), - ) -) - - -class SimXarmWrapper(gym.Wrapper): - """ - A wrapper for the SimXarm environments. This wrapper is used to - convert the action and observation spaces to the correct format. - """ - - def __init__(self, env, task, obs_mode, image_size, action_repeat, frame_stack=1, channel_last=False): - super().__init__(env) - self._env = env - self.obs_mode = obs_mode - self.image_size = image_size - self.action_repeat = action_repeat - self.frame_stack = frame_stack - self._frames = deque([], maxlen=frame_stack) - self.channel_last = channel_last - self._max_episode_steps = task["episode_length"] // action_repeat - - image_shape = ( - (image_size, image_size, 3 * frame_stack) - if channel_last - else (3 * frame_stack, image_size, image_size) - ) - if obs_mode == "state": - self.observation_space = env.observation_space["observation"] - elif obs_mode == "rgb": - self.observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8) - elif obs_mode == "all": - self.observation_space = gym.spaces.Dict( - state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32), - rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8), - ) - else: - raise ValueError(f"Unknown obs_mode {obs_mode}. Must be one of [rgb, all, state]") - self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(task["action_space"]),)) - self.action_padding = np.zeros(4 - len(task["action_space"]), dtype=np.float32) - if "w" not in task["action_space"]: - self.action_padding[-1] = 1.0 - - def _render_obs(self): - obs = self.render(mode="rgb_array", width=self.image_size, height=self.image_size) - if not self.channel_last: - obs = obs.transpose(2, 0, 1) - return obs.copy() - - def _update_frames(self, reset=False): - pixels = self._render_obs() - self._frames.append(pixels) - if reset: - for _ in range(1, self.frame_stack): - self._frames.append(pixels) - assert len(self._frames) == self.frame_stack - - def transform_obs(self, obs, reset=False): - if self.obs_mode == "state": - return obs["observation"] - elif self.obs_mode == "rgb": - self._update_frames(reset=reset) - rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) - return rgb_obs - elif self.obs_mode == "all": - self._update_frames(reset=reset) - rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0) - return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state))) - else: - raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]") - - def reset(self): - return self.transform_obs(self._env.reset(), reset=True) - - def step(self, action): - action = np.concatenate([action, self.action_padding]) - reward = 0.0 - for _ in range(self.action_repeat): - obs, r, done, info = self._env.step(action) - reward += r - return self.transform_obs(obs), reward, done, info - - def render(self, mode="rgb_array", width=384, height=384, **kwargs): - return self._env.render(mode, width=width, height=height) - - @property - def state(self): - return self._env.robot_state - - -def make(task, obs_mode="state", image_size=84, action_repeat=1, frame_stack=1, channel_last=False, seed=0): - """ - Create a new environment. - Args: - task (str): The task to create an environment for. Must be one of: - - 'reach' - - 'push' - - 'peg-in-box' - - 'lift' - obs_mode (str): The observation mode to use. Must be one of: - - 'state': Only state observations - - 'rgb': RGB images - - 'all': RGB images and state observations - image_size (int): The size of the image observations - action_repeat (int): The number of times to repeat the action - seed (int): The random seed to use - Returns: - gym.Env: The environment - """ - if task not in TASKS: - raise ValueError(f"Unknown task {task}. Must be one of {list(TASKS.keys())}") - env = TASKS[task]["env"]() - env = TimeLimit(env, TASKS[task]["episode_length"]) - env = SimXarmWrapper(env, TASKS[task], obs_mode, image_size, action_repeat, frame_stack, channel_last) - env.seed(seed) - - return env diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/__init__.py b/lerobot/common/envs/simxarm/simxarm/tasks/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/lift.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/lift.xml deleted file mode 100644 index 92231f92..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/lift.xml +++ /dev/null @@ -1,53 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/base_link.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/base_link.stl deleted file mode 100644 index f1f52955..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/base_link.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:21fb81ae7fba19e3c6b2d2ca60c8051712ba273357287eb5a397d92d61c7a736 -size 1211434 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner.stl deleted file mode 100644 index 6cb88945..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be68ce180d11630a667a5f37f4dffcc3feebe4217d4bb3912c813b6d9ca3ec66 -size 3284 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner2.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner2.stl deleted file mode 100644 index dab55ef5..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner2.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c6448552bf6b1c4f17334d686a5320ce051bcdfe31431edf69303d8a570d1de -size 3284 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_outer.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_outer.stl deleted file mode 100644 index 21cf11fa..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_outer.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:748b9e197e6521914f18d1f6383a36f211136b3f33f2ad2a8c11b9f921c2cf86 -size 6284 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_finger.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_finger.stl deleted file mode 100644 index 6bf4e502..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_finger.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a44756eb72f9c214cb37e61dc209cd7073fdff3e4271a7423476ef6fd090d2d4 -size 242684 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_inner_knuckle.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_inner_knuckle.stl deleted file mode 100644 index 817c7e1d..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_inner_knuckle.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e8e48692ad26837bb3d6a97582c89784d09948fc09bfe4e5a59017859ff04dac -size 366284 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_outer_knuckle.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_outer_knuckle.stl deleted file mode 100644 index 010c0f3b..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_outer_knuckle.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:501665812b08d67e764390db781e839adc6896a9540301d60adf606f57648921 -size 22284 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link1.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link1.stl deleted file mode 100644 index f2b676f2..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link1.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:34b541122df84d2ef5fcb91b715eb19659dc15ad8d44a191dde481f780265636 -size 184184 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link2.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link2.stl deleted file mode 100644 index bf93580c..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link2.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:61e641cd47c169ecef779683332e00e4914db729bf02dfb61bfbe69351827455 -size 225584 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link3.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link3.stl deleted file mode 100644 index d316d233..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link3.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e2798e7946dd70046c95455d5ba96392d0b54a6069caba91dc4ca66e1379b42 -size 237084 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link4.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link4.stl deleted file mode 100644 index f6d5fe94..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link4.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c757fee95f873191a0633c355c07a360032960771cabbd7593a6cdb0f1ffb089 -size 243684 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link5.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link5.stl deleted file mode 100644 index e037b8b9..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link5.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:715ad5787c5dab57589937fd47289882707b5e1eb997e340d567785b02f4ec90 -size 229084 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link6.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link6.stl deleted file mode 100644 index 198c5300..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link6.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:85b320aa420497827223d16d492bba8de091173374e361396fc7a5dad7bdb0cb -size 399384 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link7.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link7.stl deleted file mode 100644 index ce9a39ac..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link7.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:97115d848fbf802cb770cd9be639ae2af993103b9d9bbb0c50c943c738a36f18 -size 231684 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link_base.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link_base.stl deleted file mode 100644 index 110b9531..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link_base.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f6fcbc18258090eb56c21cfb17baa5ae43abc98b1958cd366f3a73b9898fc7f0 -size 2106184 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_finger.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_finger.stl deleted file mode 100644 index 03f26e9a..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_finger.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c5dee87c7f37baf554b8456ebfe0b3e8ed0b22b8938bd1add6505c2ad6d32c7d -size 242684 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_inner_knuckle.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_inner_knuckle.stl deleted file mode 100644 index 8586f344..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_inner_knuckle.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b41dd2c2c550281bf78d7cc6fa117b14786700e5c453560a0cb5fd6dfa0ffb3e -size 366284 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_outer_knuckle.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_outer_knuckle.stl deleted file mode 100644 index ae7afc25..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_outer_knuckle.stl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:75ca1107d0a42a0f03802a9a49cab48419b31851ee8935f8f1ca06be1c1c91e8 -size 22284 diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/peg_in_box.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/peg_in_box.xml deleted file mode 100644 index 0f85459f..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/peg_in_box.xml +++ /dev/null @@ -1,74 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/push.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/push.xml deleted file mode 100644 index 42a78c8a..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/push.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/reach.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/reach.xml deleted file mode 100644 index ded6d209..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/reach.xml +++ /dev/null @@ -1,48 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/shared.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/shared.xml deleted file mode 100644 index ee56f8f0..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/shared.xml +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/xarm.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/xarm.xml deleted file mode 100644 index 023474d6..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/xarm.xml +++ /dev/null @@ -1,88 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/base.py b/lerobot/common/envs/simxarm/simxarm/tasks/base.py deleted file mode 100644 index b937b290..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/base.py +++ /dev/null @@ -1,145 +0,0 @@ -import os - -import mujoco -import numpy as np -from gymnasium_robotics.envs import robot_env - -from lerobot.common.envs.simxarm.simxarm.tasks import mocap - - -class Base(robot_env.MujocoRobotEnv): - """ - Superclass for all simxarm environments. - Args: - xml_name (str): name of the xml environment file - gripper_rotation (list): initial rotation of the gripper (given as a quaternion) - """ - - def __init__(self, xml_name, gripper_rotation=None): - if gripper_rotation is None: - gripper_rotation = [0, 1, 0, 0] - self.gripper_rotation = np.array(gripper_rotation, dtype=np.float32) - self.center_of_table = np.array([1.655, 0.3, 0.63625]) - self.max_z = 1.2 - self.min_z = 0.2 - super().__init__( - model_path=os.path.join(os.path.dirname(__file__), "assets", xml_name + ".xml"), - n_substeps=20, - n_actions=4, - initial_qpos={}, - ) - - @property - def dt(self): - return self.n_substeps * self.model.opt.timestep - - @property - def eef(self): - return self._utils.get_site_xpos(self.model, self.data, "grasp") - - @property - def obj(self): - return self._utils.get_site_xpos(self.model, self.data, "object_site") - - @property - def robot_state(self): - gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint") - return np.concatenate([self.eef, gripper_angle]) - - def is_success(self): - return NotImplementedError() - - def get_reward(self): - raise NotImplementedError() - - def _sample_goal(self): - raise NotImplementedError() - - def get_obs(self): - return self._get_obs() - - def _step_callback(self): - self._mujoco.mj_forward(self.model, self.data) - - def _limit_gripper(self, gripper_pos, pos_ctrl): - if gripper_pos[0] > self.center_of_table[0] - 0.105 + 0.15: - pos_ctrl[0] = min(pos_ctrl[0], 0) - if gripper_pos[0] < self.center_of_table[0] - 0.105 - 0.3: - pos_ctrl[0] = max(pos_ctrl[0], 0) - if gripper_pos[1] > self.center_of_table[1] + 0.3: - pos_ctrl[1] = min(pos_ctrl[1], 0) - if gripper_pos[1] < self.center_of_table[1] - 0.3: - pos_ctrl[1] = max(pos_ctrl[1], 0) - if gripper_pos[2] > self.max_z: - pos_ctrl[2] = min(pos_ctrl[2], 0) - if gripper_pos[2] < self.min_z: - pos_ctrl[2] = max(pos_ctrl[2], 0) - return pos_ctrl - - def _apply_action(self, action): - assert action.shape == (4,) - action = action.copy() - pos_ctrl, gripper_ctrl = action[:3], action[3] - pos_ctrl = self._limit_gripper( - self._utils.get_site_xpos(self.model, self.data, "grasp"), pos_ctrl - ) * (1 / self.n_substeps) - gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) - mocap.apply_action( - self.model, - self._model_names, - self.data, - np.concatenate([pos_ctrl, self.gripper_rotation, gripper_ctrl]), - ) - - def _render_callback(self): - self._mujoco.mj_forward(self.model, self.data) - - def _reset_sim(self): - self.data.time = self.initial_time - self.data.qpos[:] = np.copy(self.initial_qpos) - self.data.qvel[:] = np.copy(self.initial_qvel) - self._sample_goal() - self._mujoco.mj_step(self.model, self.data, nstep=10) - return True - - def _set_gripper(self, gripper_pos, gripper_rotation): - self._utils.set_mocap_pos(self.model, self.data, "robot0:mocap", gripper_pos) - self._utils.set_mocap_quat(self.model, self.data, "robot0:mocap", gripper_rotation) - self._utils.set_joint_qpos(self.model, self.data, "right_outer_knuckle_joint", 0) - self.data.qpos[10] = 0.0 - self.data.qpos[12] = 0.0 - - def _env_setup(self, initial_qpos): - for name, value in initial_qpos.items(): - self.data.set_joint_qpos(name, value) - mocap.reset(self.model, self.data) - mujoco.mj_forward(self.model, self.data) - self._sample_goal() - mujoco.mj_forward(self.model, self.data) - - def reset(self): - self._reset_sim() - return self._get_obs() - - def step(self, action): - assert action.shape == (4,) - assert self.action_space.contains(action), "{!r} ({}) invalid".format(action, type(action)) - self._apply_action(action) - self._mujoco.mj_step(self.model, self.data, nstep=2) - self._step_callback() - obs = self._get_obs() - reward = self.get_reward() - done = False - info = {"is_success": self.is_success(), "success": self.is_success()} - return obs, reward, done, info - - def render(self, mode="rgb_array", width=384, height=384): - self._render_callback() - # HACK - self.model.vis.global_.offwidth = width - self.model.vis.global_.offheight = height - return self.mujoco_renderer.render(mode) - - def close(self): - if self.mujoco_renderer is not None: - self.mujoco_renderer.close() diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/lift.py b/lerobot/common/envs/simxarm/simxarm/tasks/lift.py deleted file mode 100644 index 0b11196c..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/lift.py +++ /dev/null @@ -1,100 +0,0 @@ -import numpy as np - -from lerobot.common.envs.simxarm.simxarm import Base - - -class Lift(Base): - def __init__(self): - self._z_threshold = 0.15 - super().__init__("lift") - - @property - def z_target(self): - return self._init_z + self._z_threshold - - def is_success(self): - return self.obj[2] >= self.z_target - - def get_reward(self): - reach_dist = np.linalg.norm(self.obj - self.eef) - reach_dist_xy = np.linalg.norm(self.obj[:-1] - self.eef[:-1]) - pick_completed = self.obj[2] >= (self.z_target - 0.01) - obj_dropped = (self.obj[2] < (self._init_z + 0.005)) and (reach_dist > 0.02) - - # Reach - if reach_dist < 0.05: - reach_reward = -reach_dist + max(self._action[-1], 0) / 50 - elif reach_dist_xy < 0.05: - reach_reward = -reach_dist - else: - z_bonus = np.linalg.norm(np.linalg.norm(self.obj[-1] - self.eef[-1])) - reach_reward = -reach_dist - 2 * z_bonus - - # Pick - if pick_completed and not obj_dropped: - pick_reward = self.z_target - elif (reach_dist < 0.1) and (self.obj[2] > (self._init_z + 0.005)): - pick_reward = min(self.z_target, self.obj[2]) - else: - pick_reward = 0 - - return reach_reward / 100 + pick_reward - - def _get_obs(self): - eef_velp = self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt - gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint") - eef = self.eef - self.center_of_table - - obj = self.obj - self.center_of_table - obj_rot = self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:] - obj_velp = self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt - obj_velr = self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt - - obs = np.concatenate( - [ - eef, - eef_velp, - obj, - obj_rot, - obj_velp, - obj_velr, - eef - obj, - np.array( - [ - np.linalg.norm(eef - obj), - np.linalg.norm(eef[:-1] - obj[:-1]), - self.z_target, - self.z_target - obj[-1], - self.z_target - eef[-1], - ] - ), - gripper_angle, - ], - axis=0, - ) - return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": eef} - - def _sample_goal(self): - # Gripper - gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3) - super()._set_gripper(gripper_pos, self.gripper_rotation) - - # Object - object_pos = self.center_of_table - np.array([0.15, 0.10, 0.07]) - object_pos[0] += self.np_random.uniform(-0.05, 0.05, size=1) - object_pos[1] += self.np_random.uniform(-0.05, 0.05, size=1) - object_qpos = self._utils.get_joint_qpos(self.model, self.data, "object_joint0") - object_qpos[:3] = object_pos - self._utils.set_joint_qpos(self.model, self.data, "object_joint0", object_qpos) - self._init_z = object_pos[2] - - # Goal - return object_pos + np.array([0, 0, self._z_threshold]) - - def reset(self): - self._action = np.zeros(4) - return super().reset() - - def step(self, action): - self._action = action.copy() - return super().step(action) diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/mocap.py b/lerobot/common/envs/simxarm/simxarm/tasks/mocap.py deleted file mode 100644 index 4295bf19..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/mocap.py +++ /dev/null @@ -1,67 +0,0 @@ -# import mujoco_py -import mujoco -import numpy as np - - -def apply_action(model, model_names, data, action): - if model.nmocap > 0: - pos_action, gripper_action = np.split(action, (model.nmocap * 7,)) - if data.ctrl is not None: - for i in range(gripper_action.shape[0]): - data.ctrl[i] = gripper_action[i] - pos_action = pos_action.reshape(model.nmocap, 7) - pos_delta, quat_delta = pos_action[:, :3], pos_action[:, 3:] - reset_mocap2body_xpos(model, model_names, data) - data.mocap_pos[:] = data.mocap_pos + pos_delta - data.mocap_quat[:] = data.mocap_quat + quat_delta - - -def reset(model, data): - if model.nmocap > 0 and model.eq_data is not None: - for i in range(model.eq_data.shape[0]): - # if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD: - if model.eq_type[i] == mujoco.mjtEq.mjEQ_WELD: - # model.eq_data[i, :] = np.array([0., 0., 0., 1., 0., 0., 0.]) - model.eq_data[i, :] = np.array( - [ - 0.0, - 0.0, - 0.0, - 1.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - ] - ) - # sim.forward() - mujoco.mj_forward(model, data) - - -def reset_mocap2body_xpos(model, model_names, data): - if model.eq_type is None or model.eq_obj1id is None or model.eq_obj2id is None: - return - - # For all weld constraints - for eq_type, obj1_id, obj2_id in zip(model.eq_type, model.eq_obj1id, model.eq_obj2id, strict=False): - # if eq_type != mujoco_py.const.EQ_WELD: - if eq_type != mujoco.mjtEq.mjEQ_WELD: - continue - # body2 = model.body_id2name(obj2_id) - body2 = model_names.body_id2name[obj2_id] - if body2 == "B0" or body2 == "B9" or body2 == "B1": - continue - mocap_id = model.body_mocapid[obj1_id] - if mocap_id != -1: - # obj1 is the mocap, obj2 is the welded body - body_idx = obj2_id - else: - # obj2 is the mocap, obj1 is the welded body - mocap_id = model.body_mocapid[obj2_id] - body_idx = obj1_id - assert mocap_id != -1 - data.mocap_pos[mocap_id][:] = data.xpos[body_idx] - data.mocap_quat[mocap_id][:] = data.xquat[body_idx] diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/peg_in_box.py b/lerobot/common/envs/simxarm/simxarm/tasks/peg_in_box.py deleted file mode 100644 index 42e41520..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/peg_in_box.py +++ /dev/null @@ -1,86 +0,0 @@ -import numpy as np - -from lerobot.common.envs.simxarm.simxarm import Base - - -class PegInBox(Base): - def __init__(self): - super().__init__("peg_in_box") - - def _reset_sim(self): - self._act_magnitude = 0 - super()._reset_sim() - for _ in range(10): - self._apply_action(np.array([0, 0, 0, 1], dtype=np.float32)) - self.sim.step() - - @property - def box(self): - return self.sim.data.get_site_xpos("box_site") - - def is_success(self): - return np.linalg.norm(self.obj - self.box) <= 0.05 - - def get_reward(self): - dist_xy = np.linalg.norm(self.obj[:2] - self.box[:2]) - dist_xyz = np.linalg.norm(self.obj - self.box) - return float(dist_xy <= 0.045) * (2 - 6 * dist_xyz) - 0.2 * np.square(self._act_magnitude) - dist_xy - - def _get_obs(self): - eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt - gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint") - eef, box = self.eef - self.center_of_table, self.box - self.center_of_table - - obj = self.obj - self.center_of_table - obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:] - obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt - obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt - - obs = np.concatenate( - [ - eef, - eef_velp, - box, - obj, - obj_rot, - obj_velp, - obj_velr, - eef - box, - eef - obj, - obj - box, - np.array( - [ - np.linalg.norm(eef - box), - np.linalg.norm(eef - obj), - np.linalg.norm(obj - box), - gripper_angle, - ] - ), - ], - axis=0, - ) - return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": box} - - def _sample_goal(self): - # Gripper - gripper_pos = np.array([1.280, 0.295, 0.9]) + self.np_random.uniform(-0.05, 0.05, size=3) - super()._set_gripper(gripper_pos, self.gripper_rotation) - - # Object - object_pos = gripper_pos - np.array([0, 0, 0.06]) + self.np_random.uniform(-0.005, 0.005, size=3) - object_qpos = self.sim.data.get_joint_qpos("object_joint0") - object_qpos[:3] = object_pos - self.sim.data.set_joint_qpos("object_joint0", object_qpos) - - # Box - box_pos = np.array([1.61, 0.18, 0.58]) - box_pos[:2] += self.np_random.uniform(-0.11, 0.11, size=2) - box_qpos = self.sim.data.get_joint_qpos("box_joint0") - box_qpos[:3] = box_pos - self.sim.data.set_joint_qpos("box_joint0", box_qpos) - - return self.box - - def step(self, action): - self._act_magnitude = np.linalg.norm(action[:3]) - return super().step(action) diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/push.py b/lerobot/common/envs/simxarm/simxarm/tasks/push.py deleted file mode 100644 index 36c4a550..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/push.py +++ /dev/null @@ -1,78 +0,0 @@ -import numpy as np - -from lerobot.common.envs.simxarm.simxarm import Base - - -class Push(Base): - def __init__(self): - super().__init__("push") - - def _reset_sim(self): - self._act_magnitude = 0 - super()._reset_sim() - - def is_success(self): - return np.linalg.norm(self.obj - self.goal) <= 0.05 - - def get_reward(self): - dist = np.linalg.norm(self.obj - self.goal) - penalty = self._act_magnitude**2 - return -(dist + 0.15 * penalty) - - def _get_obs(self): - eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt - gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint") - eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table - - obj = self.obj - self.center_of_table - obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:] - obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt - obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt - - obs = np.concatenate( - [ - eef, - eef_velp, - goal, - obj, - obj_rot, - obj_velp, - obj_velr, - eef - goal, - eef - obj, - obj - goal, - np.array( - [ - np.linalg.norm(eef - goal), - np.linalg.norm(eef - obj), - np.linalg.norm(obj - goal), - gripper_angle, - ] - ), - ], - axis=0, - ) - return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal} - - def _sample_goal(self): - # Gripper - gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3) - super()._set_gripper(gripper_pos, self.gripper_rotation) - - # Object - object_pos = self.center_of_table - np.array([0.25, 0, 0.07]) - object_pos[0] += self.np_random.uniform(-0.08, 0.08, size=1) - object_pos[1] += self.np_random.uniform(-0.08, 0.08, size=1) - object_qpos = self.sim.data.get_joint_qpos("object_joint0") - object_qpos[:3] = object_pos - self.sim.data.set_joint_qpos("object_joint0", object_qpos) - - # Goal - self.goal = np.array([1.600, 0.200, 0.545]) - self.goal[:2] += self.np_random.uniform(-0.1, 0.1, size=2) - self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal - return self.goal - - def step(self, action): - self._act_magnitude = np.linalg.norm(action[:3]) - return super().step(action) diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/reach.py b/lerobot/common/envs/simxarm/simxarm/tasks/reach.py deleted file mode 100644 index 941a586f..00000000 --- a/lerobot/common/envs/simxarm/simxarm/tasks/reach.py +++ /dev/null @@ -1,44 +0,0 @@ -import numpy as np - -from lerobot.common.envs.simxarm.simxarm import Base - - -class Reach(Base): - def __init__(self): - super().__init__("reach") - - def _reset_sim(self): - self._act_magnitude = 0 - super()._reset_sim() - - def is_success(self): - return np.linalg.norm(self.eef - self.goal) <= 0.05 - - def get_reward(self): - dist = np.linalg.norm(self.eef - self.goal) - penalty = self._act_magnitude**2 - return -(dist + 0.15 * penalty) - - def _get_obs(self): - eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt - gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint") - eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table - obs = np.concatenate( - [eef, eef_velp, goal, eef - goal, np.array([np.linalg.norm(eef - goal), gripper_angle])], axis=0 - ) - return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal} - - def _sample_goal(self): - # Gripper - gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3) - super()._set_gripper(gripper_pos, self.gripper_rotation) - - # Goal - self.goal = np.array([1.550, 0.287, 0.580]) - self.goal[:2] += self.np_random.uniform(-0.125, 0.125, size=2) - self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal - return self.goal - - def step(self, action): - self._act_magnitude = np.linalg.norm(action[:3]) - return super().step(action) diff --git a/lerobot/common/envs/utils.py b/lerobot/common/envs/utils.py new file mode 100644 index 00000000..1696ddbe --- /dev/null +++ b/lerobot/common/envs/utils.py @@ -0,0 +1,32 @@ +import einops +import torch + +from lerobot.common.transforms import apply_inverse_transform + + +def preprocess_observation(observation, transform=None): + # map to expected inputs for the policy + obs = { + "observation.image": torch.from_numpy(observation["pixels"]).float(), + "observation.state": torch.from_numpy(observation["agent_pos"]).float(), + } + # convert to (b c h w) torch format + obs["observation.image"] = einops.rearrange(obs["observation.image"], "b h w c -> b c h w") + + # apply same transforms as in training + if transform is not None: + for key in obs: + obs[key] = torch.stack([transform({key: item})[key] for item in obs[key]]) + + return obs + + +def postprocess_action(action, transform=None): + action = action.to("cpu") + # action is a batch (num_env,action_dim) instead of an item (action_dim), + # we assume applying inverse transform on a batch works the same + action = apply_inverse_transform({"action": action}, transform)["action"].numpy() + assert ( + action.ndim == 2 + ), "we assume dimensions are respectively the number of parallel envs, action dimensions" + return action diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py index 09399878..e7ba53fc 100644 --- a/lerobot/scripts/eval.py +++ b/lerobot/scripts/eval.py @@ -44,9 +44,9 @@ from huggingface_hub import snapshot_download from lerobot.common.datasets.factory import make_dataset from lerobot.common.envs.factory import make_env +from lerobot.common.envs.utils import postprocess_action, preprocess_observation from lerobot.common.logger import log_output_dir from lerobot.common.policies.factory import make_policy -from lerobot.common.transforms import apply_inverse_transform from lerobot.common.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed @@ -54,34 +54,6 @@ def write_video(video_path, stacked_frames, fps): imageio.mimsave(video_path, stacked_frames, fps=fps) -def preprocess_observation(observation, transform=None): - # map to expected inputs for the policy - obs = { - "observation.image": torch.from_numpy(observation["pixels"]).float(), - "observation.state": torch.from_numpy(observation["agent_pos"]).float(), - } - # convert to (b c h w) torch format - obs["observation.image"] = einops.rearrange(obs["observation.image"], "b h w c -> b c h w") - - # apply same transforms as in training - if transform is not None: - for key in obs: - obs[key] = torch.stack([transform({key: item})[key] for item in obs[key]]) - - return obs - - -def postprocess_action(action, transform=None): - action = action.to("cpu") - # action is a batch (num_env,action_dim) instead of an item (action_dim), - # we assume applying inverse transform on a batch works the same - action = apply_inverse_transform({"action": action}, transform)["action"].numpy() - assert ( - action.ndim == 2 - ), "we assume dimensions are respectively the number of parallel envs, action dimensions" - return action - - def eval_policy( env: gym.vector.VectorEnv, policy, @@ -114,10 +86,10 @@ def eval_policy( def maybe_render_frame(env): if save_video: # noqa: B023 if return_first_video: - visu = env.envs[0].render() + visu = env.envs[0].render(mode="visualization") visu = visu[None, ...] # add batch dim else: - visu = np.stack([env.render() for env in env.envs]) + visu = np.stack([env.render(mode="visualization") for env in env.envs]) ep_frames.append(visu) # noqa: B023 for _ in range(num_episodes): diff --git a/tests/test_envs.py b/tests/test_envs.py index 665c1fba..a94d76f2 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -1,47 +1,47 @@ import pytest -from tensordict import TensorDict import torch -from torchrl.envs.utils import check_env_specs, step_mdp from lerobot.common.datasets.factory import make_dataset import gymnasium as gym from gymnasium.utils.env_checker import check_env -from lerobot.common.envs.aloha.env import AlohaEnv from lerobot.common.envs.factory import make_env -from lerobot.common.envs.pusht.env import PushtEnv -from lerobot.common.envs.simxarm.env import SimxarmEnv from lerobot.common.utils import init_hydra_config +from lerobot.common.envs.utils import preprocess_observation + +# import dmc_aloha # noqa: F401 + from .utils import DEVICE, DEFAULT_CONFIG_PATH -def print_spec_rollout(env): - print("observation_spec:", env.observation_spec) - print("action_spec:", env.action_spec) - print("reward_spec:", env.reward_spec) - print("done_spec:", env.done_spec) +# def print_spec_rollout(env): +# print("observation_spec:", env.observation_spec) +# print("action_spec:", env.action_spec) +# print("reward_spec:", env.reward_spec) +# print("done_spec:", env.done_spec) - td = env.reset() - print("reset tensordict", td) +# td = env.reset() +# print("reset tensordict", td) - td = env.rand_step(td) - print("random step tensordict", td) +# td = env.rand_step(td) +# print("random step tensordict", td) - def simple_rollout(steps=100): - # preallocate: - data = TensorDict({}, [steps]) - # reset - _data = env.reset() - for i in range(steps): - _data["action"] = env.action_spec.rand() - _data = env.step(_data) - data[i] = _data - _data = step_mdp(_data, keep_other=True) - return data +# def simple_rollout(steps=100): +# # preallocate: +# data = TensorDict({}, [steps]) +# # reset +# _data = env.reset() +# for i in range(steps): +# _data["action"] = env.action_spec.rand() +# _data = env.step(_data) +# data[i] = _data +# _data = step_mdp(_data, keep_other=True) +# return data - print("data from rollout:", simple_rollout(100)) +# print("data from rollout:", simple_rollout(100)) +@pytest.mark.skip("TODO") @pytest.mark.parametrize( "task,from_pixels,pixels_only", [ @@ -63,50 +63,41 @@ def test_aloha(task, from_pixels, pixels_only): @pytest.mark.parametrize( - "task, obs_type", + "env_task, obs_type", [ ("XarmLift-v0", "state"), ("XarmLift-v0", "pixels"), ("XarmLift-v0", "pixels_agent_pos"), - # TODO(aliberts): Add simxarm other tasks + # TODO(aliberts): Add gym_xarm other tasks ], ) def test_xarm(env_task, obs_type): - import gym_xarm + import gym_xarm # noqa: F401 env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type) - # env = SimxarmEnv( - # task, - # from_pixels=from_pixels, - # pixels_only=pixels_only, - # image_size=84 if from_pixels else None, - # ) - # print_spec_rollout(env) - # check_env_specs(env) check_env(env) + @pytest.mark.parametrize( - "from_pixels,pixels_only", + "env_task, obs_type", [ - (True, False), + ("PushTPixels-v0", "state"), + ("PushTPixels-v0", "pixels"), + ("PushTPixels-v0", "pixels_agent_pos"), ], ) -def test_pusht(from_pixels, pixels_only): - env = PushtEnv( - from_pixels=from_pixels, - pixels_only=pixels_only, - image_size=96 if from_pixels else None, - ) - # print_spec_rollout(env) - check_env_specs(env) +def test_pusht(env_task, obs_type): + import gym_pusht # noqa: F401 + env = gym.make(f"gym_pusht/{env_task}", obs_type=obs_type) + check_env(env) @pytest.mark.parametrize( "env_name", [ - "simxarm", "pusht", - "aloha", + "simxarm", + # "aloha", ], ) def test_factory(env_name): @@ -118,15 +109,12 @@ def test_factory(env_name): dataset = make_dataset(cfg) env = make_env(cfg) + obs, info = env.reset() + obs = {key: obs[key][None, ...] for key in obs} + obs = preprocess_observation(obs, transform=dataset.transform) for key in dataset.image_keys: - assert env.reset().get(key).dtype == torch.uint8 - check_env_specs(env) - - env = make_env(cfg, transform=dataset.transform) - for key in dataset.image_keys: - img = env.reset().get(key) + img = obs[key] assert img.dtype == torch.float32 # TODO(rcadene): we assume for now that image normalization takes place in the model assert img.max() <= 1.0 assert img.min() >= 0.0 - check_env_specs(env)