diff --git a/lerobot/common/envs/abstract.py b/lerobot/common/envs/abstract.py
deleted file mode 100644
index ea5ce3da..00000000
--- a/lerobot/common/envs/abstract.py
+++ /dev/null
@@ -1,92 +0,0 @@
-from collections import deque
-from typing import Optional
-
-from tensordict import TensorDict
-from torchrl.envs import EnvBase
-
-from lerobot.common.utils import set_global_seed
-
-
-class AbstractEnv(EnvBase):
- """
- Note:
- When implementing a concrete class (e.g. `AlohaDataset`, `PushtEnv`, `DiffusionPolicy`), you need to:
- 1. set the required class attributes:
- - for classes inheriting from `AbstractDataset`: `available_datasets`
- - for classes inheriting from `AbstractEnv`: `name`, `available_tasks`
- - for classes inheriting from `AbstractPolicy`: `name`
- 2. update variables in `lerobot/__init__.py` (e.g. `available_envs`, `available_datasets_per_envs`, `available_policies`)
- 3. update variables in `tests/test_available.py` by importing your new class
- """
-
- name: str | None = None # same name should be used to instantiate the environment in factory.py
- available_tasks: list[str] | None = None # for instance: sim_insertion, sim_transfer_cube, pusht, lift
-
- def __init__(
- self,
- task,
- frame_skip: int = 1,
- from_pixels: bool = False,
- pixels_only: bool = False,
- image_size=None,
- seed=1337,
- device="cpu",
- num_prev_obs=1,
- num_prev_action=0,
- ):
- super().__init__(device=device, batch_size=[])
- assert self.name is not None, "Subclasses of `AbstractEnv` should set the `name` class attribute."
- assert (
- self.available_tasks is not None
- ), "Subclasses of `AbstractEnv` should set the `available_tasks` class attribute."
- assert (
- task in self.available_tasks
- ), f"The provided task ({task}) is not on the list of available tasks {self.available_tasks}."
-
- self.task = task
- self.frame_skip = frame_skip
- self.from_pixels = from_pixels
- self.pixels_only = pixels_only
- self.image_size = image_size
- self.num_prev_obs = num_prev_obs
- self.num_prev_action = num_prev_action
-
- if pixels_only:
- assert from_pixels
- if from_pixels:
- assert image_size
-
- self._make_env()
- self._make_spec()
-
- # self._next_seed will be used for the next reset. It is recommended that when self.set_seed is called
- # you store the return value in self._next_seed (it will be a new randomly generated seed).
- self._next_seed = seed
- # Don't store the result of this in self._next_seed, as we want to make sure that the first time
- # self._reset is called, we use seed.
- self.set_seed(seed)
-
- if self.num_prev_obs > 0:
- self._prev_obs_image_queue = deque(maxlen=self.num_prev_obs)
- self._prev_obs_state_queue = deque(maxlen=self.num_prev_obs)
- if self.num_prev_action > 0:
- raise NotImplementedError()
- # self._prev_action_queue = deque(maxlen=self.num_prev_action)
-
- def render(self, mode="rgb_array", width=640, height=480):
- raise NotImplementedError("Abstract method")
-
- def _reset(self, tensordict: Optional[TensorDict] = None):
- raise NotImplementedError("Abstract method")
-
- def _step(self, tensordict: TensorDict):
- raise NotImplementedError("Abstract method")
-
- def _make_env(self):
- raise NotImplementedError("Abstract method")
-
- def _make_spec(self):
- raise NotImplementedError("Abstract method")
-
- def _set_seed(self, seed: Optional[int]):
- set_global_seed(seed)
diff --git a/lerobot/common/envs/factory.py b/lerobot/common/envs/factory.py
index 788af3cb..4ddb81a2 100644
--- a/lerobot/common/envs/factory.py
+++ b/lerobot/common/envs/factory.py
@@ -6,24 +6,27 @@ def make_env(cfg, num_parallel_envs=0) -> gym.Env | gym.vector.SyncVectorEnv:
Note: When `num_parallel_envs > 0`, this function returns a `SyncVectorEnv` which takes batched action as input and
returns batched observation, reward, terminated, truncated of `num_parallel_envs` items.
"""
- kwargs = {}
+ kwargs = {
+ "obs_type": "pixels_agent_pos",
+ "max_episode_steps": cfg.env.episode_length,
+ "visualization_width": 384,
+ "visualization_height": 384,
+ }
if cfg.env.name == "simxarm":
- kwargs["task"] = cfg.env.task
+ import gym_xarm # noqa: F401
+
+ assert cfg.env.task == "lift"
+ env_fn = lambda: gym.make( # noqa: E731
+ "gym_xarm/XarmLift-v0",
+ **kwargs,
+ )
elif cfg.env.name == "pusht":
- import gym_pusht # noqa
+ import gym_pusht # noqa: F401
# assert kwargs["seed"] > 200, "Seed 0-200 are used for the demonstration dataset, so we don't want to seed the eval env with this range."
- kwargs.update(
- {
- "obs_type": "pixels_agent_pos",
- "render_action": False,
- }
- )
env_fn = lambda: gym.make( # noqa: E731
"gym_pusht/PushTPixels-v0",
- render_mode="rgb_array",
- max_episode_steps=cfg.env.episode_length,
**kwargs,
)
elif cfg.env.name == "aloha":
diff --git a/lerobot/common/envs/pusht/env.py b/lerobot/common/envs/pusht/env.py
deleted file mode 100644
index 5f7fb2c3..00000000
--- a/lerobot/common/envs/pusht/env.py
+++ /dev/null
@@ -1,245 +0,0 @@
-import importlib
-import logging
-from collections import deque
-from typing import Optional
-
-import cv2
-import numpy as np
-import torch
-from tensordict import TensorDict
-from torchrl.data.tensor_specs import (
- BoundedTensorSpec,
- CompositeSpec,
- DiscreteTensorSpec,
- UnboundedContinuousTensorSpec,
-)
-from torchrl.envs.libs.gym import _gym_to_torchrl_spec_transform
-
-from lerobot.common.envs.abstract import AbstractEnv
-from lerobot.common.utils import set_global_seed
-
-_has_gym = importlib.util.find_spec("gymnasium") is not None
-
-
-class PushtEnv(AbstractEnv):
- name = "pusht"
- available_tasks = ["pusht"]
- _reset_warning_issued = False
-
- def __init__(
- self,
- task="pusht",
- frame_skip: int = 1,
- from_pixels: bool = False,
- pixels_only: bool = False,
- image_size=None,
- seed=1337,
- device="cpu",
- num_prev_obs=1,
- num_prev_action=0,
- ):
- super().__init__(
- task=task,
- frame_skip=frame_skip,
- from_pixels=from_pixels,
- pixels_only=pixels_only,
- image_size=image_size,
- seed=seed,
- device=device,
- num_prev_obs=num_prev_obs,
- num_prev_action=num_prev_action,
- )
-
- def _make_env(self):
- if not _has_gym:
- raise ImportError("Cannot import gymnasium.")
-
- # TODO(rcadene) (PushTEnv is similar to PushTImageEnv, but without the image rendering, it's faster to iterate on)
- # from lerobot.common.envs.pusht.pusht_env import PushTEnv
-
- if not self.from_pixels:
- raise NotImplementedError("Use PushTEnv, instead of PushTImageEnv")
- from lerobot.common.envs.pusht.pusht_image_env import PushTImageEnv
-
- self._env = PushTImageEnv(render_size=self.image_size)
-
- def render(self, mode="rgb_array", width=96, height=96, with_marker=True):
- """
- with_marker adds a cursor showing the targeted action for the controller.
- """
- if width != height:
- raise NotImplementedError()
- tmp = self._env.render_size
- if width != self._env.render_size:
- self._env.render_cache = None
- self._env.render_size = width
- out = self._env.render(mode).copy()
- if with_marker and self._env.latest_action is not None:
- action = np.array(self._env.latest_action)
- coord = (action / 512 * self._env.render_size).astype(np.int32)
- marker_size = int(8 / 96 * self._env.render_size)
- thickness = int(1 / 96 * self._env.render_size)
- cv2.drawMarker(
- out,
- coord,
- color=(255, 0, 0),
- markerType=cv2.MARKER_CROSS,
- markerSize=marker_size,
- thickness=thickness,
- )
- self._env.render_size = tmp
- return out
-
- def _format_raw_obs(self, raw_obs):
- if self.from_pixels:
- image = torch.from_numpy(raw_obs["image"])
- obs = {"image": image}
-
- if not self.pixels_only:
- obs["state"] = torch.from_numpy(raw_obs["agent_pos"]).type(torch.float32)
- else:
- # TODO:
- obs = {"state": torch.from_numpy(raw_obs["observation"]).type(torch.float32)}
-
- return obs
-
- def _reset(self, tensordict: Optional[TensorDict] = None):
- if tensordict is not None and not PushtEnv._reset_warning_issued:
- logging.warning(f"{self.__class__.__name__}._reset ignores the provided tensordict.")
- PushtEnv._reset_warning_issued = True
-
- # Seed the environment and update the seed to be used for the next reset.
- self._next_seed = self.set_seed(self._next_seed)
- raw_obs = self._env.reset()
-
- obs = self._format_raw_obs(raw_obs)
-
- if self.num_prev_obs > 0:
- stacked_obs = {}
- if "image" in obs:
- self._prev_obs_image_queue = deque(
- [obs["image"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
- )
- stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
- if "state" in obs:
- self._prev_obs_state_queue = deque(
- [obs["state"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
- )
- stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
- obs = stacked_obs
-
- td = TensorDict(
- {
- "observation": TensorDict(obs, batch_size=[]),
- "done": torch.tensor([False], dtype=torch.bool),
- },
- batch_size=[],
- )
-
- return td
-
- def _step(self, tensordict: TensorDict):
- td = tensordict
- action = td["action"].numpy()
- assert action.ndim == 1
- # TODO(rcadene): add info["is_success"] and info["success"] ?
-
- raw_obs, reward, done, info = self._env.step(action)
-
- obs = self._format_raw_obs(raw_obs)
-
- if self.num_prev_obs > 0:
- stacked_obs = {}
- if "image" in obs:
- self._prev_obs_image_queue.append(obs["image"])
- stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
- if "state" in obs:
- self._prev_obs_state_queue.append(obs["state"])
- stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
- obs = stacked_obs
-
- td = TensorDict(
- {
- "observation": TensorDict(obs, batch_size=[]),
- "reward": torch.tensor([reward], dtype=torch.float32),
- # success and done are true when coverage > self.success_threshold in env
- "done": torch.tensor([done], dtype=torch.bool),
- "success": torch.tensor([done], dtype=torch.bool),
- },
- batch_size=[],
- )
- return td
-
- def _make_spec(self):
- obs = {}
- if self.from_pixels:
- image_shape = (3, self.image_size, self.image_size)
- if self.num_prev_obs > 0:
- image_shape = (self.num_prev_obs + 1, *image_shape)
-
- obs["image"] = BoundedTensorSpec(
- low=0,
- high=255,
- shape=image_shape,
- dtype=torch.uint8,
- device=self.device,
- )
- if not self.pixels_only:
- state_shape = self._env.observation_space["agent_pos"].shape
- if self.num_prev_obs > 0:
- state_shape = (self.num_prev_obs + 1, *state_shape)
-
- obs["state"] = BoundedTensorSpec(
- low=0,
- high=512,
- shape=state_shape,
- dtype=torch.float32,
- device=self.device,
- )
- else:
- # TODO(rcadene): add observation_space achieved_goal and desired_goal?
- state_shape = self._env.observation_space["observation"].shape
- if self.num_prev_obs > 0:
- state_shape = (self.num_prev_obs + 1, *state_shape)
-
- obs["state"] = UnboundedContinuousTensorSpec(
- # TODO:
- shape=state_shape,
- dtype=torch.float32,
- device=self.device,
- )
- self.observation_spec = CompositeSpec({"observation": obs})
-
- self.action_spec = _gym_to_torchrl_spec_transform(
- self._env.action_space,
- device=self.device,
- )
-
- self.reward_spec = UnboundedContinuousTensorSpec(
- shape=(1,),
- dtype=torch.float32,
- device=self.device,
- )
-
- self.done_spec = CompositeSpec(
- {
- "done": DiscreteTensorSpec(
- 2,
- shape=(1,),
- dtype=torch.bool,
- device=self.device,
- ),
- "success": DiscreteTensorSpec(
- 2,
- shape=(1,),
- dtype=torch.bool,
- device=self.device,
- ),
- }
- )
-
- def _set_seed(self, seed: Optional[int]):
- # Set global seed.
- set_global_seed(seed)
- # Set PushTImageEnv seed as it relies on it's own internal _seed attribute.
- self._env.seed(seed)
diff --git a/lerobot/common/envs/pusht/pusht_env.py b/lerobot/common/envs/pusht/pusht_env.py
deleted file mode 100644
index 6ef70aec..00000000
--- a/lerobot/common/envs/pusht/pusht_env.py
+++ /dev/null
@@ -1,378 +0,0 @@
-import collections
-
-import cv2
-import gymnasium as gym
-import numpy as np
-import pygame
-import pymunk
-import pymunk.pygame_util
-import shapely.geometry as sg
-import skimage.transform as st
-from gymnasium import spaces
-from pymunk.vec2d import Vec2d
-
-from lerobot.common.envs.pusht.pymunk_override import DrawOptions
-
-
-def pymunk_to_shapely(body, shapes):
- geoms = []
- for shape in shapes:
- if isinstance(shape, pymunk.shapes.Poly):
- verts = [body.local_to_world(v) for v in shape.get_vertices()]
- verts += [verts[0]]
- geoms.append(sg.Polygon(verts))
- else:
- raise RuntimeError(f"Unsupported shape type {type(shape)}")
- geom = sg.MultiPolygon(geoms)
- return geom
-
-
-class PushTEnv(gym.Env):
- metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 10}
- reward_range = (0.0, 1.0)
-
- def __init__(
- self,
- legacy=True, # compatibility with original
- block_cog=None,
- damping=None,
- render_action=True,
- render_size=96,
- reset_to_state=None,
- ):
- self._seed = None
- self.seed()
- self.window_size = ws = 512 # The size of the PyGame window
- self.render_size = render_size
- self.sim_hz = 100
- # Local controller params.
- self.k_p, self.k_v = 100, 20 # PD control.z
- self.control_hz = self.metadata["video.frames_per_second"]
- # legcay set_state for data compatibility
- self.legacy = legacy
-
- # agent_pos, block_pos, block_angle
- self.observation_space = spaces.Box(
- low=np.array([0, 0, 0, 0, 0], dtype=np.float64),
- high=np.array([ws, ws, ws, ws, np.pi * 2], dtype=np.float64),
- shape=(5,),
- dtype=np.float64,
- )
-
- # positional goal for agent
- self.action_space = spaces.Box(
- low=np.array([0, 0], dtype=np.float64),
- high=np.array([ws, ws], dtype=np.float64),
- shape=(2,),
- dtype=np.float64,
- )
-
- self.block_cog = block_cog
- self.damping = damping
- self.render_action = render_action
-
- """
- If human-rendering is used, `self.window` will be a reference
- to the window that we draw to. `self.clock` will be a clock that is used
- to ensure that the environment is rendered at the correct framerate in
- human-mode. They will remain `None` until human-mode is used for the
- first time.
- """
- self.window = None
- self.clock = None
- self.screen = None
-
- self.space = None
- self.teleop = None
- self.render_buffer = None
- self.latest_action = None
- self.reset_to_state = reset_to_state
-
- def reset(self):
- seed = self._seed
- self._setup()
- if self.block_cog is not None:
- self.block.center_of_gravity = self.block_cog
- if self.damping is not None:
- self.space.damping = self.damping
-
- # use legacy RandomState for compatibility
- state = self.reset_to_state
- if state is None:
- rs = np.random.RandomState(seed=seed)
- state = np.array(
- [
- rs.randint(50, 450),
- rs.randint(50, 450),
- rs.randint(100, 400),
- rs.randint(100, 400),
- rs.randn() * 2 * np.pi - np.pi,
- ]
- )
- self._set_state(state)
-
- observation = self._get_obs()
- return observation
-
- def step(self, action):
- dt = 1.0 / self.sim_hz
- self.n_contact_points = 0
- n_steps = self.sim_hz // self.control_hz
- if action is not None:
- self.latest_action = action
- for _ in range(n_steps):
- # Step PD control.
- # self.agent.velocity = self.k_p * (act - self.agent.position) # P control works too.
- acceleration = self.k_p * (action - self.agent.position) + self.k_v * (
- Vec2d(0, 0) - self.agent.velocity
- )
- self.agent.velocity += acceleration * dt
-
- # Step physics.
- self.space.step(dt)
-
- # compute reward
- goal_body = self._get_goal_pose_body(self.goal_pose)
- goal_geom = pymunk_to_shapely(goal_body, self.block.shapes)
- block_geom = pymunk_to_shapely(self.block, self.block.shapes)
-
- intersection_area = goal_geom.intersection(block_geom).area
- goal_area = goal_geom.area
- coverage = intersection_area / goal_area
- reward = np.clip(coverage / self.success_threshold, 0, 1)
- done = coverage > self.success_threshold
-
- observation = self._get_obs()
- info = self._get_info()
-
- return observation, reward, done, info
-
- def render(self, mode):
- return self._render_frame(mode)
-
- def teleop_agent(self):
- TeleopAgent = collections.namedtuple("TeleopAgent", ["act"])
-
- def act(obs):
- act = None
- mouse_position = pymunk.pygame_util.from_pygame(Vec2d(*pygame.mouse.get_pos()), self.screen)
- if self.teleop or (mouse_position - self.agent.position).length < 30:
- self.teleop = True
- act = mouse_position
- return act
-
- return TeleopAgent(act)
-
- def _get_obs(self):
- obs = np.array(
- tuple(self.agent.position) + tuple(self.block.position) + (self.block.angle % (2 * np.pi),)
- )
- return obs
-
- def _get_goal_pose_body(self, pose):
- mass = 1
- inertia = pymunk.moment_for_box(mass, (50, 100))
- body = pymunk.Body(mass, inertia)
- # preserving the legacy assignment order for compatibility
- # the order here doesn't matter somehow, maybe because CoM is aligned with body origin
- body.position = pose[:2].tolist()
- body.angle = pose[2]
- return body
-
- def _get_info(self):
- n_steps = self.sim_hz // self.control_hz
- n_contact_points_per_step = int(np.ceil(self.n_contact_points / n_steps))
- info = {
- "pos_agent": np.array(self.agent.position),
- "vel_agent": np.array(self.agent.velocity),
- "block_pose": np.array(list(self.block.position) + [self.block.angle]),
- "goal_pose": self.goal_pose,
- "n_contacts": n_contact_points_per_step,
- }
- return info
-
- def _render_frame(self, mode):
- if self.window is None and mode == "human":
- pygame.init()
- pygame.display.init()
- self.window = pygame.display.set_mode((self.window_size, self.window_size))
- if self.clock is None and mode == "human":
- self.clock = pygame.time.Clock()
-
- canvas = pygame.Surface((self.window_size, self.window_size))
- canvas.fill((255, 255, 255))
- self.screen = canvas
-
- draw_options = DrawOptions(canvas)
-
- # Draw goal pose.
- goal_body = self._get_goal_pose_body(self.goal_pose)
- for shape in self.block.shapes:
- goal_points = [
- pymunk.pygame_util.to_pygame(goal_body.local_to_world(v), draw_options.surface)
- for v in shape.get_vertices()
- ]
- goal_points += [goal_points[0]]
- pygame.draw.polygon(canvas, self.goal_color, goal_points)
-
- # Draw agent and block.
- self.space.debug_draw(draw_options)
-
- if mode == "human":
- # The following line copies our drawings from `canvas` to the visible window
- self.window.blit(canvas, canvas.get_rect())
- pygame.event.pump()
- pygame.display.update()
-
- # the clock is already ticked during in step for "human"
-
- img = np.transpose(np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2))
- img = cv2.resize(img, (self.render_size, self.render_size))
- if self.render_action and self.latest_action is not None:
- action = np.array(self.latest_action)
- coord = (action / 512 * 96).astype(np.int32)
- marker_size = int(8 / 96 * self.render_size)
- thickness = int(1 / 96 * self.render_size)
- cv2.drawMarker(
- img,
- coord,
- color=(255, 0, 0),
- markerType=cv2.MARKER_CROSS,
- markerSize=marker_size,
- thickness=thickness,
- )
- return img
-
- def close(self):
- if self.window is not None:
- pygame.display.quit()
- pygame.quit()
-
- def seed(self, seed=None):
- if seed is None:
- seed = np.random.randint(0, 25536)
- self._seed = seed
- self.np_random = np.random.default_rng(seed)
-
- def _handle_collision(self, arbiter, space, data):
- self.n_contact_points += len(arbiter.contact_point_set.points)
-
- def _set_state(self, state):
- if isinstance(state, np.ndarray):
- state = state.tolist()
- pos_agent = state[:2]
- pos_block = state[2:4]
- rot_block = state[4]
- self.agent.position = pos_agent
- # setting angle rotates with respect to center of mass
- # therefore will modify the geometric position
- # if not the same as CoM
- # therefore should be modified first.
- if self.legacy:
- # for compatibility with legacy data
- self.block.position = pos_block
- self.block.angle = rot_block
- else:
- self.block.angle = rot_block
- self.block.position = pos_block
-
- # Run physics to take effect
- self.space.step(1.0 / self.sim_hz)
-
- def _set_state_local(self, state_local):
- agent_pos_local = state_local[:2]
- block_pose_local = state_local[2:]
- tf_img_obj = st.AffineTransform(translation=self.goal_pose[:2], rotation=self.goal_pose[2])
- tf_obj_new = st.AffineTransform(translation=block_pose_local[:2], rotation=block_pose_local[2])
- tf_img_new = st.AffineTransform(matrix=tf_img_obj.params @ tf_obj_new.params)
- agent_pos_new = tf_img_new(agent_pos_local)
- new_state = np.array(list(agent_pos_new[0]) + list(tf_img_new.translation) + [tf_img_new.rotation])
- self._set_state(new_state)
- return new_state
-
- def _setup(self):
- self.space = pymunk.Space()
- self.space.gravity = 0, 0
- self.space.damping = 0
- self.teleop = False
- self.render_buffer = []
-
- # Add walls.
- walls = [
- self._add_segment((5, 506), (5, 5), 2),
- self._add_segment((5, 5), (506, 5), 2),
- self._add_segment((506, 5), (506, 506), 2),
- self._add_segment((5, 506), (506, 506), 2),
- ]
- self.space.add(*walls)
-
- # Add agent, block, and goal zone.
- self.agent = self.add_circle((256, 400), 15)
- self.block = self.add_tee((256, 300), 0)
- self.goal_color = pygame.Color("LightGreen")
- self.goal_pose = np.array([256, 256, np.pi / 4]) # x, y, theta (in radians)
-
- # Add collision handling
- self.collision_handeler = self.space.add_collision_handler(0, 0)
- self.collision_handeler.post_solve = self._handle_collision
- self.n_contact_points = 0
-
- self.max_score = 50 * 100
- self.success_threshold = 0.95 # 95% coverage.
-
- def _add_segment(self, a, b, radius):
- shape = pymunk.Segment(self.space.static_body, a, b, radius)
- shape.color = pygame.Color("LightGray") # https://htmlcolorcodes.com/color-names
- return shape
-
- def add_circle(self, position, radius):
- body = pymunk.Body(body_type=pymunk.Body.KINEMATIC)
- body.position = position
- body.friction = 1
- shape = pymunk.Circle(body, radius)
- shape.color = pygame.Color("RoyalBlue")
- self.space.add(body, shape)
- return body
-
- def add_box(self, position, height, width):
- mass = 1
- inertia = pymunk.moment_for_box(mass, (height, width))
- body = pymunk.Body(mass, inertia)
- body.position = position
- shape = pymunk.Poly.create_box(body, (height, width))
- shape.color = pygame.Color("LightSlateGray")
- self.space.add(body, shape)
- return body
-
- def add_tee(self, position, angle, scale=30, color="LightSlateGray", mask=None):
- if mask is None:
- mask = pymunk.ShapeFilter.ALL_MASKS()
- mass = 1
- length = 4
- vertices1 = [
- (-length * scale / 2, scale),
- (length * scale / 2, scale),
- (length * scale / 2, 0),
- (-length * scale / 2, 0),
- ]
- inertia1 = pymunk.moment_for_poly(mass, vertices=vertices1)
- vertices2 = [
- (-scale / 2, scale),
- (-scale / 2, length * scale),
- (scale / 2, length * scale),
- (scale / 2, scale),
- ]
- inertia2 = pymunk.moment_for_poly(mass, vertices=vertices1)
- body = pymunk.Body(mass, inertia1 + inertia2)
- shape1 = pymunk.Poly(body, vertices1)
- shape2 = pymunk.Poly(body, vertices2)
- shape1.color = pygame.Color(color)
- shape2.color = pygame.Color(color)
- shape1.filter = pymunk.ShapeFilter(mask=mask)
- shape2.filter = pymunk.ShapeFilter(mask=mask)
- body.center_of_gravity = (shape1.center_of_gravity + shape2.center_of_gravity) / 2
- body.position = position
- body.angle = angle
- body.friction = 1
- self.space.add(body, shape1, shape2)
- return body
diff --git a/lerobot/common/envs/pusht/pusht_image_env.py b/lerobot/common/envs/pusht/pusht_image_env.py
deleted file mode 100644
index 6547835a..00000000
--- a/lerobot/common/envs/pusht/pusht_image_env.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import numpy as np
-from gymnasium import spaces
-
-from lerobot.common.envs.pusht.pusht_env import PushTEnv
-
-
-class PushTImageEnv(PushTEnv):
- metadata = {"render.modes": ["rgb_array"], "video.frames_per_second": 10}
-
- # Note: legacy defaults to True for compatibility with original
- def __init__(self, legacy=True, block_cog=None, damping=None, render_size=96):
- super().__init__(
- legacy=legacy, block_cog=block_cog, damping=damping, render_size=render_size, render_action=False
- )
- ws = self.window_size
- self.observation_space = spaces.Dict(
- {
- "image": spaces.Box(low=0, high=1, shape=(3, render_size, render_size), dtype=np.float32),
- "agent_pos": spaces.Box(low=0, high=ws, shape=(2,), dtype=np.float32),
- }
- )
- self.render_cache = None
-
- def _get_obs(self):
- img = super()._render_frame(mode="rgb_array")
-
- agent_pos = np.array(self.agent.position)
- img_obs = np.moveaxis(img, -1, 0)
- obs = {"image": img_obs, "agent_pos": agent_pos}
-
- self.render_cache = img
-
- return obs
-
- def render(self, mode):
- assert mode == "rgb_array"
-
- if self.render_cache is None:
- self._get_obs()
-
- return self.render_cache
diff --git a/lerobot/common/envs/pusht/pymunk_override.py b/lerobot/common/envs/pusht/pymunk_override.py
deleted file mode 100644
index 7ad76237..00000000
--- a/lerobot/common/envs/pusht/pymunk_override.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# ----------------------------------------------------------------------------
-# pymunk
-# Copyright (c) 2007-2016 Victor Blomqvist
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-# ----------------------------------------------------------------------------
-
-"""This submodule contains helper functions to help with quick prototyping
-using pymunk together with pygame.
-
-Intended to help with debugging and prototyping, not for actual production use
-in a full application. The methods contained in this module is opinionated
-about your coordinate system and not in any way optimized.
-"""
-
-__docformat__ = "reStructuredText"
-
-__all__ = [
- "DrawOptions",
- "get_mouse_pos",
- "to_pygame",
- "from_pygame",
- # "lighten",
- "positive_y_is_up",
-]
-
-from typing import Sequence, Tuple
-
-import numpy as np
-import pygame
-import pymunk
-from pymunk.space_debug_draw_options import SpaceDebugColor
-from pymunk.vec2d import Vec2d
-
-positive_y_is_up: bool = False
-"""Make increasing values of y point upwards.
-
-When True::
-
- y
- ^
- | . (3, 3)
- |
- | . (2, 2)
- |
- +------ > x
-
-When False::
-
- +------ > x
- |
- | . (2, 2)
- |
- | . (3, 3)
- v
- y
-
-"""
-
-
-class DrawOptions(pymunk.SpaceDebugDrawOptions):
- def __init__(self, surface: pygame.Surface) -> None:
- """Draw a pymunk.Space on a pygame.Surface object.
-
- Typical usage::
-
- >>> import pymunk
- >>> surface = pygame.Surface((10,10))
- >>> space = pymunk.Space()
- >>> options = pymunk.pygame_util.DrawOptions(surface)
- >>> space.debug_draw(options)
-
- You can control the color of a shape by setting shape.color to the color
- you want it drawn in::
-
- >>> c = pymunk.Circle(None, 10)
- >>> c.color = pygame.Color("pink")
-
- See pygame_util.demo.py for a full example
-
- Since pygame uses a coordinate system where y points down (in contrast
- to many other cases), you either have to make the physics simulation
- with Pymunk also behave in that way, or flip everything when you draw.
-
- The easiest is probably to just make the simulation behave the same
- way as Pygame does. In that way all coordinates used are in the same
- orientation and easy to reason about::
-
- >>> space = pymunk.Space()
- >>> space.gravity = (0, -1000)
- >>> body = pymunk.Body()
- >>> body.position = (0, 0) # will be positioned in the top left corner
- >>> space.debug_draw(options)
-
- To flip the drawing its possible to set the module property
- :py:data:`positive_y_is_up` to True. Then the pygame drawing will flip
- the simulation upside down before drawing::
-
- >>> positive_y_is_up = True
- >>> body = pymunk.Body()
- >>> body.position = (0, 0)
- >>> # Body will be position in bottom left corner
-
- :Parameters:
- surface : pygame.Surface
- Surface that the objects will be drawn on
- """
- self.surface = surface
- super().__init__()
-
- def draw_circle(
- self,
- pos: Vec2d,
- angle: float,
- radius: float,
- outline_color: SpaceDebugColor,
- fill_color: SpaceDebugColor,
- ) -> None:
- p = to_pygame(pos, self.surface)
-
- pygame.draw.circle(self.surface, fill_color.as_int(), p, round(radius), 0)
- pygame.draw.circle(self.surface, light_color(fill_color).as_int(), p, round(radius - 4), 0)
-
- # circle_edge = pos + Vec2d(radius, 0).rotated(angle)
- # p2 = to_pygame(circle_edge, self.surface)
- # line_r = 2 if radius > 20 else 1
- # pygame.draw.lines(self.surface, outline_color.as_int(), False, [p, p2], line_r)
-
- def draw_segment(self, a: Vec2d, b: Vec2d, color: SpaceDebugColor) -> None:
- p1 = to_pygame(a, self.surface)
- p2 = to_pygame(b, self.surface)
-
- pygame.draw.aalines(self.surface, color.as_int(), False, [p1, p2])
-
- def draw_fat_segment(
- self,
- a: Tuple[float, float],
- b: Tuple[float, float],
- radius: float,
- outline_color: SpaceDebugColor,
- fill_color: SpaceDebugColor,
- ) -> None:
- p1 = to_pygame(a, self.surface)
- p2 = to_pygame(b, self.surface)
-
- r = round(max(1, radius * 2))
- pygame.draw.lines(self.surface, fill_color.as_int(), False, [p1, p2], r)
- if r > 2:
- orthog = [abs(p2[1] - p1[1]), abs(p2[0] - p1[0])]
- if orthog[0] == 0 and orthog[1] == 0:
- return
- scale = radius / (orthog[0] * orthog[0] + orthog[1] * orthog[1]) ** 0.5
- orthog[0] = round(orthog[0] * scale)
- orthog[1] = round(orthog[1] * scale)
- points = [
- (p1[0] - orthog[0], p1[1] - orthog[1]),
- (p1[0] + orthog[0], p1[1] + orthog[1]),
- (p2[0] + orthog[0], p2[1] + orthog[1]),
- (p2[0] - orthog[0], p2[1] - orthog[1]),
- ]
- pygame.draw.polygon(self.surface, fill_color.as_int(), points)
- pygame.draw.circle(
- self.surface,
- fill_color.as_int(),
- (round(p1[0]), round(p1[1])),
- round(radius),
- )
- pygame.draw.circle(
- self.surface,
- fill_color.as_int(),
- (round(p2[0]), round(p2[1])),
- round(radius),
- )
-
- def draw_polygon(
- self,
- verts: Sequence[Tuple[float, float]],
- radius: float,
- outline_color: SpaceDebugColor,
- fill_color: SpaceDebugColor,
- ) -> None:
- ps = [to_pygame(v, self.surface) for v in verts]
- ps += [ps[0]]
-
- radius = 2
- pygame.draw.polygon(self.surface, light_color(fill_color).as_int(), ps)
-
- if radius > 0:
- for i in range(len(verts)):
- a = verts[i]
- b = verts[(i + 1) % len(verts)]
- self.draw_fat_segment(a, b, radius, fill_color, fill_color)
-
- def draw_dot(self, size: float, pos: Tuple[float, float], color: SpaceDebugColor) -> None:
- p = to_pygame(pos, self.surface)
- pygame.draw.circle(self.surface, color.as_int(), p, round(size), 0)
-
-
-def get_mouse_pos(surface: pygame.Surface) -> Tuple[int, int]:
- """Get position of the mouse pointer in pymunk coordinates."""
- p = pygame.mouse.get_pos()
- return from_pygame(p, surface)
-
-
-def to_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
- """Convenience method to convert pymunk coordinates to pygame surface
- local coordinates.
-
- Note that in case positive_y_is_up is False, this function won't actually do
- anything except converting the point to integers.
- """
- if positive_y_is_up:
- return round(p[0]), surface.get_height() - round(p[1])
- else:
- return round(p[0]), round(p[1])
-
-
-def from_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
- """Convenience method to convert pygame surface local coordinates to
- pymunk coordinates
- """
- return to_pygame(p, surface)
-
-
-def light_color(color: SpaceDebugColor):
- color = np.minimum(1.2 * np.float32([color.r, color.g, color.b, color.a]), np.float32([255]))
- color = SpaceDebugColor(r=color[0], g=color[1], b=color[2], a=color[3])
- return color
diff --git a/lerobot/common/envs/simxarm/env.py b/lerobot/common/envs/simxarm/env.py
deleted file mode 100644
index 8ce6b24c..00000000
--- a/lerobot/common/envs/simxarm/env.py
+++ /dev/null
@@ -1,237 +0,0 @@
-import importlib
-import logging
-from collections import deque
-from typing import Optional
-
-import einops
-import numpy as np
-import torch
-from tensordict import TensorDict
-from torchrl.data.tensor_specs import (
- BoundedTensorSpec,
- CompositeSpec,
- DiscreteTensorSpec,
- UnboundedContinuousTensorSpec,
-)
-from torchrl.envs.libs.gym import _gym_to_torchrl_spec_transform
-
-from lerobot.common.envs.abstract import AbstractEnv
-from lerobot.common.utils import set_global_seed
-
-MAX_NUM_ACTIONS = 4
-
-_has_gym = importlib.util.find_spec("gymnasium") is not None
-
-
-class SimxarmEnv(AbstractEnv):
- name = "simxarm"
- available_tasks = ["lift"]
-
- def __init__(
- self,
- task,
- frame_skip: int = 1,
- from_pixels: bool = False,
- pixels_only: bool = False,
- image_size=None,
- seed=1337,
- device="cpu",
- num_prev_obs=0,
- num_prev_action=0,
- ):
- super().__init__(
- task=task,
- frame_skip=frame_skip,
- from_pixels=from_pixels,
- pixels_only=pixels_only,
- image_size=image_size,
- seed=seed,
- device=device,
- num_prev_obs=num_prev_obs,
- num_prev_action=num_prev_action,
- )
-
- def _make_env(self):
- if not _has_gym:
- raise ImportError("Cannot import gymnasium.")
-
- import gymnasium as gym
-
- from lerobot.common.envs.simxarm.simxarm import TASKS
-
- if self.task not in TASKS:
- raise ValueError(f"Unknown task {self.task}. Must be one of {list(TASKS.keys())}")
-
- self._env = TASKS[self.task]["env"]()
-
- num_actions = len(TASKS[self.task]["action_space"])
- self._action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(num_actions,))
- self._action_padding = np.zeros((MAX_NUM_ACTIONS - num_actions), dtype=np.float32)
- if "w" not in TASKS[self.task]["action_space"]:
- self._action_padding[-1] = 1.0
-
- def render(self, mode="rgb_array", width=384, height=384):
- return self._env.render(mode, width=width, height=height)
-
- def _format_raw_obs(self, raw_obs):
- if self.from_pixels:
- image = self.render(mode="rgb_array", width=self.image_size, height=self.image_size)
- image = image.transpose(2, 0, 1) # (H, W, C) -> (C, H, W)
- image = torch.tensor(image.copy(), dtype=torch.uint8)
-
- obs = {"image": image}
-
- if not self.pixels_only:
- obs["state"] = torch.tensor(self._env.robot_state, dtype=torch.float32)
- else:
- obs = {"state": torch.tensor(raw_obs["observation"], dtype=torch.float32)}
-
- # obs = TensorDict(obs, batch_size=[])
- return obs
-
- def _reset(self, tensordict: Optional[TensorDict] = None):
- td = tensordict
- if td is None or td.is_empty():
- raw_obs = self._env.reset()
-
- obs = self._format_raw_obs(raw_obs)
-
- if self.num_prev_obs > 0:
- stacked_obs = {}
- if "image" in obs:
- self._prev_obs_image_queue = deque(
- [obs["image"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
- )
- stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
- if "state" in obs:
- self._prev_obs_state_queue = deque(
- [obs["state"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
- )
- stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
- obs = stacked_obs
-
- td = TensorDict(
- {
- "observation": TensorDict(obs, batch_size=[]),
- "done": torch.tensor([False], dtype=torch.bool),
- },
- batch_size=[],
- )
- else:
- raise NotImplementedError()
-
- return td
-
- def _step(self, tensordict: TensorDict):
- td = tensordict
- action = td["action"].numpy()
- # step expects shape=(4,) so we pad if necessary
- action = np.concatenate([action, self._action_padding])
- # TODO(rcadene): add info["is_success"] and info["success"] ?
- sum_reward = 0
-
- if action.ndim == 1:
- action = einops.repeat(action, "c -> t c", t=self.frame_skip)
- else:
- if self.frame_skip > 1:
- raise NotImplementedError()
-
- num_action_steps = action.shape[0]
- for i in range(num_action_steps):
- raw_obs, reward, done, info = self._env.step(action[i])
- sum_reward += reward
-
- obs = self._format_raw_obs(raw_obs)
-
- if self.num_prev_obs > 0:
- stacked_obs = {}
- if "image" in obs:
- self._prev_obs_image_queue.append(obs["image"])
- stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
- if "state" in obs:
- self._prev_obs_state_queue.append(obs["state"])
- stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
- obs = stacked_obs
-
- td = TensorDict(
- {
- "observation": self._format_raw_obs(raw_obs),
- "reward": torch.tensor([sum_reward], dtype=torch.float32),
- "done": torch.tensor([done], dtype=torch.bool),
- "success": torch.tensor([info["success"]], dtype=torch.bool),
- },
- batch_size=[],
- )
- return td
-
- def _make_spec(self):
- obs = {}
- if self.from_pixels:
- image_shape = (3, self.image_size, self.image_size)
- if self.num_prev_obs > 0:
- image_shape = (self.num_prev_obs + 1, *image_shape)
-
- obs["image"] = BoundedTensorSpec(
- low=0,
- high=255,
- shape=image_shape,
- dtype=torch.uint8,
- device=self.device,
- )
- if not self.pixels_only:
- state_shape = (len(self._env.robot_state),)
- if self.num_prev_obs > 0:
- state_shape = (self.num_prev_obs + 1, *state_shape)
-
- obs["state"] = UnboundedContinuousTensorSpec(
- shape=state_shape,
- dtype=torch.float32,
- device=self.device,
- )
- else:
- # TODO(rcadene): add observation_space achieved_goal and desired_goal?
- state_shape = self._env.observation_space["observation"].shape
- if self.num_prev_obs > 0:
- state_shape = (self.num_prev_obs + 1, *state_shape)
-
- obs["state"] = UnboundedContinuousTensorSpec(
- # TODO:
- shape=state_shape,
- dtype=torch.float32,
- device=self.device,
- )
- self.observation_spec = CompositeSpec({"observation": obs})
-
- self.action_spec = _gym_to_torchrl_spec_transform(
- self._action_space,
- device=self.device,
- )
-
- self.reward_spec = UnboundedContinuousTensorSpec(
- shape=(1,),
- dtype=torch.float32,
- device=self.device,
- )
-
- self.done_spec = CompositeSpec(
- {
- "done": DiscreteTensorSpec(
- 2,
- shape=(1,),
- dtype=torch.bool,
- device=self.device,
- ),
- "success": DiscreteTensorSpec(
- 2,
- shape=(1,),
- dtype=torch.bool,
- device=self.device,
- ),
- }
- )
-
- def _set_seed(self, seed: Optional[int]):
- set_global_seed(seed)
- self._seed = seed
- # TODO(aliberts): change self._reset so that it takes in a seed value
- logging.warning("simxarm env is not properly seeded")
diff --git a/lerobot/common/envs/simxarm/simxarm/__init__.py b/lerobot/common/envs/simxarm/simxarm/__init__.py
deleted file mode 100644
index 903d6042..00000000
--- a/lerobot/common/envs/simxarm/simxarm/__init__.py
+++ /dev/null
@@ -1,166 +0,0 @@
-from collections import OrderedDict, deque
-
-import gymnasium as gym
-import numpy as np
-from gymnasium.wrappers import TimeLimit
-
-from lerobot.common.envs.simxarm.simxarm.tasks.base import Base as Base
-from lerobot.common.envs.simxarm.simxarm.tasks.lift import Lift
-from lerobot.common.envs.simxarm.simxarm.tasks.peg_in_box import PegInBox
-from lerobot.common.envs.simxarm.simxarm.tasks.push import Push
-from lerobot.common.envs.simxarm.simxarm.tasks.reach import Reach
-
-TASKS = OrderedDict(
- (
- (
- "reach",
- {
- "env": Reach,
- "action_space": "xyz",
- "episode_length": 50,
- "description": "Reach a target location with the end effector",
- },
- ),
- (
- "push",
- {
- "env": Push,
- "action_space": "xyz",
- "episode_length": 50,
- "description": "Push a cube to a target location",
- },
- ),
- (
- "peg_in_box",
- {
- "env": PegInBox,
- "action_space": "xyz",
- "episode_length": 50,
- "description": "Insert a peg into a box",
- },
- ),
- (
- "lift",
- {
- "env": Lift,
- "action_space": "xyzw",
- "episode_length": 50,
- "description": "Lift a cube above a height threshold",
- },
- ),
- )
-)
-
-
-class SimXarmWrapper(gym.Wrapper):
- """
- A wrapper for the SimXarm environments. This wrapper is used to
- convert the action and observation spaces to the correct format.
- """
-
- def __init__(self, env, task, obs_mode, image_size, action_repeat, frame_stack=1, channel_last=False):
- super().__init__(env)
- self._env = env
- self.obs_mode = obs_mode
- self.image_size = image_size
- self.action_repeat = action_repeat
- self.frame_stack = frame_stack
- self._frames = deque([], maxlen=frame_stack)
- self.channel_last = channel_last
- self._max_episode_steps = task["episode_length"] // action_repeat
-
- image_shape = (
- (image_size, image_size, 3 * frame_stack)
- if channel_last
- else (3 * frame_stack, image_size, image_size)
- )
- if obs_mode == "state":
- self.observation_space = env.observation_space["observation"]
- elif obs_mode == "rgb":
- self.observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8)
- elif obs_mode == "all":
- self.observation_space = gym.spaces.Dict(
- state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32),
- rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
- )
- else:
- raise ValueError(f"Unknown obs_mode {obs_mode}. Must be one of [rgb, all, state]")
- self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(task["action_space"]),))
- self.action_padding = np.zeros(4 - len(task["action_space"]), dtype=np.float32)
- if "w" not in task["action_space"]:
- self.action_padding[-1] = 1.0
-
- def _render_obs(self):
- obs = self.render(mode="rgb_array", width=self.image_size, height=self.image_size)
- if not self.channel_last:
- obs = obs.transpose(2, 0, 1)
- return obs.copy()
-
- def _update_frames(self, reset=False):
- pixels = self._render_obs()
- self._frames.append(pixels)
- if reset:
- for _ in range(1, self.frame_stack):
- self._frames.append(pixels)
- assert len(self._frames) == self.frame_stack
-
- def transform_obs(self, obs, reset=False):
- if self.obs_mode == "state":
- return obs["observation"]
- elif self.obs_mode == "rgb":
- self._update_frames(reset=reset)
- rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
- return rgb_obs
- elif self.obs_mode == "all":
- self._update_frames(reset=reset)
- rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
- return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))
- else:
- raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]")
-
- def reset(self):
- return self.transform_obs(self._env.reset(), reset=True)
-
- def step(self, action):
- action = np.concatenate([action, self.action_padding])
- reward = 0.0
- for _ in range(self.action_repeat):
- obs, r, done, info = self._env.step(action)
- reward += r
- return self.transform_obs(obs), reward, done, info
-
- def render(self, mode="rgb_array", width=384, height=384, **kwargs):
- return self._env.render(mode, width=width, height=height)
-
- @property
- def state(self):
- return self._env.robot_state
-
-
-def make(task, obs_mode="state", image_size=84, action_repeat=1, frame_stack=1, channel_last=False, seed=0):
- """
- Create a new environment.
- Args:
- task (str): The task to create an environment for. Must be one of:
- - 'reach'
- - 'push'
- - 'peg-in-box'
- - 'lift'
- obs_mode (str): The observation mode to use. Must be one of:
- - 'state': Only state observations
- - 'rgb': RGB images
- - 'all': RGB images and state observations
- image_size (int): The size of the image observations
- action_repeat (int): The number of times to repeat the action
- seed (int): The random seed to use
- Returns:
- gym.Env: The environment
- """
- if task not in TASKS:
- raise ValueError(f"Unknown task {task}. Must be one of {list(TASKS.keys())}")
- env = TASKS[task]["env"]()
- env = TimeLimit(env, TASKS[task]["episode_length"])
- env = SimXarmWrapper(env, TASKS[task], obs_mode, image_size, action_repeat, frame_stack, channel_last)
- env.seed(seed)
-
- return env
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/__init__.py b/lerobot/common/envs/simxarm/simxarm/tasks/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/lift.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/lift.xml
deleted file mode 100644
index 92231f92..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/lift.xml
+++ /dev/null
@@ -1,53 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/base_link.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/base_link.stl
deleted file mode 100644
index f1f52955..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/base_link.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:21fb81ae7fba19e3c6b2d2ca60c8051712ba273357287eb5a397d92d61c7a736
-size 1211434
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner.stl
deleted file mode 100644
index 6cb88945..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:be68ce180d11630a667a5f37f4dffcc3feebe4217d4bb3912c813b6d9ca3ec66
-size 3284
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner2.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner2.stl
deleted file mode 100644
index dab55ef5..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_inner2.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2c6448552bf6b1c4f17334d686a5320ce051bcdfe31431edf69303d8a570d1de
-size 3284
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_outer.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_outer.stl
deleted file mode 100644
index 21cf11fa..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/block_outer.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:748b9e197e6521914f18d1f6383a36f211136b3f33f2ad2a8c11b9f921c2cf86
-size 6284
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_finger.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_finger.stl
deleted file mode 100644
index 6bf4e502..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_finger.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a44756eb72f9c214cb37e61dc209cd7073fdff3e4271a7423476ef6fd090d2d4
-size 242684
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_inner_knuckle.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_inner_knuckle.stl
deleted file mode 100644
index 817c7e1d..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_inner_knuckle.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e8e48692ad26837bb3d6a97582c89784d09948fc09bfe4e5a59017859ff04dac
-size 366284
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_outer_knuckle.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_outer_knuckle.stl
deleted file mode 100644
index 010c0f3b..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/left_outer_knuckle.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:501665812b08d67e764390db781e839adc6896a9540301d60adf606f57648921
-size 22284
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link1.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link1.stl
deleted file mode 100644
index f2b676f2..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link1.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:34b541122df84d2ef5fcb91b715eb19659dc15ad8d44a191dde481f780265636
-size 184184
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link2.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link2.stl
deleted file mode 100644
index bf93580c..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link2.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:61e641cd47c169ecef779683332e00e4914db729bf02dfb61bfbe69351827455
-size 225584
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link3.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link3.stl
deleted file mode 100644
index d316d233..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link3.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9e2798e7946dd70046c95455d5ba96392d0b54a6069caba91dc4ca66e1379b42
-size 237084
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link4.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link4.stl
deleted file mode 100644
index f6d5fe94..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link4.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c757fee95f873191a0633c355c07a360032960771cabbd7593a6cdb0f1ffb089
-size 243684
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link5.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link5.stl
deleted file mode 100644
index e037b8b9..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link5.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:715ad5787c5dab57589937fd47289882707b5e1eb997e340d567785b02f4ec90
-size 229084
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link6.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link6.stl
deleted file mode 100644
index 198c5300..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link6.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:85b320aa420497827223d16d492bba8de091173374e361396fc7a5dad7bdb0cb
-size 399384
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link7.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link7.stl
deleted file mode 100644
index ce9a39ac..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link7.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:97115d848fbf802cb770cd9be639ae2af993103b9d9bbb0c50c943c738a36f18
-size 231684
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link_base.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link_base.stl
deleted file mode 100644
index 110b9531..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/link_base.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f6fcbc18258090eb56c21cfb17baa5ae43abc98b1958cd366f3a73b9898fc7f0
-size 2106184
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_finger.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_finger.stl
deleted file mode 100644
index 03f26e9a..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_finger.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c5dee87c7f37baf554b8456ebfe0b3e8ed0b22b8938bd1add6505c2ad6d32c7d
-size 242684
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_inner_knuckle.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_inner_knuckle.stl
deleted file mode 100644
index 8586f344..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_inner_knuckle.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b41dd2c2c550281bf78d7cc6fa117b14786700e5c453560a0cb5fd6dfa0ffb3e
-size 366284
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_outer_knuckle.stl b/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_outer_knuckle.stl
deleted file mode 100644
index ae7afc25..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/mesh/right_outer_knuckle.stl
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:75ca1107d0a42a0f03802a9a49cab48419b31851ee8935f8f1ca06be1c1c91e8
-size 22284
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/peg_in_box.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/peg_in_box.xml
deleted file mode 100644
index 0f85459f..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/peg_in_box.xml
+++ /dev/null
@@ -1,74 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/push.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/push.xml
deleted file mode 100644
index 42a78c8a..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/push.xml
+++ /dev/null
@@ -1,54 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/reach.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/reach.xml
deleted file mode 100644
index ded6d209..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/reach.xml
+++ /dev/null
@@ -1,48 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/shared.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/shared.xml
deleted file mode 100644
index ee56f8f0..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/shared.xml
+++ /dev/null
@@ -1,51 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/assets/xarm.xml b/lerobot/common/envs/simxarm/simxarm/tasks/assets/xarm.xml
deleted file mode 100644
index 023474d6..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/assets/xarm.xml
+++ /dev/null
@@ -1,88 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/base.py b/lerobot/common/envs/simxarm/simxarm/tasks/base.py
deleted file mode 100644
index b937b290..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/base.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import os
-
-import mujoco
-import numpy as np
-from gymnasium_robotics.envs import robot_env
-
-from lerobot.common.envs.simxarm.simxarm.tasks import mocap
-
-
-class Base(robot_env.MujocoRobotEnv):
- """
- Superclass for all simxarm environments.
- Args:
- xml_name (str): name of the xml environment file
- gripper_rotation (list): initial rotation of the gripper (given as a quaternion)
- """
-
- def __init__(self, xml_name, gripper_rotation=None):
- if gripper_rotation is None:
- gripper_rotation = [0, 1, 0, 0]
- self.gripper_rotation = np.array(gripper_rotation, dtype=np.float32)
- self.center_of_table = np.array([1.655, 0.3, 0.63625])
- self.max_z = 1.2
- self.min_z = 0.2
- super().__init__(
- model_path=os.path.join(os.path.dirname(__file__), "assets", xml_name + ".xml"),
- n_substeps=20,
- n_actions=4,
- initial_qpos={},
- )
-
- @property
- def dt(self):
- return self.n_substeps * self.model.opt.timestep
-
- @property
- def eef(self):
- return self._utils.get_site_xpos(self.model, self.data, "grasp")
-
- @property
- def obj(self):
- return self._utils.get_site_xpos(self.model, self.data, "object_site")
-
- @property
- def robot_state(self):
- gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
- return np.concatenate([self.eef, gripper_angle])
-
- def is_success(self):
- return NotImplementedError()
-
- def get_reward(self):
- raise NotImplementedError()
-
- def _sample_goal(self):
- raise NotImplementedError()
-
- def get_obs(self):
- return self._get_obs()
-
- def _step_callback(self):
- self._mujoco.mj_forward(self.model, self.data)
-
- def _limit_gripper(self, gripper_pos, pos_ctrl):
- if gripper_pos[0] > self.center_of_table[0] - 0.105 + 0.15:
- pos_ctrl[0] = min(pos_ctrl[0], 0)
- if gripper_pos[0] < self.center_of_table[0] - 0.105 - 0.3:
- pos_ctrl[0] = max(pos_ctrl[0], 0)
- if gripper_pos[1] > self.center_of_table[1] + 0.3:
- pos_ctrl[1] = min(pos_ctrl[1], 0)
- if gripper_pos[1] < self.center_of_table[1] - 0.3:
- pos_ctrl[1] = max(pos_ctrl[1], 0)
- if gripper_pos[2] > self.max_z:
- pos_ctrl[2] = min(pos_ctrl[2], 0)
- if gripper_pos[2] < self.min_z:
- pos_ctrl[2] = max(pos_ctrl[2], 0)
- return pos_ctrl
-
- def _apply_action(self, action):
- assert action.shape == (4,)
- action = action.copy()
- pos_ctrl, gripper_ctrl = action[:3], action[3]
- pos_ctrl = self._limit_gripper(
- self._utils.get_site_xpos(self.model, self.data, "grasp"), pos_ctrl
- ) * (1 / self.n_substeps)
- gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
- mocap.apply_action(
- self.model,
- self._model_names,
- self.data,
- np.concatenate([pos_ctrl, self.gripper_rotation, gripper_ctrl]),
- )
-
- def _render_callback(self):
- self._mujoco.mj_forward(self.model, self.data)
-
- def _reset_sim(self):
- self.data.time = self.initial_time
- self.data.qpos[:] = np.copy(self.initial_qpos)
- self.data.qvel[:] = np.copy(self.initial_qvel)
- self._sample_goal()
- self._mujoco.mj_step(self.model, self.data, nstep=10)
- return True
-
- def _set_gripper(self, gripper_pos, gripper_rotation):
- self._utils.set_mocap_pos(self.model, self.data, "robot0:mocap", gripper_pos)
- self._utils.set_mocap_quat(self.model, self.data, "robot0:mocap", gripper_rotation)
- self._utils.set_joint_qpos(self.model, self.data, "right_outer_knuckle_joint", 0)
- self.data.qpos[10] = 0.0
- self.data.qpos[12] = 0.0
-
- def _env_setup(self, initial_qpos):
- for name, value in initial_qpos.items():
- self.data.set_joint_qpos(name, value)
- mocap.reset(self.model, self.data)
- mujoco.mj_forward(self.model, self.data)
- self._sample_goal()
- mujoco.mj_forward(self.model, self.data)
-
- def reset(self):
- self._reset_sim()
- return self._get_obs()
-
- def step(self, action):
- assert action.shape == (4,)
- assert self.action_space.contains(action), "{!r} ({}) invalid".format(action, type(action))
- self._apply_action(action)
- self._mujoco.mj_step(self.model, self.data, nstep=2)
- self._step_callback()
- obs = self._get_obs()
- reward = self.get_reward()
- done = False
- info = {"is_success": self.is_success(), "success": self.is_success()}
- return obs, reward, done, info
-
- def render(self, mode="rgb_array", width=384, height=384):
- self._render_callback()
- # HACK
- self.model.vis.global_.offwidth = width
- self.model.vis.global_.offheight = height
- return self.mujoco_renderer.render(mode)
-
- def close(self):
- if self.mujoco_renderer is not None:
- self.mujoco_renderer.close()
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/lift.py b/lerobot/common/envs/simxarm/simxarm/tasks/lift.py
deleted file mode 100644
index 0b11196c..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/lift.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import numpy as np
-
-from lerobot.common.envs.simxarm.simxarm import Base
-
-
-class Lift(Base):
- def __init__(self):
- self._z_threshold = 0.15
- super().__init__("lift")
-
- @property
- def z_target(self):
- return self._init_z + self._z_threshold
-
- def is_success(self):
- return self.obj[2] >= self.z_target
-
- def get_reward(self):
- reach_dist = np.linalg.norm(self.obj - self.eef)
- reach_dist_xy = np.linalg.norm(self.obj[:-1] - self.eef[:-1])
- pick_completed = self.obj[2] >= (self.z_target - 0.01)
- obj_dropped = (self.obj[2] < (self._init_z + 0.005)) and (reach_dist > 0.02)
-
- # Reach
- if reach_dist < 0.05:
- reach_reward = -reach_dist + max(self._action[-1], 0) / 50
- elif reach_dist_xy < 0.05:
- reach_reward = -reach_dist
- else:
- z_bonus = np.linalg.norm(np.linalg.norm(self.obj[-1] - self.eef[-1]))
- reach_reward = -reach_dist - 2 * z_bonus
-
- # Pick
- if pick_completed and not obj_dropped:
- pick_reward = self.z_target
- elif (reach_dist < 0.1) and (self.obj[2] > (self._init_z + 0.005)):
- pick_reward = min(self.z_target, self.obj[2])
- else:
- pick_reward = 0
-
- return reach_reward / 100 + pick_reward
-
- def _get_obs(self):
- eef_velp = self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt
- gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
- eef = self.eef - self.center_of_table
-
- obj = self.obj - self.center_of_table
- obj_rot = self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:]
- obj_velp = self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt
- obj_velr = self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt
-
- obs = np.concatenate(
- [
- eef,
- eef_velp,
- obj,
- obj_rot,
- obj_velp,
- obj_velr,
- eef - obj,
- np.array(
- [
- np.linalg.norm(eef - obj),
- np.linalg.norm(eef[:-1] - obj[:-1]),
- self.z_target,
- self.z_target - obj[-1],
- self.z_target - eef[-1],
- ]
- ),
- gripper_angle,
- ],
- axis=0,
- )
- return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": eef}
-
- def _sample_goal(self):
- # Gripper
- gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
- super()._set_gripper(gripper_pos, self.gripper_rotation)
-
- # Object
- object_pos = self.center_of_table - np.array([0.15, 0.10, 0.07])
- object_pos[0] += self.np_random.uniform(-0.05, 0.05, size=1)
- object_pos[1] += self.np_random.uniform(-0.05, 0.05, size=1)
- object_qpos = self._utils.get_joint_qpos(self.model, self.data, "object_joint0")
- object_qpos[:3] = object_pos
- self._utils.set_joint_qpos(self.model, self.data, "object_joint0", object_qpos)
- self._init_z = object_pos[2]
-
- # Goal
- return object_pos + np.array([0, 0, self._z_threshold])
-
- def reset(self):
- self._action = np.zeros(4)
- return super().reset()
-
- def step(self, action):
- self._action = action.copy()
- return super().step(action)
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/mocap.py b/lerobot/common/envs/simxarm/simxarm/tasks/mocap.py
deleted file mode 100644
index 4295bf19..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/mocap.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# import mujoco_py
-import mujoco
-import numpy as np
-
-
-def apply_action(model, model_names, data, action):
- if model.nmocap > 0:
- pos_action, gripper_action = np.split(action, (model.nmocap * 7,))
- if data.ctrl is not None:
- for i in range(gripper_action.shape[0]):
- data.ctrl[i] = gripper_action[i]
- pos_action = pos_action.reshape(model.nmocap, 7)
- pos_delta, quat_delta = pos_action[:, :3], pos_action[:, 3:]
- reset_mocap2body_xpos(model, model_names, data)
- data.mocap_pos[:] = data.mocap_pos + pos_delta
- data.mocap_quat[:] = data.mocap_quat + quat_delta
-
-
-def reset(model, data):
- if model.nmocap > 0 and model.eq_data is not None:
- for i in range(model.eq_data.shape[0]):
- # if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD:
- if model.eq_type[i] == mujoco.mjtEq.mjEQ_WELD:
- # model.eq_data[i, :] = np.array([0., 0., 0., 1., 0., 0., 0.])
- model.eq_data[i, :] = np.array(
- [
- 0.0,
- 0.0,
- 0.0,
- 1.0,
- 0.0,
- 0.0,
- 0.0,
- 0.0,
- 0.0,
- 0.0,
- 0.0,
- ]
- )
- # sim.forward()
- mujoco.mj_forward(model, data)
-
-
-def reset_mocap2body_xpos(model, model_names, data):
- if model.eq_type is None or model.eq_obj1id is None or model.eq_obj2id is None:
- return
-
- # For all weld constraints
- for eq_type, obj1_id, obj2_id in zip(model.eq_type, model.eq_obj1id, model.eq_obj2id, strict=False):
- # if eq_type != mujoco_py.const.EQ_WELD:
- if eq_type != mujoco.mjtEq.mjEQ_WELD:
- continue
- # body2 = model.body_id2name(obj2_id)
- body2 = model_names.body_id2name[obj2_id]
- if body2 == "B0" or body2 == "B9" or body2 == "B1":
- continue
- mocap_id = model.body_mocapid[obj1_id]
- if mocap_id != -1:
- # obj1 is the mocap, obj2 is the welded body
- body_idx = obj2_id
- else:
- # obj2 is the mocap, obj1 is the welded body
- mocap_id = model.body_mocapid[obj2_id]
- body_idx = obj1_id
- assert mocap_id != -1
- data.mocap_pos[mocap_id][:] = data.xpos[body_idx]
- data.mocap_quat[mocap_id][:] = data.xquat[body_idx]
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/peg_in_box.py b/lerobot/common/envs/simxarm/simxarm/tasks/peg_in_box.py
deleted file mode 100644
index 42e41520..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/peg_in_box.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import numpy as np
-
-from lerobot.common.envs.simxarm.simxarm import Base
-
-
-class PegInBox(Base):
- def __init__(self):
- super().__init__("peg_in_box")
-
- def _reset_sim(self):
- self._act_magnitude = 0
- super()._reset_sim()
- for _ in range(10):
- self._apply_action(np.array([0, 0, 0, 1], dtype=np.float32))
- self.sim.step()
-
- @property
- def box(self):
- return self.sim.data.get_site_xpos("box_site")
-
- def is_success(self):
- return np.linalg.norm(self.obj - self.box) <= 0.05
-
- def get_reward(self):
- dist_xy = np.linalg.norm(self.obj[:2] - self.box[:2])
- dist_xyz = np.linalg.norm(self.obj - self.box)
- return float(dist_xy <= 0.045) * (2 - 6 * dist_xyz) - 0.2 * np.square(self._act_magnitude) - dist_xy
-
- def _get_obs(self):
- eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
- gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
- eef, box = self.eef - self.center_of_table, self.box - self.center_of_table
-
- obj = self.obj - self.center_of_table
- obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:]
- obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt
- obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt
-
- obs = np.concatenate(
- [
- eef,
- eef_velp,
- box,
- obj,
- obj_rot,
- obj_velp,
- obj_velr,
- eef - box,
- eef - obj,
- obj - box,
- np.array(
- [
- np.linalg.norm(eef - box),
- np.linalg.norm(eef - obj),
- np.linalg.norm(obj - box),
- gripper_angle,
- ]
- ),
- ],
- axis=0,
- )
- return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": box}
-
- def _sample_goal(self):
- # Gripper
- gripper_pos = np.array([1.280, 0.295, 0.9]) + self.np_random.uniform(-0.05, 0.05, size=3)
- super()._set_gripper(gripper_pos, self.gripper_rotation)
-
- # Object
- object_pos = gripper_pos - np.array([0, 0, 0.06]) + self.np_random.uniform(-0.005, 0.005, size=3)
- object_qpos = self.sim.data.get_joint_qpos("object_joint0")
- object_qpos[:3] = object_pos
- self.sim.data.set_joint_qpos("object_joint0", object_qpos)
-
- # Box
- box_pos = np.array([1.61, 0.18, 0.58])
- box_pos[:2] += self.np_random.uniform(-0.11, 0.11, size=2)
- box_qpos = self.sim.data.get_joint_qpos("box_joint0")
- box_qpos[:3] = box_pos
- self.sim.data.set_joint_qpos("box_joint0", box_qpos)
-
- return self.box
-
- def step(self, action):
- self._act_magnitude = np.linalg.norm(action[:3])
- return super().step(action)
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/push.py b/lerobot/common/envs/simxarm/simxarm/tasks/push.py
deleted file mode 100644
index 36c4a550..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/push.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import numpy as np
-
-from lerobot.common.envs.simxarm.simxarm import Base
-
-
-class Push(Base):
- def __init__(self):
- super().__init__("push")
-
- def _reset_sim(self):
- self._act_magnitude = 0
- super()._reset_sim()
-
- def is_success(self):
- return np.linalg.norm(self.obj - self.goal) <= 0.05
-
- def get_reward(self):
- dist = np.linalg.norm(self.obj - self.goal)
- penalty = self._act_magnitude**2
- return -(dist + 0.15 * penalty)
-
- def _get_obs(self):
- eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
- gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
- eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table
-
- obj = self.obj - self.center_of_table
- obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:]
- obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt
- obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt
-
- obs = np.concatenate(
- [
- eef,
- eef_velp,
- goal,
- obj,
- obj_rot,
- obj_velp,
- obj_velr,
- eef - goal,
- eef - obj,
- obj - goal,
- np.array(
- [
- np.linalg.norm(eef - goal),
- np.linalg.norm(eef - obj),
- np.linalg.norm(obj - goal),
- gripper_angle,
- ]
- ),
- ],
- axis=0,
- )
- return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal}
-
- def _sample_goal(self):
- # Gripper
- gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
- super()._set_gripper(gripper_pos, self.gripper_rotation)
-
- # Object
- object_pos = self.center_of_table - np.array([0.25, 0, 0.07])
- object_pos[0] += self.np_random.uniform(-0.08, 0.08, size=1)
- object_pos[1] += self.np_random.uniform(-0.08, 0.08, size=1)
- object_qpos = self.sim.data.get_joint_qpos("object_joint0")
- object_qpos[:3] = object_pos
- self.sim.data.set_joint_qpos("object_joint0", object_qpos)
-
- # Goal
- self.goal = np.array([1.600, 0.200, 0.545])
- self.goal[:2] += self.np_random.uniform(-0.1, 0.1, size=2)
- self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal
- return self.goal
-
- def step(self, action):
- self._act_magnitude = np.linalg.norm(action[:3])
- return super().step(action)
diff --git a/lerobot/common/envs/simxarm/simxarm/tasks/reach.py b/lerobot/common/envs/simxarm/simxarm/tasks/reach.py
deleted file mode 100644
index 941a586f..00000000
--- a/lerobot/common/envs/simxarm/simxarm/tasks/reach.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import numpy as np
-
-from lerobot.common.envs.simxarm.simxarm import Base
-
-
-class Reach(Base):
- def __init__(self):
- super().__init__("reach")
-
- def _reset_sim(self):
- self._act_magnitude = 0
- super()._reset_sim()
-
- def is_success(self):
- return np.linalg.norm(self.eef - self.goal) <= 0.05
-
- def get_reward(self):
- dist = np.linalg.norm(self.eef - self.goal)
- penalty = self._act_magnitude**2
- return -(dist + 0.15 * penalty)
-
- def _get_obs(self):
- eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
- gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
- eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table
- obs = np.concatenate(
- [eef, eef_velp, goal, eef - goal, np.array([np.linalg.norm(eef - goal), gripper_angle])], axis=0
- )
- return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal}
-
- def _sample_goal(self):
- # Gripper
- gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
- super()._set_gripper(gripper_pos, self.gripper_rotation)
-
- # Goal
- self.goal = np.array([1.550, 0.287, 0.580])
- self.goal[:2] += self.np_random.uniform(-0.125, 0.125, size=2)
- self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal
- return self.goal
-
- def step(self, action):
- self._act_magnitude = np.linalg.norm(action[:3])
- return super().step(action)
diff --git a/lerobot/common/envs/utils.py b/lerobot/common/envs/utils.py
new file mode 100644
index 00000000..1696ddbe
--- /dev/null
+++ b/lerobot/common/envs/utils.py
@@ -0,0 +1,32 @@
+import einops
+import torch
+
+from lerobot.common.transforms import apply_inverse_transform
+
+
+def preprocess_observation(observation, transform=None):
+ # map to expected inputs for the policy
+ obs = {
+ "observation.image": torch.from_numpy(observation["pixels"]).float(),
+ "observation.state": torch.from_numpy(observation["agent_pos"]).float(),
+ }
+ # convert to (b c h w) torch format
+ obs["observation.image"] = einops.rearrange(obs["observation.image"], "b h w c -> b c h w")
+
+ # apply same transforms as in training
+ if transform is not None:
+ for key in obs:
+ obs[key] = torch.stack([transform({key: item})[key] for item in obs[key]])
+
+ return obs
+
+
+def postprocess_action(action, transform=None):
+ action = action.to("cpu")
+ # action is a batch (num_env,action_dim) instead of an item (action_dim),
+ # we assume applying inverse transform on a batch works the same
+ action = apply_inverse_transform({"action": action}, transform)["action"].numpy()
+ assert (
+ action.ndim == 2
+ ), "we assume dimensions are respectively the number of parallel envs, action dimensions"
+ return action
diff --git a/lerobot/common/policies/act/policy.py b/lerobot/common/policies/act/policy.py
index 1aacc41d..f42c6a3c 100644
--- a/lerobot/common/policies/act/policy.py
+++ b/lerobot/common/policies/act/policy.py
@@ -168,21 +168,15 @@ class ActionChunkingTransformerPolicy(nn.Module):
nn.init.xavier_uniform_(p)
@torch.no_grad()
- def select_actions(self, observation, step_count):
- # TODO(rcadene): remove unused step_count
- del step_count
-
+ def select_actions(self, batch, *_):
+ # TODO(now): Implement queueing mechanism.
self.eval()
+ self._preprocess_batch(batch)
- # TODO(rcadene): remove hack
- # add 1 camera dimension
- observation["image", "top"] = observation["image", "top"].unsqueeze(1)
-
- obs_dict = {
- "image": observation["image", "top"],
- "agent_pos": observation["state"],
- }
- action = self._forward(qpos=obs_dict["agent_pos"] * 0.182, image=obs_dict["image"])
+ # TODO(now): What's up with this 0.182?
+ action = self.forward(
+ robot_state=batch["observation.state"] * 0.182, image=batch["observation.images.top"]
+ )
if self.cfg.temporal_agg:
# TODO(rcadene): implement temporal aggregation
@@ -197,9 +191,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
# exp_weights = torch.from_numpy(exp_weights).cuda().unsqueeze(dim=1)
# raw_action = (actions_for_curr_step * exp_weights).sum(dim=0, keepdim=True)
- # take first predicted action or n first actions
- action = action[: self.n_action_steps]
- return action
+ return action[: self.n_action_steps]
def __call__(self, *args, **kwargs):
# TODO(now): Temporary bridge.
diff --git a/lerobot/common/policies/tdmpc/policy.py b/lerobot/common/policies/tdmpc/policy.py
index 64dcc94d..85700913 100644
--- a/lerobot/common/policies/tdmpc/policy.py
+++ b/lerobot/common/policies/tdmpc/policy.py
@@ -1,6 +1,7 @@
# ruff: noqa: N806
import time
+from collections import deque
from copy import deepcopy
import einops
@@ -9,7 +10,7 @@ import torch
import torch.nn as nn
import lerobot.common.policies.tdmpc.helper as h
-from lerobot.common.policies.abstract import AbstractPolicy
+from lerobot.common.policies.utils import populate_queues
from lerobot.common.utils import get_safe_torch_device
FIRST_FRAME = 0
@@ -87,16 +88,18 @@ class TOLD(nn.Module):
return torch.min(Q1, Q2) if return_type == "min" else (Q1 + Q2) / 2
-class TDMPCPolicy(AbstractPolicy):
+class TDMPCPolicy(nn.Module):
"""Implementation of TD-MPC learning + inference."""
name = "tdmpc"
- def __init__(self, cfg, device):
- super().__init__(None)
+ def __init__(self, cfg, n_obs_steps, n_action_steps, device):
+ super().__init__()
self.action_dim = cfg.action_dim
self.cfg = cfg
+ self.n_obs_steps = n_obs_steps
+ self.n_action_steps = n_action_steps
self.device = get_safe_torch_device(device)
self.std = h.linear_schedule(cfg.std_schedule, 0)
self.model = TOLD(cfg)
@@ -128,20 +131,42 @@ class TDMPCPolicy(AbstractPolicy):
self.model.load_state_dict(d["model"])
self.model_target.load_state_dict(d["model_target"])
- @torch.no_grad()
- def select_actions(self, observation, step_count):
- if observation["image"].shape[0] != 1:
- raise NotImplementedError("Batch size > 1 not handled")
-
- t0 = step_count.item() == 0
-
- obs = {
- # TODO(rcadene): remove contiguous hack...
- "rgb": observation["image"].contiguous(),
- "state": observation["state"].contiguous(),
+ def reset(self):
+ """
+ Clear observation and action queues. Should be called on `env.reset()`
+ """
+ self._queues = {
+ "observation.image": deque(maxlen=self.n_obs_steps),
+ "observation.state": deque(maxlen=self.n_obs_steps),
+ "action": deque(maxlen=self.n_action_steps),
}
- # Note: unsqueeze needed because `act` still uses non-batch logic.
- action = self.act(obs, t0=t0, step=self.step.item()).unsqueeze(0)
+
+ @torch.no_grad()
+ def select_action(self, batch, step):
+ assert "observation.image" in batch
+ assert "observation.state" in batch
+ assert len(batch) == 2
+
+ self._queues = populate_queues(self._queues, batch)
+
+ t0 = step == 0
+
+ if len(self._queues["action"]) == 0:
+ batch = {key: torch.stack(list(self._queues[key]), dim=1) for key in batch}
+
+ actions = []
+ batch_size = batch["observation.image."].shape[0]
+ for i in range(batch_size):
+ obs = {
+ "rgb": batch["observation.image"][[i]],
+ "state": batch["observation.state"][[i]],
+ }
+ # Note: unsqueeze needed because `act` still uses non-batch logic.
+ action = self.act(obs, t0=t0, step=self.step)
+ actions.append(action)
+ action = torch.stack(actions)
+
+ action = self._queues["action"].popleft()
return action
@torch.no_grad()
@@ -293,97 +318,97 @@ class TDMPCPolicy(AbstractPolicy):
td_target = reward + self.cfg.discount * mask * next_v
return td_target
- def update(self, replay_buffer, step, demo_buffer=None):
+ def forward(self, batch, step):
"""Main update function. Corresponds to one iteration of the model learning."""
start_time = time.time()
- num_slices = self.cfg.batch_size
- batch_size = self.cfg.horizon * num_slices
+ # num_slices = self.cfg.batch_size
+ # batch_size = self.cfg.horizon * num_slices
- if demo_buffer is None:
- demo_batch_size = 0
- else:
- # Update oversampling ratio
- demo_pc_batch = h.linear_schedule(self.cfg.demo_schedule, step)
- demo_num_slices = int(demo_pc_batch * self.batch_size)
- demo_batch_size = self.cfg.horizon * demo_num_slices
- batch_size -= demo_batch_size
- num_slices -= demo_num_slices
- replay_buffer._sampler.num_slices = num_slices
- demo_buffer._sampler.num_slices = demo_num_slices
+ # if demo_buffer is None:
+ # demo_batch_size = 0
+ # else:
+ # # Update oversampling ratio
+ # demo_pc_batch = h.linear_schedule(self.cfg.demo_schedule, step)
+ # demo_num_slices = int(demo_pc_batch * self.batch_size)
+ # demo_batch_size = self.cfg.horizon * demo_num_slices
+ # batch_size -= demo_batch_size
+ # num_slices -= demo_num_slices
+ # replay_buffer._sampler.num_slices = num_slices
+ # demo_buffer._sampler.num_slices = demo_num_slices
- assert demo_batch_size % self.cfg.horizon == 0
- assert demo_batch_size % demo_num_slices == 0
+ # assert demo_batch_size % self.cfg.horizon == 0
+ # assert demo_batch_size % demo_num_slices == 0
- assert batch_size % self.cfg.horizon == 0
- assert batch_size % num_slices == 0
+ # assert batch_size % self.cfg.horizon == 0
+ # assert batch_size % num_slices == 0
- # Sample from interaction dataset
+ # # Sample from interaction dataset
- def process_batch(batch, horizon, num_slices):
- # trajectory t = 256, horizon h = 5
- # (t h) ... -> h t ...
- batch = batch.reshape(num_slices, horizon).transpose(1, 0).contiguous()
+ # def process_batch(batch, horizon, num_slices):
+ # # trajectory t = 256, horizon h = 5
+ # # (t h) ... -> h t ...
+ # batch = batch.reshape(num_slices, horizon).transpose(1, 0).contiguous()
- obs = {
- "rgb": batch["observation", "image"][FIRST_FRAME].to(self.device, non_blocking=True),
- "state": batch["observation", "state"][FIRST_FRAME].to(self.device, non_blocking=True),
- }
- action = batch["action"].to(self.device, non_blocking=True)
- next_obses = {
- "rgb": batch["next", "observation", "image"].to(self.device, non_blocking=True),
- "state": batch["next", "observation", "state"].to(self.device, non_blocking=True),
- }
- reward = batch["next", "reward"].to(self.device, non_blocking=True)
+ # obs = {
+ # "rgb": batch["observation", "image"][FIRST_FRAME].to(self.device, non_blocking=True),
+ # "state": batch["observation", "state"][FIRST_FRAME].to(self.device, non_blocking=True),
+ # }
+ # action = batch["action"].to(self.device, non_blocking=True)
+ # next_obses = {
+ # "rgb": batch["next", "observation", "image"].to(self.device, non_blocking=True),
+ # "state": batch["next", "observation", "state"].to(self.device, non_blocking=True),
+ # }
+ # reward = batch["next", "reward"].to(self.device, non_blocking=True)
- idxs = batch["index"][FIRST_FRAME].to(self.device, non_blocking=True)
- weights = batch["_weight"][FIRST_FRAME, :, None].to(self.device, non_blocking=True)
+ # idxs = batch["index"][FIRST_FRAME].to(self.device, non_blocking=True)
+ # weights = batch["_weight"][FIRST_FRAME, :, None].to(self.device, non_blocking=True)
- # TODO(rcadene): rearrange directly in offline dataset
- if reward.ndim == 2:
- reward = einops.rearrange(reward, "h t -> h t 1")
+ # # TODO(rcadene): rearrange directly in offline dataset
+ # if reward.ndim == 2:
+ # reward = einops.rearrange(reward, "h t -> h t 1")
- assert reward.ndim == 3
- assert reward.shape == (horizon, num_slices, 1)
- # We dont use `batch["next", "done"]` since it only indicates the end of an
- # episode, but not the end of the trajectory of an episode.
- # Neither does `batch["next", "terminated"]`
- done = torch.zeros_like(reward, dtype=torch.bool, device=reward.device)
- mask = torch.ones_like(reward, dtype=torch.bool, device=reward.device)
- return obs, action, next_obses, reward, mask, done, idxs, weights
+ # assert reward.ndim == 3
+ # assert reward.shape == (horizon, num_slices, 1)
+ # # We dont use `batch["next", "done"]` since it only indicates the end of an
+ # # episode, but not the end of the trajectory of an episode.
+ # # Neither does `batch["next", "terminated"]`
+ # done = torch.zeros_like(reward, dtype=torch.bool, device=reward.device)
+ # mask = torch.ones_like(reward, dtype=torch.bool, device=reward.device)
+ # return obs, action, next_obses, reward, mask, done, idxs, weights
- batch = replay_buffer.sample(batch_size) if self.cfg.balanced_sampling else replay_buffer.sample()
+ # batch = replay_buffer.sample(batch_size) if self.cfg.balanced_sampling else replay_buffer.sample()
- obs, action, next_obses, reward, mask, done, idxs, weights = process_batch(
- batch, self.cfg.horizon, num_slices
- )
+ # obs, action, next_obses, reward, mask, done, idxs, weights = process_batch(
+ # batch, self.cfg.horizon, num_slices
+ # )
# Sample from demonstration dataset
- if demo_batch_size > 0:
- demo_batch = demo_buffer.sample(demo_batch_size)
- (
- demo_obs,
- demo_action,
- demo_next_obses,
- demo_reward,
- demo_mask,
- demo_done,
- demo_idxs,
- demo_weights,
- ) = process_batch(demo_batch, self.cfg.horizon, demo_num_slices)
+ # if demo_batch_size > 0:
+ # demo_batch = demo_buffer.sample(demo_batch_size)
+ # (
+ # demo_obs,
+ # demo_action,
+ # demo_next_obses,
+ # demo_reward,
+ # demo_mask,
+ # demo_done,
+ # demo_idxs,
+ # demo_weights,
+ # ) = process_batch(demo_batch, self.cfg.horizon, demo_num_slices)
- if isinstance(obs, dict):
- obs = {k: torch.cat([obs[k], demo_obs[k]]) for k in obs}
- next_obses = {k: torch.cat([next_obses[k], demo_next_obses[k]], dim=1) for k in next_obses}
- else:
- obs = torch.cat([obs, demo_obs])
- next_obses = torch.cat([next_obses, demo_next_obses], dim=1)
- action = torch.cat([action, demo_action], dim=1)
- reward = torch.cat([reward, demo_reward], dim=1)
- mask = torch.cat([mask, demo_mask], dim=1)
- done = torch.cat([done, demo_done], dim=1)
- idxs = torch.cat([idxs, demo_idxs])
- weights = torch.cat([weights, demo_weights])
+ # if isinstance(obs, dict):
+ # obs = {k: torch.cat([obs[k], demo_obs[k]]) for k in obs}
+ # next_obses = {k: torch.cat([next_obses[k], demo_next_obses[k]], dim=1) for k in next_obses}
+ # else:
+ # obs = torch.cat([obs, demo_obs])
+ # next_obses = torch.cat([next_obses, demo_next_obses], dim=1)
+ # action = torch.cat([action, demo_action], dim=1)
+ # reward = torch.cat([reward, demo_reward], dim=1)
+ # mask = torch.cat([mask, demo_mask], dim=1)
+ # done = torch.cat([done, demo_done], dim=1)
+ # idxs = torch.cat([idxs, demo_idxs])
+ # weights = torch.cat([weights, demo_weights])
# Apply augmentations
aug_tf = h.aug(self.cfg)
diff --git a/lerobot/configs/env/simxarm.yaml b/lerobot/configs/env/simxarm.yaml
index f79db8f7..843f80c6 100644
--- a/lerobot/configs/env/simxarm.yaml
+++ b/lerobot/configs/env/simxarm.yaml
@@ -17,7 +17,7 @@ env:
from_pixels: True
pixels_only: False
image_size: 84
- action_repeat: 2
+ # action_repeat: 2 # we can remove if policy has n_action_steps=2
episode_length: 25
fps: ${fps}
diff --git a/lerobot/configs/policy/tdmpc.yaml b/lerobot/configs/policy/tdmpc.yaml
index ff0e6b04..5d5d8b62 100644
--- a/lerobot/configs/policy/tdmpc.yaml
+++ b/lerobot/configs/policy/tdmpc.yaml
@@ -1,6 +1,6 @@
# @package _global_
-n_action_steps: 1
+n_action_steps: 2
n_obs_steps: 1
policy:
diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py
index 09399878..e7ba53fc 100644
--- a/lerobot/scripts/eval.py
+++ b/lerobot/scripts/eval.py
@@ -44,9 +44,9 @@ from huggingface_hub import snapshot_download
from lerobot.common.datasets.factory import make_dataset
from lerobot.common.envs.factory import make_env
+from lerobot.common.envs.utils import postprocess_action, preprocess_observation
from lerobot.common.logger import log_output_dir
from lerobot.common.policies.factory import make_policy
-from lerobot.common.transforms import apply_inverse_transform
from lerobot.common.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed
@@ -54,34 +54,6 @@ def write_video(video_path, stacked_frames, fps):
imageio.mimsave(video_path, stacked_frames, fps=fps)
-def preprocess_observation(observation, transform=None):
- # map to expected inputs for the policy
- obs = {
- "observation.image": torch.from_numpy(observation["pixels"]).float(),
- "observation.state": torch.from_numpy(observation["agent_pos"]).float(),
- }
- # convert to (b c h w) torch format
- obs["observation.image"] = einops.rearrange(obs["observation.image"], "b h w c -> b c h w")
-
- # apply same transforms as in training
- if transform is not None:
- for key in obs:
- obs[key] = torch.stack([transform({key: item})[key] for item in obs[key]])
-
- return obs
-
-
-def postprocess_action(action, transform=None):
- action = action.to("cpu")
- # action is a batch (num_env,action_dim) instead of an item (action_dim),
- # we assume applying inverse transform on a batch works the same
- action = apply_inverse_transform({"action": action}, transform)["action"].numpy()
- assert (
- action.ndim == 2
- ), "we assume dimensions are respectively the number of parallel envs, action dimensions"
- return action
-
-
def eval_policy(
env: gym.vector.VectorEnv,
policy,
@@ -114,10 +86,10 @@ def eval_policy(
def maybe_render_frame(env):
if save_video: # noqa: B023
if return_first_video:
- visu = env.envs[0].render()
+ visu = env.envs[0].render(mode="visualization")
visu = visu[None, ...] # add batch dim
else:
- visu = np.stack([env.render() for env in env.envs])
+ visu = np.stack([env.render(mode="visualization") for env in env.envs])
ep_frames.append(visu) # noqa: B023
for _ in range(num_episodes):
diff --git a/poetry.lock b/poetry.lock
index b8c6c638..3f301d0b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -903,6 +903,26 @@ url = "git@github.com:huggingface/gym-pusht.git"
reference = "HEAD"
resolved_reference = "0fe4449cca5a2b08f529f7a07fbf5b9df24962ec"
+[[package]]
+name = "gym-xarm"
+version = "0.1.0"
+description = "A gym environment for xArm"
+optional = true
+python-versions = "^3.10"
+files = []
+develop = false
+
+[package.dependencies]
+gymnasium = "^0.29.1"
+gymnasium-robotics = "^1.2.4"
+mujoco = "^2.3.7"
+
+[package.source]
+type = "git"
+url = "git@github.com:huggingface/gym-xarm.git"
+reference = "HEAD"
+resolved_reference = "2eb83fc4fc871b9d271c946d169e42f226ac3a7c"
+
[[package]]
name = "gymnasium"
version = "0.29.1"
@@ -3695,8 +3715,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[extras]
pusht = ["gym_pusht"]
+xarm = ["gym_xarm"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
-content-hash = "3eee17e4bf2b7a570f41ef9c400ec5a24a3113f62a13162229cf43504ca0d005"
+content-hash = "c9524cdf000eaa755a2ab3be669118222b4f8b1c262013f103f6874cbd54eeb6"
diff --git a/pyproject.toml b/pyproject.toml
index a7d2dd65..619a16d9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,9 +53,13 @@ gymnasium-robotics = "^1.2.4"
gymnasium = "^0.29.1"
cmake = "^3.29.0.1"
gym_pusht = { git = "git@github.com:huggingface/gym-pusht.git", optional = true}
+gym_xarm = { git = "git@github.com:huggingface/gym-xarm.git", optional = true}
+# gym_pusht = { path = "../gym-pusht", develop = true, optional = true}
+# gym_xarm = { path = "../gym-xarm", develop = true, optional = true}
[tool.poetry.extras]
pusht = ["gym_pusht"]
+xarm = ["gym_xarm"]
[tool.poetry.group.dev.dependencies]
pre-commit = "^3.6.2"
diff --git a/sbatch.sh b/sbatch.sh
index cb5b285a..c08f7055 100644
--- a/sbatch.sh
+++ b/sbatch.sh
@@ -6,7 +6,7 @@
#SBATCH --time=2-00:00:00
#SBATCH --output=/home/rcadene/slurm/%j.out
#SBATCH --error=/home/rcadene/slurm/%j.err
-#SBATCH --qos=medium
+#SBATCH --qos=low
#SBATCH --mail-user=re.cadene@gmail.com
#SBATCH --mail-type=ALL
@@ -20,4 +20,6 @@ source ~/.bashrc
#conda activate fowm
conda activate lerobot
+export DATA_DIR="data"
+
srun $CMD
diff --git a/tests/test_envs.py b/tests/test_envs.py
index 0c56f4fc..a94d76f2 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -1,45 +1,47 @@
import pytest
-from tensordict import TensorDict
import torch
-from torchrl.envs.utils import check_env_specs, step_mdp
from lerobot.common.datasets.factory import make_dataset
+import gymnasium as gym
+from gymnasium.utils.env_checker import check_env
-from lerobot.common.envs.aloha.env import AlohaEnv
from lerobot.common.envs.factory import make_env
-from lerobot.common.envs.pusht.env import PushtEnv
-from lerobot.common.envs.simxarm.env import SimxarmEnv
from lerobot.common.utils import init_hydra_config
+from lerobot.common.envs.utils import preprocess_observation
+
+# import dmc_aloha # noqa: F401
+
from .utils import DEVICE, DEFAULT_CONFIG_PATH
-def print_spec_rollout(env):
- print("observation_spec:", env.observation_spec)
- print("action_spec:", env.action_spec)
- print("reward_spec:", env.reward_spec)
- print("done_spec:", env.done_spec)
+# def print_spec_rollout(env):
+# print("observation_spec:", env.observation_spec)
+# print("action_spec:", env.action_spec)
+# print("reward_spec:", env.reward_spec)
+# print("done_spec:", env.done_spec)
- td = env.reset()
- print("reset tensordict", td)
+# td = env.reset()
+# print("reset tensordict", td)
- td = env.rand_step(td)
- print("random step tensordict", td)
+# td = env.rand_step(td)
+# print("random step tensordict", td)
- def simple_rollout(steps=100):
- # preallocate:
- data = TensorDict({}, [steps])
- # reset
- _data = env.reset()
- for i in range(steps):
- _data["action"] = env.action_spec.rand()
- _data = env.step(_data)
- data[i] = _data
- _data = step_mdp(_data, keep_other=True)
- return data
+# def simple_rollout(steps=100):
+# # preallocate:
+# data = TensorDict({}, [steps])
+# # reset
+# _data = env.reset()
+# for i in range(steps):
+# _data["action"] = env.action_spec.rand()
+# _data = env.step(_data)
+# data[i] = _data
+# _data = step_mdp(_data, keep_other=True)
+# return data
- print("data from rollout:", simple_rollout(100))
+# print("data from rollout:", simple_rollout(100))
+@pytest.mark.skip("TODO")
@pytest.mark.parametrize(
"task,from_pixels,pixels_only",
[
@@ -61,53 +63,41 @@ def test_aloha(task, from_pixels, pixels_only):
@pytest.mark.parametrize(
- "task,from_pixels,pixels_only",
+ "env_task, obs_type",
[
- ("lift", False, False),
- ("lift", True, False),
- ("lift", True, True),
- # TODO(aliberts): Add simxarm other tasks
- # ("reach", False, False),
- # ("reach", True, False),
- # ("push", False, False),
- # ("push", True, False),
- # ("peg_in_box", False, False),
- # ("peg_in_box", True, False),
+ ("XarmLift-v0", "state"),
+ ("XarmLift-v0", "pixels"),
+ ("XarmLift-v0", "pixels_agent_pos"),
+ # TODO(aliberts): Add gym_xarm other tasks
],
)
-def test_simxarm(task, from_pixels, pixels_only):
- env = SimxarmEnv(
- task,
- from_pixels=from_pixels,
- pixels_only=pixels_only,
- image_size=84 if from_pixels else None,
- )
- # print_spec_rollout(env)
- check_env_specs(env)
+def test_xarm(env_task, obs_type):
+ import gym_xarm # noqa: F401
+ env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type)
+ check_env(env)
+
@pytest.mark.parametrize(
- "from_pixels,pixels_only",
+ "env_task, obs_type",
[
- (True, False),
+ ("PushTPixels-v0", "state"),
+ ("PushTPixels-v0", "pixels"),
+ ("PushTPixels-v0", "pixels_agent_pos"),
],
)
-def test_pusht(from_pixels, pixels_only):
- env = PushtEnv(
- from_pixels=from_pixels,
- pixels_only=pixels_only,
- image_size=96 if from_pixels else None,
- )
- # print_spec_rollout(env)
- check_env_specs(env)
+def test_pusht(env_task, obs_type):
+ import gym_pusht # noqa: F401
+ env = gym.make(f"gym_pusht/{env_task}", obs_type=obs_type)
+ check_env(env)
@pytest.mark.parametrize(
"env_name",
[
- "simxarm",
"pusht",
- "aloha",
+ "simxarm",
+ # "aloha",
],
)
def test_factory(env_name):
@@ -119,15 +109,12 @@ def test_factory(env_name):
dataset = make_dataset(cfg)
env = make_env(cfg)
+ obs, info = env.reset()
+ obs = {key: obs[key][None, ...] for key in obs}
+ obs = preprocess_observation(obs, transform=dataset.transform)
for key in dataset.image_keys:
- assert env.reset().get(key).dtype == torch.uint8
- check_env_specs(env)
-
- env = make_env(cfg, transform=dataset.transform)
- for key in dataset.image_keys:
- img = env.reset().get(key)
+ img = obs[key]
assert img.dtype == torch.float32
# TODO(rcadene): we assume for now that image normalization takes place in the model
assert img.max() <= 1.0
assert img.min() >= 0.0
- check_env_specs(env)