Merge remote-tracking branch 'Cadene/user/rcadene/2024_03_31_remove_torchrl' into refactor_act_remove_torchrl
This commit is contained in:
commit
ab2286025b
|
@ -1,92 +0,0 @@
|
|||
from collections import deque
|
||||
from typing import Optional
|
||||
|
||||
from tensordict import TensorDict
|
||||
from torchrl.envs import EnvBase
|
||||
|
||||
from lerobot.common.utils import set_global_seed
|
||||
|
||||
|
||||
class AbstractEnv(EnvBase):
|
||||
"""
|
||||
Note:
|
||||
When implementing a concrete class (e.g. `AlohaDataset`, `PushtEnv`, `DiffusionPolicy`), you need to:
|
||||
1. set the required class attributes:
|
||||
- for classes inheriting from `AbstractDataset`: `available_datasets`
|
||||
- for classes inheriting from `AbstractEnv`: `name`, `available_tasks`
|
||||
- for classes inheriting from `AbstractPolicy`: `name`
|
||||
2. update variables in `lerobot/__init__.py` (e.g. `available_envs`, `available_datasets_per_envs`, `available_policies`)
|
||||
3. update variables in `tests/test_available.py` by importing your new class
|
||||
"""
|
||||
|
||||
name: str | None = None # same name should be used to instantiate the environment in factory.py
|
||||
available_tasks: list[str] | None = None # for instance: sim_insertion, sim_transfer_cube, pusht, lift
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task,
|
||||
frame_skip: int = 1,
|
||||
from_pixels: bool = False,
|
||||
pixels_only: bool = False,
|
||||
image_size=None,
|
||||
seed=1337,
|
||||
device="cpu",
|
||||
num_prev_obs=1,
|
||||
num_prev_action=0,
|
||||
):
|
||||
super().__init__(device=device, batch_size=[])
|
||||
assert self.name is not None, "Subclasses of `AbstractEnv` should set the `name` class attribute."
|
||||
assert (
|
||||
self.available_tasks is not None
|
||||
), "Subclasses of `AbstractEnv` should set the `available_tasks` class attribute."
|
||||
assert (
|
||||
task in self.available_tasks
|
||||
), f"The provided task ({task}) is not on the list of available tasks {self.available_tasks}."
|
||||
|
||||
self.task = task
|
||||
self.frame_skip = frame_skip
|
||||
self.from_pixels = from_pixels
|
||||
self.pixels_only = pixels_only
|
||||
self.image_size = image_size
|
||||
self.num_prev_obs = num_prev_obs
|
||||
self.num_prev_action = num_prev_action
|
||||
|
||||
if pixels_only:
|
||||
assert from_pixels
|
||||
if from_pixels:
|
||||
assert image_size
|
||||
|
||||
self._make_env()
|
||||
self._make_spec()
|
||||
|
||||
# self._next_seed will be used for the next reset. It is recommended that when self.set_seed is called
|
||||
# you store the return value in self._next_seed (it will be a new randomly generated seed).
|
||||
self._next_seed = seed
|
||||
# Don't store the result of this in self._next_seed, as we want to make sure that the first time
|
||||
# self._reset is called, we use seed.
|
||||
self.set_seed(seed)
|
||||
|
||||
if self.num_prev_obs > 0:
|
||||
self._prev_obs_image_queue = deque(maxlen=self.num_prev_obs)
|
||||
self._prev_obs_state_queue = deque(maxlen=self.num_prev_obs)
|
||||
if self.num_prev_action > 0:
|
||||
raise NotImplementedError()
|
||||
# self._prev_action_queue = deque(maxlen=self.num_prev_action)
|
||||
|
||||
def render(self, mode="rgb_array", width=640, height=480):
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
def _reset(self, tensordict: Optional[TensorDict] = None):
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
def _step(self, tensordict: TensorDict):
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
def _make_env(self):
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
def _make_spec(self):
|
||||
raise NotImplementedError("Abstract method")
|
||||
|
||||
def _set_seed(self, seed: Optional[int]):
|
||||
set_global_seed(seed)
|
|
@ -6,24 +6,27 @@ def make_env(cfg, num_parallel_envs=0) -> gym.Env | gym.vector.SyncVectorEnv:
|
|||
Note: When `num_parallel_envs > 0`, this function returns a `SyncVectorEnv` which takes batched action as input and
|
||||
returns batched observation, reward, terminated, truncated of `num_parallel_envs` items.
|
||||
"""
|
||||
kwargs = {}
|
||||
kwargs = {
|
||||
"obs_type": "pixels_agent_pos",
|
||||
"max_episode_steps": cfg.env.episode_length,
|
||||
"visualization_width": 384,
|
||||
"visualization_height": 384,
|
||||
}
|
||||
|
||||
if cfg.env.name == "simxarm":
|
||||
kwargs["task"] = cfg.env.task
|
||||
import gym_xarm # noqa: F401
|
||||
|
||||
assert cfg.env.task == "lift"
|
||||
env_fn = lambda: gym.make( # noqa: E731
|
||||
"gym_xarm/XarmLift-v0",
|
||||
**kwargs,
|
||||
)
|
||||
elif cfg.env.name == "pusht":
|
||||
import gym_pusht # noqa
|
||||
import gym_pusht # noqa: F401
|
||||
|
||||
# assert kwargs["seed"] > 200, "Seed 0-200 are used for the demonstration dataset, so we don't want to seed the eval env with this range."
|
||||
kwargs.update(
|
||||
{
|
||||
"obs_type": "pixels_agent_pos",
|
||||
"render_action": False,
|
||||
}
|
||||
)
|
||||
env_fn = lambda: gym.make( # noqa: E731
|
||||
"gym_pusht/PushTPixels-v0",
|
||||
render_mode="rgb_array",
|
||||
max_episode_steps=cfg.env.episode_length,
|
||||
**kwargs,
|
||||
)
|
||||
elif cfg.env.name == "aloha":
|
||||
|
|
|
@ -1,245 +0,0 @@
|
|||
import importlib
|
||||
import logging
|
||||
from collections import deque
|
||||
from typing import Optional
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from tensordict import TensorDict
|
||||
from torchrl.data.tensor_specs import (
|
||||
BoundedTensorSpec,
|
||||
CompositeSpec,
|
||||
DiscreteTensorSpec,
|
||||
UnboundedContinuousTensorSpec,
|
||||
)
|
||||
from torchrl.envs.libs.gym import _gym_to_torchrl_spec_transform
|
||||
|
||||
from lerobot.common.envs.abstract import AbstractEnv
|
||||
from lerobot.common.utils import set_global_seed
|
||||
|
||||
_has_gym = importlib.util.find_spec("gymnasium") is not None
|
||||
|
||||
|
||||
class PushtEnv(AbstractEnv):
|
||||
name = "pusht"
|
||||
available_tasks = ["pusht"]
|
||||
_reset_warning_issued = False
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task="pusht",
|
||||
frame_skip: int = 1,
|
||||
from_pixels: bool = False,
|
||||
pixels_only: bool = False,
|
||||
image_size=None,
|
||||
seed=1337,
|
||||
device="cpu",
|
||||
num_prev_obs=1,
|
||||
num_prev_action=0,
|
||||
):
|
||||
super().__init__(
|
||||
task=task,
|
||||
frame_skip=frame_skip,
|
||||
from_pixels=from_pixels,
|
||||
pixels_only=pixels_only,
|
||||
image_size=image_size,
|
||||
seed=seed,
|
||||
device=device,
|
||||
num_prev_obs=num_prev_obs,
|
||||
num_prev_action=num_prev_action,
|
||||
)
|
||||
|
||||
def _make_env(self):
|
||||
if not _has_gym:
|
||||
raise ImportError("Cannot import gymnasium.")
|
||||
|
||||
# TODO(rcadene) (PushTEnv is similar to PushTImageEnv, but without the image rendering, it's faster to iterate on)
|
||||
# from lerobot.common.envs.pusht.pusht_env import PushTEnv
|
||||
|
||||
if not self.from_pixels:
|
||||
raise NotImplementedError("Use PushTEnv, instead of PushTImageEnv")
|
||||
from lerobot.common.envs.pusht.pusht_image_env import PushTImageEnv
|
||||
|
||||
self._env = PushTImageEnv(render_size=self.image_size)
|
||||
|
||||
def render(self, mode="rgb_array", width=96, height=96, with_marker=True):
|
||||
"""
|
||||
with_marker adds a cursor showing the targeted action for the controller.
|
||||
"""
|
||||
if width != height:
|
||||
raise NotImplementedError()
|
||||
tmp = self._env.render_size
|
||||
if width != self._env.render_size:
|
||||
self._env.render_cache = None
|
||||
self._env.render_size = width
|
||||
out = self._env.render(mode).copy()
|
||||
if with_marker and self._env.latest_action is not None:
|
||||
action = np.array(self._env.latest_action)
|
||||
coord = (action / 512 * self._env.render_size).astype(np.int32)
|
||||
marker_size = int(8 / 96 * self._env.render_size)
|
||||
thickness = int(1 / 96 * self._env.render_size)
|
||||
cv2.drawMarker(
|
||||
out,
|
||||
coord,
|
||||
color=(255, 0, 0),
|
||||
markerType=cv2.MARKER_CROSS,
|
||||
markerSize=marker_size,
|
||||
thickness=thickness,
|
||||
)
|
||||
self._env.render_size = tmp
|
||||
return out
|
||||
|
||||
def _format_raw_obs(self, raw_obs):
|
||||
if self.from_pixels:
|
||||
image = torch.from_numpy(raw_obs["image"])
|
||||
obs = {"image": image}
|
||||
|
||||
if not self.pixels_only:
|
||||
obs["state"] = torch.from_numpy(raw_obs["agent_pos"]).type(torch.float32)
|
||||
else:
|
||||
# TODO:
|
||||
obs = {"state": torch.from_numpy(raw_obs["observation"]).type(torch.float32)}
|
||||
|
||||
return obs
|
||||
|
||||
def _reset(self, tensordict: Optional[TensorDict] = None):
|
||||
if tensordict is not None and not PushtEnv._reset_warning_issued:
|
||||
logging.warning(f"{self.__class__.__name__}._reset ignores the provided tensordict.")
|
||||
PushtEnv._reset_warning_issued = True
|
||||
|
||||
# Seed the environment and update the seed to be used for the next reset.
|
||||
self._next_seed = self.set_seed(self._next_seed)
|
||||
raw_obs = self._env.reset()
|
||||
|
||||
obs = self._format_raw_obs(raw_obs)
|
||||
|
||||
if self.num_prev_obs > 0:
|
||||
stacked_obs = {}
|
||||
if "image" in obs:
|
||||
self._prev_obs_image_queue = deque(
|
||||
[obs["image"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
|
||||
)
|
||||
stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
|
||||
if "state" in obs:
|
||||
self._prev_obs_state_queue = deque(
|
||||
[obs["state"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
|
||||
)
|
||||
stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
|
||||
obs = stacked_obs
|
||||
|
||||
td = TensorDict(
|
||||
{
|
||||
"observation": TensorDict(obs, batch_size=[]),
|
||||
"done": torch.tensor([False], dtype=torch.bool),
|
||||
},
|
||||
batch_size=[],
|
||||
)
|
||||
|
||||
return td
|
||||
|
||||
def _step(self, tensordict: TensorDict):
|
||||
td = tensordict
|
||||
action = td["action"].numpy()
|
||||
assert action.ndim == 1
|
||||
# TODO(rcadene): add info["is_success"] and info["success"] ?
|
||||
|
||||
raw_obs, reward, done, info = self._env.step(action)
|
||||
|
||||
obs = self._format_raw_obs(raw_obs)
|
||||
|
||||
if self.num_prev_obs > 0:
|
||||
stacked_obs = {}
|
||||
if "image" in obs:
|
||||
self._prev_obs_image_queue.append(obs["image"])
|
||||
stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
|
||||
if "state" in obs:
|
||||
self._prev_obs_state_queue.append(obs["state"])
|
||||
stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
|
||||
obs = stacked_obs
|
||||
|
||||
td = TensorDict(
|
||||
{
|
||||
"observation": TensorDict(obs, batch_size=[]),
|
||||
"reward": torch.tensor([reward], dtype=torch.float32),
|
||||
# success and done are true when coverage > self.success_threshold in env
|
||||
"done": torch.tensor([done], dtype=torch.bool),
|
||||
"success": torch.tensor([done], dtype=torch.bool),
|
||||
},
|
||||
batch_size=[],
|
||||
)
|
||||
return td
|
||||
|
||||
def _make_spec(self):
|
||||
obs = {}
|
||||
if self.from_pixels:
|
||||
image_shape = (3, self.image_size, self.image_size)
|
||||
if self.num_prev_obs > 0:
|
||||
image_shape = (self.num_prev_obs + 1, *image_shape)
|
||||
|
||||
obs["image"] = BoundedTensorSpec(
|
||||
low=0,
|
||||
high=255,
|
||||
shape=image_shape,
|
||||
dtype=torch.uint8,
|
||||
device=self.device,
|
||||
)
|
||||
if not self.pixels_only:
|
||||
state_shape = self._env.observation_space["agent_pos"].shape
|
||||
if self.num_prev_obs > 0:
|
||||
state_shape = (self.num_prev_obs + 1, *state_shape)
|
||||
|
||||
obs["state"] = BoundedTensorSpec(
|
||||
low=0,
|
||||
high=512,
|
||||
shape=state_shape,
|
||||
dtype=torch.float32,
|
||||
device=self.device,
|
||||
)
|
||||
else:
|
||||
# TODO(rcadene): add observation_space achieved_goal and desired_goal?
|
||||
state_shape = self._env.observation_space["observation"].shape
|
||||
if self.num_prev_obs > 0:
|
||||
state_shape = (self.num_prev_obs + 1, *state_shape)
|
||||
|
||||
obs["state"] = UnboundedContinuousTensorSpec(
|
||||
# TODO:
|
||||
shape=state_shape,
|
||||
dtype=torch.float32,
|
||||
device=self.device,
|
||||
)
|
||||
self.observation_spec = CompositeSpec({"observation": obs})
|
||||
|
||||
self.action_spec = _gym_to_torchrl_spec_transform(
|
||||
self._env.action_space,
|
||||
device=self.device,
|
||||
)
|
||||
|
||||
self.reward_spec = UnboundedContinuousTensorSpec(
|
||||
shape=(1,),
|
||||
dtype=torch.float32,
|
||||
device=self.device,
|
||||
)
|
||||
|
||||
self.done_spec = CompositeSpec(
|
||||
{
|
||||
"done": DiscreteTensorSpec(
|
||||
2,
|
||||
shape=(1,),
|
||||
dtype=torch.bool,
|
||||
device=self.device,
|
||||
),
|
||||
"success": DiscreteTensorSpec(
|
||||
2,
|
||||
shape=(1,),
|
||||
dtype=torch.bool,
|
||||
device=self.device,
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
def _set_seed(self, seed: Optional[int]):
|
||||
# Set global seed.
|
||||
set_global_seed(seed)
|
||||
# Set PushTImageEnv seed as it relies on it's own internal _seed attribute.
|
||||
self._env.seed(seed)
|
|
@ -1,378 +0,0 @@
|
|||
import collections
|
||||
|
||||
import cv2
|
||||
import gymnasium as gym
|
||||
import numpy as np
|
||||
import pygame
|
||||
import pymunk
|
||||
import pymunk.pygame_util
|
||||
import shapely.geometry as sg
|
||||
import skimage.transform as st
|
||||
from gymnasium import spaces
|
||||
from pymunk.vec2d import Vec2d
|
||||
|
||||
from lerobot.common.envs.pusht.pymunk_override import DrawOptions
|
||||
|
||||
|
||||
def pymunk_to_shapely(body, shapes):
|
||||
geoms = []
|
||||
for shape in shapes:
|
||||
if isinstance(shape, pymunk.shapes.Poly):
|
||||
verts = [body.local_to_world(v) for v in shape.get_vertices()]
|
||||
verts += [verts[0]]
|
||||
geoms.append(sg.Polygon(verts))
|
||||
else:
|
||||
raise RuntimeError(f"Unsupported shape type {type(shape)}")
|
||||
geom = sg.MultiPolygon(geoms)
|
||||
return geom
|
||||
|
||||
|
||||
class PushTEnv(gym.Env):
|
||||
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 10}
|
||||
reward_range = (0.0, 1.0)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
legacy=True, # compatibility with original
|
||||
block_cog=None,
|
||||
damping=None,
|
||||
render_action=True,
|
||||
render_size=96,
|
||||
reset_to_state=None,
|
||||
):
|
||||
self._seed = None
|
||||
self.seed()
|
||||
self.window_size = ws = 512 # The size of the PyGame window
|
||||
self.render_size = render_size
|
||||
self.sim_hz = 100
|
||||
# Local controller params.
|
||||
self.k_p, self.k_v = 100, 20 # PD control.z
|
||||
self.control_hz = self.metadata["video.frames_per_second"]
|
||||
# legcay set_state for data compatibility
|
||||
self.legacy = legacy
|
||||
|
||||
# agent_pos, block_pos, block_angle
|
||||
self.observation_space = spaces.Box(
|
||||
low=np.array([0, 0, 0, 0, 0], dtype=np.float64),
|
||||
high=np.array([ws, ws, ws, ws, np.pi * 2], dtype=np.float64),
|
||||
shape=(5,),
|
||||
dtype=np.float64,
|
||||
)
|
||||
|
||||
# positional goal for agent
|
||||
self.action_space = spaces.Box(
|
||||
low=np.array([0, 0], dtype=np.float64),
|
||||
high=np.array([ws, ws], dtype=np.float64),
|
||||
shape=(2,),
|
||||
dtype=np.float64,
|
||||
)
|
||||
|
||||
self.block_cog = block_cog
|
||||
self.damping = damping
|
||||
self.render_action = render_action
|
||||
|
||||
"""
|
||||
If human-rendering is used, `self.window` will be a reference
|
||||
to the window that we draw to. `self.clock` will be a clock that is used
|
||||
to ensure that the environment is rendered at the correct framerate in
|
||||
human-mode. They will remain `None` until human-mode is used for the
|
||||
first time.
|
||||
"""
|
||||
self.window = None
|
||||
self.clock = None
|
||||
self.screen = None
|
||||
|
||||
self.space = None
|
||||
self.teleop = None
|
||||
self.render_buffer = None
|
||||
self.latest_action = None
|
||||
self.reset_to_state = reset_to_state
|
||||
|
||||
def reset(self):
|
||||
seed = self._seed
|
||||
self._setup()
|
||||
if self.block_cog is not None:
|
||||
self.block.center_of_gravity = self.block_cog
|
||||
if self.damping is not None:
|
||||
self.space.damping = self.damping
|
||||
|
||||
# use legacy RandomState for compatibility
|
||||
state = self.reset_to_state
|
||||
if state is None:
|
||||
rs = np.random.RandomState(seed=seed)
|
||||
state = np.array(
|
||||
[
|
||||
rs.randint(50, 450),
|
||||
rs.randint(50, 450),
|
||||
rs.randint(100, 400),
|
||||
rs.randint(100, 400),
|
||||
rs.randn() * 2 * np.pi - np.pi,
|
||||
]
|
||||
)
|
||||
self._set_state(state)
|
||||
|
||||
observation = self._get_obs()
|
||||
return observation
|
||||
|
||||
def step(self, action):
|
||||
dt = 1.0 / self.sim_hz
|
||||
self.n_contact_points = 0
|
||||
n_steps = self.sim_hz // self.control_hz
|
||||
if action is not None:
|
||||
self.latest_action = action
|
||||
for _ in range(n_steps):
|
||||
# Step PD control.
|
||||
# self.agent.velocity = self.k_p * (act - self.agent.position) # P control works too.
|
||||
acceleration = self.k_p * (action - self.agent.position) + self.k_v * (
|
||||
Vec2d(0, 0) - self.agent.velocity
|
||||
)
|
||||
self.agent.velocity += acceleration * dt
|
||||
|
||||
# Step physics.
|
||||
self.space.step(dt)
|
||||
|
||||
# compute reward
|
||||
goal_body = self._get_goal_pose_body(self.goal_pose)
|
||||
goal_geom = pymunk_to_shapely(goal_body, self.block.shapes)
|
||||
block_geom = pymunk_to_shapely(self.block, self.block.shapes)
|
||||
|
||||
intersection_area = goal_geom.intersection(block_geom).area
|
||||
goal_area = goal_geom.area
|
||||
coverage = intersection_area / goal_area
|
||||
reward = np.clip(coverage / self.success_threshold, 0, 1)
|
||||
done = coverage > self.success_threshold
|
||||
|
||||
observation = self._get_obs()
|
||||
info = self._get_info()
|
||||
|
||||
return observation, reward, done, info
|
||||
|
||||
def render(self, mode):
|
||||
return self._render_frame(mode)
|
||||
|
||||
def teleop_agent(self):
|
||||
TeleopAgent = collections.namedtuple("TeleopAgent", ["act"])
|
||||
|
||||
def act(obs):
|
||||
act = None
|
||||
mouse_position = pymunk.pygame_util.from_pygame(Vec2d(*pygame.mouse.get_pos()), self.screen)
|
||||
if self.teleop or (mouse_position - self.agent.position).length < 30:
|
||||
self.teleop = True
|
||||
act = mouse_position
|
||||
return act
|
||||
|
||||
return TeleopAgent(act)
|
||||
|
||||
def _get_obs(self):
|
||||
obs = np.array(
|
||||
tuple(self.agent.position) + tuple(self.block.position) + (self.block.angle % (2 * np.pi),)
|
||||
)
|
||||
return obs
|
||||
|
||||
def _get_goal_pose_body(self, pose):
|
||||
mass = 1
|
||||
inertia = pymunk.moment_for_box(mass, (50, 100))
|
||||
body = pymunk.Body(mass, inertia)
|
||||
# preserving the legacy assignment order for compatibility
|
||||
# the order here doesn't matter somehow, maybe because CoM is aligned with body origin
|
||||
body.position = pose[:2].tolist()
|
||||
body.angle = pose[2]
|
||||
return body
|
||||
|
||||
def _get_info(self):
|
||||
n_steps = self.sim_hz // self.control_hz
|
||||
n_contact_points_per_step = int(np.ceil(self.n_contact_points / n_steps))
|
||||
info = {
|
||||
"pos_agent": np.array(self.agent.position),
|
||||
"vel_agent": np.array(self.agent.velocity),
|
||||
"block_pose": np.array(list(self.block.position) + [self.block.angle]),
|
||||
"goal_pose": self.goal_pose,
|
||||
"n_contacts": n_contact_points_per_step,
|
||||
}
|
||||
return info
|
||||
|
||||
def _render_frame(self, mode):
|
||||
if self.window is None and mode == "human":
|
||||
pygame.init()
|
||||
pygame.display.init()
|
||||
self.window = pygame.display.set_mode((self.window_size, self.window_size))
|
||||
if self.clock is None and mode == "human":
|
||||
self.clock = pygame.time.Clock()
|
||||
|
||||
canvas = pygame.Surface((self.window_size, self.window_size))
|
||||
canvas.fill((255, 255, 255))
|
||||
self.screen = canvas
|
||||
|
||||
draw_options = DrawOptions(canvas)
|
||||
|
||||
# Draw goal pose.
|
||||
goal_body = self._get_goal_pose_body(self.goal_pose)
|
||||
for shape in self.block.shapes:
|
||||
goal_points = [
|
||||
pymunk.pygame_util.to_pygame(goal_body.local_to_world(v), draw_options.surface)
|
||||
for v in shape.get_vertices()
|
||||
]
|
||||
goal_points += [goal_points[0]]
|
||||
pygame.draw.polygon(canvas, self.goal_color, goal_points)
|
||||
|
||||
# Draw agent and block.
|
||||
self.space.debug_draw(draw_options)
|
||||
|
||||
if mode == "human":
|
||||
# The following line copies our drawings from `canvas` to the visible window
|
||||
self.window.blit(canvas, canvas.get_rect())
|
||||
pygame.event.pump()
|
||||
pygame.display.update()
|
||||
|
||||
# the clock is already ticked during in step for "human"
|
||||
|
||||
img = np.transpose(np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2))
|
||||
img = cv2.resize(img, (self.render_size, self.render_size))
|
||||
if self.render_action and self.latest_action is not None:
|
||||
action = np.array(self.latest_action)
|
||||
coord = (action / 512 * 96).astype(np.int32)
|
||||
marker_size = int(8 / 96 * self.render_size)
|
||||
thickness = int(1 / 96 * self.render_size)
|
||||
cv2.drawMarker(
|
||||
img,
|
||||
coord,
|
||||
color=(255, 0, 0),
|
||||
markerType=cv2.MARKER_CROSS,
|
||||
markerSize=marker_size,
|
||||
thickness=thickness,
|
||||
)
|
||||
return img
|
||||
|
||||
def close(self):
|
||||
if self.window is not None:
|
||||
pygame.display.quit()
|
||||
pygame.quit()
|
||||
|
||||
def seed(self, seed=None):
|
||||
if seed is None:
|
||||
seed = np.random.randint(0, 25536)
|
||||
self._seed = seed
|
||||
self.np_random = np.random.default_rng(seed)
|
||||
|
||||
def _handle_collision(self, arbiter, space, data):
|
||||
self.n_contact_points += len(arbiter.contact_point_set.points)
|
||||
|
||||
def _set_state(self, state):
|
||||
if isinstance(state, np.ndarray):
|
||||
state = state.tolist()
|
||||
pos_agent = state[:2]
|
||||
pos_block = state[2:4]
|
||||
rot_block = state[4]
|
||||
self.agent.position = pos_agent
|
||||
# setting angle rotates with respect to center of mass
|
||||
# therefore will modify the geometric position
|
||||
# if not the same as CoM
|
||||
# therefore should be modified first.
|
||||
if self.legacy:
|
||||
# for compatibility with legacy data
|
||||
self.block.position = pos_block
|
||||
self.block.angle = rot_block
|
||||
else:
|
||||
self.block.angle = rot_block
|
||||
self.block.position = pos_block
|
||||
|
||||
# Run physics to take effect
|
||||
self.space.step(1.0 / self.sim_hz)
|
||||
|
||||
def _set_state_local(self, state_local):
|
||||
agent_pos_local = state_local[:2]
|
||||
block_pose_local = state_local[2:]
|
||||
tf_img_obj = st.AffineTransform(translation=self.goal_pose[:2], rotation=self.goal_pose[2])
|
||||
tf_obj_new = st.AffineTransform(translation=block_pose_local[:2], rotation=block_pose_local[2])
|
||||
tf_img_new = st.AffineTransform(matrix=tf_img_obj.params @ tf_obj_new.params)
|
||||
agent_pos_new = tf_img_new(agent_pos_local)
|
||||
new_state = np.array(list(agent_pos_new[0]) + list(tf_img_new.translation) + [tf_img_new.rotation])
|
||||
self._set_state(new_state)
|
||||
return new_state
|
||||
|
||||
def _setup(self):
|
||||
self.space = pymunk.Space()
|
||||
self.space.gravity = 0, 0
|
||||
self.space.damping = 0
|
||||
self.teleop = False
|
||||
self.render_buffer = []
|
||||
|
||||
# Add walls.
|
||||
walls = [
|
||||
self._add_segment((5, 506), (5, 5), 2),
|
||||
self._add_segment((5, 5), (506, 5), 2),
|
||||
self._add_segment((506, 5), (506, 506), 2),
|
||||
self._add_segment((5, 506), (506, 506), 2),
|
||||
]
|
||||
self.space.add(*walls)
|
||||
|
||||
# Add agent, block, and goal zone.
|
||||
self.agent = self.add_circle((256, 400), 15)
|
||||
self.block = self.add_tee((256, 300), 0)
|
||||
self.goal_color = pygame.Color("LightGreen")
|
||||
self.goal_pose = np.array([256, 256, np.pi / 4]) # x, y, theta (in radians)
|
||||
|
||||
# Add collision handling
|
||||
self.collision_handeler = self.space.add_collision_handler(0, 0)
|
||||
self.collision_handeler.post_solve = self._handle_collision
|
||||
self.n_contact_points = 0
|
||||
|
||||
self.max_score = 50 * 100
|
||||
self.success_threshold = 0.95 # 95% coverage.
|
||||
|
||||
def _add_segment(self, a, b, radius):
|
||||
shape = pymunk.Segment(self.space.static_body, a, b, radius)
|
||||
shape.color = pygame.Color("LightGray") # https://htmlcolorcodes.com/color-names
|
||||
return shape
|
||||
|
||||
def add_circle(self, position, radius):
|
||||
body = pymunk.Body(body_type=pymunk.Body.KINEMATIC)
|
||||
body.position = position
|
||||
body.friction = 1
|
||||
shape = pymunk.Circle(body, radius)
|
||||
shape.color = pygame.Color("RoyalBlue")
|
||||
self.space.add(body, shape)
|
||||
return body
|
||||
|
||||
def add_box(self, position, height, width):
|
||||
mass = 1
|
||||
inertia = pymunk.moment_for_box(mass, (height, width))
|
||||
body = pymunk.Body(mass, inertia)
|
||||
body.position = position
|
||||
shape = pymunk.Poly.create_box(body, (height, width))
|
||||
shape.color = pygame.Color("LightSlateGray")
|
||||
self.space.add(body, shape)
|
||||
return body
|
||||
|
||||
def add_tee(self, position, angle, scale=30, color="LightSlateGray", mask=None):
|
||||
if mask is None:
|
||||
mask = pymunk.ShapeFilter.ALL_MASKS()
|
||||
mass = 1
|
||||
length = 4
|
||||
vertices1 = [
|
||||
(-length * scale / 2, scale),
|
||||
(length * scale / 2, scale),
|
||||
(length * scale / 2, 0),
|
||||
(-length * scale / 2, 0),
|
||||
]
|
||||
inertia1 = pymunk.moment_for_poly(mass, vertices=vertices1)
|
||||
vertices2 = [
|
||||
(-scale / 2, scale),
|
||||
(-scale / 2, length * scale),
|
||||
(scale / 2, length * scale),
|
||||
(scale / 2, scale),
|
||||
]
|
||||
inertia2 = pymunk.moment_for_poly(mass, vertices=vertices1)
|
||||
body = pymunk.Body(mass, inertia1 + inertia2)
|
||||
shape1 = pymunk.Poly(body, vertices1)
|
||||
shape2 = pymunk.Poly(body, vertices2)
|
||||
shape1.color = pygame.Color(color)
|
||||
shape2.color = pygame.Color(color)
|
||||
shape1.filter = pymunk.ShapeFilter(mask=mask)
|
||||
shape2.filter = pymunk.ShapeFilter(mask=mask)
|
||||
body.center_of_gravity = (shape1.center_of_gravity + shape2.center_of_gravity) / 2
|
||||
body.position = position
|
||||
body.angle = angle
|
||||
body.friction = 1
|
||||
self.space.add(body, shape1, shape2)
|
||||
return body
|
|
@ -1,41 +0,0 @@
|
|||
import numpy as np
|
||||
from gymnasium import spaces
|
||||
|
||||
from lerobot.common.envs.pusht.pusht_env import PushTEnv
|
||||
|
||||
|
||||
class PushTImageEnv(PushTEnv):
|
||||
metadata = {"render.modes": ["rgb_array"], "video.frames_per_second": 10}
|
||||
|
||||
# Note: legacy defaults to True for compatibility with original
|
||||
def __init__(self, legacy=True, block_cog=None, damping=None, render_size=96):
|
||||
super().__init__(
|
||||
legacy=legacy, block_cog=block_cog, damping=damping, render_size=render_size, render_action=False
|
||||
)
|
||||
ws = self.window_size
|
||||
self.observation_space = spaces.Dict(
|
||||
{
|
||||
"image": spaces.Box(low=0, high=1, shape=(3, render_size, render_size), dtype=np.float32),
|
||||
"agent_pos": spaces.Box(low=0, high=ws, shape=(2,), dtype=np.float32),
|
||||
}
|
||||
)
|
||||
self.render_cache = None
|
||||
|
||||
def _get_obs(self):
|
||||
img = super()._render_frame(mode="rgb_array")
|
||||
|
||||
agent_pos = np.array(self.agent.position)
|
||||
img_obs = np.moveaxis(img, -1, 0)
|
||||
obs = {"image": img_obs, "agent_pos": agent_pos}
|
||||
|
||||
self.render_cache = img
|
||||
|
||||
return obs
|
||||
|
||||
def render(self, mode):
|
||||
assert mode == "rgb_array"
|
||||
|
||||
if self.render_cache is None:
|
||||
self._get_obs()
|
||||
|
||||
return self.render_cache
|
|
@ -1,244 +0,0 @@
|
|||
# ----------------------------------------------------------------------------
|
||||
# pymunk
|
||||
# Copyright (c) 2007-2016 Victor Blomqvist
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
"""This submodule contains helper functions to help with quick prototyping
|
||||
using pymunk together with pygame.
|
||||
|
||||
Intended to help with debugging and prototyping, not for actual production use
|
||||
in a full application. The methods contained in this module is opinionated
|
||||
about your coordinate system and not in any way optimized.
|
||||
"""
|
||||
|
||||
__docformat__ = "reStructuredText"
|
||||
|
||||
__all__ = [
|
||||
"DrawOptions",
|
||||
"get_mouse_pos",
|
||||
"to_pygame",
|
||||
"from_pygame",
|
||||
# "lighten",
|
||||
"positive_y_is_up",
|
||||
]
|
||||
|
||||
from typing import Sequence, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pygame
|
||||
import pymunk
|
||||
from pymunk.space_debug_draw_options import SpaceDebugColor
|
||||
from pymunk.vec2d import Vec2d
|
||||
|
||||
positive_y_is_up: bool = False
|
||||
"""Make increasing values of y point upwards.
|
||||
|
||||
When True::
|
||||
|
||||
y
|
||||
^
|
||||
| . (3, 3)
|
||||
|
|
||||
| . (2, 2)
|
||||
|
|
||||
+------ > x
|
||||
|
||||
When False::
|
||||
|
||||
+------ > x
|
||||
|
|
||||
| . (2, 2)
|
||||
|
|
||||
| . (3, 3)
|
||||
v
|
||||
y
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class DrawOptions(pymunk.SpaceDebugDrawOptions):
|
||||
def __init__(self, surface: pygame.Surface) -> None:
|
||||
"""Draw a pymunk.Space on a pygame.Surface object.
|
||||
|
||||
Typical usage::
|
||||
|
||||
>>> import pymunk
|
||||
>>> surface = pygame.Surface((10,10))
|
||||
>>> space = pymunk.Space()
|
||||
>>> options = pymunk.pygame_util.DrawOptions(surface)
|
||||
>>> space.debug_draw(options)
|
||||
|
||||
You can control the color of a shape by setting shape.color to the color
|
||||
you want it drawn in::
|
||||
|
||||
>>> c = pymunk.Circle(None, 10)
|
||||
>>> c.color = pygame.Color("pink")
|
||||
|
||||
See pygame_util.demo.py for a full example
|
||||
|
||||
Since pygame uses a coordinate system where y points down (in contrast
|
||||
to many other cases), you either have to make the physics simulation
|
||||
with Pymunk also behave in that way, or flip everything when you draw.
|
||||
|
||||
The easiest is probably to just make the simulation behave the same
|
||||
way as Pygame does. In that way all coordinates used are in the same
|
||||
orientation and easy to reason about::
|
||||
|
||||
>>> space = pymunk.Space()
|
||||
>>> space.gravity = (0, -1000)
|
||||
>>> body = pymunk.Body()
|
||||
>>> body.position = (0, 0) # will be positioned in the top left corner
|
||||
>>> space.debug_draw(options)
|
||||
|
||||
To flip the drawing its possible to set the module property
|
||||
:py:data:`positive_y_is_up` to True. Then the pygame drawing will flip
|
||||
the simulation upside down before drawing::
|
||||
|
||||
>>> positive_y_is_up = True
|
||||
>>> body = pymunk.Body()
|
||||
>>> body.position = (0, 0)
|
||||
>>> # Body will be position in bottom left corner
|
||||
|
||||
:Parameters:
|
||||
surface : pygame.Surface
|
||||
Surface that the objects will be drawn on
|
||||
"""
|
||||
self.surface = surface
|
||||
super().__init__()
|
||||
|
||||
def draw_circle(
|
||||
self,
|
||||
pos: Vec2d,
|
||||
angle: float,
|
||||
radius: float,
|
||||
outline_color: SpaceDebugColor,
|
||||
fill_color: SpaceDebugColor,
|
||||
) -> None:
|
||||
p = to_pygame(pos, self.surface)
|
||||
|
||||
pygame.draw.circle(self.surface, fill_color.as_int(), p, round(radius), 0)
|
||||
pygame.draw.circle(self.surface, light_color(fill_color).as_int(), p, round(radius - 4), 0)
|
||||
|
||||
# circle_edge = pos + Vec2d(radius, 0).rotated(angle)
|
||||
# p2 = to_pygame(circle_edge, self.surface)
|
||||
# line_r = 2 if radius > 20 else 1
|
||||
# pygame.draw.lines(self.surface, outline_color.as_int(), False, [p, p2], line_r)
|
||||
|
||||
def draw_segment(self, a: Vec2d, b: Vec2d, color: SpaceDebugColor) -> None:
|
||||
p1 = to_pygame(a, self.surface)
|
||||
p2 = to_pygame(b, self.surface)
|
||||
|
||||
pygame.draw.aalines(self.surface, color.as_int(), False, [p1, p2])
|
||||
|
||||
def draw_fat_segment(
|
||||
self,
|
||||
a: Tuple[float, float],
|
||||
b: Tuple[float, float],
|
||||
radius: float,
|
||||
outline_color: SpaceDebugColor,
|
||||
fill_color: SpaceDebugColor,
|
||||
) -> None:
|
||||
p1 = to_pygame(a, self.surface)
|
||||
p2 = to_pygame(b, self.surface)
|
||||
|
||||
r = round(max(1, radius * 2))
|
||||
pygame.draw.lines(self.surface, fill_color.as_int(), False, [p1, p2], r)
|
||||
if r > 2:
|
||||
orthog = [abs(p2[1] - p1[1]), abs(p2[0] - p1[0])]
|
||||
if orthog[0] == 0 and orthog[1] == 0:
|
||||
return
|
||||
scale = radius / (orthog[0] * orthog[0] + orthog[1] * orthog[1]) ** 0.5
|
||||
orthog[0] = round(orthog[0] * scale)
|
||||
orthog[1] = round(orthog[1] * scale)
|
||||
points = [
|
||||
(p1[0] - orthog[0], p1[1] - orthog[1]),
|
||||
(p1[0] + orthog[0], p1[1] + orthog[1]),
|
||||
(p2[0] + orthog[0], p2[1] + orthog[1]),
|
||||
(p2[0] - orthog[0], p2[1] - orthog[1]),
|
||||
]
|
||||
pygame.draw.polygon(self.surface, fill_color.as_int(), points)
|
||||
pygame.draw.circle(
|
||||
self.surface,
|
||||
fill_color.as_int(),
|
||||
(round(p1[0]), round(p1[1])),
|
||||
round(radius),
|
||||
)
|
||||
pygame.draw.circle(
|
||||
self.surface,
|
||||
fill_color.as_int(),
|
||||
(round(p2[0]), round(p2[1])),
|
||||
round(radius),
|
||||
)
|
||||
|
||||
def draw_polygon(
|
||||
self,
|
||||
verts: Sequence[Tuple[float, float]],
|
||||
radius: float,
|
||||
outline_color: SpaceDebugColor,
|
||||
fill_color: SpaceDebugColor,
|
||||
) -> None:
|
||||
ps = [to_pygame(v, self.surface) for v in verts]
|
||||
ps += [ps[0]]
|
||||
|
||||
radius = 2
|
||||
pygame.draw.polygon(self.surface, light_color(fill_color).as_int(), ps)
|
||||
|
||||
if radius > 0:
|
||||
for i in range(len(verts)):
|
||||
a = verts[i]
|
||||
b = verts[(i + 1) % len(verts)]
|
||||
self.draw_fat_segment(a, b, radius, fill_color, fill_color)
|
||||
|
||||
def draw_dot(self, size: float, pos: Tuple[float, float], color: SpaceDebugColor) -> None:
|
||||
p = to_pygame(pos, self.surface)
|
||||
pygame.draw.circle(self.surface, color.as_int(), p, round(size), 0)
|
||||
|
||||
|
||||
def get_mouse_pos(surface: pygame.Surface) -> Tuple[int, int]:
|
||||
"""Get position of the mouse pointer in pymunk coordinates."""
|
||||
p = pygame.mouse.get_pos()
|
||||
return from_pygame(p, surface)
|
||||
|
||||
|
||||
def to_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
|
||||
"""Convenience method to convert pymunk coordinates to pygame surface
|
||||
local coordinates.
|
||||
|
||||
Note that in case positive_y_is_up is False, this function won't actually do
|
||||
anything except converting the point to integers.
|
||||
"""
|
||||
if positive_y_is_up:
|
||||
return round(p[0]), surface.get_height() - round(p[1])
|
||||
else:
|
||||
return round(p[0]), round(p[1])
|
||||
|
||||
|
||||
def from_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
|
||||
"""Convenience method to convert pygame surface local coordinates to
|
||||
pymunk coordinates
|
||||
"""
|
||||
return to_pygame(p, surface)
|
||||
|
||||
|
||||
def light_color(color: SpaceDebugColor):
|
||||
color = np.minimum(1.2 * np.float32([color.r, color.g, color.b, color.a]), np.float32([255]))
|
||||
color = SpaceDebugColor(r=color[0], g=color[1], b=color[2], a=color[3])
|
||||
return color
|
|
@ -1,237 +0,0 @@
|
|||
import importlib
|
||||
import logging
|
||||
from collections import deque
|
||||
from typing import Optional
|
||||
|
||||
import einops
|
||||
import numpy as np
|
||||
import torch
|
||||
from tensordict import TensorDict
|
||||
from torchrl.data.tensor_specs import (
|
||||
BoundedTensorSpec,
|
||||
CompositeSpec,
|
||||
DiscreteTensorSpec,
|
||||
UnboundedContinuousTensorSpec,
|
||||
)
|
||||
from torchrl.envs.libs.gym import _gym_to_torchrl_spec_transform
|
||||
|
||||
from lerobot.common.envs.abstract import AbstractEnv
|
||||
from lerobot.common.utils import set_global_seed
|
||||
|
||||
MAX_NUM_ACTIONS = 4
|
||||
|
||||
_has_gym = importlib.util.find_spec("gymnasium") is not None
|
||||
|
||||
|
||||
class SimxarmEnv(AbstractEnv):
|
||||
name = "simxarm"
|
||||
available_tasks = ["lift"]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task,
|
||||
frame_skip: int = 1,
|
||||
from_pixels: bool = False,
|
||||
pixels_only: bool = False,
|
||||
image_size=None,
|
||||
seed=1337,
|
||||
device="cpu",
|
||||
num_prev_obs=0,
|
||||
num_prev_action=0,
|
||||
):
|
||||
super().__init__(
|
||||
task=task,
|
||||
frame_skip=frame_skip,
|
||||
from_pixels=from_pixels,
|
||||
pixels_only=pixels_only,
|
||||
image_size=image_size,
|
||||
seed=seed,
|
||||
device=device,
|
||||
num_prev_obs=num_prev_obs,
|
||||
num_prev_action=num_prev_action,
|
||||
)
|
||||
|
||||
def _make_env(self):
|
||||
if not _has_gym:
|
||||
raise ImportError("Cannot import gymnasium.")
|
||||
|
||||
import gymnasium as gym
|
||||
|
||||
from lerobot.common.envs.simxarm.simxarm import TASKS
|
||||
|
||||
if self.task not in TASKS:
|
||||
raise ValueError(f"Unknown task {self.task}. Must be one of {list(TASKS.keys())}")
|
||||
|
||||
self._env = TASKS[self.task]["env"]()
|
||||
|
||||
num_actions = len(TASKS[self.task]["action_space"])
|
||||
self._action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(num_actions,))
|
||||
self._action_padding = np.zeros((MAX_NUM_ACTIONS - num_actions), dtype=np.float32)
|
||||
if "w" not in TASKS[self.task]["action_space"]:
|
||||
self._action_padding[-1] = 1.0
|
||||
|
||||
def render(self, mode="rgb_array", width=384, height=384):
|
||||
return self._env.render(mode, width=width, height=height)
|
||||
|
||||
def _format_raw_obs(self, raw_obs):
|
||||
if self.from_pixels:
|
||||
image = self.render(mode="rgb_array", width=self.image_size, height=self.image_size)
|
||||
image = image.transpose(2, 0, 1) # (H, W, C) -> (C, H, W)
|
||||
image = torch.tensor(image.copy(), dtype=torch.uint8)
|
||||
|
||||
obs = {"image": image}
|
||||
|
||||
if not self.pixels_only:
|
||||
obs["state"] = torch.tensor(self._env.robot_state, dtype=torch.float32)
|
||||
else:
|
||||
obs = {"state": torch.tensor(raw_obs["observation"], dtype=torch.float32)}
|
||||
|
||||
# obs = TensorDict(obs, batch_size=[])
|
||||
return obs
|
||||
|
||||
def _reset(self, tensordict: Optional[TensorDict] = None):
|
||||
td = tensordict
|
||||
if td is None or td.is_empty():
|
||||
raw_obs = self._env.reset()
|
||||
|
||||
obs = self._format_raw_obs(raw_obs)
|
||||
|
||||
if self.num_prev_obs > 0:
|
||||
stacked_obs = {}
|
||||
if "image" in obs:
|
||||
self._prev_obs_image_queue = deque(
|
||||
[obs["image"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
|
||||
)
|
||||
stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
|
||||
if "state" in obs:
|
||||
self._prev_obs_state_queue = deque(
|
||||
[obs["state"]] * (self.num_prev_obs + 1), maxlen=(self.num_prev_obs + 1)
|
||||
)
|
||||
stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
|
||||
obs = stacked_obs
|
||||
|
||||
td = TensorDict(
|
||||
{
|
||||
"observation": TensorDict(obs, batch_size=[]),
|
||||
"done": torch.tensor([False], dtype=torch.bool),
|
||||
},
|
||||
batch_size=[],
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
return td
|
||||
|
||||
def _step(self, tensordict: TensorDict):
|
||||
td = tensordict
|
||||
action = td["action"].numpy()
|
||||
# step expects shape=(4,) so we pad if necessary
|
||||
action = np.concatenate([action, self._action_padding])
|
||||
# TODO(rcadene): add info["is_success"] and info["success"] ?
|
||||
sum_reward = 0
|
||||
|
||||
if action.ndim == 1:
|
||||
action = einops.repeat(action, "c -> t c", t=self.frame_skip)
|
||||
else:
|
||||
if self.frame_skip > 1:
|
||||
raise NotImplementedError()
|
||||
|
||||
num_action_steps = action.shape[0]
|
||||
for i in range(num_action_steps):
|
||||
raw_obs, reward, done, info = self._env.step(action[i])
|
||||
sum_reward += reward
|
||||
|
||||
obs = self._format_raw_obs(raw_obs)
|
||||
|
||||
if self.num_prev_obs > 0:
|
||||
stacked_obs = {}
|
||||
if "image" in obs:
|
||||
self._prev_obs_image_queue.append(obs["image"])
|
||||
stacked_obs["image"] = torch.stack(list(self._prev_obs_image_queue))
|
||||
if "state" in obs:
|
||||
self._prev_obs_state_queue.append(obs["state"])
|
||||
stacked_obs["state"] = torch.stack(list(self._prev_obs_state_queue))
|
||||
obs = stacked_obs
|
||||
|
||||
td = TensorDict(
|
||||
{
|
||||
"observation": self._format_raw_obs(raw_obs),
|
||||
"reward": torch.tensor([sum_reward], dtype=torch.float32),
|
||||
"done": torch.tensor([done], dtype=torch.bool),
|
||||
"success": torch.tensor([info["success"]], dtype=torch.bool),
|
||||
},
|
||||
batch_size=[],
|
||||
)
|
||||
return td
|
||||
|
||||
def _make_spec(self):
|
||||
obs = {}
|
||||
if self.from_pixels:
|
||||
image_shape = (3, self.image_size, self.image_size)
|
||||
if self.num_prev_obs > 0:
|
||||
image_shape = (self.num_prev_obs + 1, *image_shape)
|
||||
|
||||
obs["image"] = BoundedTensorSpec(
|
||||
low=0,
|
||||
high=255,
|
||||
shape=image_shape,
|
||||
dtype=torch.uint8,
|
||||
device=self.device,
|
||||
)
|
||||
if not self.pixels_only:
|
||||
state_shape = (len(self._env.robot_state),)
|
||||
if self.num_prev_obs > 0:
|
||||
state_shape = (self.num_prev_obs + 1, *state_shape)
|
||||
|
||||
obs["state"] = UnboundedContinuousTensorSpec(
|
||||
shape=state_shape,
|
||||
dtype=torch.float32,
|
||||
device=self.device,
|
||||
)
|
||||
else:
|
||||
# TODO(rcadene): add observation_space achieved_goal and desired_goal?
|
||||
state_shape = self._env.observation_space["observation"].shape
|
||||
if self.num_prev_obs > 0:
|
||||
state_shape = (self.num_prev_obs + 1, *state_shape)
|
||||
|
||||
obs["state"] = UnboundedContinuousTensorSpec(
|
||||
# TODO:
|
||||
shape=state_shape,
|
||||
dtype=torch.float32,
|
||||
device=self.device,
|
||||
)
|
||||
self.observation_spec = CompositeSpec({"observation": obs})
|
||||
|
||||
self.action_spec = _gym_to_torchrl_spec_transform(
|
||||
self._action_space,
|
||||
device=self.device,
|
||||
)
|
||||
|
||||
self.reward_spec = UnboundedContinuousTensorSpec(
|
||||
shape=(1,),
|
||||
dtype=torch.float32,
|
||||
device=self.device,
|
||||
)
|
||||
|
||||
self.done_spec = CompositeSpec(
|
||||
{
|
||||
"done": DiscreteTensorSpec(
|
||||
2,
|
||||
shape=(1,),
|
||||
dtype=torch.bool,
|
||||
device=self.device,
|
||||
),
|
||||
"success": DiscreteTensorSpec(
|
||||
2,
|
||||
shape=(1,),
|
||||
dtype=torch.bool,
|
||||
device=self.device,
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
def _set_seed(self, seed: Optional[int]):
|
||||
set_global_seed(seed)
|
||||
self._seed = seed
|
||||
# TODO(aliberts): change self._reset so that it takes in a seed value
|
||||
logging.warning("simxarm env is not properly seeded")
|
|
@ -1,166 +0,0 @@
|
|||
from collections import OrderedDict, deque
|
||||
|
||||
import gymnasium as gym
|
||||
import numpy as np
|
||||
from gymnasium.wrappers import TimeLimit
|
||||
|
||||
from lerobot.common.envs.simxarm.simxarm.tasks.base import Base as Base
|
||||
from lerobot.common.envs.simxarm.simxarm.tasks.lift import Lift
|
||||
from lerobot.common.envs.simxarm.simxarm.tasks.peg_in_box import PegInBox
|
||||
from lerobot.common.envs.simxarm.simxarm.tasks.push import Push
|
||||
from lerobot.common.envs.simxarm.simxarm.tasks.reach import Reach
|
||||
|
||||
TASKS = OrderedDict(
|
||||
(
|
||||
(
|
||||
"reach",
|
||||
{
|
||||
"env": Reach,
|
||||
"action_space": "xyz",
|
||||
"episode_length": 50,
|
||||
"description": "Reach a target location with the end effector",
|
||||
},
|
||||
),
|
||||
(
|
||||
"push",
|
||||
{
|
||||
"env": Push,
|
||||
"action_space": "xyz",
|
||||
"episode_length": 50,
|
||||
"description": "Push a cube to a target location",
|
||||
},
|
||||
),
|
||||
(
|
||||
"peg_in_box",
|
||||
{
|
||||
"env": PegInBox,
|
||||
"action_space": "xyz",
|
||||
"episode_length": 50,
|
||||
"description": "Insert a peg into a box",
|
||||
},
|
||||
),
|
||||
(
|
||||
"lift",
|
||||
{
|
||||
"env": Lift,
|
||||
"action_space": "xyzw",
|
||||
"episode_length": 50,
|
||||
"description": "Lift a cube above a height threshold",
|
||||
},
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class SimXarmWrapper(gym.Wrapper):
|
||||
"""
|
||||
A wrapper for the SimXarm environments. This wrapper is used to
|
||||
convert the action and observation spaces to the correct format.
|
||||
"""
|
||||
|
||||
def __init__(self, env, task, obs_mode, image_size, action_repeat, frame_stack=1, channel_last=False):
|
||||
super().__init__(env)
|
||||
self._env = env
|
||||
self.obs_mode = obs_mode
|
||||
self.image_size = image_size
|
||||
self.action_repeat = action_repeat
|
||||
self.frame_stack = frame_stack
|
||||
self._frames = deque([], maxlen=frame_stack)
|
||||
self.channel_last = channel_last
|
||||
self._max_episode_steps = task["episode_length"] // action_repeat
|
||||
|
||||
image_shape = (
|
||||
(image_size, image_size, 3 * frame_stack)
|
||||
if channel_last
|
||||
else (3 * frame_stack, image_size, image_size)
|
||||
)
|
||||
if obs_mode == "state":
|
||||
self.observation_space = env.observation_space["observation"]
|
||||
elif obs_mode == "rgb":
|
||||
self.observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8)
|
||||
elif obs_mode == "all":
|
||||
self.observation_space = gym.spaces.Dict(
|
||||
state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32),
|
||||
rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown obs_mode {obs_mode}. Must be one of [rgb, all, state]")
|
||||
self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(task["action_space"]),))
|
||||
self.action_padding = np.zeros(4 - len(task["action_space"]), dtype=np.float32)
|
||||
if "w" not in task["action_space"]:
|
||||
self.action_padding[-1] = 1.0
|
||||
|
||||
def _render_obs(self):
|
||||
obs = self.render(mode="rgb_array", width=self.image_size, height=self.image_size)
|
||||
if not self.channel_last:
|
||||
obs = obs.transpose(2, 0, 1)
|
||||
return obs.copy()
|
||||
|
||||
def _update_frames(self, reset=False):
|
||||
pixels = self._render_obs()
|
||||
self._frames.append(pixels)
|
||||
if reset:
|
||||
for _ in range(1, self.frame_stack):
|
||||
self._frames.append(pixels)
|
||||
assert len(self._frames) == self.frame_stack
|
||||
|
||||
def transform_obs(self, obs, reset=False):
|
||||
if self.obs_mode == "state":
|
||||
return obs["observation"]
|
||||
elif self.obs_mode == "rgb":
|
||||
self._update_frames(reset=reset)
|
||||
rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
|
||||
return rgb_obs
|
||||
elif self.obs_mode == "all":
|
||||
self._update_frames(reset=reset)
|
||||
rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
|
||||
return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))
|
||||
else:
|
||||
raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]")
|
||||
|
||||
def reset(self):
|
||||
return self.transform_obs(self._env.reset(), reset=True)
|
||||
|
||||
def step(self, action):
|
||||
action = np.concatenate([action, self.action_padding])
|
||||
reward = 0.0
|
||||
for _ in range(self.action_repeat):
|
||||
obs, r, done, info = self._env.step(action)
|
||||
reward += r
|
||||
return self.transform_obs(obs), reward, done, info
|
||||
|
||||
def render(self, mode="rgb_array", width=384, height=384, **kwargs):
|
||||
return self._env.render(mode, width=width, height=height)
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
return self._env.robot_state
|
||||
|
||||
|
||||
def make(task, obs_mode="state", image_size=84, action_repeat=1, frame_stack=1, channel_last=False, seed=0):
|
||||
"""
|
||||
Create a new environment.
|
||||
Args:
|
||||
task (str): The task to create an environment for. Must be one of:
|
||||
- 'reach'
|
||||
- 'push'
|
||||
- 'peg-in-box'
|
||||
- 'lift'
|
||||
obs_mode (str): The observation mode to use. Must be one of:
|
||||
- 'state': Only state observations
|
||||
- 'rgb': RGB images
|
||||
- 'all': RGB images and state observations
|
||||
image_size (int): The size of the image observations
|
||||
action_repeat (int): The number of times to repeat the action
|
||||
seed (int): The random seed to use
|
||||
Returns:
|
||||
gym.Env: The environment
|
||||
"""
|
||||
if task not in TASKS:
|
||||
raise ValueError(f"Unknown task {task}. Must be one of {list(TASKS.keys())}")
|
||||
env = TASKS[task]["env"]()
|
||||
env = TimeLimit(env, TASKS[task]["episode_length"])
|
||||
env = SimXarmWrapper(env, TASKS[task], obs_mode, image_size, action_repeat, frame_stack, channel_last)
|
||||
env.seed(seed)
|
||||
|
||||
return env
|
|
@ -1,53 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<mujoco>
|
||||
<compiler angle="radian" coordinate="local" meshdir="mesh" texturedir="texture"></compiler>
|
||||
<size nconmax="2000" njmax="500"/>
|
||||
|
||||
<option timestep="0.002">
|
||||
<flag warmstart="enable"></flag>
|
||||
</option>
|
||||
|
||||
<include file="shared.xml"></include>
|
||||
|
||||
<worldbody>
|
||||
<body name="floor0" pos="0 0 0">
|
||||
<geom name="floorgeom0" pos="1.2 -2.0 0" size="20.0 20.0 1" type="plane" condim="3" material="floor_mat"></geom>
|
||||
</body>
|
||||
|
||||
<include file="xarm.xml"></include>
|
||||
|
||||
<body pos="0.75 0 0.6325" name="pedestal0">
|
||||
<geom name="pedestalgeom0" size="0.1 0.1 0.01" pos="0.32 0.27 0" type="box" mass="2000" material="pedestal_mat"></geom>
|
||||
<site pos="0.30 0.30 0" size="0.075 0.075 0.002" type="box" name="robotmountsite0" rgba="0.55 0.54 0.53 1" />
|
||||
</body>
|
||||
|
||||
<body pos="1.5 0.075 0.3425" name="table0">
|
||||
<geom name="tablegeom0" size="0.3 0.6 0.2" pos="0 0 0" type="box" material="table_mat" density="2000" friction="1 1 1"></geom>
|
||||
</body>
|
||||
|
||||
<body name="object" pos="1.405 0.3 0.58625">
|
||||
<joint name="object_joint0" type="free" limited="false"></joint>
|
||||
<geom size="0.035 0.035 0.035" type="box" name="object0" material="block_mat" density="50000" condim="4" friction="1 1 1" solimp="1 1 1" solref="0.02 1"></geom>
|
||||
<site name="object_site" pos="0 0 0" size="0.035 0.035 0.035" rgba="1 0 0 0" type="box"></site>
|
||||
</body>
|
||||
|
||||
<light directional="true" ambient="0.1 0.1 0.1" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="1.65 0 10" dir="-0.57 -0.57 -0.57" name="light0"></light>
|
||||
<light directional="true" ambient="0.1 0.1 0.1" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="0 -4 4" dir="0 1 -0.1" name="light1"></light>
|
||||
<light directional="true" ambient="0.05 0.05 0.05" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="2.13 1.6 2.5" name="light2"></light>
|
||||
<light pos="0 0 2" dir="0.2 0.2 -0.8" directional="true" diffuse="0.3 0.3 0.3" castshadow="false" name="light3"></light>
|
||||
|
||||
<camera fovy="50" name="camera0" pos="0.9559 1.0 1.1" euler="-1.1 -0.6 3.4" />
|
||||
</worldbody>
|
||||
|
||||
<equality>
|
||||
<connect body2="left_finger" body1="left_inner_knuckle" anchor="0.0 0.035 0.042" solimp="0.9 0.95 0.001 0.5 2" solref="0.0002 1.0" ></connect>
|
||||
<connect body2="right_finger" body1="right_inner_knuckle" anchor="0.0 -0.035 0.042" solimp="0.9 0.95 0.001 0.5 2" solref="0.0002 1.0" ></connect>
|
||||
<joint joint1="left_inner_knuckle_joint" joint2="right_inner_knuckle_joint"></joint>
|
||||
</equality>
|
||||
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="left_inner_knuckle_joint" gear="200.0"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="right_inner_knuckle_joint" gear="200.0"/>
|
||||
</actuator>
|
||||
</mujoco>
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:21fb81ae7fba19e3c6b2d2ca60c8051712ba273357287eb5a397d92d61c7a736
|
||||
size 1211434
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:be68ce180d11630a667a5f37f4dffcc3feebe4217d4bb3912c813b6d9ca3ec66
|
||||
size 3284
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2c6448552bf6b1c4f17334d686a5320ce051bcdfe31431edf69303d8a570d1de
|
||||
size 3284
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:748b9e197e6521914f18d1f6383a36f211136b3f33f2ad2a8c11b9f921c2cf86
|
||||
size 6284
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a44756eb72f9c214cb37e61dc209cd7073fdff3e4271a7423476ef6fd090d2d4
|
||||
size 242684
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e8e48692ad26837bb3d6a97582c89784d09948fc09bfe4e5a59017859ff04dac
|
||||
size 366284
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:501665812b08d67e764390db781e839adc6896a9540301d60adf606f57648921
|
||||
size 22284
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:34b541122df84d2ef5fcb91b715eb19659dc15ad8d44a191dde481f780265636
|
||||
size 184184
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:61e641cd47c169ecef779683332e00e4914db729bf02dfb61bfbe69351827455
|
||||
size 225584
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9e2798e7946dd70046c95455d5ba96392d0b54a6069caba91dc4ca66e1379b42
|
||||
size 237084
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c757fee95f873191a0633c355c07a360032960771cabbd7593a6cdb0f1ffb089
|
||||
size 243684
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:715ad5787c5dab57589937fd47289882707b5e1eb997e340d567785b02f4ec90
|
||||
size 229084
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:85b320aa420497827223d16d492bba8de091173374e361396fc7a5dad7bdb0cb
|
||||
size 399384
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:97115d848fbf802cb770cd9be639ae2af993103b9d9bbb0c50c943c738a36f18
|
||||
size 231684
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f6fcbc18258090eb56c21cfb17baa5ae43abc98b1958cd366f3a73b9898fc7f0
|
||||
size 2106184
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c5dee87c7f37baf554b8456ebfe0b3e8ed0b22b8938bd1add6505c2ad6d32c7d
|
||||
size 242684
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b41dd2c2c550281bf78d7cc6fa117b14786700e5c453560a0cb5fd6dfa0ffb3e
|
||||
size 366284
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:75ca1107d0a42a0f03802a9a49cab48419b31851ee8935f8f1ca06be1c1c91e8
|
||||
size 22284
|
|
@ -1,74 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<mujoco>
|
||||
<compiler angle="radian" coordinate="local" meshdir="mesh" texturedir="texture"></compiler>
|
||||
<size nconmax="2000" njmax="500"/>
|
||||
|
||||
<option timestep="0.001">
|
||||
<flag warmstart="enable"></flag>
|
||||
</option>
|
||||
|
||||
<include file="shared.xml"></include>
|
||||
|
||||
<worldbody>
|
||||
<body name="floor0" pos="0 0 0">
|
||||
<geom name="floorgeom0" pos="1.2 -2.0 0" size="1.0 10.0 1" type="plane" condim="3" material="floor_mat"></geom>
|
||||
</body>
|
||||
|
||||
<include file="xarm.xml"></include>
|
||||
|
||||
<body pos="0.75 0 0.6325" name="pedestal0">
|
||||
<geom name="pedestalgeom0" size="0.1 0.1 0.01" pos="0.32 0.27 0" type="box" mass="2000" material="pedestal_mat"></geom>
|
||||
<site pos="0.30 0.30 0" size="0.075 0.075 0.002" type="box" name="robotmountsite0" rgba="0.55 0.54 0.53 1" />
|
||||
</body>
|
||||
|
||||
<body pos="1.5 0.075 0.3425" name="table0">
|
||||
<geom name="tablegeom0" size="0.3 0.6 0.2" pos="0 0 0" type="box" material="table_mat" density="2000" friction="1 0.005 0.0002"></geom>
|
||||
</body>
|
||||
|
||||
<body name="box0" pos="1.605 0.25 0.55">
|
||||
<joint name="box_joint0" type="free" limited="false"></joint>
|
||||
<site name="box_site" pos="0 0.075 -0.01" size="0.02" rgba="0 0 0 0" type="sphere"></site>
|
||||
<geom name="box_side0" pos="0 0 0" size="0.065 0.002 0.04" type= "box" rgba="0.8 0.1 0.1 1" mass ="1" condim="4" />
|
||||
<geom name="box_side1" pos="0 0.149 0" size="0.065 0.002 0.04" type="box" rgba="0.9 0.2 0.2 1" mass ="2" condim="4" />
|
||||
<geom name="box_side2" pos="0.064 0.074 0" size="0.002 0.075 0.04" type="box" rgba="0.8 0.1 0.1 1" mass ="2" condim="4" />
|
||||
<geom name="box_side3" pos="-0.064 0.074 0" size="0.002 0.075 0.04" type="box" rgba="0.9 0.2 0.2 1" mass ="2" condim="4" />
|
||||
<geom name="box_side4" pos="-0 0.074 -0.038" size="0.065 0.075 0.002" type="box" rgba="0.5 0 0 1" mass ="2" condim="4"/>
|
||||
</body>
|
||||
|
||||
<body name="object0" pos="1.4 0.25 0.65">
|
||||
<joint name="object_joint0" type="free" limited="false"></joint>
|
||||
<geom name="object_target0" type="cylinder" pos="0 0 -0.05" size="0.03 0.035" rgba="0.6 0.8 0.5 1" mass ="0.1" condim="3" />
|
||||
<site name="object_site" pos="0 0 -0.05" size="0.0325 0.0375" rgba="0 0 0 0" type="cylinder"></site>
|
||||
<body name="B0" pos="0 0 0" euler="0 0 0 ">
|
||||
<joint name="B0:joint" type="slide" limited="true" axis="0 0 1" damping="0.05" range="0.0001 0.0001001" solimpfriction="0.98 0.98 0.95" frictionloss="1"></joint>
|
||||
<geom type="capsule" size="0.002 0.03" rgba="0 0 0 1" mass="0.001" condim="4"/>
|
||||
<body name="B1" pos="0 0 0.04" euler="0 3.14 0 ">
|
||||
<joint name="B1:joint1" type="hinge" axis="1 0 0" range="-0.1 0.1" frictionloss="1"></joint>
|
||||
<joint name="B1:joint2" type="hinge" axis="0 1 0" range="-0.1 0.1" frictionloss="1"></joint>
|
||||
<joint name="B1:joint3" type="hinge" axis="0 0 1" range="-0.1 0.1" frictionloss="1"></joint>
|
||||
<geom type="capsule" size="0.002 0.004" rgba="1 0 0 0" mass="0.001" condim="4"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
|
||||
<light directional="true" ambient="0.1 0.1 0.1" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="1.65 0 10" dir="-0.57 -0.57 -0.57" name="light0"></light>
|
||||
<light directional="true" ambient="0.1 0.1 0.1" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="0 -4 4" dir="0 1 -0.1" name="light1"></light>
|
||||
<light directional="true" ambient="0.05 0.05 0.05" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="2.13 1.6 2.5" name="light2"></light>
|
||||
<light pos="0 0 2" dir="0.2 0.2 -0.8" directional="true" diffuse="0.3 0.3 0.3" castshadow="false" name="light3"></light>
|
||||
|
||||
<camera fovy="50" name="camera0" pos="0.9559 1.0 1.1" euler="-1.1 -0.6 3.4" />
|
||||
</worldbody>
|
||||
|
||||
<equality>
|
||||
<connect body2="left_finger" body1="left_inner_knuckle" anchor="0.0 0.035 0.042" solimp="0.9 0.95 0.001 0.5 2" solref="0.0002 1.0" ></connect>
|
||||
<connect body2="right_finger" body1="right_inner_knuckle" anchor="0.0 -0.035 0.042" solimp="0.9 0.95 0.001 0.5 2" solref="0.0002 1.0" ></connect>
|
||||
<weld body1="right_hand" body2="B1" solimp="0.99 0.99 0.99" solref="0.02 1"></weld>
|
||||
<joint joint1="left_inner_knuckle_joint" joint2="right_inner_knuckle_joint"></joint>
|
||||
</equality>
|
||||
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="left_inner_knuckle_joint" gear="200.0"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="right_inner_knuckle_joint" gear="200.0"/>
|
||||
</actuator>
|
||||
</mujoco>
|
|
@ -1,54 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<mujoco>
|
||||
<compiler angle="radian" coordinate="local" meshdir="mesh" texturedir="texture"></compiler>
|
||||
<size nconmax="2000" njmax="500"/>
|
||||
|
||||
<option timestep="0.002">
|
||||
<flag warmstart="enable"></flag>
|
||||
</option>
|
||||
|
||||
<include file="shared.xml"></include>
|
||||
|
||||
<worldbody>
|
||||
<body name="floor0" pos="0 0 0">
|
||||
<geom name="floorgeom0" pos="1.2 -2.0 0" size="1.0 10.0 1" type="plane" condim="3" material="floor_mat"></geom>
|
||||
<site name="target0" pos="1.565 0.3 0.545" size="0.0475 0.001" rgba="1 0 0 1" type="cylinder"></site>
|
||||
</body>
|
||||
|
||||
<include file="xarm.xml"></include>
|
||||
|
||||
<body pos="0.75 0 0.6325" name="pedestal0">
|
||||
<geom name="pedestalgeom0" size="0.1 0.1 0.01" pos="0.32 0.27 0" type="box" mass="2000" material="pedestal_mat"></geom>
|
||||
<site pos="0.30 0.30 0" size="0.075 0.075 0.002" type="box" name="robotmountsite0" rgba="0.55 0.54 0.53 1" />
|
||||
</body>
|
||||
|
||||
<body pos="1.5 0.075 0.3425" name="table0">
|
||||
<geom name="tablegeom0" size="0.3 0.6 0.2" pos="0 0 0" type="box" material="table_mat" density="2000" friction="1 0.005 0.0002"></geom>
|
||||
</body>
|
||||
|
||||
<body name="object" pos="1.655 0.3 0.68">
|
||||
<joint name="object_joint0" type="free" limited="false"></joint>
|
||||
<geom size="0.024 0.024 0.024" type="box" name="object" material="block_mat" density="50000" condim="4" friction="1 1 1" solimp="1 1 1" solref="0.02 1"></geom>
|
||||
<site name="object_site" pos="0 0 0" size="0.024 0.024 0.024" rgba="0 0 0 0" type="box"></site>
|
||||
</body>
|
||||
|
||||
<light directional="true" ambient="0.1 0.1 0.1" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="1.65 0 10" dir="-0.57 -0.57 -0.57" name="light0"></light>
|
||||
<light directional="true" ambient="0.1 0.1 0.1" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="0 -4 4" dir="0 1 -0.1" name="light1"></light>
|
||||
<light directional="true" ambient="0.05 0.05 0.05" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="2.13 1.6 2.5" name="light2"></light>
|
||||
<light pos="0 0 2" dir="0.2 0.2 -0.8" directional="true" diffuse="0.3 0.3 0.3" castshadow="false" name="light3"></light>
|
||||
|
||||
<camera fovy="50" name="camera0" pos="0.9559 1.0 1.1" euler="-1.1 -0.6 3.4" />
|
||||
</worldbody>
|
||||
|
||||
<equality>
|
||||
<connect body2="left_finger" body1="left_inner_knuckle" anchor="0.0 0.035 0.042" solimp="0.9 0.95 0.001 0.5 2" solref="0.0002 1.0" ></connect>
|
||||
<connect body2="right_finger" body1="right_inner_knuckle" anchor="0.0 -0.035 0.042" solimp="0.9 0.95 0.001 0.5 2" solref="0.0002 1.0" ></connect>
|
||||
<joint joint1="left_inner_knuckle_joint" joint2="right_inner_knuckle_joint"></joint>
|
||||
</equality>
|
||||
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="left_inner_knuckle_joint" gear="200.0"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="right_inner_knuckle_joint" gear="200.0"/>
|
||||
</actuator>
|
||||
</mujoco>
|
|
@ -1,48 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<mujoco>
|
||||
<compiler angle="radian" coordinate="local" meshdir="mesh" texturedir="texture"></compiler>
|
||||
<size nconmax="2000" njmax="500"/>
|
||||
|
||||
<option timestep="0.002">
|
||||
<flag warmstart="enable"></flag>
|
||||
</option>
|
||||
|
||||
<include file="shared.xml"></include>
|
||||
|
||||
<worldbody>
|
||||
<body name="floor0" pos="0 0 0">
|
||||
<geom name="floorgeom0" pos="1.2 -2.0 0" size="1.0 10.0 1" type="plane" condim="3" material="floor_mat"></geom>
|
||||
<site name="target0" pos="1.605 0.3 0.58" size="0.0475 0.001" rgba="1 0 0 1" type="cylinder"></site>
|
||||
</body>
|
||||
|
||||
<include file="xarm.xml"></include>
|
||||
|
||||
<body pos="0.75 0 0.6325" name="pedestal0">
|
||||
<geom name="pedestalgeom0" size="0.1 0.1 0.01" pos="0.32 0.27 0" type="box" mass="2000" material="pedestal_mat"></geom>
|
||||
<site pos="0.30 0.30 0" size="0.075 0.075 0.002" type="box" name="robotmountsite0" rgba="0.55 0.54 0.53 1" />
|
||||
</body>
|
||||
|
||||
<body pos="1.5 0.075 0.3425" name="table0">
|
||||
<geom name="tablegeom0" size="0.3 0.6 0.2" pos="0 0 0" type="box" material="table_mat" density="2000" friction="1 0.005 0.0002"></geom>
|
||||
</body>
|
||||
|
||||
<light directional="true" ambient="0.1 0.1 0.1" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="1.65 0 10" dir="-0.57 -0.57 -0.57" name="light0"></light>
|
||||
<light directional="true" ambient="0.1 0.1 0.1" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="0 -4 4" dir="0 1 -0.1" name="light1"></light>
|
||||
<light directional="true" ambient="0.05 0.05 0.05" diffuse="0 0 0" specular="0 0 0" castshadow="false" pos="2.13 1.6 2.5" name="light2"></light>
|
||||
<light pos="0 0 2" dir="0.2 0.2 -0.8" directional="true" diffuse="0.3 0.3 0.3" castshadow="false" name="light3"></light>
|
||||
|
||||
<camera fovy="50" name="camera0" pos="0.9559 1.0 1.1" euler="-1.1 -0.6 3.4" />
|
||||
</worldbody>
|
||||
|
||||
<equality>
|
||||
<connect body2="left_finger" body1="left_inner_knuckle" anchor="0.0 0.035 0.042" solimp="0.9 0.95 0.001 0.5 2" solref="0.0002 1.0" ></connect>
|
||||
<connect body2="right_finger" body1="right_inner_knuckle" anchor="0.0 -0.035 0.042" solimp="0.9 0.95 0.001 0.5 2" solref="0.0002 1.0" ></connect>
|
||||
<joint joint1="left_inner_knuckle_joint" joint2="right_inner_knuckle_joint"></joint>
|
||||
</equality>
|
||||
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="left_inner_knuckle_joint" gear="200.0"/>
|
||||
<motor ctrllimited="true" ctrlrange="-1.0 1.0" joint="right_inner_knuckle_joint" gear="200.0"/>
|
||||
</actuator>
|
||||
</mujoco>
|
|
@ -1,51 +0,0 @@
|
|||
<mujoco>
|
||||
<asset>
|
||||
<texture type="skybox" builtin="gradient" rgb1="0.0 0.0 0.0" rgb2="0.0 0.0 0.0" width="32" height="32"></texture>
|
||||
<material name="floor_mat" specular="0" shininess="0.0" reflectance="0" rgba="0.043 0.055 0.051 1"></material>
|
||||
|
||||
<material name="table_mat" specular="0.2" shininess="0.2" reflectance="0" rgba="1 1 1 1"></material>
|
||||
<material name="pedestal_mat" specular="0.35" shininess="0.5" reflectance="0" rgba="0.705 0.585 0.405 1"></material>
|
||||
<material name="block_mat" specular="0.5" shininess="0.9" reflectance="0.05" rgba="0.373 0.678 0.627 1"></material>
|
||||
|
||||
<material name="robot0:geomMat" shininess="0.03" specular="0.4"></material>
|
||||
<material name="robot0:gripper_finger_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
|
||||
<material name="robot0:gripper_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
|
||||
<material name="background:gripper_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
|
||||
<material name="robot0:arm_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
|
||||
<material name="robot0:head_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
|
||||
<material name="robot0:torso_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
|
||||
<material name="robot0:base_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
|
||||
|
||||
<mesh name="link_base" file="link_base.stl" />
|
||||
<mesh name="link1" file="link1.stl" />
|
||||
<mesh name="link2" file="link2.stl" />
|
||||
<mesh name="link3" file="link3.stl" />
|
||||
<mesh name="link4" file="link4.stl" />
|
||||
<mesh name="link5" file="link5.stl" />
|
||||
<mesh name="link6" file="link6.stl" />
|
||||
<mesh name="link7" file="link7.stl" />
|
||||
<mesh name="base_link" file="base_link.stl" />
|
||||
<mesh name="left_outer_knuckle" file="left_outer_knuckle.stl" />
|
||||
<mesh name="left_finger" file="left_finger.stl" />
|
||||
<mesh name="left_inner_knuckle" file="left_inner_knuckle.stl" />
|
||||
<mesh name="right_outer_knuckle" file="right_outer_knuckle.stl" />
|
||||
<mesh name="right_finger" file="right_finger.stl" />
|
||||
<mesh name="right_inner_knuckle" file="right_inner_knuckle.stl" />
|
||||
</asset>
|
||||
|
||||
<equality>
|
||||
<weld body1="robot0:mocap2" body2="link7" solimp="0.9 0.95 0.001" solref="0.02 1"></weld>
|
||||
</equality>
|
||||
|
||||
<default>
|
||||
<joint armature="1" damping="0.1" limited="true"/>
|
||||
<default class="robot0:blue">
|
||||
<geom rgba="0.086 0.506 0.767 1.0"></geom>
|
||||
</default>
|
||||
|
||||
<default class="robot0:grey">
|
||||
<geom rgba="0.356 0.361 0.376 1.0"></geom>
|
||||
</default>
|
||||
</default>
|
||||
|
||||
</mujoco>
|
|
@ -1,88 +0,0 @@
|
|||
<mujoco model="xarm7">
|
||||
<body mocap="true" name="robot0:mocap2" pos="0 0 0">
|
||||
<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0" size="0.005 0.005 0.005" type="box"></geom>
|
||||
<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0.5 0 0 0" size="1 0.005 0.005" type="box"></geom>
|
||||
<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0 0.5 0" size="0.005 1 0.001" type="box"></geom>
|
||||
<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0.5 0.5 0 0" size="0.005 0.005 1" type="box"></geom>
|
||||
</body>
|
||||
|
||||
<body name="link0" pos="1.09 0.28 0.655">
|
||||
<geom name="bb" type="mesh" mesh="link_base" material="robot0:base_mat" rgba="1 1 1 1"/>
|
||||
<body name="link1" pos="0 0 0.267">
|
||||
<inertial pos="-0.0042142 0.02821 -0.0087788" quat="0.917781 -0.277115 0.0606681 0.277858" mass="0.42603" diaginertia="0.00144551 0.00137757 0.000823511" />
|
||||
<joint name="joint1" pos="0 0 0" axis="0 0 1" limited="true" range="-6.28319 6.28319" damping="10" frictionloss="1" />
|
||||
<geom name="j1" type="mesh" mesh="link1" material="robot0:arm_mat" rgba="1 1 1 1"/>
|
||||
<body name="link2" pos="0 0 0" quat="0.707105 -0.707108 0 0">
|
||||
<inertial pos="-3.3178e-05 -0.12849 0.026337" quat="0.447793 0.894132 -0.00224061 0.00218314" mass="0.56095" diaginertia="0.00319151 0.00311598 0.000980804" />
|
||||
<joint name="joint2" pos="0 0 0" axis="0 0 1" limited="true" range="-2.059 2.0944" damping="10" frictionloss="1" />
|
||||
<geom name="j2" type="mesh" mesh="link2" material="robot0:head_mat" rgba="1 1 1 1"/>
|
||||
<body name="link3" pos="0 -0.293 0" quat="0.707105 0.707108 0 0">
|
||||
<inertial pos="0.04223 -0.023258 -0.0096674" quat="0.883205 0.339803 0.323238 0.000542237" mass="0.44463" diaginertia="0.00133227 0.00119126 0.000780475" />
|
||||
<joint name="joint3" pos="0 0 0" axis="0 0 1" limited="true" range="-6.28319 6.28319" damping="5" frictionloss="1" />
|
||||
<geom name="j3" type="mesh" mesh="link3" material="robot0:gripper_mat" rgba="1 1 1 1"/>
|
||||
<body name="link4" pos="0.0525 0 0" quat="0.707105 0.707108 0 0">
|
||||
<inertial pos="0.067148 -0.10732 0.024479" quat="0.0654142 0.483317 -0.738663 0.465298" mass="0.52387" diaginertia="0.00288984 0.00282705 0.000894409" />
|
||||
<joint name="joint4" pos="0 0 0" axis="0 0 1" limited="true" range="-0.19198 3.927" damping="5" frictionloss="1" />
|
||||
<geom name="j4" type="mesh" mesh="link4" material="robot0:arm_mat" rgba="1 1 1 1"/>
|
||||
<body name="link5" pos="0.0775 -0.3425 0" quat="0.707105 0.707108 0 0">
|
||||
<inertial pos="-0.00023397 0.036705 -0.080064" quat="0.981064 -0.19003 0.00637998 0.0369004" mass="0.18554" diaginertia="0.00099553 0.000988613 0.000247126" />
|
||||
<joint name="joint5" pos="0 0 0" axis="0 0 1" limited="true" range="-6.28319 6.28319" damping="5" frictionloss="1" />
|
||||
<geom name="j5" type="mesh" material="robot0:gripper_mat" rgba="1 1 1 1" mesh="link5" />
|
||||
<body name="link6" pos="0 0 0" quat="0.707105 0.707108 0 0">
|
||||
<inertial pos="0.058911 0.028469 0.0068428" quat="-0.188705 0.793535 0.166088 0.554173" mass="0.31344" diaginertia="0.000827892 0.000768871 0.000386708" />
|
||||
<joint name="joint6" pos="0 0 0" axis="0 0 1" limited="true" range="-1.69297 3.14159" damping="2" frictionloss="1" />
|
||||
<geom name="j6" type="mesh" material="robot0:gripper_mat" rgba="1 1 1 1" mesh="link6" />
|
||||
<body name="link7" pos="0.076 0.097 0" quat="0.707105 -0.707108 0 0">
|
||||
<inertial pos="-0.000420033 -0.00287433 0.0257078" quat="0.999372 -0.0349129 -0.00605634 0.000551744" mass="0.85624" diaginertia="0.00137671 0.00118744 0.000514968" />
|
||||
<joint name="joint7" pos="0 0 0" axis="0 0 1" limited="true" range="-6.28319 6.28319" damping="2" frictionloss="1" />
|
||||
<geom name="j8" material="robot0:gripper_mat" type="mesh" rgba="0.753 0.753 0.753 1" mesh="link7" />
|
||||
<geom name="j9" material="robot0:gripper_mat" type="mesh" rgba="1 1 1 1" mesh="base_link" />
|
||||
<site name="grasp" pos="0 0 0.16" rgba="1 0 0 0" type="sphere" size="0.01" group="1"/>
|
||||
<body name="left_outer_knuckle" pos="0 0.035 0.059098">
|
||||
<inertial pos="0 0.021559 0.015181" quat="0.47789 0.87842 0 0" mass="0.033618" diaginertia="1.9111e-05 1.79089e-05 1.90167e-06" />
|
||||
<joint name="drive_joint" pos="0 0 0" axis="1 0 0" limited="true" range="0 0.85" />
|
||||
<geom type="mesh" rgba="0 0 0 1" conaffinity="1" contype="0" mesh="left_outer_knuckle" />
|
||||
<body name="left_finger" pos="0 0.035465 0.042039">
|
||||
<inertial pos="0 -0.016413 0.029258" quat="0.697634 0.115353 -0.115353 0.697634" mass="0.048304" diaginertia="1.88037e-05 1.7493e-05 3.56792e-06" />
|
||||
<joint name="left_finger_joint" pos="0 0 0" axis="-1 0 0" limited="true" range="0 0.85" />
|
||||
<geom name="j10" material="robot0:gripper_finger_mat" type="mesh" rgba="0 0 0 1" conaffinity="3" contype="2" mesh="left_finger" friction='1.5 1.5 1.5' solref='0.01 1' solimp='0.99 0.99 0.01'/>
|
||||
<body name="right_hand" pos="0 -0.03 0.05" quat="-0.7071 0 0 0.7071">
|
||||
<site name="ee" pos="0 0 0" rgba="0 0 1 0" type="sphere" group="1"/>
|
||||
<site name="ee_x" pos="0 0 0" size="0.005 .1" quat="0.707105 0.707108 0 0 " rgba="1 0 0 0" type="cylinder" group="1"/>
|
||||
<site name="ee_z" pos="0 0 0" size="0.005 .1" quat="0.707105 0 0 0.707108" rgba="0 0 1 0" type="cylinder" group="1"/>
|
||||
<site name="ee_y" pos="0 0 0" size="0.005 .1" quat="0.707105 0 0.707108 0 " rgba="0 1 0 0" type="cylinder" group="1"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="left_inner_knuckle" pos="0 0.02 0.074098">
|
||||
<inertial pos="1.86601e-06 0.0220468 0.0261335" quat="0.664139 -0.242732 0.242713 0.664146" mass="0.0230126" diaginertia="8.34216e-06 6.0949e-06 2.75601e-06" />
|
||||
<joint name="left_inner_knuckle_joint" pos="0 0 0" axis="1 0 0" limited="true" range="0 0.85" />
|
||||
<geom type="mesh" rgba="0 0 0 1" conaffinity="1" contype="0" mesh="left_inner_knuckle" friction='1.5 1.5 1.5' solref='0.01 1' solimp='0.99 0.99 0.01'/>
|
||||
</body>
|
||||
<body name="right_outer_knuckle" pos="0 -0.035 0.059098">
|
||||
<inertial pos="0 -0.021559 0.015181" quat="0.87842 0.47789 0 0" mass="0.033618" diaginertia="1.9111e-05 1.79089e-05 1.90167e-06" />
|
||||
<joint name="right_outer_knuckle_joint" pos="0 0 0" axis="-1 0 0" limited="true" range="0 0.85" />
|
||||
<geom type="mesh" rgba="0 0 0 1" conaffinity="1" contype="0" mesh="right_outer_knuckle" />
|
||||
<body name="right_finger" pos="0 -0.035465 0.042039">
|
||||
<inertial pos="0 0.016413 0.029258" quat="0.697634 -0.115356 0.115356 0.697634" mass="0.048304" diaginertia="1.88038e-05 1.7493e-05 3.56779e-06" />
|
||||
<joint name="right_finger_joint" pos="0 0 0" axis="1 0 0" limited="true" range="0 0.85" />
|
||||
<geom name="j11" material="robot0:gripper_finger_mat" type="mesh" rgba="0 0 0 1" conaffinity="3" contype="2" mesh="right_finger" friction='1.5 1.5 1.5' solref='0.01 1' solimp='0.99 0.99 0.01'/>
|
||||
<body name="left_hand" pos="0 0.03 0.05" quat="-0.7071 0 0 0.7071">
|
||||
<site name="ee_2" pos="0 0 0" rgba="1 0 0 0" type="sphere" size="0.01" group="1"/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
<body name="right_inner_knuckle" pos="0 -0.02 0.074098">
|
||||
<inertial pos="1.866e-06 -0.022047 0.026133" quat="0.66415 0.242702 -0.242721 0.664144" mass="0.023013" diaginertia="8.34209e-06 6.0949e-06 2.75601e-06" />
|
||||
<joint name="right_inner_knuckle_joint" pos="0 0 0" axis="-1 0 0" limited="true" range="0 0.85" />
|
||||
<geom type="mesh" rgba="0 0 0 1" conaffinity="1" contype="0" mesh="right_inner_knuckle" friction='1.5 1.5 1.5' solref='0.01 1' solimp='0.99 0.99 0.01'/>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</body>
|
||||
</mujoco>
|
|
@ -1,145 +0,0 @@
|
|||
import os
|
||||
|
||||
import mujoco
|
||||
import numpy as np
|
||||
from gymnasium_robotics.envs import robot_env
|
||||
|
||||
from lerobot.common.envs.simxarm.simxarm.tasks import mocap
|
||||
|
||||
|
||||
class Base(robot_env.MujocoRobotEnv):
|
||||
"""
|
||||
Superclass for all simxarm environments.
|
||||
Args:
|
||||
xml_name (str): name of the xml environment file
|
||||
gripper_rotation (list): initial rotation of the gripper (given as a quaternion)
|
||||
"""
|
||||
|
||||
def __init__(self, xml_name, gripper_rotation=None):
|
||||
if gripper_rotation is None:
|
||||
gripper_rotation = [0, 1, 0, 0]
|
||||
self.gripper_rotation = np.array(gripper_rotation, dtype=np.float32)
|
||||
self.center_of_table = np.array([1.655, 0.3, 0.63625])
|
||||
self.max_z = 1.2
|
||||
self.min_z = 0.2
|
||||
super().__init__(
|
||||
model_path=os.path.join(os.path.dirname(__file__), "assets", xml_name + ".xml"),
|
||||
n_substeps=20,
|
||||
n_actions=4,
|
||||
initial_qpos={},
|
||||
)
|
||||
|
||||
@property
|
||||
def dt(self):
|
||||
return self.n_substeps * self.model.opt.timestep
|
||||
|
||||
@property
|
||||
def eef(self):
|
||||
return self._utils.get_site_xpos(self.model, self.data, "grasp")
|
||||
|
||||
@property
|
||||
def obj(self):
|
||||
return self._utils.get_site_xpos(self.model, self.data, "object_site")
|
||||
|
||||
@property
|
||||
def robot_state(self):
|
||||
gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
|
||||
return np.concatenate([self.eef, gripper_angle])
|
||||
|
||||
def is_success(self):
|
||||
return NotImplementedError()
|
||||
|
||||
def get_reward(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def _sample_goal(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_obs(self):
|
||||
return self._get_obs()
|
||||
|
||||
def _step_callback(self):
|
||||
self._mujoco.mj_forward(self.model, self.data)
|
||||
|
||||
def _limit_gripper(self, gripper_pos, pos_ctrl):
|
||||
if gripper_pos[0] > self.center_of_table[0] - 0.105 + 0.15:
|
||||
pos_ctrl[0] = min(pos_ctrl[0], 0)
|
||||
if gripper_pos[0] < self.center_of_table[0] - 0.105 - 0.3:
|
||||
pos_ctrl[0] = max(pos_ctrl[0], 0)
|
||||
if gripper_pos[1] > self.center_of_table[1] + 0.3:
|
||||
pos_ctrl[1] = min(pos_ctrl[1], 0)
|
||||
if gripper_pos[1] < self.center_of_table[1] - 0.3:
|
||||
pos_ctrl[1] = max(pos_ctrl[1], 0)
|
||||
if gripper_pos[2] > self.max_z:
|
||||
pos_ctrl[2] = min(pos_ctrl[2], 0)
|
||||
if gripper_pos[2] < self.min_z:
|
||||
pos_ctrl[2] = max(pos_ctrl[2], 0)
|
||||
return pos_ctrl
|
||||
|
||||
def _apply_action(self, action):
|
||||
assert action.shape == (4,)
|
||||
action = action.copy()
|
||||
pos_ctrl, gripper_ctrl = action[:3], action[3]
|
||||
pos_ctrl = self._limit_gripper(
|
||||
self._utils.get_site_xpos(self.model, self.data, "grasp"), pos_ctrl
|
||||
) * (1 / self.n_substeps)
|
||||
gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
|
||||
mocap.apply_action(
|
||||
self.model,
|
||||
self._model_names,
|
||||
self.data,
|
||||
np.concatenate([pos_ctrl, self.gripper_rotation, gripper_ctrl]),
|
||||
)
|
||||
|
||||
def _render_callback(self):
|
||||
self._mujoco.mj_forward(self.model, self.data)
|
||||
|
||||
def _reset_sim(self):
|
||||
self.data.time = self.initial_time
|
||||
self.data.qpos[:] = np.copy(self.initial_qpos)
|
||||
self.data.qvel[:] = np.copy(self.initial_qvel)
|
||||
self._sample_goal()
|
||||
self._mujoco.mj_step(self.model, self.data, nstep=10)
|
||||
return True
|
||||
|
||||
def _set_gripper(self, gripper_pos, gripper_rotation):
|
||||
self._utils.set_mocap_pos(self.model, self.data, "robot0:mocap", gripper_pos)
|
||||
self._utils.set_mocap_quat(self.model, self.data, "robot0:mocap", gripper_rotation)
|
||||
self._utils.set_joint_qpos(self.model, self.data, "right_outer_knuckle_joint", 0)
|
||||
self.data.qpos[10] = 0.0
|
||||
self.data.qpos[12] = 0.0
|
||||
|
||||
def _env_setup(self, initial_qpos):
|
||||
for name, value in initial_qpos.items():
|
||||
self.data.set_joint_qpos(name, value)
|
||||
mocap.reset(self.model, self.data)
|
||||
mujoco.mj_forward(self.model, self.data)
|
||||
self._sample_goal()
|
||||
mujoco.mj_forward(self.model, self.data)
|
||||
|
||||
def reset(self):
|
||||
self._reset_sim()
|
||||
return self._get_obs()
|
||||
|
||||
def step(self, action):
|
||||
assert action.shape == (4,)
|
||||
assert self.action_space.contains(action), "{!r} ({}) invalid".format(action, type(action))
|
||||
self._apply_action(action)
|
||||
self._mujoco.mj_step(self.model, self.data, nstep=2)
|
||||
self._step_callback()
|
||||
obs = self._get_obs()
|
||||
reward = self.get_reward()
|
||||
done = False
|
||||
info = {"is_success": self.is_success(), "success": self.is_success()}
|
||||
return obs, reward, done, info
|
||||
|
||||
def render(self, mode="rgb_array", width=384, height=384):
|
||||
self._render_callback()
|
||||
# HACK
|
||||
self.model.vis.global_.offwidth = width
|
||||
self.model.vis.global_.offheight = height
|
||||
return self.mujoco_renderer.render(mode)
|
||||
|
||||
def close(self):
|
||||
if self.mujoco_renderer is not None:
|
||||
self.mujoco_renderer.close()
|
|
@ -1,100 +0,0 @@
|
|||
import numpy as np
|
||||
|
||||
from lerobot.common.envs.simxarm.simxarm import Base
|
||||
|
||||
|
||||
class Lift(Base):
|
||||
def __init__(self):
|
||||
self._z_threshold = 0.15
|
||||
super().__init__("lift")
|
||||
|
||||
@property
|
||||
def z_target(self):
|
||||
return self._init_z + self._z_threshold
|
||||
|
||||
def is_success(self):
|
||||
return self.obj[2] >= self.z_target
|
||||
|
||||
def get_reward(self):
|
||||
reach_dist = np.linalg.norm(self.obj - self.eef)
|
||||
reach_dist_xy = np.linalg.norm(self.obj[:-1] - self.eef[:-1])
|
||||
pick_completed = self.obj[2] >= (self.z_target - 0.01)
|
||||
obj_dropped = (self.obj[2] < (self._init_z + 0.005)) and (reach_dist > 0.02)
|
||||
|
||||
# Reach
|
||||
if reach_dist < 0.05:
|
||||
reach_reward = -reach_dist + max(self._action[-1], 0) / 50
|
||||
elif reach_dist_xy < 0.05:
|
||||
reach_reward = -reach_dist
|
||||
else:
|
||||
z_bonus = np.linalg.norm(np.linalg.norm(self.obj[-1] - self.eef[-1]))
|
||||
reach_reward = -reach_dist - 2 * z_bonus
|
||||
|
||||
# Pick
|
||||
if pick_completed and not obj_dropped:
|
||||
pick_reward = self.z_target
|
||||
elif (reach_dist < 0.1) and (self.obj[2] > (self._init_z + 0.005)):
|
||||
pick_reward = min(self.z_target, self.obj[2])
|
||||
else:
|
||||
pick_reward = 0
|
||||
|
||||
return reach_reward / 100 + pick_reward
|
||||
|
||||
def _get_obs(self):
|
||||
eef_velp = self._utils.get_site_xvelp(self.model, self.data, "grasp") * self.dt
|
||||
gripper_angle = self._utils.get_joint_qpos(self.model, self.data, "right_outer_knuckle_joint")
|
||||
eef = self.eef - self.center_of_table
|
||||
|
||||
obj = self.obj - self.center_of_table
|
||||
obj_rot = self._utils.get_joint_qpos(self.model, self.data, "object_joint0")[-4:]
|
||||
obj_velp = self._utils.get_site_xvelp(self.model, self.data, "object_site") * self.dt
|
||||
obj_velr = self._utils.get_site_xvelr(self.model, self.data, "object_site") * self.dt
|
||||
|
||||
obs = np.concatenate(
|
||||
[
|
||||
eef,
|
||||
eef_velp,
|
||||
obj,
|
||||
obj_rot,
|
||||
obj_velp,
|
||||
obj_velr,
|
||||
eef - obj,
|
||||
np.array(
|
||||
[
|
||||
np.linalg.norm(eef - obj),
|
||||
np.linalg.norm(eef[:-1] - obj[:-1]),
|
||||
self.z_target,
|
||||
self.z_target - obj[-1],
|
||||
self.z_target - eef[-1],
|
||||
]
|
||||
),
|
||||
gripper_angle,
|
||||
],
|
||||
axis=0,
|
||||
)
|
||||
return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": eef}
|
||||
|
||||
def _sample_goal(self):
|
||||
# Gripper
|
||||
gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
|
||||
super()._set_gripper(gripper_pos, self.gripper_rotation)
|
||||
|
||||
# Object
|
||||
object_pos = self.center_of_table - np.array([0.15, 0.10, 0.07])
|
||||
object_pos[0] += self.np_random.uniform(-0.05, 0.05, size=1)
|
||||
object_pos[1] += self.np_random.uniform(-0.05, 0.05, size=1)
|
||||
object_qpos = self._utils.get_joint_qpos(self.model, self.data, "object_joint0")
|
||||
object_qpos[:3] = object_pos
|
||||
self._utils.set_joint_qpos(self.model, self.data, "object_joint0", object_qpos)
|
||||
self._init_z = object_pos[2]
|
||||
|
||||
# Goal
|
||||
return object_pos + np.array([0, 0, self._z_threshold])
|
||||
|
||||
def reset(self):
|
||||
self._action = np.zeros(4)
|
||||
return super().reset()
|
||||
|
||||
def step(self, action):
|
||||
self._action = action.copy()
|
||||
return super().step(action)
|
|
@ -1,67 +0,0 @@
|
|||
# import mujoco_py
|
||||
import mujoco
|
||||
import numpy as np
|
||||
|
||||
|
||||
def apply_action(model, model_names, data, action):
|
||||
if model.nmocap > 0:
|
||||
pos_action, gripper_action = np.split(action, (model.nmocap * 7,))
|
||||
if data.ctrl is not None:
|
||||
for i in range(gripper_action.shape[0]):
|
||||
data.ctrl[i] = gripper_action[i]
|
||||
pos_action = pos_action.reshape(model.nmocap, 7)
|
||||
pos_delta, quat_delta = pos_action[:, :3], pos_action[:, 3:]
|
||||
reset_mocap2body_xpos(model, model_names, data)
|
||||
data.mocap_pos[:] = data.mocap_pos + pos_delta
|
||||
data.mocap_quat[:] = data.mocap_quat + quat_delta
|
||||
|
||||
|
||||
def reset(model, data):
|
||||
if model.nmocap > 0 and model.eq_data is not None:
|
||||
for i in range(model.eq_data.shape[0]):
|
||||
# if sim.model.eq_type[i] == mujoco_py.const.EQ_WELD:
|
||||
if model.eq_type[i] == mujoco.mjtEq.mjEQ_WELD:
|
||||
# model.eq_data[i, :] = np.array([0., 0., 0., 1., 0., 0., 0.])
|
||||
model.eq_data[i, :] = np.array(
|
||||
[
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
1.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
]
|
||||
)
|
||||
# sim.forward()
|
||||
mujoco.mj_forward(model, data)
|
||||
|
||||
|
||||
def reset_mocap2body_xpos(model, model_names, data):
|
||||
if model.eq_type is None or model.eq_obj1id is None or model.eq_obj2id is None:
|
||||
return
|
||||
|
||||
# For all weld constraints
|
||||
for eq_type, obj1_id, obj2_id in zip(model.eq_type, model.eq_obj1id, model.eq_obj2id, strict=False):
|
||||
# if eq_type != mujoco_py.const.EQ_WELD:
|
||||
if eq_type != mujoco.mjtEq.mjEQ_WELD:
|
||||
continue
|
||||
# body2 = model.body_id2name(obj2_id)
|
||||
body2 = model_names.body_id2name[obj2_id]
|
||||
if body2 == "B0" or body2 == "B9" or body2 == "B1":
|
||||
continue
|
||||
mocap_id = model.body_mocapid[obj1_id]
|
||||
if mocap_id != -1:
|
||||
# obj1 is the mocap, obj2 is the welded body
|
||||
body_idx = obj2_id
|
||||
else:
|
||||
# obj2 is the mocap, obj1 is the welded body
|
||||
mocap_id = model.body_mocapid[obj2_id]
|
||||
body_idx = obj1_id
|
||||
assert mocap_id != -1
|
||||
data.mocap_pos[mocap_id][:] = data.xpos[body_idx]
|
||||
data.mocap_quat[mocap_id][:] = data.xquat[body_idx]
|
|
@ -1,86 +0,0 @@
|
|||
import numpy as np
|
||||
|
||||
from lerobot.common.envs.simxarm.simxarm import Base
|
||||
|
||||
|
||||
class PegInBox(Base):
|
||||
def __init__(self):
|
||||
super().__init__("peg_in_box")
|
||||
|
||||
def _reset_sim(self):
|
||||
self._act_magnitude = 0
|
||||
super()._reset_sim()
|
||||
for _ in range(10):
|
||||
self._apply_action(np.array([0, 0, 0, 1], dtype=np.float32))
|
||||
self.sim.step()
|
||||
|
||||
@property
|
||||
def box(self):
|
||||
return self.sim.data.get_site_xpos("box_site")
|
||||
|
||||
def is_success(self):
|
||||
return np.linalg.norm(self.obj - self.box) <= 0.05
|
||||
|
||||
def get_reward(self):
|
||||
dist_xy = np.linalg.norm(self.obj[:2] - self.box[:2])
|
||||
dist_xyz = np.linalg.norm(self.obj - self.box)
|
||||
return float(dist_xy <= 0.045) * (2 - 6 * dist_xyz) - 0.2 * np.square(self._act_magnitude) - dist_xy
|
||||
|
||||
def _get_obs(self):
|
||||
eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
|
||||
gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
|
||||
eef, box = self.eef - self.center_of_table, self.box - self.center_of_table
|
||||
|
||||
obj = self.obj - self.center_of_table
|
||||
obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:]
|
||||
obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt
|
||||
obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt
|
||||
|
||||
obs = np.concatenate(
|
||||
[
|
||||
eef,
|
||||
eef_velp,
|
||||
box,
|
||||
obj,
|
||||
obj_rot,
|
||||
obj_velp,
|
||||
obj_velr,
|
||||
eef - box,
|
||||
eef - obj,
|
||||
obj - box,
|
||||
np.array(
|
||||
[
|
||||
np.linalg.norm(eef - box),
|
||||
np.linalg.norm(eef - obj),
|
||||
np.linalg.norm(obj - box),
|
||||
gripper_angle,
|
||||
]
|
||||
),
|
||||
],
|
||||
axis=0,
|
||||
)
|
||||
return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": box}
|
||||
|
||||
def _sample_goal(self):
|
||||
# Gripper
|
||||
gripper_pos = np.array([1.280, 0.295, 0.9]) + self.np_random.uniform(-0.05, 0.05, size=3)
|
||||
super()._set_gripper(gripper_pos, self.gripper_rotation)
|
||||
|
||||
# Object
|
||||
object_pos = gripper_pos - np.array([0, 0, 0.06]) + self.np_random.uniform(-0.005, 0.005, size=3)
|
||||
object_qpos = self.sim.data.get_joint_qpos("object_joint0")
|
||||
object_qpos[:3] = object_pos
|
||||
self.sim.data.set_joint_qpos("object_joint0", object_qpos)
|
||||
|
||||
# Box
|
||||
box_pos = np.array([1.61, 0.18, 0.58])
|
||||
box_pos[:2] += self.np_random.uniform(-0.11, 0.11, size=2)
|
||||
box_qpos = self.sim.data.get_joint_qpos("box_joint0")
|
||||
box_qpos[:3] = box_pos
|
||||
self.sim.data.set_joint_qpos("box_joint0", box_qpos)
|
||||
|
||||
return self.box
|
||||
|
||||
def step(self, action):
|
||||
self._act_magnitude = np.linalg.norm(action[:3])
|
||||
return super().step(action)
|
|
@ -1,78 +0,0 @@
|
|||
import numpy as np
|
||||
|
||||
from lerobot.common.envs.simxarm.simxarm import Base
|
||||
|
||||
|
||||
class Push(Base):
|
||||
def __init__(self):
|
||||
super().__init__("push")
|
||||
|
||||
def _reset_sim(self):
|
||||
self._act_magnitude = 0
|
||||
super()._reset_sim()
|
||||
|
||||
def is_success(self):
|
||||
return np.linalg.norm(self.obj - self.goal) <= 0.05
|
||||
|
||||
def get_reward(self):
|
||||
dist = np.linalg.norm(self.obj - self.goal)
|
||||
penalty = self._act_magnitude**2
|
||||
return -(dist + 0.15 * penalty)
|
||||
|
||||
def _get_obs(self):
|
||||
eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
|
||||
gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
|
||||
eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table
|
||||
|
||||
obj = self.obj - self.center_of_table
|
||||
obj_rot = self.sim.data.get_joint_qpos("object_joint0")[-4:]
|
||||
obj_velp = self.sim.data.get_site_xvelp("object_site") * self.dt
|
||||
obj_velr = self.sim.data.get_site_xvelr("object_site") * self.dt
|
||||
|
||||
obs = np.concatenate(
|
||||
[
|
||||
eef,
|
||||
eef_velp,
|
||||
goal,
|
||||
obj,
|
||||
obj_rot,
|
||||
obj_velp,
|
||||
obj_velr,
|
||||
eef - goal,
|
||||
eef - obj,
|
||||
obj - goal,
|
||||
np.array(
|
||||
[
|
||||
np.linalg.norm(eef - goal),
|
||||
np.linalg.norm(eef - obj),
|
||||
np.linalg.norm(obj - goal),
|
||||
gripper_angle,
|
||||
]
|
||||
),
|
||||
],
|
||||
axis=0,
|
||||
)
|
||||
return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal}
|
||||
|
||||
def _sample_goal(self):
|
||||
# Gripper
|
||||
gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
|
||||
super()._set_gripper(gripper_pos, self.gripper_rotation)
|
||||
|
||||
# Object
|
||||
object_pos = self.center_of_table - np.array([0.25, 0, 0.07])
|
||||
object_pos[0] += self.np_random.uniform(-0.08, 0.08, size=1)
|
||||
object_pos[1] += self.np_random.uniform(-0.08, 0.08, size=1)
|
||||
object_qpos = self.sim.data.get_joint_qpos("object_joint0")
|
||||
object_qpos[:3] = object_pos
|
||||
self.sim.data.set_joint_qpos("object_joint0", object_qpos)
|
||||
|
||||
# Goal
|
||||
self.goal = np.array([1.600, 0.200, 0.545])
|
||||
self.goal[:2] += self.np_random.uniform(-0.1, 0.1, size=2)
|
||||
self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal
|
||||
return self.goal
|
||||
|
||||
def step(self, action):
|
||||
self._act_magnitude = np.linalg.norm(action[:3])
|
||||
return super().step(action)
|
|
@ -1,44 +0,0 @@
|
|||
import numpy as np
|
||||
|
||||
from lerobot.common.envs.simxarm.simxarm import Base
|
||||
|
||||
|
||||
class Reach(Base):
|
||||
def __init__(self):
|
||||
super().__init__("reach")
|
||||
|
||||
def _reset_sim(self):
|
||||
self._act_magnitude = 0
|
||||
super()._reset_sim()
|
||||
|
||||
def is_success(self):
|
||||
return np.linalg.norm(self.eef - self.goal) <= 0.05
|
||||
|
||||
def get_reward(self):
|
||||
dist = np.linalg.norm(self.eef - self.goal)
|
||||
penalty = self._act_magnitude**2
|
||||
return -(dist + 0.15 * penalty)
|
||||
|
||||
def _get_obs(self):
|
||||
eef_velp = self.sim.data.get_site_xvelp("grasp") * self.dt
|
||||
gripper_angle = self.sim.data.get_joint_qpos("right_outer_knuckle_joint")
|
||||
eef, goal = self.eef - self.center_of_table, self.goal - self.center_of_table
|
||||
obs = np.concatenate(
|
||||
[eef, eef_velp, goal, eef - goal, np.array([np.linalg.norm(eef - goal), gripper_angle])], axis=0
|
||||
)
|
||||
return {"observation": obs, "state": eef, "achieved_goal": eef, "desired_goal": goal}
|
||||
|
||||
def _sample_goal(self):
|
||||
# Gripper
|
||||
gripper_pos = np.array([1.280, 0.295, 0.735]) + self.np_random.uniform(-0.05, 0.05, size=3)
|
||||
super()._set_gripper(gripper_pos, self.gripper_rotation)
|
||||
|
||||
# Goal
|
||||
self.goal = np.array([1.550, 0.287, 0.580])
|
||||
self.goal[:2] += self.np_random.uniform(-0.125, 0.125, size=2)
|
||||
self.sim.model.site_pos[self.sim.model.site_name2id("target0")] = self.goal
|
||||
return self.goal
|
||||
|
||||
def step(self, action):
|
||||
self._act_magnitude = np.linalg.norm(action[:3])
|
||||
return super().step(action)
|
|
@ -0,0 +1,32 @@
|
|||
import einops
|
||||
import torch
|
||||
|
||||
from lerobot.common.transforms import apply_inverse_transform
|
||||
|
||||
|
||||
def preprocess_observation(observation, transform=None):
|
||||
# map to expected inputs for the policy
|
||||
obs = {
|
||||
"observation.image": torch.from_numpy(observation["pixels"]).float(),
|
||||
"observation.state": torch.from_numpy(observation["agent_pos"]).float(),
|
||||
}
|
||||
# convert to (b c h w) torch format
|
||||
obs["observation.image"] = einops.rearrange(obs["observation.image"], "b h w c -> b c h w")
|
||||
|
||||
# apply same transforms as in training
|
||||
if transform is not None:
|
||||
for key in obs:
|
||||
obs[key] = torch.stack([transform({key: item})[key] for item in obs[key]])
|
||||
|
||||
return obs
|
||||
|
||||
|
||||
def postprocess_action(action, transform=None):
|
||||
action = action.to("cpu")
|
||||
# action is a batch (num_env,action_dim) instead of an item (action_dim),
|
||||
# we assume applying inverse transform on a batch works the same
|
||||
action = apply_inverse_transform({"action": action}, transform)["action"].numpy()
|
||||
assert (
|
||||
action.ndim == 2
|
||||
), "we assume dimensions are respectively the number of parallel envs, action dimensions"
|
||||
return action
|
|
@ -168,21 +168,15 @@ class ActionChunkingTransformerPolicy(nn.Module):
|
|||
nn.init.xavier_uniform_(p)
|
||||
|
||||
@torch.no_grad()
|
||||
def select_actions(self, observation, step_count):
|
||||
# TODO(rcadene): remove unused step_count
|
||||
del step_count
|
||||
|
||||
def select_actions(self, batch, *_):
|
||||
# TODO(now): Implement queueing mechanism.
|
||||
self.eval()
|
||||
self._preprocess_batch(batch)
|
||||
|
||||
# TODO(rcadene): remove hack
|
||||
# add 1 camera dimension
|
||||
observation["image", "top"] = observation["image", "top"].unsqueeze(1)
|
||||
|
||||
obs_dict = {
|
||||
"image": observation["image", "top"],
|
||||
"agent_pos": observation["state"],
|
||||
}
|
||||
action = self._forward(qpos=obs_dict["agent_pos"] * 0.182, image=obs_dict["image"])
|
||||
# TODO(now): What's up with this 0.182?
|
||||
action = self.forward(
|
||||
robot_state=batch["observation.state"] * 0.182, image=batch["observation.images.top"]
|
||||
)
|
||||
|
||||
if self.cfg.temporal_agg:
|
||||
# TODO(rcadene): implement temporal aggregation
|
||||
|
@ -197,9 +191,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
|
|||
# exp_weights = torch.from_numpy(exp_weights).cuda().unsqueeze(dim=1)
|
||||
# raw_action = (actions_for_curr_step * exp_weights).sum(dim=0, keepdim=True)
|
||||
|
||||
# take first predicted action or n first actions
|
||||
action = action[: self.n_action_steps]
|
||||
return action
|
||||
return action[: self.n_action_steps]
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
# TODO(now): Temporary bridge.
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# ruff: noqa: N806
|
||||
|
||||
import time
|
||||
from collections import deque
|
||||
from copy import deepcopy
|
||||
|
||||
import einops
|
||||
|
@ -9,7 +10,7 @@ import torch
|
|||
import torch.nn as nn
|
||||
|
||||
import lerobot.common.policies.tdmpc.helper as h
|
||||
from lerobot.common.policies.abstract import AbstractPolicy
|
||||
from lerobot.common.policies.utils import populate_queues
|
||||
from lerobot.common.utils import get_safe_torch_device
|
||||
|
||||
FIRST_FRAME = 0
|
||||
|
@ -87,16 +88,18 @@ class TOLD(nn.Module):
|
|||
return torch.min(Q1, Q2) if return_type == "min" else (Q1 + Q2) / 2
|
||||
|
||||
|
||||
class TDMPCPolicy(AbstractPolicy):
|
||||
class TDMPCPolicy(nn.Module):
|
||||
"""Implementation of TD-MPC learning + inference."""
|
||||
|
||||
name = "tdmpc"
|
||||
|
||||
def __init__(self, cfg, device):
|
||||
super().__init__(None)
|
||||
def __init__(self, cfg, n_obs_steps, n_action_steps, device):
|
||||
super().__init__()
|
||||
self.action_dim = cfg.action_dim
|
||||
|
||||
self.cfg = cfg
|
||||
self.n_obs_steps = n_obs_steps
|
||||
self.n_action_steps = n_action_steps
|
||||
self.device = get_safe_torch_device(device)
|
||||
self.std = h.linear_schedule(cfg.std_schedule, 0)
|
||||
self.model = TOLD(cfg)
|
||||
|
@ -128,20 +131,42 @@ class TDMPCPolicy(AbstractPolicy):
|
|||
self.model.load_state_dict(d["model"])
|
||||
self.model_target.load_state_dict(d["model_target"])
|
||||
|
||||
@torch.no_grad()
|
||||
def select_actions(self, observation, step_count):
|
||||
if observation["image"].shape[0] != 1:
|
||||
raise NotImplementedError("Batch size > 1 not handled")
|
||||
|
||||
t0 = step_count.item() == 0
|
||||
|
||||
obs = {
|
||||
# TODO(rcadene): remove contiguous hack...
|
||||
"rgb": observation["image"].contiguous(),
|
||||
"state": observation["state"].contiguous(),
|
||||
def reset(self):
|
||||
"""
|
||||
Clear observation and action queues. Should be called on `env.reset()`
|
||||
"""
|
||||
self._queues = {
|
||||
"observation.image": deque(maxlen=self.n_obs_steps),
|
||||
"observation.state": deque(maxlen=self.n_obs_steps),
|
||||
"action": deque(maxlen=self.n_action_steps),
|
||||
}
|
||||
# Note: unsqueeze needed because `act` still uses non-batch logic.
|
||||
action = self.act(obs, t0=t0, step=self.step.item()).unsqueeze(0)
|
||||
|
||||
@torch.no_grad()
|
||||
def select_action(self, batch, step):
|
||||
assert "observation.image" in batch
|
||||
assert "observation.state" in batch
|
||||
assert len(batch) == 2
|
||||
|
||||
self._queues = populate_queues(self._queues, batch)
|
||||
|
||||
t0 = step == 0
|
||||
|
||||
if len(self._queues["action"]) == 0:
|
||||
batch = {key: torch.stack(list(self._queues[key]), dim=1) for key in batch}
|
||||
|
||||
actions = []
|
||||
batch_size = batch["observation.image."].shape[0]
|
||||
for i in range(batch_size):
|
||||
obs = {
|
||||
"rgb": batch["observation.image"][[i]],
|
||||
"state": batch["observation.state"][[i]],
|
||||
}
|
||||
# Note: unsqueeze needed because `act` still uses non-batch logic.
|
||||
action = self.act(obs, t0=t0, step=self.step)
|
||||
actions.append(action)
|
||||
action = torch.stack(actions)
|
||||
|
||||
action = self._queues["action"].popleft()
|
||||
return action
|
||||
|
||||
@torch.no_grad()
|
||||
|
@ -293,97 +318,97 @@ class TDMPCPolicy(AbstractPolicy):
|
|||
td_target = reward + self.cfg.discount * mask * next_v
|
||||
return td_target
|
||||
|
||||
def update(self, replay_buffer, step, demo_buffer=None):
|
||||
def forward(self, batch, step):
|
||||
"""Main update function. Corresponds to one iteration of the model learning."""
|
||||
start_time = time.time()
|
||||
|
||||
num_slices = self.cfg.batch_size
|
||||
batch_size = self.cfg.horizon * num_slices
|
||||
# num_slices = self.cfg.batch_size
|
||||
# batch_size = self.cfg.horizon * num_slices
|
||||
|
||||
if demo_buffer is None:
|
||||
demo_batch_size = 0
|
||||
else:
|
||||
# Update oversampling ratio
|
||||
demo_pc_batch = h.linear_schedule(self.cfg.demo_schedule, step)
|
||||
demo_num_slices = int(demo_pc_batch * self.batch_size)
|
||||
demo_batch_size = self.cfg.horizon * demo_num_slices
|
||||
batch_size -= demo_batch_size
|
||||
num_slices -= demo_num_slices
|
||||
replay_buffer._sampler.num_slices = num_slices
|
||||
demo_buffer._sampler.num_slices = demo_num_slices
|
||||
# if demo_buffer is None:
|
||||
# demo_batch_size = 0
|
||||
# else:
|
||||
# # Update oversampling ratio
|
||||
# demo_pc_batch = h.linear_schedule(self.cfg.demo_schedule, step)
|
||||
# demo_num_slices = int(demo_pc_batch * self.batch_size)
|
||||
# demo_batch_size = self.cfg.horizon * demo_num_slices
|
||||
# batch_size -= demo_batch_size
|
||||
# num_slices -= demo_num_slices
|
||||
# replay_buffer._sampler.num_slices = num_slices
|
||||
# demo_buffer._sampler.num_slices = demo_num_slices
|
||||
|
||||
assert demo_batch_size % self.cfg.horizon == 0
|
||||
assert demo_batch_size % demo_num_slices == 0
|
||||
# assert demo_batch_size % self.cfg.horizon == 0
|
||||
# assert demo_batch_size % demo_num_slices == 0
|
||||
|
||||
assert batch_size % self.cfg.horizon == 0
|
||||
assert batch_size % num_slices == 0
|
||||
# assert batch_size % self.cfg.horizon == 0
|
||||
# assert batch_size % num_slices == 0
|
||||
|
||||
# Sample from interaction dataset
|
||||
# # Sample from interaction dataset
|
||||
|
||||
def process_batch(batch, horizon, num_slices):
|
||||
# trajectory t = 256, horizon h = 5
|
||||
# (t h) ... -> h t ...
|
||||
batch = batch.reshape(num_slices, horizon).transpose(1, 0).contiguous()
|
||||
# def process_batch(batch, horizon, num_slices):
|
||||
# # trajectory t = 256, horizon h = 5
|
||||
# # (t h) ... -> h t ...
|
||||
# batch = batch.reshape(num_slices, horizon).transpose(1, 0).contiguous()
|
||||
|
||||
obs = {
|
||||
"rgb": batch["observation", "image"][FIRST_FRAME].to(self.device, non_blocking=True),
|
||||
"state": batch["observation", "state"][FIRST_FRAME].to(self.device, non_blocking=True),
|
||||
}
|
||||
action = batch["action"].to(self.device, non_blocking=True)
|
||||
next_obses = {
|
||||
"rgb": batch["next", "observation", "image"].to(self.device, non_blocking=True),
|
||||
"state": batch["next", "observation", "state"].to(self.device, non_blocking=True),
|
||||
}
|
||||
reward = batch["next", "reward"].to(self.device, non_blocking=True)
|
||||
# obs = {
|
||||
# "rgb": batch["observation", "image"][FIRST_FRAME].to(self.device, non_blocking=True),
|
||||
# "state": batch["observation", "state"][FIRST_FRAME].to(self.device, non_blocking=True),
|
||||
# }
|
||||
# action = batch["action"].to(self.device, non_blocking=True)
|
||||
# next_obses = {
|
||||
# "rgb": batch["next", "observation", "image"].to(self.device, non_blocking=True),
|
||||
# "state": batch["next", "observation", "state"].to(self.device, non_blocking=True),
|
||||
# }
|
||||
# reward = batch["next", "reward"].to(self.device, non_blocking=True)
|
||||
|
||||
idxs = batch["index"][FIRST_FRAME].to(self.device, non_blocking=True)
|
||||
weights = batch["_weight"][FIRST_FRAME, :, None].to(self.device, non_blocking=True)
|
||||
# idxs = batch["index"][FIRST_FRAME].to(self.device, non_blocking=True)
|
||||
# weights = batch["_weight"][FIRST_FRAME, :, None].to(self.device, non_blocking=True)
|
||||
|
||||
# TODO(rcadene): rearrange directly in offline dataset
|
||||
if reward.ndim == 2:
|
||||
reward = einops.rearrange(reward, "h t -> h t 1")
|
||||
# # TODO(rcadene): rearrange directly in offline dataset
|
||||
# if reward.ndim == 2:
|
||||
# reward = einops.rearrange(reward, "h t -> h t 1")
|
||||
|
||||
assert reward.ndim == 3
|
||||
assert reward.shape == (horizon, num_slices, 1)
|
||||
# We dont use `batch["next", "done"]` since it only indicates the end of an
|
||||
# episode, but not the end of the trajectory of an episode.
|
||||
# Neither does `batch["next", "terminated"]`
|
||||
done = torch.zeros_like(reward, dtype=torch.bool, device=reward.device)
|
||||
mask = torch.ones_like(reward, dtype=torch.bool, device=reward.device)
|
||||
return obs, action, next_obses, reward, mask, done, idxs, weights
|
||||
# assert reward.ndim == 3
|
||||
# assert reward.shape == (horizon, num_slices, 1)
|
||||
# # We dont use `batch["next", "done"]` since it only indicates the end of an
|
||||
# # episode, but not the end of the trajectory of an episode.
|
||||
# # Neither does `batch["next", "terminated"]`
|
||||
# done = torch.zeros_like(reward, dtype=torch.bool, device=reward.device)
|
||||
# mask = torch.ones_like(reward, dtype=torch.bool, device=reward.device)
|
||||
# return obs, action, next_obses, reward, mask, done, idxs, weights
|
||||
|
||||
batch = replay_buffer.sample(batch_size) if self.cfg.balanced_sampling else replay_buffer.sample()
|
||||
# batch = replay_buffer.sample(batch_size) if self.cfg.balanced_sampling else replay_buffer.sample()
|
||||
|
||||
obs, action, next_obses, reward, mask, done, idxs, weights = process_batch(
|
||||
batch, self.cfg.horizon, num_slices
|
||||
)
|
||||
# obs, action, next_obses, reward, mask, done, idxs, weights = process_batch(
|
||||
# batch, self.cfg.horizon, num_slices
|
||||
# )
|
||||
|
||||
# Sample from demonstration dataset
|
||||
if demo_batch_size > 0:
|
||||
demo_batch = demo_buffer.sample(demo_batch_size)
|
||||
(
|
||||
demo_obs,
|
||||
demo_action,
|
||||
demo_next_obses,
|
||||
demo_reward,
|
||||
demo_mask,
|
||||
demo_done,
|
||||
demo_idxs,
|
||||
demo_weights,
|
||||
) = process_batch(demo_batch, self.cfg.horizon, demo_num_slices)
|
||||
# if demo_batch_size > 0:
|
||||
# demo_batch = demo_buffer.sample(demo_batch_size)
|
||||
# (
|
||||
# demo_obs,
|
||||
# demo_action,
|
||||
# demo_next_obses,
|
||||
# demo_reward,
|
||||
# demo_mask,
|
||||
# demo_done,
|
||||
# demo_idxs,
|
||||
# demo_weights,
|
||||
# ) = process_batch(demo_batch, self.cfg.horizon, demo_num_slices)
|
||||
|
||||
if isinstance(obs, dict):
|
||||
obs = {k: torch.cat([obs[k], demo_obs[k]]) for k in obs}
|
||||
next_obses = {k: torch.cat([next_obses[k], demo_next_obses[k]], dim=1) for k in next_obses}
|
||||
else:
|
||||
obs = torch.cat([obs, demo_obs])
|
||||
next_obses = torch.cat([next_obses, demo_next_obses], dim=1)
|
||||
action = torch.cat([action, demo_action], dim=1)
|
||||
reward = torch.cat([reward, demo_reward], dim=1)
|
||||
mask = torch.cat([mask, demo_mask], dim=1)
|
||||
done = torch.cat([done, demo_done], dim=1)
|
||||
idxs = torch.cat([idxs, demo_idxs])
|
||||
weights = torch.cat([weights, demo_weights])
|
||||
# if isinstance(obs, dict):
|
||||
# obs = {k: torch.cat([obs[k], demo_obs[k]]) for k in obs}
|
||||
# next_obses = {k: torch.cat([next_obses[k], demo_next_obses[k]], dim=1) for k in next_obses}
|
||||
# else:
|
||||
# obs = torch.cat([obs, demo_obs])
|
||||
# next_obses = torch.cat([next_obses, demo_next_obses], dim=1)
|
||||
# action = torch.cat([action, demo_action], dim=1)
|
||||
# reward = torch.cat([reward, demo_reward], dim=1)
|
||||
# mask = torch.cat([mask, demo_mask], dim=1)
|
||||
# done = torch.cat([done, demo_done], dim=1)
|
||||
# idxs = torch.cat([idxs, demo_idxs])
|
||||
# weights = torch.cat([weights, demo_weights])
|
||||
|
||||
# Apply augmentations
|
||||
aug_tf = h.aug(self.cfg)
|
||||
|
|
|
@ -17,7 +17,7 @@ env:
|
|||
from_pixels: True
|
||||
pixels_only: False
|
||||
image_size: 84
|
||||
action_repeat: 2
|
||||
# action_repeat: 2 # we can remove if policy has n_action_steps=2
|
||||
episode_length: 25
|
||||
fps: ${fps}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# @package _global_
|
||||
|
||||
n_action_steps: 1
|
||||
n_action_steps: 2
|
||||
n_obs_steps: 1
|
||||
|
||||
policy:
|
||||
|
|
|
@ -44,9 +44,9 @@ from huggingface_hub import snapshot_download
|
|||
|
||||
from lerobot.common.datasets.factory import make_dataset
|
||||
from lerobot.common.envs.factory import make_env
|
||||
from lerobot.common.envs.utils import postprocess_action, preprocess_observation
|
||||
from lerobot.common.logger import log_output_dir
|
||||
from lerobot.common.policies.factory import make_policy
|
||||
from lerobot.common.transforms import apply_inverse_transform
|
||||
from lerobot.common.utils import get_safe_torch_device, init_hydra_config, init_logging, set_global_seed
|
||||
|
||||
|
||||
|
@ -54,34 +54,6 @@ def write_video(video_path, stacked_frames, fps):
|
|||
imageio.mimsave(video_path, stacked_frames, fps=fps)
|
||||
|
||||
|
||||
def preprocess_observation(observation, transform=None):
|
||||
# map to expected inputs for the policy
|
||||
obs = {
|
||||
"observation.image": torch.from_numpy(observation["pixels"]).float(),
|
||||
"observation.state": torch.from_numpy(observation["agent_pos"]).float(),
|
||||
}
|
||||
# convert to (b c h w) torch format
|
||||
obs["observation.image"] = einops.rearrange(obs["observation.image"], "b h w c -> b c h w")
|
||||
|
||||
# apply same transforms as in training
|
||||
if transform is not None:
|
||||
for key in obs:
|
||||
obs[key] = torch.stack([transform({key: item})[key] for item in obs[key]])
|
||||
|
||||
return obs
|
||||
|
||||
|
||||
def postprocess_action(action, transform=None):
|
||||
action = action.to("cpu")
|
||||
# action is a batch (num_env,action_dim) instead of an item (action_dim),
|
||||
# we assume applying inverse transform on a batch works the same
|
||||
action = apply_inverse_transform({"action": action}, transform)["action"].numpy()
|
||||
assert (
|
||||
action.ndim == 2
|
||||
), "we assume dimensions are respectively the number of parallel envs, action dimensions"
|
||||
return action
|
||||
|
||||
|
||||
def eval_policy(
|
||||
env: gym.vector.VectorEnv,
|
||||
policy,
|
||||
|
@ -114,10 +86,10 @@ def eval_policy(
|
|||
def maybe_render_frame(env):
|
||||
if save_video: # noqa: B023
|
||||
if return_first_video:
|
||||
visu = env.envs[0].render()
|
||||
visu = env.envs[0].render(mode="visualization")
|
||||
visu = visu[None, ...] # add batch dim
|
||||
else:
|
||||
visu = np.stack([env.render() for env in env.envs])
|
||||
visu = np.stack([env.render(mode="visualization") for env in env.envs])
|
||||
ep_frames.append(visu) # noqa: B023
|
||||
|
||||
for _ in range(num_episodes):
|
||||
|
|
|
@ -903,6 +903,26 @@ url = "git@github.com:huggingface/gym-pusht.git"
|
|||
reference = "HEAD"
|
||||
resolved_reference = "0fe4449cca5a2b08f529f7a07fbf5b9df24962ec"
|
||||
|
||||
[[package]]
|
||||
name = "gym-xarm"
|
||||
version = "0.1.0"
|
||||
description = "A gym environment for xArm"
|
||||
optional = true
|
||||
python-versions = "^3.10"
|
||||
files = []
|
||||
develop = false
|
||||
|
||||
[package.dependencies]
|
||||
gymnasium = "^0.29.1"
|
||||
gymnasium-robotics = "^1.2.4"
|
||||
mujoco = "^2.3.7"
|
||||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "git@github.com:huggingface/gym-xarm.git"
|
||||
reference = "HEAD"
|
||||
resolved_reference = "2eb83fc4fc871b9d271c946d169e42f226ac3a7c"
|
||||
|
||||
[[package]]
|
||||
name = "gymnasium"
|
||||
version = "0.29.1"
|
||||
|
@ -3695,8 +3715,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
|||
|
||||
[extras]
|
||||
pusht = ["gym_pusht"]
|
||||
xarm = ["gym_xarm"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "3eee17e4bf2b7a570f41ef9c400ec5a24a3113f62a13162229cf43504ca0d005"
|
||||
content-hash = "c9524cdf000eaa755a2ab3be669118222b4f8b1c262013f103f6874cbd54eeb6"
|
||||
|
|
|
@ -53,9 +53,13 @@ gymnasium-robotics = "^1.2.4"
|
|||
gymnasium = "^0.29.1"
|
||||
cmake = "^3.29.0.1"
|
||||
gym_pusht = { git = "git@github.com:huggingface/gym-pusht.git", optional = true}
|
||||
gym_xarm = { git = "git@github.com:huggingface/gym-xarm.git", optional = true}
|
||||
# gym_pusht = { path = "../gym-pusht", develop = true, optional = true}
|
||||
# gym_xarm = { path = "../gym-xarm", develop = true, optional = true}
|
||||
|
||||
[tool.poetry.extras]
|
||||
pusht = ["gym_pusht"]
|
||||
xarm = ["gym_xarm"]
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pre-commit = "^3.6.2"
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#SBATCH --time=2-00:00:00
|
||||
#SBATCH --output=/home/rcadene/slurm/%j.out
|
||||
#SBATCH --error=/home/rcadene/slurm/%j.err
|
||||
#SBATCH --qos=medium
|
||||
#SBATCH --qos=low
|
||||
#SBATCH --mail-user=re.cadene@gmail.com
|
||||
#SBATCH --mail-type=ALL
|
||||
|
||||
|
@ -20,4 +20,6 @@ source ~/.bashrc
|
|||
#conda activate fowm
|
||||
conda activate lerobot
|
||||
|
||||
export DATA_DIR="data"
|
||||
|
||||
srun $CMD
|
||||
|
|
|
@ -1,45 +1,47 @@
|
|||
import pytest
|
||||
from tensordict import TensorDict
|
||||
import torch
|
||||
from torchrl.envs.utils import check_env_specs, step_mdp
|
||||
from lerobot.common.datasets.factory import make_dataset
|
||||
import gymnasium as gym
|
||||
from gymnasium.utils.env_checker import check_env
|
||||
|
||||
from lerobot.common.envs.aloha.env import AlohaEnv
|
||||
from lerobot.common.envs.factory import make_env
|
||||
from lerobot.common.envs.pusht.env import PushtEnv
|
||||
from lerobot.common.envs.simxarm.env import SimxarmEnv
|
||||
from lerobot.common.utils import init_hydra_config
|
||||
|
||||
from lerobot.common.envs.utils import preprocess_observation
|
||||
|
||||
# import dmc_aloha # noqa: F401
|
||||
|
||||
from .utils import DEVICE, DEFAULT_CONFIG_PATH
|
||||
|
||||
|
||||
def print_spec_rollout(env):
|
||||
print("observation_spec:", env.observation_spec)
|
||||
print("action_spec:", env.action_spec)
|
||||
print("reward_spec:", env.reward_spec)
|
||||
print("done_spec:", env.done_spec)
|
||||
# def print_spec_rollout(env):
|
||||
# print("observation_spec:", env.observation_spec)
|
||||
# print("action_spec:", env.action_spec)
|
||||
# print("reward_spec:", env.reward_spec)
|
||||
# print("done_spec:", env.done_spec)
|
||||
|
||||
td = env.reset()
|
||||
print("reset tensordict", td)
|
||||
# td = env.reset()
|
||||
# print("reset tensordict", td)
|
||||
|
||||
td = env.rand_step(td)
|
||||
print("random step tensordict", td)
|
||||
# td = env.rand_step(td)
|
||||
# print("random step tensordict", td)
|
||||
|
||||
def simple_rollout(steps=100):
|
||||
# preallocate:
|
||||
data = TensorDict({}, [steps])
|
||||
# reset
|
||||
_data = env.reset()
|
||||
for i in range(steps):
|
||||
_data["action"] = env.action_spec.rand()
|
||||
_data = env.step(_data)
|
||||
data[i] = _data
|
||||
_data = step_mdp(_data, keep_other=True)
|
||||
return data
|
||||
# def simple_rollout(steps=100):
|
||||
# # preallocate:
|
||||
# data = TensorDict({}, [steps])
|
||||
# # reset
|
||||
# _data = env.reset()
|
||||
# for i in range(steps):
|
||||
# _data["action"] = env.action_spec.rand()
|
||||
# _data = env.step(_data)
|
||||
# data[i] = _data
|
||||
# _data = step_mdp(_data, keep_other=True)
|
||||
# return data
|
||||
|
||||
print("data from rollout:", simple_rollout(100))
|
||||
# print("data from rollout:", simple_rollout(100))
|
||||
|
||||
|
||||
@pytest.mark.skip("TODO")
|
||||
@pytest.mark.parametrize(
|
||||
"task,from_pixels,pixels_only",
|
||||
[
|
||||
|
@ -61,53 +63,41 @@ def test_aloha(task, from_pixels, pixels_only):
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"task,from_pixels,pixels_only",
|
||||
"env_task, obs_type",
|
||||
[
|
||||
("lift", False, False),
|
||||
("lift", True, False),
|
||||
("lift", True, True),
|
||||
# TODO(aliberts): Add simxarm other tasks
|
||||
# ("reach", False, False),
|
||||
# ("reach", True, False),
|
||||
# ("push", False, False),
|
||||
# ("push", True, False),
|
||||
# ("peg_in_box", False, False),
|
||||
# ("peg_in_box", True, False),
|
||||
("XarmLift-v0", "state"),
|
||||
("XarmLift-v0", "pixels"),
|
||||
("XarmLift-v0", "pixels_agent_pos"),
|
||||
# TODO(aliberts): Add gym_xarm other tasks
|
||||
],
|
||||
)
|
||||
def test_simxarm(task, from_pixels, pixels_only):
|
||||
env = SimxarmEnv(
|
||||
task,
|
||||
from_pixels=from_pixels,
|
||||
pixels_only=pixels_only,
|
||||
image_size=84 if from_pixels else None,
|
||||
)
|
||||
# print_spec_rollout(env)
|
||||
check_env_specs(env)
|
||||
def test_xarm(env_task, obs_type):
|
||||
import gym_xarm # noqa: F401
|
||||
env = gym.make(f"gym_xarm/{env_task}", obs_type=obs_type)
|
||||
check_env(env)
|
||||
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"from_pixels,pixels_only",
|
||||
"env_task, obs_type",
|
||||
[
|
||||
(True, False),
|
||||
("PushTPixels-v0", "state"),
|
||||
("PushTPixels-v0", "pixels"),
|
||||
("PushTPixels-v0", "pixels_agent_pos"),
|
||||
],
|
||||
)
|
||||
def test_pusht(from_pixels, pixels_only):
|
||||
env = PushtEnv(
|
||||
from_pixels=from_pixels,
|
||||
pixels_only=pixels_only,
|
||||
image_size=96 if from_pixels else None,
|
||||
)
|
||||
# print_spec_rollout(env)
|
||||
check_env_specs(env)
|
||||
def test_pusht(env_task, obs_type):
|
||||
import gym_pusht # noqa: F401
|
||||
env = gym.make(f"gym_pusht/{env_task}", obs_type=obs_type)
|
||||
check_env(env)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"env_name",
|
||||
[
|
||||
"simxarm",
|
||||
"pusht",
|
||||
"aloha",
|
||||
"simxarm",
|
||||
# "aloha",
|
||||
],
|
||||
)
|
||||
def test_factory(env_name):
|
||||
|
@ -119,15 +109,12 @@ def test_factory(env_name):
|
|||
dataset = make_dataset(cfg)
|
||||
|
||||
env = make_env(cfg)
|
||||
obs, info = env.reset()
|
||||
obs = {key: obs[key][None, ...] for key in obs}
|
||||
obs = preprocess_observation(obs, transform=dataset.transform)
|
||||
for key in dataset.image_keys:
|
||||
assert env.reset().get(key).dtype == torch.uint8
|
||||
check_env_specs(env)
|
||||
|
||||
env = make_env(cfg, transform=dataset.transform)
|
||||
for key in dataset.image_keys:
|
||||
img = env.reset().get(key)
|
||||
img = obs[key]
|
||||
assert img.dtype == torch.float32
|
||||
# TODO(rcadene): we assume for now that image normalization takes place in the model
|
||||
assert img.max() <= 1.0
|
||||
assert img.min() >= 0.0
|
||||
check_env_specs(env)
|
||||
|
|
Loading…
Reference in New Issue