lerobot/envs/sim_xarm/xarm/__init__.py

from collections import OrderedDict, deque

import gymnasium as gym
import numpy as np
from gymnasium.wrappers import TimeLimit

from xarm.tasks.base import Base as Base
from xarm.tasks.lift import Lift
from xarm.tasks.peg_in_box import PegInBox
from xarm.tasks.push import Push
from xarm.tasks.reach import Reach

TASKS = OrderedDict(
    (
        (
            "reach",
            {
                "env": Reach,
                "action_space": "xyz",
                "episode_length": 50,
                "description": "Reach a target location with the end effector",
            },
        ),
        (
            "push",
            {
                "env": Push,
                "action_space": "xyz",
                "episode_length": 50,
                "description": "Push a cube to a target location",
            },
        ),
        (
            "peg_in_box",
            {
                "env": PegInBox,
                "action_space": "xyz",
                "episode_length": 50,
                "description": "Insert a peg into a box",
            },
        ),
        (
            "lift",
            {
                "env": Lift,
                "action_space": "xyzw",
                "episode_length": 50,
                "description": "Lift a cube above a height threshold",
            },
        ),
    )
)


class SimXarmWrapper(gym.Wrapper):
    """
    A wrapper for the SimXarm environments. This wrapper is used to
    convert the action and observation spaces to the correct format.
    """

    def __init__(self, env, task, obs_mode, image_size, action_repeat, frame_stack=1, channel_last=False):
        super().__init__(env)
        self._env = env
        self.obs_mode = obs_mode
        self.image_size = image_size
        self.action_repeat = action_repeat
        self.frame_stack = frame_stack
        self._frames = deque([], maxlen=frame_stack)
        self.channel_last = channel_last
        self._max_episode_steps = task["episode_length"] // action_repeat

        image_shape = (
            (image_size, image_size, 3 * frame_stack)
            if channel_last
            else (3 * frame_stack, image_size, image_size)
        )
        if obs_mode == "state":
            self.observation_space = env.observation_space["observation"]
        elif obs_mode == "rgb":
            self.observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8)
        elif obs_mode == "all":
            self.observation_space = gym.spaces.Dict(
                state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32),
                rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),
            )
        else:
            raise ValueError(f"Unknown obs_mode {obs_mode}. Must be one of [rgb, all, state]")
        self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(task["action_space"]),))
        self.action_padding = np.zeros(4 - len(task["action_space"]), dtype=np.float32)
        if "w" not in task["action_space"]:
            self.action_padding[-1] = 1.0

    def _render_obs(self):
        obs = self.render(mode="rgb_array", width=self.image_size, height=self.image_size)
        if not self.channel_last:
            obs = obs.transpose(2, 0, 1)
        return obs.copy()

    def _update_frames(self, reset=False):
        pixels = self._render_obs()
        self._frames.append(pixels)
        if reset:
            for _ in range(1, self.frame_stack):
                self._frames.append(pixels)
        assert len(self._frames) == self.frame_stack

    def transform_obs(self, obs, reset=False):
        if self.obs_mode == "state":
            return obs["observation"]
        elif self.obs_mode == "rgb":
            self._update_frames(reset=reset)
            rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
            return rgb_obs
        elif self.obs_mode == "all":
            self._update_frames(reset=reset)
            rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)
            return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))
        else:
            raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]")

    def reset(self):
        return self.transform_obs(self._env.reset(), reset=True)

    def step(self, action):
        action = np.concatenate([action, self.action_padding])
        reward = 0.0
        for _ in range(self.action_repeat):
            obs, r, done, info = self._env.step(action)
            reward += r
        return self.transform_obs(obs), reward, done, info

    def render(self, mode="rgb_array", width=384, height=384, **kwargs):
        return self._env.render(mode, width=width, height=height)

    @property
    def state(self):
        return self._env.robot_state


def make(task, obs_mode="state", image_size=84, action_repeat=1, frame_stack=1, channel_last=False, seed=0):
    """
    Create a new environment.
    Args:
            task (str): The task to create an environment for. Must be one of:
                    - 'reach'
                    - 'push'
                    - 'peg-in-box'
                    - 'lift'
            obs_mode (str): The observation mode to use. Must be one of:
                    - 'state': Only state observations
                    - 'rgb': RGB images
                    - 'all': RGB images and state observations
            image_size (int): The size of the image observations
            action_repeat (int): The number of times to repeat the action
            seed (int): The random seed to use
    Returns:
            gym.Env: The environment
    """
    if task not in TASKS:
        raise ValueError(f"Unknown task {task}. Must be one of {list(TASKS.keys())}")
    env = TASKS[task]["env"]()
    env = TimeLimit(env, TASKS[task]["episode_length"])
    env = SimXarmWrapper(env, TASKS[task], obs_mode, image_size, action_repeat, frame_stack, channel_last)
    env.seed(seed)

    return env
Package xarm 2024-03-29 20:59:09 +08:00			`from collections import OrderedDict, deque`

			`import gymnasium as gym`
			`import numpy as np`
			`from gymnasium.wrappers import TimeLimit`

			`from xarm.tasks.base import Base as Base`
			`from xarm.tasks.lift import Lift`
			`from xarm.tasks.peg_in_box import PegInBox`
			`from xarm.tasks.push import Push`
			`from xarm.tasks.reach import Reach`

			`TASKS = OrderedDict(`
			`(`
			`(`
			`"reach",`
			`{`
			`"env": Reach,`
			`"action_space": "xyz",`
			`"episode_length": 50,`
			`"description": "Reach a target location with the end effector",`
			`},`
			`),`
			`(`
			`"push",`
			`{`
			`"env": Push,`
			`"action_space": "xyz",`
			`"episode_length": 50,`
			`"description": "Push a cube to a target location",`
			`},`
			`),`
			`(`
			`"peg_in_box",`
			`{`
			`"env": PegInBox,`
			`"action_space": "xyz",`
			`"episode_length": 50,`
			`"description": "Insert a peg into a box",`
			`},`
			`),`
			`(`
			`"lift",`
			`{`
			`"env": Lift,`
			`"action_space": "xyzw",`
			`"episode_length": 50,`
			`"description": "Lift a cube above a height threshold",`
			`},`
			`),`
			`)`
			`)`


			`class SimXarmWrapper(gym.Wrapper):`
			`"""`
			`A wrapper for the SimXarm environments. This wrapper is used to`
			`convert the action and observation spaces to the correct format.`
			`"""`

			`def __init__(self, env, task, obs_mode, image_size, action_repeat, frame_stack=1, channel_last=False):`
			`super().__init__(env)`
			`self._env = env`
			`self.obs_mode = obs_mode`
			`self.image_size = image_size`
			`self.action_repeat = action_repeat`
			`self.frame_stack = frame_stack`
			`self._frames = deque([], maxlen=frame_stack)`
			`self.channel_last = channel_last`
			`self._max_episode_steps = task["episode_length"] // action_repeat`

			`image_shape = (`
			`(image_size, image_size, 3 * frame_stack)`
			`if channel_last`
			`else (3 * frame_stack, image_size, image_size)`
			`)`
			`if obs_mode == "state":`
			`self.observation_space = env.observation_space["observation"]`
			`elif obs_mode == "rgb":`
			`self.observation_space = gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8)`
			`elif obs_mode == "all":`
			`self.observation_space = gym.spaces.Dict(`
			`state=gym.spaces.Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32),`
			`rgb=gym.spaces.Box(low=0, high=255, shape=image_shape, dtype=np.uint8),`
			`)`
			`else:`
			`raise ValueError(f"Unknown obs_mode {obs_mode}. Must be one of [rgb, all, state]")`
			`self.action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(len(task["action_space"]),))`
			`self.action_padding = np.zeros(4 - len(task["action_space"]), dtype=np.float32)`
			`if "w" not in task["action_space"]:`
			`self.action_padding[-1] = 1.0`

			`def _render_obs(self):`
			`obs = self.render(mode="rgb_array", width=self.image_size, height=self.image_size)`
			`if not self.channel_last:`
			`obs = obs.transpose(2, 0, 1)`
			`return obs.copy()`

			`def _update_frames(self, reset=False):`
			`pixels = self._render_obs()`
			`self._frames.append(pixels)`
			`if reset:`
			`for _ in range(1, self.frame_stack):`
			`self._frames.append(pixels)`
			`assert len(self._frames) == self.frame_stack`

			`def transform_obs(self, obs, reset=False):`
			`if self.obs_mode == "state":`
			`return obs["observation"]`
			`elif self.obs_mode == "rgb":`
			`self._update_frames(reset=reset)`
			`rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)`
			`return rgb_obs`
			`elif self.obs_mode == "all":`
			`self._update_frames(reset=reset)`
			`rgb_obs = np.concatenate(list(self._frames), axis=-1 if self.channel_last else 0)`
			`return OrderedDict((("rgb", rgb_obs), ("state", self.robot_state)))`
			`else:`
			`raise ValueError(f"Unknown obs_mode {self.obs_mode}. Must be one of [rgb, all, state]")`

			`def reset(self):`
			`return self.transform_obs(self._env.reset(), reset=True)`

			`def step(self, action):`
			`action = np.concatenate([action, self.action_padding])`
			`reward = 0.0`
			`for _ in range(self.action_repeat):`
			`obs, r, done, info = self._env.step(action)`
			`reward += r`
			`return self.transform_obs(obs), reward, done, info`

			`def render(self, mode="rgb_array", width=384, height=384, **kwargs):`
			`return self._env.render(mode, width=width, height=height)`

			`@property`
			`def state(self):`
			`return self._env.robot_state`


			`def make(task, obs_mode="state", image_size=84, action_repeat=1, frame_stack=1, channel_last=False, seed=0):`
			`"""`
			`Create a new environment.`
			`Args:`
			`task (str): The task to create an environment for. Must be one of:`
			`- 'reach'`
			`- 'push'`
			`- 'peg-in-box'`
			`- 'lift'`
			`obs_mode (str): The observation mode to use. Must be one of:`
			`- 'state': Only state observations`
			`- 'rgb': RGB images`
			`- 'all': RGB images and state observations`
			`image_size (int): The size of the image observations`
			`action_repeat (int): The number of times to repeat the action`
			`seed (int): The random seed to use`
			`Returns:`
			`gym.Env: The environment`
			`"""`
			`if task not in TASKS:`
			`raise ValueError(f"Unknown task {task}. Must be one of {list(TASKS.keys())}")`
			`env = TASKS[task]["env"]()`
			`env = TimeLimit(env, TASKS[task]["episode_length"])`
			`env = SimXarmWrapper(env, TASKS[task], obs_mode, image_size, action_repeat, frame_stack, channel_last)`
			`env.seed(seed)`

			`return env`