from legged_gym import LEGGED_GYM_ROOT_DIR, envs import time from warnings import WarningMessage import numpy as np import os from isaacgym.torch_utils import * from isaacgym import gymtorch, gymapi, gymutil import torch from torch import Tensor from typing import Tuple, Dict from legged_gym import LEGGED_GYM_ROOT_DIR from legged_gym.envs.base.base_task import BaseTask from legged_gym.utils.math import quat_apply_yaw, wrap_to_pi, torch_rand_sqrt_float from legged_gym.utils.isaacgym_utils import get_euler_xyz from legged_gym.utils.helpers import class_to_dict from .legged_robot_config import LeggedRobotCfg class LeggedRobot(BaseTask): def __init__(self, cfg: LeggedRobotCfg, sim_params, physics_engine, sim_device, headless): """ Parses the provided config file, calls create_sim() (which creates, simulation and environments), initilizes pytorch buffers used during training Args: cfg (Dict): Environment config file sim_params (gymapi.SimParams): simulation parameters physics_engine (gymapi.SimType): gymapi.SIM_PHYSX (must be PhysX) device_type (string): 'cuda' or 'cpu' device_id (int): 0, 1, ... headless (bool): Run without rendering if True """ self.cfg = cfg self.sim_params = sim_params self.height_samples = None self.debug_viz = False self.init_done = False self._parse_cfg(self.cfg) super().__init__(self.cfg, sim_params, physics_engine, sim_device, headless) if not self.headless: self.set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat) self._init_buffers() self._prepare_reward_function() self.init_done = True def step(self, actions): """ Apply actions, simulate, call self.post_physics_step() Args: actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env) """ clip_actions = self.cfg.normalization.clip_actions self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device) # step physics and render each frame self.render() for _ in range(self.cfg.control.decimation): self.torques = self._compute_torques(self.actions).view(self.torques.shape) self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques)) self.gym.simulate(self.sim) if self.cfg.env.test: elapsed_time = self.gym.get_elapsed_time(self.sim) sim_time = self.gym.get_sim_time(self.sim) if sim_time-elapsed_time>0: time.sleep(sim_time-elapsed_time) if self.device == 'cpu': self.gym.fetch_results(self.sim, True) self.gym.refresh_dof_state_tensor(self.sim) self.post_physics_step() # return clipped obs, clipped states (None), rewards, dones and infos clip_obs = self.cfg.normalization.clip_observations self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs) if self.privileged_obs_buf is not None: self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs) return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras def post_physics_step(self): """ check terminations, compute observations and rewards calls self._post_physics_step_callback() for common computations calls self._draw_debug_vis() if needed """ self.gym.refresh_actor_root_state_tensor(self.sim) self.gym.refresh_net_contact_force_tensor(self.sim) self.episode_length_buf += 1 self.common_step_counter += 1 # prepare quantities self.base_pos[:] = self.root_states[:, 0:3] self.base_quat[:] = self.root_states[:, 3:7] self.rpy[:] = get_euler_xyz(self.base_quat) self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10]) self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13]) self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec) self._post_physics_step_callback() # compute observations, rewards, resets, ... self.check_termination() self.compute_reward() env_ids = self.reset_buf.nonzero(as_tuple=False).flatten() self.reset_idx(env_ids) self.compute_observations() # in some cases a simulation step might be required to refresh some obs (for example body positions) self.last_actions[:] = self.actions[:] self.last_dof_vel[:] = self.dof_vel[:] self.last_root_vel[:] = self.root_states[:, 7:13] def check_termination(self): """ Check if environments need to be reset """ self.reset_buf = torch.any(torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1., dim=1) self.reset_buf |= torch.logical_or(torch.abs(self.rpy[:,1])>1.0, torch.abs(self.rpy[:,0])>0.8) self.time_out_buf = self.episode_length_buf > self.max_episode_length # no terminal reward for time-outs self.reset_buf |= self.time_out_buf def reset_idx(self, env_ids): """ Reset some environments. Calls self._reset_dofs(env_ids), self._reset_root_states(env_ids), and self._resample_commands(env_ids) [Optional] calls self._update_terrain_curriculum(env_ids), self.update_command_curriculum(env_ids) and Logs episode info Resets some buffers Args: env_ids (list[int]): List of environment ids which must be reset """ if len(env_ids) == 0: return # reset robot states self._reset_dofs(env_ids) self._reset_root_states(env_ids) self._resample_commands(env_ids) # reset buffers self.last_actions[env_ids] = 0. self.last_dof_vel[env_ids] = 0. self.feet_air_time[env_ids] = 0. self.episode_length_buf[env_ids] = 0 self.reset_buf[env_ids] = 1 # fill extras self.extras["episode"] = {} for key in self.episode_sums.keys(): self.extras["episode"]['rew_' + key] = torch.mean(self.episode_sums[key][env_ids]) / self.max_episode_length_s self.episode_sums[key][env_ids] = 0. if self.cfg.commands.curriculum: self.extras["episode"]["max_command_x"] = self.command_ranges["lin_vel_x"][1] # send timeout info to the algorithm if self.cfg.env.send_timeouts: self.extras["time_outs"] = self.time_out_buf def compute_reward(self): """ Compute rewards Calls each reward function which had a non-zero scale (processed in self._prepare_reward_function()) adds each terms to the episode sums and to the total reward """ self.rew_buf[:] = 0. for i in range(len(self.reward_functions)): name = self.reward_names[i] rew = self.reward_functions[i]() * self.reward_scales[name] self.rew_buf += rew self.episode_sums[name] += rew if self.cfg.rewards.only_positive_rewards: self.rew_buf[:] = torch.clip(self.rew_buf[:], min=0.) # add termination reward after clipping if "termination" in self.reward_scales: rew = self._reward_termination() * self.reward_scales["termination"] self.rew_buf += rew self.episode_sums["termination"] += rew def compute_observations(self): """ Computes observations """ self.obs_buf = torch.cat(( self.base_lin_vel * self.obs_scales.lin_vel, self.base_ang_vel * self.obs_scales.ang_vel, self.projected_gravity, self.commands[:, :3] * self.commands_scale, (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos, self.dof_vel * self.obs_scales.dof_vel, self.actions ),dim=-1) # add perceptive inputs if not blind # add noise if needed if self.add_noise: self.obs_buf += (2 * torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec def create_sim(self): """ Creates simulation, terrain and evironments """ self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly self.sim = self.gym.create_sim(self.sim_device_id, self.graphics_device_id, self.physics_engine, self.sim_params) self._create_ground_plane() self._create_envs() def set_camera(self, position, lookat): """ Set camera position and direction """ cam_pos = gymapi.Vec3(position[0], position[1], position[2]) cam_target = gymapi.Vec3(lookat[0], lookat[1], lookat[2]) self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) #------------- Callbacks -------------- def _process_rigid_shape_props(self, props, env_id): """ Callback allowing to store/change/randomize the rigid shape properties of each environment. Called During environment creation. Base behavior: randomizes the friction of each environment Args: props (List[gymapi.RigidShapeProperties]): Properties of each shape of the asset env_id (int): Environment id Returns: [List[gymapi.RigidShapeProperties]]: Modified rigid shape properties """ if self.cfg.domain_rand.randomize_friction: if env_id==0: # prepare friction randomization friction_range = self.cfg.domain_rand.friction_range num_buckets = 64 bucket_ids = torch.randint(0, num_buckets, (self.num_envs, 1)) friction_buckets = torch_rand_float(friction_range[0], friction_range[1], (num_buckets,1), device='cpu') self.friction_coeffs = friction_buckets[bucket_ids] for s in range(len(props)): props[s].friction = self.friction_coeffs[env_id] return props def _process_dof_props(self, props, env_id): """ Callback allowing to store/change/randomize the DOF properties of each environment. Called During environment creation. Base behavior: stores position, velocity and torques limits defined in the URDF Args: props (numpy.array): Properties of each DOF of the asset env_id (int): Environment id Returns: [numpy.array]: Modified DOF properties """ if env_id==0: self.dof_pos_limits = torch.zeros(self.num_dof, 2, dtype=torch.float, device=self.device, requires_grad=False) self.dof_vel_limits = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) self.torque_limits = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) for i in range(len(props)): self.dof_pos_limits[i, 0] = props["lower"][i].item() self.dof_pos_limits[i, 1] = props["upper"][i].item() self.dof_vel_limits[i] = props["velocity"][i].item() self.torque_limits[i] = props["effort"][i].item() # soft limits m = (self.dof_pos_limits[i, 0] + self.dof_pos_limits[i, 1]) / 2 r = self.dof_pos_limits[i, 1] - self.dof_pos_limits[i, 0] self.dof_pos_limits[i, 0] = m - 0.5 * r * self.cfg.rewards.soft_dof_pos_limit self.dof_pos_limits[i, 1] = m + 0.5 * r * self.cfg.rewards.soft_dof_pos_limit return props def _process_rigid_body_props(self, props, env_id): # if env_id==0: # sum = 0 # for i, p in enumerate(props): # sum += p.mass # print(f"Mass of body {i}: {p.mass} (before randomization)") # print(f"Total mass {sum} (before randomization)") # randomize base mass if self.cfg.domain_rand.randomize_base_mass: rng = self.cfg.domain_rand.added_mass_range props[0].mass += np.random.uniform(rng[0], rng[1]) return props def _post_physics_step_callback(self): """ Callback called before computing terminations, rewards, and observations Default behaviour: Compute ang vel command based on target and heading, compute measured terrain heights and randomly push robots """ # env_ids = (self.episode_length_buf % int(self.cfg.commands.resampling_time / self.dt)==0).nonzero(as_tuple=False).flatten() self._resample_commands(env_ids) if self.cfg.commands.heading_command: forward = quat_apply(self.base_quat, self.forward_vec) heading = torch.atan2(forward[:, 1], forward[:, 0]) self.commands[:, 2] = torch.clip(0.5*wrap_to_pi(self.commands[:, 3] - heading), -1., 1.) if self.cfg.domain_rand.push_robots and (self.common_step_counter % self.cfg.domain_rand.push_interval == 0): self._push_robots() def _resample_commands(self, env_ids): """ Randommly select commands of some environments Args: env_ids (List[int]): Environments ids for which new commands are needed """ self.commands[env_ids, 0] = torch_rand_float(self.command_ranges["lin_vel_x"][0], self.command_ranges["lin_vel_x"][1], (len(env_ids), 1), device=self.device).squeeze(1) self.commands[env_ids, 1] = torch_rand_float(self.command_ranges["lin_vel_y"][0], self.command_ranges["lin_vel_y"][1], (len(env_ids), 1), device=self.device).squeeze(1) if self.cfg.commands.heading_command: self.commands[env_ids, 3] = torch_rand_float(self.command_ranges["heading"][0], self.command_ranges["heading"][1], (len(env_ids), 1), device=self.device).squeeze(1) else: self.commands[env_ids, 2] = torch_rand_float(self.command_ranges["ang_vel_yaw"][0], self.command_ranges["ang_vel_yaw"][1], (len(env_ids), 1), device=self.device).squeeze(1) # set small commands to zero self.commands[env_ids, :2] *= (torch.norm(self.commands[env_ids, :2], dim=1) > 0.2).unsqueeze(1) def _compute_torques(self, actions): """ Compute torques from actions. Actions can be interpreted as position or velocity targets given to a PD controller, or directly as scaled torques. [NOTE]: torques must have the same dimension as the number of DOFs, even if some DOFs are not actuated. Args: actions (torch.Tensor): Actions Returns: [torch.Tensor]: Torques sent to the simulation """ #pd controller actions_scaled = actions * self.cfg.control.action_scale control_type = self.cfg.control.control_type if control_type=="P": torques = self.p_gains*(actions_scaled + self.default_dof_pos - self.dof_pos) - self.d_gains*self.dof_vel elif control_type=="V": torques = self.p_gains*(actions_scaled - self.dof_vel) - self.d_gains*(self.dof_vel - self.last_dof_vel)/self.sim_params.dt elif control_type=="T": torques = actions_scaled else: raise NameError(f"Unknown controller type: {control_type}") return torch.clip(torques, -self.torque_limits, self.torque_limits) def _reset_dofs(self, env_ids): """ Resets DOF position and velocities of selected environmments Positions are randomly selected within 0.5:1.5 x default positions. Velocities are set to zero. Args: env_ids (List[int]): Environemnt ids """ self.dof_pos[env_ids] = self.default_dof_pos * torch_rand_float(0.5, 1.5, (len(env_ids), self.num_dof), device=self.device) self.dof_vel[env_ids] = 0. env_ids_int32 = env_ids.to(dtype=torch.int32) self.gym.set_dof_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self.dof_state), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) def _reset_root_states(self, env_ids): """ Resets ROOT states position and velocities of selected environmments Sets base position based on the curriculum Selects randomized base velocities within -0.5:0.5 [m/s, rad/s] Args: env_ids (List[int]): Environemnt ids """ # base position if self.custom_origins: self.root_states[env_ids] = self.base_init_state self.root_states[env_ids, :3] += self.env_origins[env_ids] self.root_states[env_ids, :2] += torch_rand_float(-1., 1., (len(env_ids), 2), device=self.device) # xy position within 1m of the center else: self.root_states[env_ids] = self.base_init_state self.root_states[env_ids, :3] += self.env_origins[env_ids] # base velocities self.root_states[env_ids, 7:13] = torch_rand_float(-0.5, 0.5, (len(env_ids), 6), device=self.device) # [7:10]: lin vel, [10:13]: ang vel env_ids_int32 = env_ids.to(dtype=torch.int32) self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self.root_states), gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) def _push_robots(self): """ Random pushes the robots. Emulates an impulse by setting a randomized base velocity. """ max_vel = self.cfg.domain_rand.max_push_vel_xy self.root_states[:, 7:9] = torch_rand_float(-max_vel, max_vel, (self.num_envs, 2), device=self.device) # lin vel x/y self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states)) def update_command_curriculum(self, env_ids): """ Implements a curriculum of increasing commands Args: env_ids (List[int]): ids of environments being reset """ # If the tracking reward is above 80% of the maximum, increase the range of commands if torch.mean(self.episode_sums["tracking_lin_vel"][env_ids]) / self.max_episode_length > 0.8 * self.reward_scales["tracking_lin_vel"]: self.command_ranges["lin_vel_x"][0] = np.clip(self.command_ranges["lin_vel_x"][0] - 0.5, -self.cfg.commands.max_curriculum, 0.) self.command_ranges["lin_vel_x"][1] = np.clip(self.command_ranges["lin_vel_x"][1] + 0.5, 0., self.cfg.commands.max_curriculum) def _get_noise_scale_vec(self, cfg): """ Sets a vector used to scale the noise added to the observations. [NOTE]: Must be adapted when changing the observations structure Args: cfg (Dict): Environment config file Returns: [torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1] """ noise_vec = torch.zeros_like(self.obs_buf[0]) self.add_noise = self.cfg.noise.add_noise noise_scales = self.cfg.noise.noise_scales noise_level = self.cfg.noise.noise_level noise_vec[:3] = noise_scales.lin_vel * noise_level * self.obs_scales.lin_vel noise_vec[3:6] = noise_scales.ang_vel * noise_level * self.obs_scales.ang_vel noise_vec[6:9] = noise_scales.gravity * noise_level noise_vec[9:12] = 0. # commands noise_vec[12:12+self.num_actions] = noise_scales.dof_pos * noise_level * self.obs_scales.dof_pos noise_vec[12+self.num_actions:12+2*self.num_actions] = noise_scales.dof_vel * noise_level * self.obs_scales.dof_vel noise_vec[12+2*self.num_actions:12+3*self.num_actions] = 0. # previous actions return noise_vec #---------------------------------------- def _init_buffers(self): """ Initialize torch tensors which will contain simulation states and processed quantities """ # get gym GPU state tensors actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim) dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) net_contact_forces = self.gym.acquire_net_contact_force_tensor(self.sim) self.gym.refresh_dof_state_tensor(self.sim) self.gym.refresh_actor_root_state_tensor(self.sim) self.gym.refresh_net_contact_force_tensor(self.sim) # create some wrapper tensors for different slices self.root_states = gymtorch.wrap_tensor(actor_root_state) self.dof_state = gymtorch.wrap_tensor(dof_state_tensor) self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0] self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1] self.base_quat = self.root_states[:, 3:7] self.rpy = get_euler_xyz(self.base_quat) self.base_pos = self.root_states[:self.num_envs, 0:3] self.contact_forces = gymtorch.wrap_tensor(net_contact_forces).view(self.num_envs, -1, 3) # shape: num_envs, num_bodies, xyz axis # initialize some data used later on self.common_step_counter = 0 self.extras = {} self.noise_scale_vec = self._get_noise_scale_vec(self.cfg) self.gravity_vec = to_torch(get_axis_params(-1., self.up_axis_idx), device=self.device).repeat((self.num_envs, 1)) self.forward_vec = to_torch([1., 0., 0.], device=self.device).repeat((self.num_envs, 1)) self.torques = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) self.p_gains = torch.zeros(self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) self.d_gains = torch.zeros(self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) self.actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) self.last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) self.last_dof_vel = torch.zeros_like(self.dof_vel) self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13]) self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) # TODO change this self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False) self.last_contacts = torch.zeros(self.num_envs, len(self.feet_indices), dtype=torch.bool, device=self.device, requires_grad=False) self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 7:10]) self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:, 10:13]) self.projected_gravity = quat_rotate_inverse(self.base_quat, self.gravity_vec) # joint positions offsets and PD gains self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False) for i in range(self.num_dofs): name = self.dof_names[i] angle = self.cfg.init_state.default_joint_angles[name] self.default_dof_pos[i] = angle found = False for dof_name in self.cfg.control.stiffness.keys(): if dof_name in name: self.p_gains[i] = self.cfg.control.stiffness[dof_name] self.d_gains[i] = self.cfg.control.damping[dof_name] found = True if not found: self.p_gains[i] = 0. self.d_gains[i] = 0. if self.cfg.control.control_type in ["P", "V"]: print(f"PD gain of joint {name} were not defined, setting them to zero") self.default_dof_pos = self.default_dof_pos.unsqueeze(0) def _prepare_reward_function(self): """ Prepares a list of reward functions, whcih will be called to compute the total reward. Looks for self._reward_, where are names of all non zero reward scales in the cfg. """ # remove zero scales + multiply non-zero ones by dt for key in list(self.reward_scales.keys()): scale = self.reward_scales[key] if scale==0: self.reward_scales.pop(key) else: self.reward_scales[key] *= self.dt # prepare list of functions self.reward_functions = [] self.reward_names = [] for name, scale in self.reward_scales.items(): if name=="termination": continue self.reward_names.append(name) name = '_reward_' + name self.reward_functions.append(getattr(self, name)) # reward episode sums self.episode_sums = {name: torch.zeros(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False) for name in self.reward_scales.keys()} def _create_ground_plane(self): """ Adds a ground plane to the simulation, sets friction and restitution based on the cfg. """ plane_params = gymapi.PlaneParams() plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0) plane_params.static_friction = self.cfg.terrain.static_friction plane_params.dynamic_friction = self.cfg.terrain.dynamic_friction plane_params.restitution = self.cfg.terrain.restitution self.gym.add_ground(self.sim, plane_params) def _create_envs(self): """ Creates environments: 1. loads the robot URDF/MJCF asset, 2. For each environment 2.1 creates the environment, 2.2 calls DOF and Rigid shape properties callbacks, 2.3 create actor with these properties and add them to the env 3. Store indices of different bodies of the robot """ asset_path = self.cfg.asset.file.format(LEGGED_GYM_ROOT_DIR=LEGGED_GYM_ROOT_DIR) asset_root = os.path.dirname(asset_path) asset_file = os.path.basename(asset_path) asset_options = gymapi.AssetOptions() asset_options.default_dof_drive_mode = self.cfg.asset.default_dof_drive_mode asset_options.collapse_fixed_joints = self.cfg.asset.collapse_fixed_joints asset_options.replace_cylinder_with_capsule = self.cfg.asset.replace_cylinder_with_capsule asset_options.flip_visual_attachments = self.cfg.asset.flip_visual_attachments asset_options.fix_base_link = self.cfg.asset.fix_base_link asset_options.density = self.cfg.asset.density asset_options.angular_damping = self.cfg.asset.angular_damping asset_options.linear_damping = self.cfg.asset.linear_damping asset_options.max_angular_velocity = self.cfg.asset.max_angular_velocity asset_options.max_linear_velocity = self.cfg.asset.max_linear_velocity asset_options.armature = self.cfg.asset.armature asset_options.thickness = self.cfg.asset.thickness asset_options.disable_gravity = self.cfg.asset.disable_gravity robot_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) self.num_dof = self.gym.get_asset_dof_count(robot_asset) self.num_bodies = self.gym.get_asset_rigid_body_count(robot_asset) dof_props_asset = self.gym.get_asset_dof_properties(robot_asset) rigid_shape_props_asset = self.gym.get_asset_rigid_shape_properties(robot_asset) # save body names from the asset body_names = self.gym.get_asset_rigid_body_names(robot_asset) self.dof_names = self.gym.get_asset_dof_names(robot_asset) self.num_bodies = len(body_names) self.num_dofs = len(self.dof_names) feet_names = [s for s in body_names if self.cfg.asset.foot_name in s] penalized_contact_names = [] for name in self.cfg.asset.penalize_contacts_on: penalized_contact_names.extend([s for s in body_names if name in s]) termination_contact_names = [] for name in self.cfg.asset.terminate_after_contacts_on: termination_contact_names.extend([s for s in body_names if name in s]) base_init_state_list = self.cfg.init_state.pos + self.cfg.init_state.rot + self.cfg.init_state.lin_vel + self.cfg.init_state.ang_vel self.base_init_state = to_torch(base_init_state_list, device=self.device, requires_grad=False) start_pose = gymapi.Transform() start_pose.p = gymapi.Vec3(*self.base_init_state[:3]) self._get_env_origins() env_lower = gymapi.Vec3(0., 0., 0.) env_upper = gymapi.Vec3(0., 0., 0.) self.actor_handles = [] self.envs = [] for i in range(self.num_envs): # create env instance env_handle = self.gym.create_env(self.sim, env_lower, env_upper, int(np.sqrt(self.num_envs))) pos = self.env_origins[i].clone() pos[:2] += torch_rand_float(-1., 1., (2,1), device=self.device).squeeze(1) start_pose.p = gymapi.Vec3(*pos) rigid_shape_props = self._process_rigid_shape_props(rigid_shape_props_asset, i) self.gym.set_asset_rigid_shape_properties(robot_asset, rigid_shape_props) actor_handle = self.gym.create_actor(env_handle, robot_asset, start_pose, self.cfg.asset.name, i, self.cfg.asset.self_collisions, 0) dof_props = self._process_dof_props(dof_props_asset, i) self.gym.set_actor_dof_properties(env_handle, actor_handle, dof_props) body_props = self.gym.get_actor_rigid_body_properties(env_handle, actor_handle) body_props = self._process_rigid_body_props(body_props, i) self.gym.set_actor_rigid_body_properties(env_handle, actor_handle, body_props, recomputeInertia=True) self.envs.append(env_handle) self.actor_handles.append(actor_handle) self.feet_indices = torch.zeros(len(feet_names), dtype=torch.long, device=self.device, requires_grad=False) for i in range(len(feet_names)): self.feet_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], feet_names[i]) self.penalised_contact_indices = torch.zeros(len(penalized_contact_names), dtype=torch.long, device=self.device, requires_grad=False) for i in range(len(penalized_contact_names)): self.penalised_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], penalized_contact_names[i]) self.termination_contact_indices = torch.zeros(len(termination_contact_names), dtype=torch.long, device=self.device, requires_grad=False) for i in range(len(termination_contact_names)): self.termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0], termination_contact_names[i]) def _get_env_origins(self): """ Sets environment origins. On rough terrain the origins are defined by the terrain platforms. Otherwise create a grid. """ self.custom_origins = False self.env_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False) # create a grid of robots num_cols = np.floor(np.sqrt(self.num_envs)) num_rows = np.ceil(self.num_envs / num_cols) xx, yy = torch.meshgrid(torch.arange(num_rows), torch.arange(num_cols)) spacing = self.cfg.env.env_spacing self.env_origins[:, 0] = spacing * xx.flatten()[:self.num_envs] self.env_origins[:, 1] = spacing * yy.flatten()[:self.num_envs] self.env_origins[:, 2] = 0. def _parse_cfg(self, cfg): self.dt = self.cfg.control.decimation * self.sim_params.dt self.obs_scales = self.cfg.normalization.obs_scales self.reward_scales = class_to_dict(self.cfg.rewards.scales) self.command_ranges = class_to_dict(self.cfg.commands.ranges) self.max_episode_length_s = self.cfg.env.episode_length_s self.max_episode_length = np.ceil(self.max_episode_length_s / self.dt) self.cfg.domain_rand.push_interval = np.ceil(self.cfg.domain_rand.push_interval_s / self.dt) #------------ reward functions---------------- def _reward_lin_vel_z(self): # Penalize z axis base linear velocity return torch.square(self.base_lin_vel[:, 2]) def _reward_ang_vel_xy(self): # Penalize xy axes base angular velocity return torch.sum(torch.square(self.base_ang_vel[:, :2]), dim=1) def _reward_orientation(self): # Penalize non flat base orientation return torch.sum(torch.square(self.projected_gravity[:, :2]), dim=1) def _reward_base_height(self): # Penalize base height away from target base_height = self.root_states[:, 2] return torch.square(base_height - self.cfg.rewards.base_height_target) def _reward_torques(self): # Penalize torques return torch.sum(torch.square(self.torques), dim=1) def _reward_dof_vel(self): # Penalize dof velocities return torch.sum(torch.square(self.dof_vel), dim=1) def _reward_dof_acc(self): # Penalize dof accelerations return torch.sum(torch.square((self.last_dof_vel - self.dof_vel) / self.dt), dim=1) def _reward_action_rate(self): # Penalize changes in actions return torch.sum(torch.square(self.last_actions - self.actions), dim=1) def _reward_collision(self): # Penalize collisions on selected bodies return torch.sum(1.*(torch.norm(self.contact_forces[:, self.penalised_contact_indices, :], dim=-1) > 0.1), dim=1) def _reward_termination(self): # Terminal reward / penalty return self.reset_buf * ~self.time_out_buf def _reward_dof_pos_limits(self): # Penalize dof positions too close to the limit out_of_limits = -(self.dof_pos - self.dof_pos_limits[:, 0]).clip(max=0.) # lower limit out_of_limits += (self.dof_pos - self.dof_pos_limits[:, 1]).clip(min=0.) return torch.sum(out_of_limits, dim=1) def _reward_dof_vel_limits(self): # Penalize dof velocities too close to the limit # clip to max error = 1 rad/s per joint to avoid huge penalties return torch.sum((torch.abs(self.dof_vel) - self.dof_vel_limits*self.cfg.rewards.soft_dof_vel_limit).clip(min=0., max=1.), dim=1) def _reward_torque_limits(self): # penalize torques too close to the limit return torch.sum((torch.abs(self.torques) - self.torque_limits*self.cfg.rewards.soft_torque_limit).clip(min=0.), dim=1) def _reward_tracking_lin_vel(self): # Tracking of linear velocity commands (xy axes) lin_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.base_lin_vel[:, :2]), dim=1) return torch.exp(-lin_vel_error/self.cfg.rewards.tracking_sigma) def _reward_tracking_ang_vel(self): # Tracking of angular velocity commands (yaw) ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2]) return torch.exp(-ang_vel_error/self.cfg.rewards.tracking_sigma) def _reward_feet_air_time(self): # Reward long steps # Need to filter the contacts because the contact reporting of PhysX is unreliable on meshes contact = self.contact_forces[:, self.feet_indices, 2] > 1. contact_filt = torch.logical_or(contact, self.last_contacts) self.last_contacts = contact first_contact = (self.feet_air_time > 0.) * contact_filt self.feet_air_time += self.dt rew_airTime = torch.sum((self.feet_air_time - 0.5) * first_contact, dim=1) # reward only on first contact with the ground rew_airTime *= torch.norm(self.commands[:, :2], dim=1) > 0.1 #no reward for zero command self.feet_air_time *= ~contact_filt return rew_airTime def _reward_stumble(self): # Penalize feet hitting vertical surfaces return torch.any(torch.norm(self.contact_forces[:, self.feet_indices, :2], dim=2) >\ 5 *torch.abs(self.contact_forces[:, self.feet_indices, 2]), dim=1) def _reward_stand_still(self): # Penalize motion at zero commands return torch.sum(torch.abs(self.dof_pos - self.default_dof_pos), dim=1) * (torch.norm(self.commands[:, :2], dim=1) < 0.1) def _reward_feet_contact_forces(self): # penalize high contact forces return torch.sum((torch.norm(self.contact_forces[:, self.feet_indices, :], dim=-1) - self.cfg.rewards.max_contact_force).clip(min=0.), dim=1)