walk-these-ways-go2/go2_gym/envs/base/legged_robot.py

1807 lines
104 KiB
Python
Raw Normal View History

2024-01-28 17:11:38 +08:00
# License: see [LICENSE, LICENSES/legged_gym/LICENSE]
import os
from typing import Dict
from isaacgym import gymtorch, gymapi, gymutil
from isaacgym.torch_utils import *
assert gymtorch
import torch
from go2_gym import MINI_GYM_ROOT_DIR
from go2_gym.envs.base.base_task import BaseTask
from go2_gym.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift
from go2_gym.utils.terrain import Terrain
2024-01-28 17:11:38 +08:00
from .legged_robot_config import Cfg
class LeggedRobot(BaseTask):
def __init__(self, cfg: Cfg, sim_params, physics_engine, sim_device, headless, eval_cfg=None,
initial_dynamics_dict=None):
""" Parses the provided config file,
calls create_sim() (which creates, simulation, terrain and environments),
initilizes pytorch buffers used during training
Args:
cfg (Dict): Environment config file
sim_params (gymapi.SimParams): simulation parameters
physics_engine (gymapi.SimType): gymapi.SIM_PHYSX (must be PhysX)
device_type (string): 'cuda' or 'cpu'
device_id (int): 0, 1, ...
headless (bool): Run without rendering if True
"""
self.cfg = cfg
self.eval_cfg = eval_cfg
self.sim_params = sim_params
self.height_samples = None
self.debug_viz = False
self.init_done = False
self.initial_dynamics_dict = initial_dynamics_dict
if eval_cfg is not None: self._parse_cfg(eval_cfg)
self._parse_cfg(self.cfg)
super().__init__(self.cfg, sim_params, physics_engine, sim_device, headless, self.eval_cfg)
self._init_command_distribution(torch.arange(self.num_envs, device=self.device))
# self.rand_buffers_eval = self._init_custom_buffers__(self.num_eval_envs)
if not self.headless:
self.set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat)
self._init_buffers()
self._prepare_reward_function()
self.init_done = True
self.record_now = False
self.record_eval_now = False
self.collecting_evaluation = False
self.num_still_evaluating = 0
def step(self, actions):
""" Apply actions, simulate, call self.post_physics_step()
Args:
actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env)
"""
clip_actions = self.cfg.normalization.clip_actions
self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device)
# step physics and render each frame
self.prev_base_pos = self.base_pos.clone()
self.prev_base_quat = self.base_quat.clone()
self.prev_base_lin_vel = self.base_lin_vel.clone()
self.prev_foot_velocities = self.foot_velocities.clone()
self.render_gui()
for _ in range(self.cfg.control.decimation):
self.torques = self._compute_torques(self.actions).view(self.torques.shape)
self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques))
self.gym.simulate(self.sim)
# if self.device == 'cpu':
self.gym.fetch_results(self.sim, True)
self.gym.refresh_dof_state_tensor(self.sim)
self.post_physics_step()
# return clipped obs, clipped states (None), rewards, dones and infos
clip_obs = self.cfg.normalization.clip_observations
self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs)
if self.privileged_obs_buf is not None:
self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs)
return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras
def post_physics_step(self):
""" check terminations, compute observations and rewards
calls self._post_physics_step_callback() for common computations
calls self._draw_debug_vis() if needed
"""
self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_net_contact_force_tensor(self.sim)
self.gym.refresh_rigid_body_state_tensor(self.sim)
if self.record_now:
self.gym.step_graphics(self.sim)
self.gym.render_all_camera_sensors(self.sim)
self.episode_length_buf += 1
self.common_step_counter += 1
# prepare quantities
self.base_pos[:] = self.root_states[:self.num_envs, 0:3]
self.base_quat[:] = self.root_states[:self.num_envs, 3:7]
self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:self.num_envs, 7:10])
self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:self.num_envs, 10:13])
self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec)
self.foot_velocities = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13
)[:, self.feet_indices, 7:10]
self.foot_positions = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices,
0:3]
self._post_physics_step_callback()
# compute observations, rewards, resets, ...
self.check_termination()
self.compute_reward()
env_ids = self.reset_buf.nonzero(as_tuple=False).flatten()
self.reset_idx(env_ids)
self.compute_observations()
self.last_last_actions[:] = self.last_actions[:]
self.last_actions[:] = self.actions[:]
self.last_last_joint_pos_target[:] = self.last_joint_pos_target[:]
self.last_joint_pos_target[:] = self.joint_pos_target[:]
self.last_dof_vel[:] = self.dof_vel[:]
self.last_root_vel[:] = self.root_states[:, 7:13]
if self.viewer and self.enable_viewer_sync and self.debug_viz:
self._draw_debug_vis()
self._render_headless()
def check_termination(self):
""" Check if environments need to be reset
"""
self.reset_buf = torch.any(torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.,
dim=1)
self.time_out_buf = self.episode_length_buf > self.cfg.env.max_episode_length # no terminal reward for time-outs
self.reset_buf |= self.time_out_buf
if self.cfg.rewards.use_terminal_body_height:
self.body_height_buf = torch.mean(self.root_states[:, 2].unsqueeze(1) - self.measured_heights, dim=1) \
< self.cfg.rewards.terminal_body_height
self.reset_buf = torch.logical_or(self.body_height_buf, self.reset_buf)
def reset_idx(self, env_ids):
""" Reset some environments.
Calls self._reset_dofs(env_ids), self._reset_root_states(env_ids), and self._resample_commands(env_ids) and
Logs episode info
Resets some buffers
Args:
env_ids (list[int]): List of environment ids which must be reset
"""
if len(env_ids) == 0:
return
# reset robot states
self._resample_commands(env_ids)
self._call_train_eval(self._randomize_dof_props, env_ids)
if self.cfg.domain_rand.randomize_rigids_after_start:
self._call_train_eval(self._randomize_rigid_body_props, env_ids)
self._call_train_eval(self.refresh_actor_rigid_shape_props, env_ids)
self._call_train_eval(self._reset_dofs, env_ids)
self._call_train_eval(self._reset_root_states, env_ids)
# reset buffers
self.last_actions[env_ids] = 0.
self.last_last_actions[env_ids] = 0.
self.last_dof_vel[env_ids] = 0.
self.feet_air_time[env_ids] = 0.
self.episode_length_buf[env_ids] = 0
self.reset_buf[env_ids] = 1
# fill extras
train_env_ids = env_ids[env_ids < self.num_train_envs]
if len(train_env_ids) > 0:
self.extras["train/episode"] = {}
for key in self.episode_sums.keys():
self.extras["train/episode"]['rew_' + key] = torch.mean(
self.episode_sums[key][train_env_ids])
self.episode_sums[key][train_env_ids] = 0.
eval_env_ids = env_ids[env_ids >= self.num_train_envs]
if len(eval_env_ids) > 0:
self.extras["eval/episode"] = {}
for key in self.episode_sums.keys():
# save the evaluation rollout result if not already saved
unset_eval_envs = eval_env_ids[self.episode_sums_eval[key][eval_env_ids] == -1]
self.episode_sums_eval[key][unset_eval_envs] = self.episode_sums[key][unset_eval_envs]
self.episode_sums[key][eval_env_ids] = 0.
# log additional curriculum info
if self.cfg.terrain.curriculum:
self.extras["train/episode"]["terrain_level"] = torch.mean(
self.terrain_levels[:self.num_train_envs].float())
if self.cfg.commands.command_curriculum:
self.extras["env_bins"] = torch.Tensor(self.env_command_bins)[:self.num_train_envs]
self.extras["train/episode"]["min_command_duration"] = torch.min(self.commands[:, 8])
self.extras["train/episode"]["max_command_duration"] = torch.max(self.commands[:, 8])
self.extras["train/episode"]["min_command_bound"] = torch.min(self.commands[:, 7])
self.extras["train/episode"]["max_command_bound"] = torch.max(self.commands[:, 7])
self.extras["train/episode"]["min_command_offset"] = torch.min(self.commands[:, 6])
self.extras["train/episode"]["max_command_offset"] = torch.max(self.commands[:, 6])
self.extras["train/episode"]["min_command_phase"] = torch.min(self.commands[:, 5])
self.extras["train/episode"]["max_command_phase"] = torch.max(self.commands[:, 5])
self.extras["train/episode"]["min_command_freq"] = torch.min(self.commands[:, 4])
self.extras["train/episode"]["max_command_freq"] = torch.max(self.commands[:, 4])
self.extras["train/episode"]["min_command_x_vel"] = torch.min(self.commands[:, 0])
self.extras["train/episode"]["max_command_x_vel"] = torch.max(self.commands[:, 0])
self.extras["train/episode"]["min_command_y_vel"] = torch.min(self.commands[:, 1])
self.extras["train/episode"]["max_command_y_vel"] = torch.max(self.commands[:, 1])
self.extras["train/episode"]["min_command_yaw_vel"] = torch.min(self.commands[:, 2])
self.extras["train/episode"]["max_command_yaw_vel"] = torch.max(self.commands[:, 2])
if self.cfg.commands.num_commands > 9:
self.extras["train/episode"]["min_command_swing_height"] = torch.min(self.commands[:, 9])
self.extras["train/episode"]["max_command_swing_height"] = torch.max(self.commands[:, 9])
for curriculum, category in zip(self.curricula, self.category_names):
self.extras["train/episode"][f"command_area_{category}"] = np.sum(curriculum.weights) / \
curriculum.weights.shape[0]
self.extras["train/episode"]["min_action"] = torch.min(self.actions)
self.extras["train/episode"]["max_action"] = torch.max(self.actions)
self.extras["curriculum/distribution"] = {}
for curriculum, category in zip(self.curricula, self.category_names):
self.extras[f"curriculum/distribution"][f"weights_{category}"] = curriculum.weights
self.extras[f"curriculum/distribution"][f"grid_{category}"] = curriculum.grid
if self.cfg.env.send_timeouts:
self.extras["time_outs"] = self.time_out_buf[:self.num_train_envs]
self.gait_indices[env_ids] = 0
for i in range(len(self.lag_buffer)):
self.lag_buffer[i][env_ids, :] = 0
def set_idx_pose(self, env_ids, dof_pos, base_state):
if len(env_ids) == 0:
return
env_ids_int32 = env_ids.to(dtype=torch.int32).to(self.device)
# joints
if dof_pos is not None:
self.dof_pos[env_ids] = dof_pos
self.dof_vel[env_ids] = 0.
self.gym.set_dof_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.dof_state),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
# base position
self.root_states[env_ids] = base_state.to(self.device)
self.gym.set_actor_root_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
def compute_reward(self):
""" Compute rewards
Calls each reward function which had a non-zero scale (processed in self._prepare_reward_function())
adds each terms to the episode sums and to the total reward
"""
self.rew_buf[:] = 0.
self.rew_buf_pos[:] = 0.
self.rew_buf_neg[:] = 0.
for i in range(len(self.reward_functions)):
name = self.reward_names[i]
rew = self.reward_functions[i]() * self.reward_scales[name]
self.rew_buf += rew
if torch.sum(rew) >= 0:
self.rew_buf_pos += rew
elif torch.sum(rew) <= 0:
self.rew_buf_neg += rew
self.episode_sums[name] += rew
if name in ['tracking_contacts_shaped_force', 'tracking_contacts_shaped_vel']:
self.command_sums[name] += self.reward_scales[name] + rew
else:
self.command_sums[name] += rew
if self.cfg.rewards.only_positive_rewards:
self.rew_buf[:] = torch.clip(self.rew_buf[:], min=0.)
elif self.cfg.rewards.only_positive_rewards_ji22_style: #TODO: update
self.rew_buf[:] = self.rew_buf_pos[:] * torch.exp(self.rew_buf_neg[:] / self.cfg.rewards.sigma_rew_neg)
self.episode_sums["total"] += self.rew_buf
# add termination reward after clipping
if "termination" in self.reward_scales:
rew = self.reward_container._reward_termination() * self.reward_scales["termination"]
self.rew_buf += rew
self.episode_sums["termination"] += rew
self.command_sums["termination"] += rew
self.command_sums["lin_vel_raw"] += self.base_lin_vel[:, 0]
self.command_sums["ang_vel_raw"] += self.base_ang_vel[:, 2]
self.command_sums["lin_vel_residual"] += (self.base_lin_vel[:, 0] - self.commands[:, 0]) ** 2
self.command_sums["ang_vel_residual"] += (self.base_ang_vel[:, 2] - self.commands[:, 2]) ** 2
self.command_sums["ep_timesteps"] += 1
def compute_observations(self):
""" Computes observations
"""
self.obs_buf = torch.cat((self.projected_gravity,
(self.dof_pos[:, :self.num_actuated_dof] - self.default_dof_pos[:,
:self.num_actuated_dof]) * self.obs_scales.dof_pos,
self.dof_vel[:, :self.num_actuated_dof] * self.obs_scales.dof_vel,
self.actions
), dim=-1)
# if self.cfg.env.observe_command and not self.cfg.env.observe_height_command:
# self.obs_buf = torch.cat((self.projected_gravity,
# self.commands[:, :3] * self.commands_scale,
# (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos,
# self.dof_vel * self.obs_scales.dof_vel,
# self.actions
# ), dim=-1)
if self.cfg.env.observe_command:
self.obs_buf = torch.cat((self.projected_gravity,
self.commands * self.commands_scale,
(self.dof_pos[:, :self.num_actuated_dof] - self.default_dof_pos[:,
:self.num_actuated_dof]) * self.obs_scales.dof_pos,
self.dof_vel[:, :self.num_actuated_dof] * self.obs_scales.dof_vel,
self.actions
), dim=-1)
if self.cfg.env.observe_two_prev_actions:
self.obs_buf = torch.cat((self.obs_buf,
self.last_actions), dim=-1)
if self.cfg.env.observe_timing_parameter:
self.obs_buf = torch.cat((self.obs_buf,
self.gait_indices.unsqueeze(1)), dim=-1)
if self.cfg.env.observe_clock_inputs:
self.obs_buf = torch.cat((self.obs_buf,
self.clock_inputs), dim=-1)
# if self.cfg.env.observe_desired_contact_states:
# self.obs_buf = torch.cat((self.obs_buf,
# self.desired_contact_states), dim=-1)
if self.cfg.env.observe_vel:
if self.cfg.commands.global_reference:
self.obs_buf = torch.cat((self.root_states[:self.num_envs, 7:10] * self.obs_scales.lin_vel,
self.base_ang_vel * self.obs_scales.ang_vel,
self.obs_buf), dim=-1)
else:
self.obs_buf = torch.cat((self.base_lin_vel * self.obs_scales.lin_vel,
self.base_ang_vel * self.obs_scales.ang_vel,
self.obs_buf), dim=-1)
if self.cfg.env.observe_only_ang_vel:
self.obs_buf = torch.cat((self.base_ang_vel * self.obs_scales.ang_vel,
self.obs_buf), dim=-1)
if self.cfg.env.observe_only_lin_vel:
self.obs_buf = torch.cat((self.base_lin_vel * self.obs_scales.lin_vel,
self.obs_buf), dim=-1)
if self.cfg.env.observe_yaw:
forward = quat_apply(self.base_quat, self.forward_vec)
heading = torch.atan2(forward[:, 1], forward[:, 0]).unsqueeze(1)
# heading_error = torch.clip(0.5 * wrap_to_pi(heading), -1., 1.).unsqueeze(1)
self.obs_buf = torch.cat((self.obs_buf,
heading), dim=-1)
if self.cfg.env.observe_contact_states:
self.obs_buf = torch.cat((self.obs_buf, (self.contact_forces[:, self.feet_indices, 2] > 1.).view(
self.num_envs,
-1) * 1.0), dim=1)
# add noise if needed
if self.add_noise:
self.obs_buf += (2 * torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec
# build privileged obs
self.privileged_obs_buf = torch.empty(self.num_envs, 0).to(self.device)
self.next_privileged_obs_buf = torch.empty(self.num_envs, 0).to(self.device)
if self.cfg.env.priv_observe_friction:
friction_coeffs_scale, friction_coeffs_shift = get_scale_shift(self.cfg.normalization.friction_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
(self.friction_coeffs[:, 0].unsqueeze(
1) - friction_coeffs_shift) * friction_coeffs_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
(self.friction_coeffs[:, 0].unsqueeze(
1) - friction_coeffs_shift) * friction_coeffs_scale),
dim=1)
if self.cfg.env.priv_observe_ground_friction:
self.ground_friction_coeffs = self._get_ground_frictions(range(self.num_envs))
ground_friction_coeffs_scale, ground_friction_coeffs_shift = get_scale_shift(
self.cfg.normalization.ground_friction_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
(self.ground_friction_coeffs.unsqueeze(
1) - ground_friction_coeffs_shift) * ground_friction_coeffs_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
(self.ground_friction_coeffs.unsqueeze(
1) - friction_coeffs_shift) * friction_coeffs_scale),
dim=1)
if self.cfg.env.priv_observe_restitution:
restitutions_scale, restitutions_shift = get_scale_shift(self.cfg.normalization.restitution_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
(self.restitutions[:, 0].unsqueeze(
1) - restitutions_shift) * restitutions_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
(self.restitutions[:, 0].unsqueeze(
1) - restitutions_shift) * restitutions_scale),
dim=1)
if self.cfg.env.priv_observe_base_mass:
payloads_scale, payloads_shift = get_scale_shift(self.cfg.normalization.added_mass_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
(self.payloads.unsqueeze(1) - payloads_shift) * payloads_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
(self.payloads.unsqueeze(1) - payloads_shift) * payloads_scale),
dim=1)
if self.cfg.env.priv_observe_com_displacement:
com_displacements_scale, com_displacements_shift = get_scale_shift(
self.cfg.normalization.com_displacement_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
(
self.com_displacements - com_displacements_shift) * com_displacements_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
(
self.com_displacements - com_displacements_shift) * com_displacements_scale),
dim=1)
if self.cfg.env.priv_observe_motor_strength:
motor_strengths_scale, motor_strengths_shift = get_scale_shift(self.cfg.normalization.motor_strength_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
(
self.motor_strengths - motor_strengths_shift) * motor_strengths_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
(
self.motor_strengths - motor_strengths_shift) * motor_strengths_scale),
dim=1)
if self.cfg.env.priv_observe_motor_offset:
motor_offset_scale, motor_offset_shift = get_scale_shift(self.cfg.normalization.motor_offset_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
(
self.motor_offsets - motor_offset_shift) * motor_offset_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.privileged_obs_buf,
(
self.motor_offsets - motor_offset_shift) * motor_offset_scale),
dim=1)
if self.cfg.env.priv_observe_body_height:
body_height_scale, body_height_shift = get_scale_shift(self.cfg.normalization.body_height_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
((self.root_states[:self.num_envs, 2]).view(
self.num_envs, -1) - body_height_shift) * body_height_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
((self.root_states[:self.num_envs, 2]).view(
self.num_envs, -1) - body_height_shift) * body_height_scale),
dim=1)
if self.cfg.env.priv_observe_body_velocity:
body_velocity_scale, body_velocity_shift = get_scale_shift(self.cfg.normalization.body_velocity_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
((self.base_lin_vel).view(self.num_envs,
-1) - body_velocity_shift) * body_velocity_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
((self.base_lin_vel).view(self.num_envs,
-1) - body_velocity_shift) * body_velocity_scale),
dim=1)
if self.cfg.env.priv_observe_gravity:
gravity_scale, gravity_shift = get_scale_shift(self.cfg.normalization.gravity_range)
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
(self.gravities - gravity_shift) / gravity_scale),
dim=1)
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
(self.gravities - gravity_shift) / gravity_scale), dim=1)
if self.cfg.env.priv_observe_clock_inputs:
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
self.clock_inputs), dim=-1)
if self.cfg.env.priv_observe_desired_contact_states:
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
self.desired_contact_states), dim=-1)
assert self.privileged_obs_buf.shape[
1] == self.cfg.env.num_privileged_obs, f"num_privileged_obs ({self.cfg.env.num_privileged_obs}) != the number of privileged observations ({self.privileged_obs_buf.shape[1]}), you will discard data from the student!"
def create_sim(self):
""" Creates simulation, terrain and evironments
"""
self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly
self.sim = self.gym.create_sim(self.sim_device_id, self.graphics_device_id, self.physics_engine,
self.sim_params)
mesh_type = self.cfg.terrain.mesh_type
if mesh_type in ['heightfield', 'trimesh']:
if self.eval_cfg is not None:
self.terrain = Terrain(self.cfg.terrain, self.num_train_envs, self.eval_cfg.terrain, self.num_eval_envs)
else:
self.terrain = Terrain(self.cfg.terrain, self.num_train_envs)
if mesh_type == 'plane':
self._create_ground_plane()
elif mesh_type == 'heightfield':
self._create_heightfield()
elif mesh_type == 'trimesh':
self._create_trimesh()
elif mesh_type is not None:
raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
self._create_envs()
def set_camera(self, position, lookat):
""" Set camera position and direction
"""
cam_pos = gymapi.Vec3(position[0], position[1], position[2])
cam_target = gymapi.Vec3(lookat[0], lookat[1], lookat[2])
self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) \
def set_main_agent_pose(self, loc, quat):
self.root_states[0, 0:3] = torch.Tensor(loc)
self.root_states[0, 3:7] = torch.Tensor(quat)
self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
# ------------- Callbacks --------------
def _call_train_eval(self, func, env_ids):
env_ids_train = env_ids[env_ids < self.num_train_envs]
env_ids_eval = env_ids[env_ids >= self.num_train_envs]
ret, ret_eval = None, None
if len(env_ids_train) > 0:
ret = func(env_ids_train, self.cfg)
if len(env_ids_eval) > 0:
ret_eval = func(env_ids_eval, self.eval_cfg)
if ret is not None and ret_eval is not None: ret = torch.cat((ret, ret_eval), axis=-1)
return ret
def _randomize_gravity(self, external_force = None):
if external_force is not None:
self.gravities[:, :] = external_force.unsqueeze(0)
elif self.cfg.domain_rand.randomize_gravity:
min_gravity, max_gravity = self.cfg.domain_rand.gravity_range
external_force = torch.rand(3, dtype=torch.float, device=self.device,
requires_grad=False) * (max_gravity - min_gravity) + min_gravity
self.gravities[:, :] = external_force.unsqueeze(0)
sim_params = self.gym.get_sim_params(self.sim)
gravity = self.gravities[0, :] + torch.Tensor([0, 0, -9.8]).to(self.device)
self.gravity_vec[:, :] = gravity.unsqueeze(0) / torch.norm(gravity)
sim_params.gravity = gymapi.Vec3(gravity[0], gravity[1], gravity[2])
self.gym.set_sim_params(self.sim, sim_params)
def _process_rigid_shape_props(self, props, env_id):
""" Callback allowing to store/change/randomize the rigid shape properties of each environment.
Called During environment creation.
Base behavior: randomizes the friction of each environment
Args:
props (List[gymapi.RigidShapeProperties]): Properties of each shape of the asset
env_id (int): Environment id
Returns:
[List[gymapi.RigidShapeProperties]]: Modified rigid shape properties
"""
for s in range(len(props)):
props[s].friction = self.friction_coeffs[env_id, 0]
props[s].restitution = self.restitutions[env_id, 0]
return props
def _process_dof_props(self, props, env_id):
""" Callback allowing to store/change/randomize the DOF properties of each environment.
Called During environment creation.
Base behavior: stores position, velocity and torques limits defined in the URDF
Args:
props (numpy.array): Properties of each DOF of the asset
env_id (int): Environment id
Returns:
[numpy.array]: Modified DOF properties
"""
if env_id == 0:
self.dof_pos_limits = torch.zeros(self.num_dof, 2, dtype=torch.float, device=self.device,
requires_grad=False)
self.dof_vel_limits = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
self.torque_limits = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
for i in range(len(props)):
self.dof_pos_limits[i, 0] = props["lower"][i].item()
self.dof_pos_limits[i, 1] = props["upper"][i].item()
self.dof_vel_limits[i] = props["velocity"][i].item()
self.torque_limits[i] = props["effort"][i].item()
# soft limits
m = (self.dof_pos_limits[i, 0] + self.dof_pos_limits[i, 1]) / 2
r = self.dof_pos_limits[i, 1] - self.dof_pos_limits[i, 0]
self.dof_pos_limits[i, 0] = m - 0.5 * r * self.cfg.rewards.soft_dof_pos_limit
self.dof_pos_limits[i, 1] = m + 0.5 * r * self.cfg.rewards.soft_dof_pos_limit
return props
def _randomize_rigid_body_props(self, env_ids, cfg):
if cfg.domain_rand.randomize_base_mass:
min_payload, max_payload = cfg.domain_rand.added_mass_range
# self.payloads[env_ids] = -1.0
self.payloads[env_ids] = torch.rand(len(env_ids), dtype=torch.float, device=self.device,
requires_grad=False) * (max_payload - min_payload) + min_payload
if cfg.domain_rand.randomize_com_displacement:
min_com_displacement, max_com_displacement = cfg.domain_rand.com_displacement_range
self.com_displacements[env_ids, :] = torch.rand(len(env_ids), 3, dtype=torch.float, device=self.device,
requires_grad=False) * (
max_com_displacement - min_com_displacement) + min_com_displacement
if cfg.domain_rand.randomize_friction:
min_friction, max_friction = cfg.domain_rand.friction_range
self.friction_coeffs[env_ids, :] = torch.rand(len(env_ids), 1, dtype=torch.float, device=self.device,
requires_grad=False) * (
max_friction - min_friction) + min_friction
if cfg.domain_rand.randomize_restitution:
min_restitution, max_restitution = cfg.domain_rand.restitution_range
self.restitutions[env_ids] = torch.rand(len(env_ids), 1, dtype=torch.float, device=self.device,
requires_grad=False) * (
max_restitution - min_restitution) + min_restitution
def refresh_actor_rigid_shape_props(self, env_ids, cfg):
for env_id in env_ids:
rigid_shape_props = self.gym.get_actor_rigid_shape_properties(self.envs[env_id], 0)
for i in range(self.num_dof):
rigid_shape_props[i].friction = self.friction_coeffs[env_id, 0]
rigid_shape_props[i].restitution = self.restitutions[env_id, 0]
self.gym.set_actor_rigid_shape_properties(self.envs[env_id], 0, rigid_shape_props)
def _randomize_dof_props(self, env_ids, cfg):
if cfg.domain_rand.randomize_motor_strength:
min_strength, max_strength = cfg.domain_rand.motor_strength_range
self.motor_strengths[env_ids, :] = torch.rand(len(env_ids), dtype=torch.float, device=self.device,
requires_grad=False).unsqueeze(1) * (
max_strength - min_strength) + min_strength
if cfg.domain_rand.randomize_motor_offset:
min_offset, max_offset = cfg.domain_rand.motor_offset_range
self.motor_offsets[env_ids, :] = torch.rand(len(env_ids), self.num_dof, dtype=torch.float,
device=self.device, requires_grad=False) * (
max_offset - min_offset) + min_offset
if cfg.domain_rand.randomize_Kp_factor:
min_Kp_factor, max_Kp_factor = cfg.domain_rand.Kp_factor_range
self.Kp_factors[env_ids, :] = torch.rand(len(env_ids), dtype=torch.float, device=self.device,
requires_grad=False).unsqueeze(1) * (
max_Kp_factor - min_Kp_factor) + min_Kp_factor
if cfg.domain_rand.randomize_Kd_factor:
min_Kd_factor, max_Kd_factor = cfg.domain_rand.Kd_factor_range
self.Kd_factors[env_ids, :] = torch.rand(len(env_ids), dtype=torch.float, device=self.device,
requires_grad=False).unsqueeze(1) * (
max_Kd_factor - min_Kd_factor) + min_Kd_factor
def _process_rigid_body_props(self, props, env_id):
self.default_body_mass = props[0].mass
props[0].mass = self.default_body_mass + self.payloads[env_id]
props[0].com = gymapi.Vec3(self.com_displacements[env_id, 0], self.com_displacements[env_id, 1],
self.com_displacements[env_id, 2])
return props
def _post_physics_step_callback(self):
""" Callback called before computing terminations, rewards, and observations
Default behaviour: Compute ang vel command based on target and heading, compute measured terrain heights and randomly push robots
"""
# teleport robots to prevent falling off the edge
self._call_train_eval(self._teleport_robots, torch.arange(self.num_envs, device=self.device))
# resample commands
sample_interval = int(self.cfg.commands.resampling_time / self.dt)
env_ids = (self.episode_length_buf % sample_interval == 0).nonzero(as_tuple=False).flatten()
self._resample_commands(env_ids)
self._step_contact_targets()
# measure terrain heights
if self.cfg.terrain.measure_heights:
self.measured_heights = self._get_heights(torch.arange(self.num_envs, device=self.device), self.cfg)
# push robots
self._call_train_eval(self._push_robots, torch.arange(self.num_envs, device=self.device))
# randomize dof properties
env_ids = (self.episode_length_buf % int(self.cfg.domain_rand.rand_interval) == 0).nonzero(
as_tuple=False).flatten()
self._call_train_eval(self._randomize_dof_props, env_ids)
if self.common_step_counter % int(self.cfg.domain_rand.gravity_rand_interval) == 0:
self._randomize_gravity()
if int(self.common_step_counter - self.cfg.domain_rand.gravity_rand_duration) % int(
self.cfg.domain_rand.gravity_rand_interval) == 0:
self._randomize_gravity(torch.tensor([0, 0, 0]))
if self.cfg.domain_rand.randomize_rigids_after_start:
self._call_train_eval(self._randomize_rigid_body_props, env_ids)
self._call_train_eval(self.refresh_actor_rigid_shape_props, env_ids)
def _resample_commands(self, env_ids):
if len(env_ids) == 0: return
timesteps = int(self.cfg.commands.resampling_time / self.dt)
ep_len = min(self.cfg.env.max_episode_length, timesteps)
# update curricula based on terminated environment bins and categories
for i, (category, curriculum) in enumerate(zip(self.category_names, self.curricula)):
env_ids_in_category = self.env_command_categories[env_ids.cpu()] == i
if isinstance(env_ids_in_category, np.bool_) or len(env_ids_in_category) == 1:
env_ids_in_category = torch.tensor([env_ids_in_category], dtype=torch.bool)
elif len(env_ids_in_category) == 0:
continue
env_ids_in_category = env_ids[env_ids_in_category]
task_rewards, success_thresholds = [], []
for key in ["tracking_lin_vel", "tracking_ang_vel", "tracking_contacts_shaped_force",
"tracking_contacts_shaped_vel"]:
if key in self.command_sums.keys():
task_rewards.append(self.command_sums[key][env_ids_in_category] / ep_len)
success_thresholds.append(self.curriculum_thresholds[key] * self.reward_scales[key])
old_bins = self.env_command_bins[env_ids_in_category.cpu().numpy()]
if len(success_thresholds) > 0:
curriculum.update(old_bins, task_rewards, success_thresholds,
local_range=np.array(
[0.55, 0.55, 0.55, 0.55, 0.35, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0]))
# assign resampled environments to new categories
random_env_floats = torch.rand(len(env_ids), device=self.device)
probability_per_category = 1. / len(self.category_names)
category_env_ids = [env_ids[torch.logical_and(probability_per_category * i <= random_env_floats,
random_env_floats < probability_per_category * (i + 1))] for i in
range(len(self.category_names))]
# sample from new category curricula
for i, (category, env_ids_in_category, curriculum) in enumerate(
zip(self.category_names, category_env_ids, self.curricula)):
batch_size = len(env_ids_in_category)
if batch_size == 0: continue
new_commands, new_bin_inds = curriculum.sample(batch_size=batch_size)
self.env_command_bins[env_ids_in_category.cpu().numpy()] = new_bin_inds
self.env_command_categories[env_ids_in_category.cpu().numpy()] = i
self.commands[env_ids_in_category, :] = torch.Tensor(new_commands[:, :self.cfg.commands.num_commands]).to(
self.device)
if self.cfg.commands.num_commands > 5:
if self.cfg.commands.gaitwise_curricula:
for i, (category, env_ids_in_category) in enumerate(zip(self.category_names, category_env_ids)):
if category == "pronk": # pronking
self.commands[env_ids_in_category, 5] = (self.commands[env_ids_in_category, 5] / 2 - 0.25) % 1
self.commands[env_ids_in_category, 6] = (self.commands[env_ids_in_category, 6] / 2 - 0.25) % 1
self.commands[env_ids_in_category, 7] = (self.commands[env_ids_in_category, 7] / 2 - 0.25) % 1
elif category == "trot": # trotting
self.commands[env_ids_in_category, 5] = self.commands[env_ids_in_category, 5] / 2 + 0.25
self.commands[env_ids_in_category, 6] = 0
self.commands[env_ids_in_category, 7] = 0
elif category == "pace": # pacing
self.commands[env_ids_in_category, 5] = 0
self.commands[env_ids_in_category, 6] = self.commands[env_ids_in_category, 6] / 2 + 0.25
self.commands[env_ids_in_category, 7] = 0
elif category == "bound": # bounding
self.commands[env_ids_in_category, 5] = 0
self.commands[env_ids_in_category, 6] = 0
self.commands[env_ids_in_category, 7] = self.commands[env_ids_in_category, 7] / 2 + 0.25
elif self.cfg.commands.exclusive_phase_offset:
random_env_floats = torch.rand(len(env_ids), device=self.device)
trotting_envs = env_ids[random_env_floats < 0.34]
pacing_envs = env_ids[torch.logical_and(0.34 <= random_env_floats, random_env_floats < 0.67)]
bounding_envs = env_ids[0.67 <= random_env_floats]
self.commands[pacing_envs, 5] = 0
self.commands[bounding_envs, 5] = 0
self.commands[trotting_envs, 6] = 0
self.commands[bounding_envs, 6] = 0
self.commands[trotting_envs, 7] = 0
self.commands[pacing_envs, 7] = 0
elif self.cfg.commands.balance_gait_distribution:
random_env_floats = torch.rand(len(env_ids), device=self.device)
pronking_envs = env_ids[random_env_floats <= 0.25]
trotting_envs = env_ids[torch.logical_and(0.25 <= random_env_floats, random_env_floats < 0.50)]
pacing_envs = env_ids[torch.logical_and(0.50 <= random_env_floats, random_env_floats < 0.75)]
bounding_envs = env_ids[0.75 <= random_env_floats]
self.commands[pronking_envs, 5] = (self.commands[pronking_envs, 5] / 2 - 0.25) % 1
self.commands[pronking_envs, 6] = (self.commands[pronking_envs, 6] / 2 - 0.25) % 1
self.commands[pronking_envs, 7] = (self.commands[pronking_envs, 7] / 2 - 0.25) % 1
self.commands[trotting_envs, 6] = 0
self.commands[trotting_envs, 7] = 0
self.commands[pacing_envs, 5] = 0
self.commands[pacing_envs, 7] = 0
self.commands[bounding_envs, 5] = 0
self.commands[bounding_envs, 6] = 0
self.commands[trotting_envs, 5] = self.commands[trotting_envs, 5] / 2 + 0.25
self.commands[pacing_envs, 6] = self.commands[pacing_envs, 6] / 2 + 0.25
self.commands[bounding_envs, 7] = self.commands[bounding_envs, 7] / 2 + 0.25
if self.cfg.commands.binary_phases:
self.commands[env_ids, 5] = (torch.round(2 * self.commands[env_ids, 5])) / 2.0 % 1
self.commands[env_ids, 6] = (torch.round(2 * self.commands[env_ids, 6])) / 2.0 % 1
self.commands[env_ids, 7] = (torch.round(2 * self.commands[env_ids, 7])) / 2.0 % 1
# setting the smaller commands to zero
self.commands[env_ids, :2] *= (torch.norm(self.commands[env_ids, :2], dim=1) > 0.2).unsqueeze(1)
# reset command sums
for key in self.command_sums.keys():
self.command_sums[key][env_ids] = 0.
def _step_contact_targets(self):
if self.cfg.env.observe_gait_commands:
frequencies = self.commands[:, 4]
phases = self.commands[:, 5]
offsets = self.commands[:, 6]
bounds = self.commands[:, 7]
durations = self.commands[:, 8]
self.gait_indices = torch.remainder(self.gait_indices + self.dt * frequencies, 1.0)
if self.cfg.commands.pacing_offset:
foot_indices = [self.gait_indices + phases + offsets + bounds,
self.gait_indices + bounds,
self.gait_indices + offsets,
self.gait_indices + phases]
else:
foot_indices = [self.gait_indices + phases + offsets + bounds,
self.gait_indices + offsets,
self.gait_indices + bounds,
self.gait_indices + phases]
self.foot_indices = torch.remainder(torch.cat([foot_indices[i].unsqueeze(1) for i in range(4)], dim=1), 1.0)
for idxs in foot_indices:
stance_idxs = torch.remainder(idxs, 1) < durations
swing_idxs = torch.remainder(idxs, 1) > durations
idxs[stance_idxs] = torch.remainder(idxs[stance_idxs], 1) * (0.5 / durations[stance_idxs])
idxs[swing_idxs] = 0.5 + (torch.remainder(idxs[swing_idxs], 1) - durations[swing_idxs]) * (
0.5 / (1 - durations[swing_idxs]))
# if self.cfg.commands.durations_warp_clock_inputs:
self.clock_inputs[:, 0] = torch.sin(2 * np.pi * foot_indices[0])
self.clock_inputs[:, 1] = torch.sin(2 * np.pi * foot_indices[1])
self.clock_inputs[:, 2] = torch.sin(2 * np.pi * foot_indices[2])
self.clock_inputs[:, 3] = torch.sin(2 * np.pi * foot_indices[3])
self.doubletime_clock_inputs[:, 0] = torch.sin(4 * np.pi * foot_indices[0])
self.doubletime_clock_inputs[:, 1] = torch.sin(4 * np.pi * foot_indices[1])
self.doubletime_clock_inputs[:, 2] = torch.sin(4 * np.pi * foot_indices[2])
self.doubletime_clock_inputs[:, 3] = torch.sin(4 * np.pi * foot_indices[3])
self.halftime_clock_inputs[:, 0] = torch.sin(np.pi * foot_indices[0])
self.halftime_clock_inputs[:, 1] = torch.sin(np.pi * foot_indices[1])
self.halftime_clock_inputs[:, 2] = torch.sin(np.pi * foot_indices[2])
self.halftime_clock_inputs[:, 3] = torch.sin(np.pi * foot_indices[3])
# von mises distribution
kappa = self.cfg.rewards.kappa_gait_probs
smoothing_cdf_start = torch.distributions.normal.Normal(0,
kappa).cdf # (x) + torch.distributions.normal.Normal(1, kappa).cdf(x)) / 2
smoothing_multiplier_FL = (smoothing_cdf_start(torch.remainder(foot_indices[0], 1.0)) * (
1 - smoothing_cdf_start(torch.remainder(foot_indices[0], 1.0) - 0.5)) +
smoothing_cdf_start(torch.remainder(foot_indices[0], 1.0) - 1) * (
1 - smoothing_cdf_start(
torch.remainder(foot_indices[0], 1.0) - 0.5 - 1)))
smoothing_multiplier_FR = (smoothing_cdf_start(torch.remainder(foot_indices[1], 1.0)) * (
1 - smoothing_cdf_start(torch.remainder(foot_indices[1], 1.0) - 0.5)) +
smoothing_cdf_start(torch.remainder(foot_indices[1], 1.0) - 1) * (
1 - smoothing_cdf_start(
torch.remainder(foot_indices[1], 1.0) - 0.5 - 1)))
smoothing_multiplier_RL = (smoothing_cdf_start(torch.remainder(foot_indices[2], 1.0)) * (
1 - smoothing_cdf_start(torch.remainder(foot_indices[2], 1.0) - 0.5)) +
smoothing_cdf_start(torch.remainder(foot_indices[2], 1.0) - 1) * (
1 - smoothing_cdf_start(
torch.remainder(foot_indices[2], 1.0) - 0.5 - 1)))
smoothing_multiplier_RR = (smoothing_cdf_start(torch.remainder(foot_indices[3], 1.0)) * (
1 - smoothing_cdf_start(torch.remainder(foot_indices[3], 1.0) - 0.5)) +
smoothing_cdf_start(torch.remainder(foot_indices[3], 1.0) - 1) * (
1 - smoothing_cdf_start(
torch.remainder(foot_indices[3], 1.0) - 0.5 - 1)))
self.desired_contact_states[:, 0] = smoothing_multiplier_FL
self.desired_contact_states[:, 1] = smoothing_multiplier_FR
self.desired_contact_states[:, 2] = smoothing_multiplier_RL
self.desired_contact_states[:, 3] = smoothing_multiplier_RR
if self.cfg.commands.num_commands > 9:
self.desired_footswing_height = self.commands[:, 9]
def _compute_torques(self, actions):
""" Compute torques from actions.
Actions can be interpreted as position or velocity targets given to a PD controller, or directly as scaled torques.
[NOTE]: torques must have the same dimension as the number of DOFs, even if some DOFs are not actuated.
Args:
actions (torch.Tensor): Actions
Returns:
[torch.Tensor]: Torques sent to the simulation
"""
# pd controller
actions_scaled = actions[:, :12] * self.cfg.control.action_scale
actions_scaled[:, [0, 3, 6, 9]] *= self.cfg.control.hip_scale_reduction # scale down hip flexion range
if self.cfg.domain_rand.randomize_lag_timesteps:
self.lag_buffer = self.lag_buffer[1:] + [actions_scaled.clone()]
self.joint_pos_target = self.lag_buffer[0] + self.default_dof_pos
else:
self.joint_pos_target = actions_scaled + self.default_dof_pos
control_type = self.cfg.control.control_type
if control_type == "actuator_net":
self.joint_pos_err = self.dof_pos - self.joint_pos_target + self.motor_offsets
self.joint_vel = self.dof_vel
torques = self.actuator_network(self.joint_pos_err, self.joint_pos_err_last, self.joint_pos_err_last_last,
self.joint_vel, self.joint_vel_last, self.joint_vel_last_last)
self.joint_pos_err_last_last = torch.clone(self.joint_pos_err_last)
self.joint_pos_err_last = torch.clone(self.joint_pos_err)
self.joint_vel_last_last = torch.clone(self.joint_vel_last)
self.joint_vel_last = torch.clone(self.joint_vel)
elif control_type == "P":
torques = self.p_gains * self.Kp_factors * (
self.joint_pos_target - self.dof_pos + self.motor_offsets) - self.d_gains * self.Kd_factors * self.dof_vel
else:
raise NameError(f"Unknown controller type: {control_type}")
torques = torques * self.motor_strengths
return torch.clip(torques, -self.torque_limits, self.torque_limits)
def _reset_dofs(self, env_ids, cfg):
""" Resets DOF position and velocities of selected environmments
Positions are randomly selected within 0.5:1.5 x default positions.
Velocities are set to zero.
Args:
env_ids (List[int]): Environemnt ids
"""
self.dof_pos[env_ids] = self.default_dof_pos * torch_rand_float(0.5, 1.5, (len(env_ids), self.num_dof),
device=self.device)
self.dof_vel[env_ids] = 0.
env_ids_int32 = env_ids.to(dtype=torch.int32)
self.gym.set_dof_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.dof_state),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
def _reset_root_states(self, env_ids, cfg):
""" Resets ROOT states position and velocities of selected environmments
Sets base position based on the curriculum
Selects randomized base velocities within -0.5:0.5 [m/s, rad/s]
Args:
env_ids (List[int]): Environemnt ids
"""
# base position
if self.custom_origins:
self.root_states[env_ids] = self.base_init_state
self.root_states[env_ids, :3] += self.env_origins[env_ids]
self.root_states[env_ids, 0:1] += torch_rand_float(-cfg.terrain.x_init_range,
cfg.terrain.x_init_range, (len(env_ids), 1),
device=self.device)
self.root_states[env_ids, 1:2] += torch_rand_float(-cfg.terrain.y_init_range,
cfg.terrain.y_init_range, (len(env_ids), 1),
device=self.device)
self.root_states[env_ids, 0] += cfg.terrain.x_init_offset
self.root_states[env_ids, 1] += cfg.terrain.y_init_offset
else:
self.root_states[env_ids] = self.base_init_state
self.root_states[env_ids, :3] += self.env_origins[env_ids]
# base yaws
init_yaws = torch_rand_float(-cfg.terrain.yaw_init_range,
cfg.terrain.yaw_init_range, (len(env_ids), 1),
device=self.device)
quat = quat_from_angle_axis(init_yaws, torch.Tensor([0, 0, 1]).to(self.device))[:, 0, :]
self.root_states[env_ids, 3:7] = quat
# base velocities
self.root_states[env_ids, 7:13] = torch_rand_float(-0.5, 0.5, (len(env_ids), 6),
device=self.device) # [7:10]: lin vel, [10:13]: ang vel
env_ids_int32 = env_ids.to(dtype=torch.int32)
self.gym.set_actor_root_state_tensor_indexed(self.sim,
gymtorch.unwrap_tensor(self.root_states),
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
if cfg.env.record_video and 0 in env_ids:
if self.complete_video_frames is None:
self.complete_video_frames = []
else:
self.complete_video_frames = self.video_frames[:]
self.video_frames = []
if cfg.env.record_video and self.eval_cfg is not None and self.num_train_envs in env_ids:
if self.complete_video_frames_eval is None:
self.complete_video_frames_eval = []
else:
self.complete_video_frames_eval = self.video_frames_eval[:]
self.video_frames_eval = []
def _push_robots(self, env_ids, cfg):
""" Random pushes the robots. Emulates an impulse by setting a randomized base velocity.
"""
if cfg.domain_rand.push_robots:
env_ids = env_ids[self.episode_length_buf[env_ids] % int(cfg.domain_rand.push_interval) == 0]
max_vel = cfg.domain_rand.max_push_vel_xy
self.root_states[env_ids, 7:9] = torch_rand_float(-max_vel, max_vel, (len(env_ids), 2),
device=self.device) # lin vel x/y
self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
def _teleport_robots(self, env_ids, cfg):
""" Teleports any robots that are too close to the edge to the other side
"""
if cfg.terrain.teleport_robots:
thresh = cfg.terrain.teleport_thresh
x_offset = int(cfg.terrain.x_offset * cfg.terrain.horizontal_scale)
low_x_ids = env_ids[self.root_states[env_ids, 0] < thresh + x_offset]
self.root_states[low_x_ids, 0] += cfg.terrain.terrain_length * (cfg.terrain.num_rows - 1)
high_x_ids = env_ids[
self.root_states[env_ids, 0] > cfg.terrain.terrain_length * cfg.terrain.num_rows - thresh + x_offset]
self.root_states[high_x_ids, 0] -= cfg.terrain.terrain_length * (cfg.terrain.num_rows - 1)
low_y_ids = env_ids[self.root_states[env_ids, 1] < thresh]
self.root_states[low_y_ids, 1] += cfg.terrain.terrain_width * (cfg.terrain.num_cols - 1)
high_y_ids = env_ids[
self.root_states[env_ids, 1] > cfg.terrain.terrain_width * cfg.terrain.num_cols - thresh]
self.root_states[high_y_ids, 1] -= cfg.terrain.terrain_width * (cfg.terrain.num_cols - 1)
self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
self.gym.refresh_actor_root_state_tensor(self.sim)
def _get_noise_scale_vec(self, cfg):
""" Sets a vector used to scale the noise added to the observations.
[NOTE]: Must be adapted when changing the observations structure
Args:
cfg (Dict): Environment config file
Returns:
[torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1]
"""
# noise_vec = torch.zeros_like(self.obs_buf[0])
self.add_noise = self.cfg.noise.add_noise
noise_scales = self.cfg.noise_scales
noise_level = self.cfg.noise.noise_level
noise_vec = torch.cat((torch.ones(3) * noise_scales.gravity * noise_level,
torch.ones(
self.num_actuated_dof) * noise_scales.dof_pos * noise_level * self.obs_scales.dof_pos,
torch.ones(
self.num_actuated_dof) * noise_scales.dof_vel * noise_level * self.obs_scales.dof_vel,
torch.zeros(self.num_actions),
), dim=0)
if self.cfg.env.observe_command:
noise_vec = torch.cat((torch.ones(3) * noise_scales.gravity * noise_level,
torch.zeros(self.cfg.commands.num_commands),
torch.ones(
self.num_actuated_dof) * noise_scales.dof_pos * noise_level * self.obs_scales.dof_pos,
torch.ones(
self.num_actuated_dof) * noise_scales.dof_vel * noise_level * self.obs_scales.dof_vel,
torch.zeros(self.num_actions),
), dim=0)
if self.cfg.env.observe_two_prev_actions:
noise_vec = torch.cat((noise_vec,
torch.zeros(self.num_actions)
), dim=0)
if self.cfg.env.observe_timing_parameter:
noise_vec = torch.cat((noise_vec,
torch.zeros(1)
), dim=0)
if self.cfg.env.observe_clock_inputs:
noise_vec = torch.cat((noise_vec,
torch.zeros(4)
), dim=0)
if self.cfg.env.observe_vel:
noise_vec = torch.cat((torch.ones(3) * noise_scales.lin_vel * noise_level * self.obs_scales.lin_vel,
torch.ones(3) * noise_scales.ang_vel * noise_level * self.obs_scales.ang_vel,
noise_vec
), dim=0)
if self.cfg.env.observe_only_lin_vel:
noise_vec = torch.cat((torch.ones(3) * noise_scales.lin_vel * noise_level * self.obs_scales.lin_vel,
noise_vec
), dim=0)
if self.cfg.env.observe_yaw:
noise_vec = torch.cat((noise_vec,
torch.zeros(1),
), dim=0)
if self.cfg.env.observe_contact_states:
noise_vec = torch.cat((noise_vec,
torch.ones(4) * noise_scales.contact_states * noise_level,
), dim=0)
noise_vec = noise_vec.to(self.device)
return noise_vec
# ----------------------------------------
def _init_buffers(self):
""" Initialize torch tensors which will contain simulation states and processed quantities
"""
# get gym GPU state tensors
actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
net_contact_forces = self.gym.acquire_net_contact_force_tensor(self.sim)
rigid_body_state = self.gym.acquire_rigid_body_state_tensor(self.sim)
self.gym.refresh_dof_state_tensor(self.sim)
self.gym.refresh_actor_root_state_tensor(self.sim)
self.gym.refresh_net_contact_force_tensor(self.sim)
self.gym.refresh_rigid_body_state_tensor(self.sim)
self.gym.render_all_camera_sensors(self.sim)
# create some wrapper tensors for different slices
self.root_states = gymtorch.wrap_tensor(actor_root_state)
self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
self.net_contact_forces = gymtorch.wrap_tensor(net_contact_forces)[:self.num_envs * self.num_bodies, :]
self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0]
self.base_pos = self.root_states[:self.num_envs, 0:3]
self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1]
self.base_quat = self.root_states[:self.num_envs, 3:7]
self.rigid_body_state = gymtorch.wrap_tensor(rigid_body_state)[:self.num_envs * self.num_bodies, :]
self.foot_velocities = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:,
self.feet_indices,
7:10]
self.foot_positions = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices,
0:3]
self.prev_base_pos = self.base_pos.clone()
self.prev_foot_velocities = self.foot_velocities.clone()
self.lag_buffer = [torch.zeros_like(self.dof_pos) for i in range(self.cfg.domain_rand.lag_timesteps+1)]
self.contact_forces = gymtorch.wrap_tensor(net_contact_forces)[:self.num_envs * self.num_bodies, :].view(self.num_envs, -1,
3) # shape: num_envs, num_bodies, xyz axis
# initialize some data used later on
self.common_step_counter = 0
self.extras = {}
if self.cfg.terrain.measure_heights:
self.height_points = self._init_height_points(torch.arange(self.num_envs, device=self.device), self.cfg)
self.measured_heights = 0
self.noise_scale_vec = self._get_noise_scale_vec(self.cfg) # , self.eval_cfg)
self.gravity_vec = to_torch(get_axis_params(-1., self.up_axis_idx), device=self.device).repeat(
(self.num_envs, 1))
self.forward_vec = to_torch([1., 0., 0.], device=self.device).repeat((self.num_envs, 1))
self.torques = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
requires_grad=False)
self.p_gains = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
self.d_gains = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
self.actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device,
requires_grad=False)
self.last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device,
requires_grad=False)
self.last_last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device,
requires_grad=False)
self.joint_pos_target = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float,
device=self.device,
requires_grad=False)
self.last_joint_pos_target = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
requires_grad=False)
self.last_last_joint_pos_target = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float,
device=self.device,
requires_grad=False)
self.last_dof_vel = torch.zeros_like(self.dof_vel)
self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13])
self.commands_value = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float,
device=self.device, requires_grad=False)
self.commands = torch.zeros_like(self.commands_value) # x vel, y vel, yaw vel, heading
self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel,
self.obs_scales.body_height_cmd, self.obs_scales.gait_freq_cmd,
self.obs_scales.gait_phase_cmd, self.obs_scales.gait_phase_cmd,
self.obs_scales.gait_phase_cmd, self.obs_scales.gait_phase_cmd,
self.obs_scales.footswing_height_cmd, self.obs_scales.body_pitch_cmd,
self.obs_scales.body_roll_cmd, self.obs_scales.stance_width_cmd,
self.obs_scales.stance_length_cmd, self.obs_scales.aux_reward_cmd],
device=self.device, requires_grad=False, )[:self.cfg.commands.num_commands]
self.desired_contact_states = torch.zeros(self.num_envs, 4, dtype=torch.float, device=self.device,
requires_grad=False, )
self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float,
device=self.device, requires_grad=False)
self.last_contacts = torch.zeros(self.num_envs, len(self.feet_indices), dtype=torch.bool, device=self.device,
requires_grad=False)
self.last_contact_filt = torch.zeros(self.num_envs, len(self.feet_indices), dtype=torch.bool,
device=self.device,
requires_grad=False)
self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:self.num_envs, 7:10])
self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:self.num_envs, 10:13])
self.projected_gravity = quat_rotate_inverse(self.base_quat, self.gravity_vec)
# joint positions offsets and PD gains
self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
for i in range(self.num_dofs):
name = self.dof_names[i]
angle = self.cfg.init_state.default_joint_angles[name]
self.default_dof_pos[i] = angle
found = False
for dof_name in self.cfg.control.stiffness.keys():
if dof_name in name:
self.p_gains[i] = self.cfg.control.stiffness[dof_name]
self.d_gains[i] = self.cfg.control.damping[dof_name]
found = True
if not found:
self.p_gains[i] = 0.
self.d_gains[i] = 0.
if self.cfg.control.control_type in ["P", "V"]:
print(f"PD gain of joint {name} were not defined, setting them to zero")
self.default_dof_pos = self.default_dof_pos.unsqueeze(0)
if self.cfg.control.control_type == "actuator_net":
actuator_path = f'{os.path.dirname(os.path.dirname(os.path.realpath(__file__)))}/../../resources/actuator_nets/unitree_go1.pt'
actuator_network = torch.jit.load(actuator_path).to(self.device)
def eval_actuator_network(joint_pos, joint_pos_last, joint_pos_last_last, joint_vel, joint_vel_last,
joint_vel_last_last):
xs = torch.cat((joint_pos.unsqueeze(-1),
joint_pos_last.unsqueeze(-1),
joint_pos_last_last.unsqueeze(-1),
joint_vel.unsqueeze(-1),
joint_vel_last.unsqueeze(-1),
joint_vel_last_last.unsqueeze(-1)), dim=-1)
torques = actuator_network(xs.view(self.num_envs * 12, 6))
return torques.view(self.num_envs, 12)
self.actuator_network = eval_actuator_network
self.joint_pos_err_last_last = torch.zeros((self.num_envs, 12), device=self.device)
self.joint_pos_err_last = torch.zeros((self.num_envs, 12), device=self.device)
self.joint_vel_last_last = torch.zeros((self.num_envs, 12), device=self.device)
self.joint_vel_last = torch.zeros((self.num_envs, 12), device=self.device)
def _init_custom_buffers__(self):
# domain randomization properties
self.friction_coeffs = self.default_friction * torch.ones(self.num_envs, 4, dtype=torch.float, device=self.device,
requires_grad=False)
self.restitutions = self.default_restitution * torch.ones(self.num_envs, 4, dtype=torch.float, device=self.device,
requires_grad=False)
self.payloads = torch.zeros(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False)
self.com_displacements = torch.zeros(self.num_envs, 3, dtype=torch.float, device=self.device,
requires_grad=False)
self.motor_strengths = torch.ones(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
requires_grad=False)
self.motor_offsets = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
requires_grad=False)
self.Kp_factors = torch.ones(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
requires_grad=False)
self.Kd_factors = torch.ones(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
requires_grad=False)
self.gravities = torch.zeros(self.num_envs, 3, dtype=torch.float, device=self.device,
requires_grad=False)
self.gravity_vec = to_torch(get_axis_params(-1., self.up_axis_idx), device=self.device).repeat(
(self.num_envs, 1))
# if custom initialization values were passed in, set them here
dynamics_params = ["friction_coeffs", "restitutions", "payloads", "com_displacements", "motor_strengths",
"Kp_factors", "Kd_factors"]
if self.initial_dynamics_dict is not None:
for k, v in self.initial_dynamics_dict.items():
if k in dynamics_params:
setattr(self, k, v.to(self.device))
self.gait_indices = torch.zeros(self.num_envs, dtype=torch.float, device=self.device,
requires_grad=False)
self.clock_inputs = torch.zeros(self.num_envs, 4, dtype=torch.float, device=self.device,
requires_grad=False)
self.doubletime_clock_inputs = torch.zeros(self.num_envs, 4, dtype=torch.float, device=self.device,
requires_grad=False)
self.halftime_clock_inputs = torch.zeros(self.num_envs, 4, dtype=torch.float, device=self.device,
requires_grad=False)
def _init_command_distribution(self, env_ids):
# new style curriculum
self.category_names = ['nominal']
if self.cfg.commands.gaitwise_curricula:
self.category_names = ['pronk', 'trot', 'pace', 'bound']
if self.cfg.commands.curriculum_type == "RewardThresholdCurriculum":
from .curriculum import RewardThresholdCurriculum
CurriculumClass = RewardThresholdCurriculum
self.curricula = []
for category in self.category_names:
self.curricula += [CurriculumClass(seed=self.cfg.commands.curriculum_seed,
x_vel=(self.cfg.commands.limit_vel_x[0],
self.cfg.commands.limit_vel_x[1],
self.cfg.commands.num_bins_vel_x),
y_vel=(self.cfg.commands.limit_vel_y[0],
self.cfg.commands.limit_vel_y[1],
self.cfg.commands.num_bins_vel_y),
yaw_vel=(self.cfg.commands.limit_vel_yaw[0],
self.cfg.commands.limit_vel_yaw[1],
self.cfg.commands.num_bins_vel_yaw),
body_height=(self.cfg.commands.limit_body_height[0],
self.cfg.commands.limit_body_height[1],
self.cfg.commands.num_bins_body_height),
gait_frequency=(self.cfg.commands.limit_gait_frequency[0],
self.cfg.commands.limit_gait_frequency[1],
self.cfg.commands.num_bins_gait_frequency),
gait_phase=(self.cfg.commands.limit_gait_phase[0],
self.cfg.commands.limit_gait_phase[1],
self.cfg.commands.num_bins_gait_phase),
gait_offset=(self.cfg.commands.limit_gait_offset[0],
self.cfg.commands.limit_gait_offset[1],
self.cfg.commands.num_bins_gait_offset),
gait_bounds=(self.cfg.commands.limit_gait_bound[0],
self.cfg.commands.limit_gait_bound[1],
self.cfg.commands.num_bins_gait_bound),
gait_duration=(self.cfg.commands.limit_gait_duration[0],
self.cfg.commands.limit_gait_duration[1],
self.cfg.commands.num_bins_gait_duration),
footswing_height=(self.cfg.commands.limit_footswing_height[0],
self.cfg.commands.limit_footswing_height[1],
self.cfg.commands.num_bins_footswing_height),
body_pitch=(self.cfg.commands.limit_body_pitch[0],
self.cfg.commands.limit_body_pitch[1],
self.cfg.commands.num_bins_body_pitch),
body_roll=(self.cfg.commands.limit_body_roll[0],
self.cfg.commands.limit_body_roll[1],
self.cfg.commands.num_bins_body_roll),
stance_width=(self.cfg.commands.limit_stance_width[0],
self.cfg.commands.limit_stance_width[1],
self.cfg.commands.num_bins_stance_width),
stance_length=(self.cfg.commands.limit_stance_length[0],
self.cfg.commands.limit_stance_length[1],
self.cfg.commands.num_bins_stance_length),
aux_reward_coef=(self.cfg.commands.limit_aux_reward_coef[0],
self.cfg.commands.limit_aux_reward_coef[1],
self.cfg.commands.num_bins_aux_reward_coef),
)]
if self.cfg.commands.curriculum_type == "LipschitzCurriculum":
for curriculum in self.curricula:
curriculum.set_params(lipschitz_threshold=self.cfg.commands.lipschitz_threshold,
binary_phases=self.cfg.commands.binary_phases)
self.env_command_bins = np.zeros(len(env_ids), dtype=np.int)
self.env_command_categories = np.zeros(len(env_ids), dtype=np.int)
low = np.array(
[self.cfg.commands.lin_vel_x[0], self.cfg.commands.lin_vel_y[0],
self.cfg.commands.ang_vel_yaw[0], self.cfg.commands.body_height_cmd[0],
self.cfg.commands.gait_frequency_cmd_range[0],
self.cfg.commands.gait_phase_cmd_range[0], self.cfg.commands.gait_offset_cmd_range[0],
self.cfg.commands.gait_bound_cmd_range[0], self.cfg.commands.gait_duration_cmd_range[0],
self.cfg.commands.footswing_height_range[0], self.cfg.commands.body_pitch_range[0],
self.cfg.commands.body_roll_range[0],self.cfg.commands.stance_width_range[0],
self.cfg.commands.stance_length_range[0], self.cfg.commands.aux_reward_coef_range[0], ])
high = np.array(
[self.cfg.commands.lin_vel_x[1], self.cfg.commands.lin_vel_y[1],
self.cfg.commands.ang_vel_yaw[1], self.cfg.commands.body_height_cmd[1],
self.cfg.commands.gait_frequency_cmd_range[1],
self.cfg.commands.gait_phase_cmd_range[1], self.cfg.commands.gait_offset_cmd_range[1],
self.cfg.commands.gait_bound_cmd_range[1], self.cfg.commands.gait_duration_cmd_range[1],
self.cfg.commands.footswing_height_range[1], self.cfg.commands.body_pitch_range[1],
self.cfg.commands.body_roll_range[1],self.cfg.commands.stance_width_range[1],
self.cfg.commands.stance_length_range[1], self.cfg.commands.aux_reward_coef_range[1], ])
for curriculum in self.curricula:
curriculum.set_to(low=low, high=high)
def _prepare_reward_function(self):
""" Prepares a list of reward functions, whcih will be called to compute the total reward.
Looks for self._reward_<REWARD_NAME>, where <REWARD_NAME> are names of all non zero reward scales in the cfg.
"""
# reward containers
from go2_gym.envs.rewards.corl_rewards import CoRLRewards
2024-01-28 17:11:38 +08:00
reward_containers = {"CoRLRewards": CoRLRewards}
self.reward_container = reward_containers[self.cfg.rewards.reward_container_name](self)
# remove zero scales + multiply non-zero ones by dt
for key in list(self.reward_scales.keys()):
scale = self.reward_scales[key]
if scale == 0:
self.reward_scales.pop(key)
else:
self.reward_scales[key] *= self.dt
# prepare list of functions
self.reward_functions = []
self.reward_names = []
for name, scale in self.reward_scales.items():
if name == "termination":
continue
if not hasattr(self.reward_container, '_reward_' + name):
print(f"Warning: reward {'_reward_' + name} has nonzero coefficient but was not found!")
else:
self.reward_names.append(name)
self.reward_functions.append(getattr(self.reward_container, '_reward_' + name))
# reward episode sums
self.episode_sums = {
name: torch.zeros(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False)
for name in self.reward_scales.keys()}
self.episode_sums["total"] = torch.zeros(self.num_envs, dtype=torch.float, device=self.device,
requires_grad=False)
self.episode_sums_eval = {
name: -1 * torch.ones(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False)
for name in self.reward_scales.keys()}
self.episode_sums_eval["total"] = torch.zeros(self.num_envs, dtype=torch.float, device=self.device,
requires_grad=False)
self.command_sums = {
name: torch.zeros(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False)
for name in
list(self.reward_scales.keys()) + ["lin_vel_raw", "ang_vel_raw", "lin_vel_residual", "ang_vel_residual",
"ep_timesteps"]}
def _create_ground_plane(self):
""" Adds a ground plane to the simulation, sets friction and restitution based on the cfg.
"""
plane_params = gymapi.PlaneParams()
plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
plane_params.static_friction = self.cfg.terrain.static_friction
plane_params.dynamic_friction = self.cfg.terrain.dynamic_friction
plane_params.restitution = self.cfg.terrain.restitution
self.gym.add_ground(self.sim, plane_params)
def _create_heightfield(self):
""" Adds a heightfield terrain to the simulation, sets parameters based on the cfg.
"""
hf_params = gymapi.HeightFieldParams()
hf_params.column_scale = self.terrain.cfg.horizontal_scale
hf_params.row_scale = self.terrain.cfg.horizontal_scale
hf_params.vertical_scale = self.terrain.cfg.vertical_scale
hf_params.nbRows = self.terrain.tot_cols
hf_params.nbColumns = self.terrain.tot_rows
hf_params.transform.p.x = -self.terrain.cfg.border_size
hf_params.transform.p.y = -self.terrain.cfg.border_size
hf_params.transform.p.z = 0.0
hf_params.static_friction = self.cfg.terrain.static_friction
hf_params.dynamic_friction = self.cfg.terrain.dynamic_friction
hf_params.restitution = self.cfg.terrain.restitution
print(self.terrain.heightsamples.shape, hf_params.nbRows, hf_params.nbColumns)
self.gym.add_heightfield(self.sim, self.terrain.heightsamples.T, hf_params)
self.height_samples = torch.tensor(self.terrain.heightsamples).view(self.terrain.tot_rows,
self.terrain.tot_cols).to(self.device)
def _create_trimesh(self):
""" Adds a triangle mesh terrain to the simulation, sets parameters based on the cfg.
# """
tm_params = gymapi.TriangleMeshParams()
tm_params.nb_vertices = self.terrain.vertices.shape[0]
tm_params.nb_triangles = self.terrain.triangles.shape[0]
tm_params.transform.p.x = -self.terrain.cfg.border_size
tm_params.transform.p.y = -self.terrain.cfg.border_size
tm_params.transform.p.z = 0.0
tm_params.static_friction = self.cfg.terrain.static_friction
tm_params.dynamic_friction = self.cfg.terrain.dynamic_friction
tm_params.restitution = self.cfg.terrain.restitution
self.gym.add_triangle_mesh(self.sim, self.terrain.vertices.flatten(order='C'),
self.terrain.triangles.flatten(order='C'), tm_params)
self.height_samples = torch.tensor(self.terrain.heightsamples).view(self.terrain.tot_rows,
self.terrain.tot_cols).to(self.device)
def _create_envs(self):
""" Creates environments:
1. loads the robot URDF/MJCF asset,
2. For each environment
2.1 creates the environment,
2.2 calls DOF and Rigid shape properties callbacks,
2.3 create actor with these properties and add them to the env
3. Store indices of different bodies of the robot
"""
asset_path = self.cfg.asset.file.format(MINI_GYM_ROOT_DIR=MINI_GYM_ROOT_DIR)
asset_root = os.path.dirname(asset_path)
asset_file = os.path.basename(asset_path)
asset_options = gymapi.AssetOptions()
asset_options.default_dof_drive_mode = self.cfg.asset.default_dof_drive_mode
asset_options.collapse_fixed_joints = self.cfg.asset.collapse_fixed_joints
asset_options.replace_cylinder_with_capsule = self.cfg.asset.replace_cylinder_with_capsule
asset_options.flip_visual_attachments = self.cfg.asset.flip_visual_attachments
asset_options.fix_base_link = self.cfg.asset.fix_base_link
asset_options.density = self.cfg.asset.density
asset_options.angular_damping = self.cfg.asset.angular_damping
asset_options.linear_damping = self.cfg.asset.linear_damping
asset_options.max_angular_velocity = self.cfg.asset.max_angular_velocity
asset_options.max_linear_velocity = self.cfg.asset.max_linear_velocity
asset_options.armature = self.cfg.asset.armature
asset_options.thickness = self.cfg.asset.thickness
asset_options.disable_gravity = self.cfg.asset.disable_gravity
self.robot_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
self.num_dof = self.gym.get_asset_dof_count(self.robot_asset)
self.num_actuated_dof = self.num_actions
self.num_bodies = self.gym.get_asset_rigid_body_count(self.robot_asset)
dof_props_asset = self.gym.get_asset_dof_properties(self.robot_asset)
rigid_shape_props_asset = self.gym.get_asset_rigid_shape_properties(self.robot_asset)
# save body names from the asset
body_names = self.gym.get_asset_rigid_body_names(self.robot_asset)
self.dof_names = self.gym.get_asset_dof_names(self.robot_asset)
self.num_bodies = len(body_names)
self.num_dofs = len(self.dof_names)
feet_names = [s for s in body_names if self.cfg.asset.foot_name in s]
penalized_contact_names = []
for name in self.cfg.asset.penalize_contacts_on:
penalized_contact_names.extend([s for s in body_names if name in s])
termination_contact_names = []
for name in self.cfg.asset.terminate_after_contacts_on:
termination_contact_names.extend([s for s in body_names if name in s])
base_init_state_list = self.cfg.init_state.pos + self.cfg.init_state.rot + self.cfg.init_state.lin_vel + self.cfg.init_state.ang_vel
self.base_init_state = to_torch(base_init_state_list, device=self.device, requires_grad=False)
start_pose = gymapi.Transform()
start_pose.p = gymapi.Vec3(*self.base_init_state[:3])
self.env_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False)
self.terrain_levels = torch.zeros(self.num_envs, device=self.device, requires_grad=False, dtype=torch.long)
self.terrain_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False)
self.terrain_types = torch.zeros(self.num_envs, device=self.device, requires_grad=False, dtype=torch.long)
self._call_train_eval(self._get_env_origins, torch.arange(self.num_envs, device=self.device))
env_lower = gymapi.Vec3(0., 0., 0.)
env_upper = gymapi.Vec3(0., 0., 0.)
self.actor_handles = []
self.imu_sensor_handles = []
self.envs = []
self.default_friction = rigid_shape_props_asset[1].friction
self.default_restitution = rigid_shape_props_asset[1].restitution
self._init_custom_buffers__()
self._call_train_eval(self._randomize_rigid_body_props, torch.arange(self.num_envs, device=self.device))
self._randomize_gravity()
for i in range(self.num_envs):
# create env instance
env_handle = self.gym.create_env(self.sim, env_lower, env_upper, int(np.sqrt(self.num_envs)))
pos = self.env_origins[i].clone()
pos[0:1] += torch_rand_float(-self.cfg.terrain.x_init_range, self.cfg.terrain.x_init_range, (1, 1),
device=self.device).squeeze(1)
pos[1:2] += torch_rand_float(-self.cfg.terrain.y_init_range, self.cfg.terrain.y_init_range, (1, 1),
device=self.device).squeeze(1)
start_pose.p = gymapi.Vec3(*pos)
rigid_shape_props = self._process_rigid_shape_props(rigid_shape_props_asset, i)
self.gym.set_asset_rigid_shape_properties(self.robot_asset, rigid_shape_props)
anymal_handle = self.gym.create_actor(env_handle, self.robot_asset, start_pose, "anymal", i,
self.cfg.asset.self_collisions, 0)
dof_props = self._process_dof_props(dof_props_asset, i)
self.gym.set_actor_dof_properties(env_handle, anymal_handle, dof_props)
body_props = self.gym.get_actor_rigid_body_properties(env_handle, anymal_handle)
body_props = self._process_rigid_body_props(body_props, i)
self.gym.set_actor_rigid_body_properties(env_handle, anymal_handle, body_props, recomputeInertia=True)
self.envs.append(env_handle)
self.actor_handles.append(anymal_handle)
self.feet_indices = torch.zeros(len(feet_names), dtype=torch.long, device=self.device, requires_grad=False)
for i in range(len(feet_names)):
self.feet_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0],
feet_names[i])
self.penalised_contact_indices = torch.zeros(len(penalized_contact_names), dtype=torch.long, device=self.device,
requires_grad=False)
for i in range(len(penalized_contact_names)):
self.penalised_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0],
self.actor_handles[0],
penalized_contact_names[i])
self.termination_contact_indices = torch.zeros(len(termination_contact_names), dtype=torch.long,
device=self.device, requires_grad=False)
for i in range(len(termination_contact_names)):
self.termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0],
self.actor_handles[0],
termination_contact_names[i])
# if recording video, set up camera
if self.cfg.env.record_video:
self.camera_props = gymapi.CameraProperties()
self.camera_props.width = 360
self.camera_props.height = 240
self.rendering_camera = self.gym.create_camera_sensor(self.envs[0], self.camera_props)
self.gym.set_camera_location(self.rendering_camera, self.envs[0], gymapi.Vec3(1.5, 1, 3.0),
gymapi.Vec3(0, 0, 0))
if self.eval_cfg is not None:
self.rendering_camera_eval = self.gym.create_camera_sensor(self.envs[self.num_train_envs],
self.camera_props)
self.gym.set_camera_location(self.rendering_camera_eval, self.envs[self.num_train_envs],
gymapi.Vec3(1.5, 1, 3.0),
gymapi.Vec3(0, 0, 0))
self.video_writer = None
self.video_frames = []
self.video_frames_eval = []
self.complete_video_frames = []
self.complete_video_frames_eval = []
def render(self, mode="rgb_array"):
assert mode == "rgb_array"
bx, by, bz = self.root_states[0, 0], self.root_states[0, 1], self.root_states[0, 2]
self.gym.set_camera_location(self.rendering_camera, self.envs[0], gymapi.Vec3(bx, by - 1.0, bz + 1.0),
gymapi.Vec3(bx, by, bz))
self.gym.step_graphics(self.sim)
self.gym.render_all_camera_sensors(self.sim)
img = self.gym.get_camera_image(self.sim, self.envs[0], self.rendering_camera, gymapi.IMAGE_COLOR)
w, h = img.shape
return img.reshape([w, h // 4, 4])
def _render_headless(self):
if self.record_now and self.complete_video_frames is not None and len(self.complete_video_frames) == 0:
bx, by, bz = self.root_states[0, 0], self.root_states[0, 1], self.root_states[0, 2]
self.gym.set_camera_location(self.rendering_camera, self.envs[0], gymapi.Vec3(bx, by - 1.0, bz + 1.0),
gymapi.Vec3(bx, by, bz))
self.video_frame = self.gym.get_camera_image(self.sim, self.envs[0], self.rendering_camera,
gymapi.IMAGE_COLOR)
self.video_frame = self.video_frame.reshape((self.camera_props.height, self.camera_props.width, 4))
self.video_frames.append(self.video_frame)
if self.record_eval_now and self.complete_video_frames_eval is not None and len(
self.complete_video_frames_eval) == 0:
if self.eval_cfg is not None:
bx, by, bz = self.root_states[self.num_train_envs, 0], self.root_states[self.num_train_envs, 1], \
self.root_states[self.num_train_envs, 2]
self.gym.set_camera_location(self.rendering_camera_eval, self.envs[self.num_train_envs],
gymapi.Vec3(bx, by - 1.0, bz + 1.0),
gymapi.Vec3(bx, by, bz))
self.video_frame_eval = self.gym.get_camera_image(self.sim, self.envs[self.num_train_envs],
self.rendering_camera_eval,
gymapi.IMAGE_COLOR)
self.video_frame_eval = self.video_frame_eval.reshape(
(self.camera_props.height, self.camera_props.width, 4))
self.video_frames_eval.append(self.video_frame_eval)
def start_recording(self):
self.complete_video_frames = None
self.record_now = True
def start_recording_eval(self):
self.complete_video_frames_eval = None
self.record_eval_now = True
def pause_recording(self):
self.complete_video_frames = []
self.video_frames = []
self.record_now = False
def pause_recording_eval(self):
self.complete_video_frames_eval = []
self.video_frames_eval = []
self.record_eval_now = False
def get_complete_frames(self):
if self.complete_video_frames is None:
return []
return self.complete_video_frames
def get_complete_frames_eval(self):
if self.complete_video_frames_eval is None:
return []
return self.complete_video_frames_eval
def _get_env_origins(self, env_ids, cfg):
""" Sets environment origins. On rough terrain the origins are defined by the terrain platforms.
Otherwise create a grid.
"""
if cfg.terrain.mesh_type in ["heightfield", "trimesh"]:
self.custom_origins = True
# put robots at the origins defined by the terrain
max_init_level = cfg.terrain.max_init_terrain_level
min_init_level = cfg.terrain.min_init_terrain_level
if not cfg.terrain.curriculum: max_init_level = cfg.terrain.num_rows - 1
if not cfg.terrain.curriculum: min_init_level = 0
if cfg.terrain.center_robots:
min_terrain_level = cfg.terrain.num_rows // 2 - cfg.terrain.center_span
max_terrain_level = cfg.terrain.num_rows // 2 + cfg.terrain.center_span - 1
min_terrain_type = cfg.terrain.num_cols // 2 - cfg.terrain.center_span
max_terrain_type = cfg.terrain.num_cols // 2 + cfg.terrain.center_span - 1
self.terrain_levels[env_ids] = torch.randint(min_terrain_level, max_terrain_level + 1, (len(env_ids),),
device=self.device)
self.terrain_types[env_ids] = torch.randint(min_terrain_type, max_terrain_type + 1, (len(env_ids),),
device=self.device)
else:
self.terrain_levels[env_ids] = torch.randint(min_init_level, max_init_level + 1, (len(env_ids),),
device=self.device)
self.terrain_types[env_ids] = torch.div(torch.arange(len(env_ids), device=self.device),
(len(env_ids) / cfg.terrain.num_cols), rounding_mode='floor').to(
torch.long)
cfg.terrain.max_terrain_level = cfg.terrain.num_rows
cfg.terrain.terrain_origins = torch.from_numpy(cfg.terrain.env_origins).to(self.device).to(torch.float)
self.env_origins[env_ids] = cfg.terrain.terrain_origins[
self.terrain_levels[env_ids], self.terrain_types[env_ids]]
else:
self.custom_origins = False
# create a grid of robots
num_cols = np.floor(np.sqrt(len(env_ids)))
num_rows = np.ceil(self.num_envs / num_cols)
xx, yy = torch.meshgrid(torch.arange(num_rows), torch.arange(num_cols))
spacing = cfg.env.env_spacing
self.env_origins[env_ids, 0] = spacing * xx.flatten()[:len(env_ids)]
self.env_origins[env_ids, 1] = spacing * yy.flatten()[:len(env_ids)]
self.env_origins[env_ids, 2] = 0.
def _parse_cfg(self, cfg):
self.dt = self.cfg.control.decimation * self.sim_params.dt
self.obs_scales = self.cfg.obs_scales
self.reward_scales = vars(self.cfg.reward_scales)
self.curriculum_thresholds = vars(self.cfg.curriculum_thresholds)
cfg.command_ranges = vars(cfg.commands)
if cfg.terrain.mesh_type not in ['heightfield', 'trimesh']:
cfg.terrain.curriculum = False
max_episode_length_s = cfg.env.episode_length_s
cfg.env.max_episode_length = np.ceil(max_episode_length_s / self.dt)
self.max_episode_length = cfg.env.max_episode_length
cfg.domain_rand.push_interval = np.ceil(cfg.domain_rand.push_interval_s / self.dt)
cfg.domain_rand.rand_interval = np.ceil(cfg.domain_rand.rand_interval_s / self.dt)
cfg.domain_rand.gravity_rand_interval = np.ceil(cfg.domain_rand.gravity_rand_interval_s / self.dt)
cfg.domain_rand.gravity_rand_duration = np.ceil(
cfg.domain_rand.gravity_rand_interval * cfg.domain_rand.gravity_impulse_duration)
def _draw_debug_vis(self):
""" Draws visualizations for dubugging (slows down simulation a lot).
Default behaviour: draws height measurement points
"""
# draw height lines
if not self.terrain.cfg.measure_heights:
return
self.gym.clear_lines(self.viewer)
self.gym.refresh_rigid_body_state_tensor(self.sim)
sphere_geom = gymutil.WireframeSphereGeometry(0.02, 4, 4, None, color=(1, 1, 0))
for i in range(self.num_envs):
base_pos = (self.root_states[i, :3]).cpu().numpy()
heights = self.measured_heights[i].cpu().numpy()
height_points = quat_apply_yaw(self.base_quat[i].repeat(heights.shape[0]),
self.height_points[i]).cpu().numpy()
for j in range(heights.shape[0]):
x = height_points[j, 0] + base_pos[0]
y = height_points[j, 1] + base_pos[1]
z = heights[j]
sphere_pose = gymapi.Transform(gymapi.Vec3(x, y, z), r=None)
gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[i], sphere_pose)
def _init_height_points(self, env_ids, cfg):
""" Returns points at which the height measurments are sampled (in base frame)
Returns:
[torch.Tensor]: Tensor of shape (num_envs, self.num_height_points, 3)
"""
y = torch.tensor(cfg.terrain.measured_points_y, device=self.device, requires_grad=False)
x = torch.tensor(cfg.terrain.measured_points_x, device=self.device, requires_grad=False)
grid_x, grid_y = torch.meshgrid(x, y)
cfg.env.num_height_points = grid_x.numel()
points = torch.zeros(len(env_ids), cfg.env.num_height_points, 3, device=self.device, requires_grad=False)
points[:, :, 0] = grid_x.flatten()
points[:, :, 1] = grid_y.flatten()
return points
def _get_heights(self, env_ids, cfg):
""" Samples heights of the terrain at required points around each robot.
The points are offset by the base's position and rotated by the base's yaw
Args:
env_ids (List[int], optional): Subset of environments for which to return the heights. Defaults to None.
Raises:
NameError: [description]
Returns:
[type]: [description]
"""
if cfg.terrain.mesh_type == 'plane':
return torch.zeros(len(env_ids), cfg.env.num_height_points, device=self.device, requires_grad=False)
elif cfg.terrain.mesh_type == 'none':
raise NameError("Can't measure height with terrain mesh type 'none'")
points = quat_apply_yaw(self.base_quat[env_ids].repeat(1, cfg.env.num_height_points),
self.height_points[env_ids]) + (self.root_states[env_ids, :3]).unsqueeze(1)
points += self.terrain.cfg.border_size
points = (points / self.terrain.cfg.horizontal_scale).long()
px = points[:, :, 0].view(-1)
py = points[:, :, 1].view(-1)
px = torch.clip(px, 0, self.height_samples.shape[0] - 2)
py = torch.clip(py, 0, self.height_samples.shape[1] - 2)
heights1 = self.height_samples[px, py]
heights2 = self.height_samples[px + 1, py]
heights3 = self.height_samples[px, py + 1]
heights = torch.min(heights1, heights2)
heights = torch.min(heights, heights3)
return heights.view(len(env_ids), -1) * self.terrain.cfg.vertical_scale