1807 lines
104 KiB
Python
Executable File
1807 lines
104 KiB
Python
Executable File
# License: see [LICENSE, LICENSES/legged_gym/LICENSE]
|
|
|
|
import os
|
|
from typing import Dict
|
|
|
|
from isaacgym import gymtorch, gymapi, gymutil
|
|
from isaacgym.torch_utils import *
|
|
|
|
assert gymtorch
|
|
import torch
|
|
|
|
from go2_gym import MINI_GYM_ROOT_DIR
|
|
from go2_gym.envs.base.base_task import BaseTask
|
|
from go2_gym.utils.math_utils import quat_apply_yaw, wrap_to_pi, get_scale_shift
|
|
from go2_gym.utils.terrain import Terrain
|
|
from .legged_robot_config import Cfg
|
|
|
|
|
|
class LeggedRobot(BaseTask):
|
|
def __init__(self, cfg: Cfg, sim_params, physics_engine, sim_device, headless, eval_cfg=None,
|
|
initial_dynamics_dict=None):
|
|
""" Parses the provided config file,
|
|
calls create_sim() (which creates, simulation, terrain and environments),
|
|
initilizes pytorch buffers used during training
|
|
|
|
Args:
|
|
cfg (Dict): Environment config file
|
|
sim_params (gymapi.SimParams): simulation parameters
|
|
physics_engine (gymapi.SimType): gymapi.SIM_PHYSX (must be PhysX)
|
|
device_type (string): 'cuda' or 'cpu'
|
|
device_id (int): 0, 1, ...
|
|
headless (bool): Run without rendering if True
|
|
"""
|
|
self.cfg = cfg
|
|
self.eval_cfg = eval_cfg
|
|
self.sim_params = sim_params
|
|
self.height_samples = None
|
|
self.debug_viz = False
|
|
self.init_done = False
|
|
self.initial_dynamics_dict = initial_dynamics_dict
|
|
if eval_cfg is not None: self._parse_cfg(eval_cfg)
|
|
self._parse_cfg(self.cfg)
|
|
|
|
super().__init__(self.cfg, sim_params, physics_engine, sim_device, headless, self.eval_cfg)
|
|
|
|
self._init_command_distribution(torch.arange(self.num_envs, device=self.device))
|
|
|
|
# self.rand_buffers_eval = self._init_custom_buffers__(self.num_eval_envs)
|
|
if not self.headless:
|
|
self.set_camera(self.cfg.viewer.pos, self.cfg.viewer.lookat)
|
|
self._init_buffers()
|
|
|
|
self._prepare_reward_function()
|
|
self.init_done = True
|
|
self.record_now = False
|
|
self.record_eval_now = False
|
|
self.collecting_evaluation = False
|
|
self.num_still_evaluating = 0
|
|
|
|
def step(self, actions):
|
|
""" Apply actions, simulate, call self.post_physics_step()
|
|
|
|
Args:
|
|
actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env)
|
|
"""
|
|
clip_actions = self.cfg.normalization.clip_actions
|
|
self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device)
|
|
# step physics and render each frame
|
|
self.prev_base_pos = self.base_pos.clone()
|
|
self.prev_base_quat = self.base_quat.clone()
|
|
self.prev_base_lin_vel = self.base_lin_vel.clone()
|
|
self.prev_foot_velocities = self.foot_velocities.clone()
|
|
self.render_gui()
|
|
for _ in range(self.cfg.control.decimation):
|
|
self.torques = self._compute_torques(self.actions).view(self.torques.shape)
|
|
self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques))
|
|
self.gym.simulate(self.sim)
|
|
# if self.device == 'cpu':
|
|
self.gym.fetch_results(self.sim, True)
|
|
self.gym.refresh_dof_state_tensor(self.sim)
|
|
self.post_physics_step()
|
|
|
|
# return clipped obs, clipped states (None), rewards, dones and infos
|
|
clip_obs = self.cfg.normalization.clip_observations
|
|
self.obs_buf = torch.clip(self.obs_buf, -clip_obs, clip_obs)
|
|
if self.privileged_obs_buf is not None:
|
|
self.privileged_obs_buf = torch.clip(self.privileged_obs_buf, -clip_obs, clip_obs)
|
|
return self.obs_buf, self.privileged_obs_buf, self.rew_buf, self.reset_buf, self.extras
|
|
|
|
def post_physics_step(self):
|
|
""" check terminations, compute observations and rewards
|
|
calls self._post_physics_step_callback() for common computations
|
|
calls self._draw_debug_vis() if needed
|
|
"""
|
|
self.gym.refresh_actor_root_state_tensor(self.sim)
|
|
self.gym.refresh_net_contact_force_tensor(self.sim)
|
|
self.gym.refresh_rigid_body_state_tensor(self.sim)
|
|
if self.record_now:
|
|
self.gym.step_graphics(self.sim)
|
|
self.gym.render_all_camera_sensors(self.sim)
|
|
|
|
self.episode_length_buf += 1
|
|
self.common_step_counter += 1
|
|
|
|
# prepare quantities
|
|
self.base_pos[:] = self.root_states[:self.num_envs, 0:3]
|
|
self.base_quat[:] = self.root_states[:self.num_envs, 3:7]
|
|
self.base_lin_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:self.num_envs, 7:10])
|
|
self.base_ang_vel[:] = quat_rotate_inverse(self.base_quat, self.root_states[:self.num_envs, 10:13])
|
|
self.projected_gravity[:] = quat_rotate_inverse(self.base_quat, self.gravity_vec)
|
|
|
|
self.foot_velocities = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13
|
|
)[:, self.feet_indices, 7:10]
|
|
self.foot_positions = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices,
|
|
0:3]
|
|
|
|
self._post_physics_step_callback()
|
|
|
|
# compute observations, rewards, resets, ...
|
|
self.check_termination()
|
|
self.compute_reward()
|
|
env_ids = self.reset_buf.nonzero(as_tuple=False).flatten()
|
|
self.reset_idx(env_ids)
|
|
self.compute_observations()
|
|
|
|
self.last_last_actions[:] = self.last_actions[:]
|
|
self.last_actions[:] = self.actions[:]
|
|
self.last_last_joint_pos_target[:] = self.last_joint_pos_target[:]
|
|
self.last_joint_pos_target[:] = self.joint_pos_target[:]
|
|
self.last_dof_vel[:] = self.dof_vel[:]
|
|
self.last_root_vel[:] = self.root_states[:, 7:13]
|
|
|
|
if self.viewer and self.enable_viewer_sync and self.debug_viz:
|
|
self._draw_debug_vis()
|
|
|
|
self._render_headless()
|
|
|
|
def check_termination(self):
|
|
""" Check if environments need to be reset
|
|
"""
|
|
self.reset_buf = torch.any(torch.norm(self.contact_forces[:, self.termination_contact_indices, :], dim=-1) > 1.,
|
|
dim=1)
|
|
self.time_out_buf = self.episode_length_buf > self.cfg.env.max_episode_length # no terminal reward for time-outs
|
|
self.reset_buf |= self.time_out_buf
|
|
if self.cfg.rewards.use_terminal_body_height:
|
|
self.body_height_buf = torch.mean(self.root_states[:, 2].unsqueeze(1) - self.measured_heights, dim=1) \
|
|
< self.cfg.rewards.terminal_body_height
|
|
self.reset_buf = torch.logical_or(self.body_height_buf, self.reset_buf)
|
|
|
|
def reset_idx(self, env_ids):
|
|
""" Reset some environments.
|
|
Calls self._reset_dofs(env_ids), self._reset_root_states(env_ids), and self._resample_commands(env_ids) and
|
|
Logs episode info
|
|
Resets some buffers
|
|
|
|
Args:
|
|
env_ids (list[int]): List of environment ids which must be reset
|
|
"""
|
|
|
|
if len(env_ids) == 0:
|
|
return
|
|
|
|
# reset robot states
|
|
self._resample_commands(env_ids)
|
|
self._call_train_eval(self._randomize_dof_props, env_ids)
|
|
if self.cfg.domain_rand.randomize_rigids_after_start:
|
|
self._call_train_eval(self._randomize_rigid_body_props, env_ids)
|
|
self._call_train_eval(self.refresh_actor_rigid_shape_props, env_ids)
|
|
|
|
self._call_train_eval(self._reset_dofs, env_ids)
|
|
self._call_train_eval(self._reset_root_states, env_ids)
|
|
|
|
# reset buffers
|
|
self.last_actions[env_ids] = 0.
|
|
self.last_last_actions[env_ids] = 0.
|
|
self.last_dof_vel[env_ids] = 0.
|
|
self.feet_air_time[env_ids] = 0.
|
|
self.episode_length_buf[env_ids] = 0
|
|
self.reset_buf[env_ids] = 1
|
|
# fill extras
|
|
train_env_ids = env_ids[env_ids < self.num_train_envs]
|
|
if len(train_env_ids) > 0:
|
|
self.extras["train/episode"] = {}
|
|
for key in self.episode_sums.keys():
|
|
self.extras["train/episode"]['rew_' + key] = torch.mean(
|
|
self.episode_sums[key][train_env_ids])
|
|
self.episode_sums[key][train_env_ids] = 0.
|
|
eval_env_ids = env_ids[env_ids >= self.num_train_envs]
|
|
if len(eval_env_ids) > 0:
|
|
self.extras["eval/episode"] = {}
|
|
for key in self.episode_sums.keys():
|
|
# save the evaluation rollout result if not already saved
|
|
unset_eval_envs = eval_env_ids[self.episode_sums_eval[key][eval_env_ids] == -1]
|
|
self.episode_sums_eval[key][unset_eval_envs] = self.episode_sums[key][unset_eval_envs]
|
|
self.episode_sums[key][eval_env_ids] = 0.
|
|
|
|
# log additional curriculum info
|
|
if self.cfg.terrain.curriculum:
|
|
self.extras["train/episode"]["terrain_level"] = torch.mean(
|
|
self.terrain_levels[:self.num_train_envs].float())
|
|
if self.cfg.commands.command_curriculum:
|
|
self.extras["env_bins"] = torch.Tensor(self.env_command_bins)[:self.num_train_envs]
|
|
self.extras["train/episode"]["min_command_duration"] = torch.min(self.commands[:, 8])
|
|
self.extras["train/episode"]["max_command_duration"] = torch.max(self.commands[:, 8])
|
|
self.extras["train/episode"]["min_command_bound"] = torch.min(self.commands[:, 7])
|
|
self.extras["train/episode"]["max_command_bound"] = torch.max(self.commands[:, 7])
|
|
self.extras["train/episode"]["min_command_offset"] = torch.min(self.commands[:, 6])
|
|
self.extras["train/episode"]["max_command_offset"] = torch.max(self.commands[:, 6])
|
|
self.extras["train/episode"]["min_command_phase"] = torch.min(self.commands[:, 5])
|
|
self.extras["train/episode"]["max_command_phase"] = torch.max(self.commands[:, 5])
|
|
self.extras["train/episode"]["min_command_freq"] = torch.min(self.commands[:, 4])
|
|
self.extras["train/episode"]["max_command_freq"] = torch.max(self.commands[:, 4])
|
|
self.extras["train/episode"]["min_command_x_vel"] = torch.min(self.commands[:, 0])
|
|
self.extras["train/episode"]["max_command_x_vel"] = torch.max(self.commands[:, 0])
|
|
self.extras["train/episode"]["min_command_y_vel"] = torch.min(self.commands[:, 1])
|
|
self.extras["train/episode"]["max_command_y_vel"] = torch.max(self.commands[:, 1])
|
|
self.extras["train/episode"]["min_command_yaw_vel"] = torch.min(self.commands[:, 2])
|
|
self.extras["train/episode"]["max_command_yaw_vel"] = torch.max(self.commands[:, 2])
|
|
if self.cfg.commands.num_commands > 9:
|
|
self.extras["train/episode"]["min_command_swing_height"] = torch.min(self.commands[:, 9])
|
|
self.extras["train/episode"]["max_command_swing_height"] = torch.max(self.commands[:, 9])
|
|
for curriculum, category in zip(self.curricula, self.category_names):
|
|
self.extras["train/episode"][f"command_area_{category}"] = np.sum(curriculum.weights) / \
|
|
curriculum.weights.shape[0]
|
|
|
|
self.extras["train/episode"]["min_action"] = torch.min(self.actions)
|
|
self.extras["train/episode"]["max_action"] = torch.max(self.actions)
|
|
|
|
self.extras["curriculum/distribution"] = {}
|
|
for curriculum, category in zip(self.curricula, self.category_names):
|
|
self.extras[f"curriculum/distribution"][f"weights_{category}"] = curriculum.weights
|
|
self.extras[f"curriculum/distribution"][f"grid_{category}"] = curriculum.grid
|
|
if self.cfg.env.send_timeouts:
|
|
self.extras["time_outs"] = self.time_out_buf[:self.num_train_envs]
|
|
|
|
self.gait_indices[env_ids] = 0
|
|
|
|
for i in range(len(self.lag_buffer)):
|
|
self.lag_buffer[i][env_ids, :] = 0
|
|
|
|
def set_idx_pose(self, env_ids, dof_pos, base_state):
|
|
if len(env_ids) == 0:
|
|
return
|
|
|
|
env_ids_int32 = env_ids.to(dtype=torch.int32).to(self.device)
|
|
|
|
# joints
|
|
if dof_pos is not None:
|
|
self.dof_pos[env_ids] = dof_pos
|
|
self.dof_vel[env_ids] = 0.
|
|
|
|
self.gym.set_dof_state_tensor_indexed(self.sim,
|
|
gymtorch.unwrap_tensor(self.dof_state),
|
|
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
|
|
|
|
# base position
|
|
self.root_states[env_ids] = base_state.to(self.device)
|
|
|
|
self.gym.set_actor_root_state_tensor_indexed(self.sim,
|
|
gymtorch.unwrap_tensor(self.root_states),
|
|
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
|
|
|
|
def compute_reward(self):
|
|
""" Compute rewards
|
|
Calls each reward function which had a non-zero scale (processed in self._prepare_reward_function())
|
|
adds each terms to the episode sums and to the total reward
|
|
"""
|
|
self.rew_buf[:] = 0.
|
|
self.rew_buf_pos[:] = 0.
|
|
self.rew_buf_neg[:] = 0.
|
|
for i in range(len(self.reward_functions)):
|
|
name = self.reward_names[i]
|
|
rew = self.reward_functions[i]() * self.reward_scales[name]
|
|
self.rew_buf += rew
|
|
if torch.sum(rew) >= 0:
|
|
self.rew_buf_pos += rew
|
|
elif torch.sum(rew) <= 0:
|
|
self.rew_buf_neg += rew
|
|
self.episode_sums[name] += rew
|
|
if name in ['tracking_contacts_shaped_force', 'tracking_contacts_shaped_vel']:
|
|
self.command_sums[name] += self.reward_scales[name] + rew
|
|
else:
|
|
self.command_sums[name] += rew
|
|
if self.cfg.rewards.only_positive_rewards:
|
|
self.rew_buf[:] = torch.clip(self.rew_buf[:], min=0.)
|
|
elif self.cfg.rewards.only_positive_rewards_ji22_style: #TODO: update
|
|
self.rew_buf[:] = self.rew_buf_pos[:] * torch.exp(self.rew_buf_neg[:] / self.cfg.rewards.sigma_rew_neg)
|
|
self.episode_sums["total"] += self.rew_buf
|
|
# add termination reward after clipping
|
|
if "termination" in self.reward_scales:
|
|
rew = self.reward_container._reward_termination() * self.reward_scales["termination"]
|
|
self.rew_buf += rew
|
|
self.episode_sums["termination"] += rew
|
|
self.command_sums["termination"] += rew
|
|
|
|
self.command_sums["lin_vel_raw"] += self.base_lin_vel[:, 0]
|
|
self.command_sums["ang_vel_raw"] += self.base_ang_vel[:, 2]
|
|
self.command_sums["lin_vel_residual"] += (self.base_lin_vel[:, 0] - self.commands[:, 0]) ** 2
|
|
self.command_sums["ang_vel_residual"] += (self.base_ang_vel[:, 2] - self.commands[:, 2]) ** 2
|
|
self.command_sums["ep_timesteps"] += 1
|
|
|
|
def compute_observations(self):
|
|
""" Computes observations
|
|
"""
|
|
self.obs_buf = torch.cat((self.projected_gravity,
|
|
(self.dof_pos[:, :self.num_actuated_dof] - self.default_dof_pos[:,
|
|
:self.num_actuated_dof]) * self.obs_scales.dof_pos,
|
|
self.dof_vel[:, :self.num_actuated_dof] * self.obs_scales.dof_vel,
|
|
self.actions
|
|
), dim=-1)
|
|
# if self.cfg.env.observe_command and not self.cfg.env.observe_height_command:
|
|
# self.obs_buf = torch.cat((self.projected_gravity,
|
|
# self.commands[:, :3] * self.commands_scale,
|
|
# (self.dof_pos - self.default_dof_pos) * self.obs_scales.dof_pos,
|
|
# self.dof_vel * self.obs_scales.dof_vel,
|
|
# self.actions
|
|
# ), dim=-1)
|
|
|
|
if self.cfg.env.observe_command:
|
|
self.obs_buf = torch.cat((self.projected_gravity,
|
|
self.commands * self.commands_scale,
|
|
(self.dof_pos[:, :self.num_actuated_dof] - self.default_dof_pos[:,
|
|
:self.num_actuated_dof]) * self.obs_scales.dof_pos,
|
|
self.dof_vel[:, :self.num_actuated_dof] * self.obs_scales.dof_vel,
|
|
self.actions
|
|
), dim=-1)
|
|
|
|
if self.cfg.env.observe_two_prev_actions:
|
|
self.obs_buf = torch.cat((self.obs_buf,
|
|
self.last_actions), dim=-1)
|
|
|
|
if self.cfg.env.observe_timing_parameter:
|
|
self.obs_buf = torch.cat((self.obs_buf,
|
|
self.gait_indices.unsqueeze(1)), dim=-1)
|
|
|
|
if self.cfg.env.observe_clock_inputs:
|
|
self.obs_buf = torch.cat((self.obs_buf,
|
|
self.clock_inputs), dim=-1)
|
|
|
|
# if self.cfg.env.observe_desired_contact_states:
|
|
# self.obs_buf = torch.cat((self.obs_buf,
|
|
# self.desired_contact_states), dim=-1)
|
|
|
|
if self.cfg.env.observe_vel:
|
|
if self.cfg.commands.global_reference:
|
|
self.obs_buf = torch.cat((self.root_states[:self.num_envs, 7:10] * self.obs_scales.lin_vel,
|
|
self.base_ang_vel * self.obs_scales.ang_vel,
|
|
self.obs_buf), dim=-1)
|
|
else:
|
|
self.obs_buf = torch.cat((self.base_lin_vel * self.obs_scales.lin_vel,
|
|
self.base_ang_vel * self.obs_scales.ang_vel,
|
|
self.obs_buf), dim=-1)
|
|
|
|
if self.cfg.env.observe_only_ang_vel:
|
|
self.obs_buf = torch.cat((self.base_ang_vel * self.obs_scales.ang_vel,
|
|
self.obs_buf), dim=-1)
|
|
|
|
if self.cfg.env.observe_only_lin_vel:
|
|
self.obs_buf = torch.cat((self.base_lin_vel * self.obs_scales.lin_vel,
|
|
self.obs_buf), dim=-1)
|
|
|
|
if self.cfg.env.observe_yaw:
|
|
forward = quat_apply(self.base_quat, self.forward_vec)
|
|
heading = torch.atan2(forward[:, 1], forward[:, 0]).unsqueeze(1)
|
|
# heading_error = torch.clip(0.5 * wrap_to_pi(heading), -1., 1.).unsqueeze(1)
|
|
self.obs_buf = torch.cat((self.obs_buf,
|
|
heading), dim=-1)
|
|
|
|
if self.cfg.env.observe_contact_states:
|
|
self.obs_buf = torch.cat((self.obs_buf, (self.contact_forces[:, self.feet_indices, 2] > 1.).view(
|
|
self.num_envs,
|
|
-1) * 1.0), dim=1)
|
|
|
|
# add noise if needed
|
|
if self.add_noise:
|
|
self.obs_buf += (2 * torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec
|
|
|
|
# build privileged obs
|
|
|
|
self.privileged_obs_buf = torch.empty(self.num_envs, 0).to(self.device)
|
|
self.next_privileged_obs_buf = torch.empty(self.num_envs, 0).to(self.device)
|
|
|
|
if self.cfg.env.priv_observe_friction:
|
|
friction_coeffs_scale, friction_coeffs_shift = get_scale_shift(self.cfg.normalization.friction_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
(self.friction_coeffs[:, 0].unsqueeze(
|
|
1) - friction_coeffs_shift) * friction_coeffs_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
|
|
(self.friction_coeffs[:, 0].unsqueeze(
|
|
1) - friction_coeffs_shift) * friction_coeffs_scale),
|
|
dim=1)
|
|
if self.cfg.env.priv_observe_ground_friction:
|
|
self.ground_friction_coeffs = self._get_ground_frictions(range(self.num_envs))
|
|
ground_friction_coeffs_scale, ground_friction_coeffs_shift = get_scale_shift(
|
|
self.cfg.normalization.ground_friction_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
(self.ground_friction_coeffs.unsqueeze(
|
|
1) - ground_friction_coeffs_shift) * ground_friction_coeffs_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
|
|
(self.ground_friction_coeffs.unsqueeze(
|
|
1) - friction_coeffs_shift) * friction_coeffs_scale),
|
|
dim=1)
|
|
if self.cfg.env.priv_observe_restitution:
|
|
restitutions_scale, restitutions_shift = get_scale_shift(self.cfg.normalization.restitution_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
(self.restitutions[:, 0].unsqueeze(
|
|
1) - restitutions_shift) * restitutions_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
|
|
(self.restitutions[:, 0].unsqueeze(
|
|
1) - restitutions_shift) * restitutions_scale),
|
|
dim=1)
|
|
if self.cfg.env.priv_observe_base_mass:
|
|
payloads_scale, payloads_shift = get_scale_shift(self.cfg.normalization.added_mass_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
(self.payloads.unsqueeze(1) - payloads_shift) * payloads_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
|
|
(self.payloads.unsqueeze(1) - payloads_shift) * payloads_scale),
|
|
dim=1)
|
|
if self.cfg.env.priv_observe_com_displacement:
|
|
com_displacements_scale, com_displacements_shift = get_scale_shift(
|
|
self.cfg.normalization.com_displacement_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
(
|
|
self.com_displacements - com_displacements_shift) * com_displacements_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
|
|
(
|
|
self.com_displacements - com_displacements_shift) * com_displacements_scale),
|
|
dim=1)
|
|
if self.cfg.env.priv_observe_motor_strength:
|
|
motor_strengths_scale, motor_strengths_shift = get_scale_shift(self.cfg.normalization.motor_strength_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
(
|
|
self.motor_strengths - motor_strengths_shift) * motor_strengths_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
|
|
(
|
|
self.motor_strengths - motor_strengths_shift) * motor_strengths_scale),
|
|
dim=1)
|
|
if self.cfg.env.priv_observe_motor_offset:
|
|
motor_offset_scale, motor_offset_shift = get_scale_shift(self.cfg.normalization.motor_offset_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
(
|
|
self.motor_offsets - motor_offset_shift) * motor_offset_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
(
|
|
self.motor_offsets - motor_offset_shift) * motor_offset_scale),
|
|
dim=1)
|
|
if self.cfg.env.priv_observe_body_height:
|
|
body_height_scale, body_height_shift = get_scale_shift(self.cfg.normalization.body_height_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
((self.root_states[:self.num_envs, 2]).view(
|
|
self.num_envs, -1) - body_height_shift) * body_height_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
|
|
((self.root_states[:self.num_envs, 2]).view(
|
|
self.num_envs, -1) - body_height_shift) * body_height_scale),
|
|
dim=1)
|
|
if self.cfg.env.priv_observe_body_velocity:
|
|
body_velocity_scale, body_velocity_shift = get_scale_shift(self.cfg.normalization.body_velocity_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
((self.base_lin_vel).view(self.num_envs,
|
|
-1) - body_velocity_shift) * body_velocity_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
|
|
((self.base_lin_vel).view(self.num_envs,
|
|
-1) - body_velocity_shift) * body_velocity_scale),
|
|
dim=1)
|
|
if self.cfg.env.priv_observe_gravity:
|
|
gravity_scale, gravity_shift = get_scale_shift(self.cfg.normalization.gravity_range)
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
(self.gravities - gravity_shift) / gravity_scale),
|
|
dim=1)
|
|
self.next_privileged_obs_buf = torch.cat((self.next_privileged_obs_buf,
|
|
(self.gravities - gravity_shift) / gravity_scale), dim=1)
|
|
|
|
if self.cfg.env.priv_observe_clock_inputs:
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
self.clock_inputs), dim=-1)
|
|
|
|
if self.cfg.env.priv_observe_desired_contact_states:
|
|
self.privileged_obs_buf = torch.cat((self.privileged_obs_buf,
|
|
self.desired_contact_states), dim=-1)
|
|
|
|
assert self.privileged_obs_buf.shape[
|
|
1] == self.cfg.env.num_privileged_obs, f"num_privileged_obs ({self.cfg.env.num_privileged_obs}) != the number of privileged observations ({self.privileged_obs_buf.shape[1]}), you will discard data from the student!"
|
|
|
|
def create_sim(self):
|
|
""" Creates simulation, terrain and evironments
|
|
"""
|
|
self.up_axis_idx = 2 # 2 for z, 1 for y -> adapt gravity accordingly
|
|
self.sim = self.gym.create_sim(self.sim_device_id, self.graphics_device_id, self.physics_engine,
|
|
self.sim_params)
|
|
|
|
mesh_type = self.cfg.terrain.mesh_type
|
|
if mesh_type in ['heightfield', 'trimesh']:
|
|
if self.eval_cfg is not None:
|
|
self.terrain = Terrain(self.cfg.terrain, self.num_train_envs, self.eval_cfg.terrain, self.num_eval_envs)
|
|
else:
|
|
self.terrain = Terrain(self.cfg.terrain, self.num_train_envs)
|
|
if mesh_type == 'plane':
|
|
self._create_ground_plane()
|
|
elif mesh_type == 'heightfield':
|
|
self._create_heightfield()
|
|
elif mesh_type == 'trimesh':
|
|
self._create_trimesh()
|
|
elif mesh_type is not None:
|
|
raise ValueError("Terrain mesh type not recognised. Allowed types are [None, plane, heightfield, trimesh]")
|
|
|
|
self._create_envs()
|
|
|
|
|
|
def set_camera(self, position, lookat):
|
|
""" Set camera position and direction
|
|
"""
|
|
cam_pos = gymapi.Vec3(position[0], position[1], position[2])
|
|
cam_target = gymapi.Vec3(lookat[0], lookat[1], lookat[2])
|
|
self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) \
|
|
|
|
def set_main_agent_pose(self, loc, quat):
|
|
self.root_states[0, 0:3] = torch.Tensor(loc)
|
|
self.root_states[0, 3:7] = torch.Tensor(quat)
|
|
self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
|
|
|
|
# ------------- Callbacks --------------
|
|
def _call_train_eval(self, func, env_ids):
|
|
|
|
env_ids_train = env_ids[env_ids < self.num_train_envs]
|
|
env_ids_eval = env_ids[env_ids >= self.num_train_envs]
|
|
|
|
ret, ret_eval = None, None
|
|
|
|
if len(env_ids_train) > 0:
|
|
ret = func(env_ids_train, self.cfg)
|
|
if len(env_ids_eval) > 0:
|
|
ret_eval = func(env_ids_eval, self.eval_cfg)
|
|
if ret is not None and ret_eval is not None: ret = torch.cat((ret, ret_eval), axis=-1)
|
|
|
|
return ret
|
|
|
|
def _randomize_gravity(self, external_force = None):
|
|
|
|
if external_force is not None:
|
|
self.gravities[:, :] = external_force.unsqueeze(0)
|
|
elif self.cfg.domain_rand.randomize_gravity:
|
|
min_gravity, max_gravity = self.cfg.domain_rand.gravity_range
|
|
external_force = torch.rand(3, dtype=torch.float, device=self.device,
|
|
requires_grad=False) * (max_gravity - min_gravity) + min_gravity
|
|
|
|
self.gravities[:, :] = external_force.unsqueeze(0)
|
|
|
|
sim_params = self.gym.get_sim_params(self.sim)
|
|
gravity = self.gravities[0, :] + torch.Tensor([0, 0, -9.8]).to(self.device)
|
|
self.gravity_vec[:, :] = gravity.unsqueeze(0) / torch.norm(gravity)
|
|
sim_params.gravity = gymapi.Vec3(gravity[0], gravity[1], gravity[2])
|
|
self.gym.set_sim_params(self.sim, sim_params)
|
|
|
|
def _process_rigid_shape_props(self, props, env_id):
|
|
""" Callback allowing to store/change/randomize the rigid shape properties of each environment.
|
|
Called During environment creation.
|
|
Base behavior: randomizes the friction of each environment
|
|
|
|
Args:
|
|
props (List[gymapi.RigidShapeProperties]): Properties of each shape of the asset
|
|
env_id (int): Environment id
|
|
|
|
Returns:
|
|
[List[gymapi.RigidShapeProperties]]: Modified rigid shape properties
|
|
"""
|
|
for s in range(len(props)):
|
|
props[s].friction = self.friction_coeffs[env_id, 0]
|
|
props[s].restitution = self.restitutions[env_id, 0]
|
|
|
|
return props
|
|
|
|
def _process_dof_props(self, props, env_id):
|
|
""" Callback allowing to store/change/randomize the DOF properties of each environment.
|
|
Called During environment creation.
|
|
Base behavior: stores position, velocity and torques limits defined in the URDF
|
|
|
|
Args:
|
|
props (numpy.array): Properties of each DOF of the asset
|
|
env_id (int): Environment id
|
|
|
|
Returns:
|
|
[numpy.array]: Modified DOF properties
|
|
"""
|
|
if env_id == 0:
|
|
self.dof_pos_limits = torch.zeros(self.num_dof, 2, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.dof_vel_limits = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
|
|
self.torque_limits = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
|
|
for i in range(len(props)):
|
|
self.dof_pos_limits[i, 0] = props["lower"][i].item()
|
|
self.dof_pos_limits[i, 1] = props["upper"][i].item()
|
|
self.dof_vel_limits[i] = props["velocity"][i].item()
|
|
self.torque_limits[i] = props["effort"][i].item()
|
|
# soft limits
|
|
m = (self.dof_pos_limits[i, 0] + self.dof_pos_limits[i, 1]) / 2
|
|
r = self.dof_pos_limits[i, 1] - self.dof_pos_limits[i, 0]
|
|
self.dof_pos_limits[i, 0] = m - 0.5 * r * self.cfg.rewards.soft_dof_pos_limit
|
|
self.dof_pos_limits[i, 1] = m + 0.5 * r * self.cfg.rewards.soft_dof_pos_limit
|
|
|
|
return props
|
|
|
|
def _randomize_rigid_body_props(self, env_ids, cfg):
|
|
if cfg.domain_rand.randomize_base_mass:
|
|
min_payload, max_payload = cfg.domain_rand.added_mass_range
|
|
# self.payloads[env_ids] = -1.0
|
|
self.payloads[env_ids] = torch.rand(len(env_ids), dtype=torch.float, device=self.device,
|
|
requires_grad=False) * (max_payload - min_payload) + min_payload
|
|
if cfg.domain_rand.randomize_com_displacement:
|
|
min_com_displacement, max_com_displacement = cfg.domain_rand.com_displacement_range
|
|
self.com_displacements[env_ids, :] = torch.rand(len(env_ids), 3, dtype=torch.float, device=self.device,
|
|
requires_grad=False) * (
|
|
max_com_displacement - min_com_displacement) + min_com_displacement
|
|
|
|
if cfg.domain_rand.randomize_friction:
|
|
min_friction, max_friction = cfg.domain_rand.friction_range
|
|
self.friction_coeffs[env_ids, :] = torch.rand(len(env_ids), 1, dtype=torch.float, device=self.device,
|
|
requires_grad=False) * (
|
|
max_friction - min_friction) + min_friction
|
|
|
|
if cfg.domain_rand.randomize_restitution:
|
|
min_restitution, max_restitution = cfg.domain_rand.restitution_range
|
|
self.restitutions[env_ids] = torch.rand(len(env_ids), 1, dtype=torch.float, device=self.device,
|
|
requires_grad=False) * (
|
|
max_restitution - min_restitution) + min_restitution
|
|
|
|
def refresh_actor_rigid_shape_props(self, env_ids, cfg):
|
|
for env_id in env_ids:
|
|
rigid_shape_props = self.gym.get_actor_rigid_shape_properties(self.envs[env_id], 0)
|
|
|
|
for i in range(self.num_dof):
|
|
rigid_shape_props[i].friction = self.friction_coeffs[env_id, 0]
|
|
rigid_shape_props[i].restitution = self.restitutions[env_id, 0]
|
|
|
|
self.gym.set_actor_rigid_shape_properties(self.envs[env_id], 0, rigid_shape_props)
|
|
|
|
def _randomize_dof_props(self, env_ids, cfg):
|
|
if cfg.domain_rand.randomize_motor_strength:
|
|
min_strength, max_strength = cfg.domain_rand.motor_strength_range
|
|
self.motor_strengths[env_ids, :] = torch.rand(len(env_ids), dtype=torch.float, device=self.device,
|
|
requires_grad=False).unsqueeze(1) * (
|
|
max_strength - min_strength) + min_strength
|
|
if cfg.domain_rand.randomize_motor_offset:
|
|
min_offset, max_offset = cfg.domain_rand.motor_offset_range
|
|
self.motor_offsets[env_ids, :] = torch.rand(len(env_ids), self.num_dof, dtype=torch.float,
|
|
device=self.device, requires_grad=False) * (
|
|
max_offset - min_offset) + min_offset
|
|
if cfg.domain_rand.randomize_Kp_factor:
|
|
min_Kp_factor, max_Kp_factor = cfg.domain_rand.Kp_factor_range
|
|
self.Kp_factors[env_ids, :] = torch.rand(len(env_ids), dtype=torch.float, device=self.device,
|
|
requires_grad=False).unsqueeze(1) * (
|
|
max_Kp_factor - min_Kp_factor) + min_Kp_factor
|
|
if cfg.domain_rand.randomize_Kd_factor:
|
|
min_Kd_factor, max_Kd_factor = cfg.domain_rand.Kd_factor_range
|
|
self.Kd_factors[env_ids, :] = torch.rand(len(env_ids), dtype=torch.float, device=self.device,
|
|
requires_grad=False).unsqueeze(1) * (
|
|
max_Kd_factor - min_Kd_factor) + min_Kd_factor
|
|
|
|
def _process_rigid_body_props(self, props, env_id):
|
|
self.default_body_mass = props[0].mass
|
|
|
|
props[0].mass = self.default_body_mass + self.payloads[env_id]
|
|
props[0].com = gymapi.Vec3(self.com_displacements[env_id, 0], self.com_displacements[env_id, 1],
|
|
self.com_displacements[env_id, 2])
|
|
return props
|
|
|
|
def _post_physics_step_callback(self):
|
|
""" Callback called before computing terminations, rewards, and observations
|
|
Default behaviour: Compute ang vel command based on target and heading, compute measured terrain heights and randomly push robots
|
|
"""
|
|
|
|
# teleport robots to prevent falling off the edge
|
|
self._call_train_eval(self._teleport_robots, torch.arange(self.num_envs, device=self.device))
|
|
|
|
# resample commands
|
|
sample_interval = int(self.cfg.commands.resampling_time / self.dt)
|
|
env_ids = (self.episode_length_buf % sample_interval == 0).nonzero(as_tuple=False).flatten()
|
|
self._resample_commands(env_ids)
|
|
self._step_contact_targets()
|
|
|
|
# measure terrain heights
|
|
if self.cfg.terrain.measure_heights:
|
|
self.measured_heights = self._get_heights(torch.arange(self.num_envs, device=self.device), self.cfg)
|
|
|
|
# push robots
|
|
self._call_train_eval(self._push_robots, torch.arange(self.num_envs, device=self.device))
|
|
|
|
# randomize dof properties
|
|
env_ids = (self.episode_length_buf % int(self.cfg.domain_rand.rand_interval) == 0).nonzero(
|
|
as_tuple=False).flatten()
|
|
self._call_train_eval(self._randomize_dof_props, env_ids)
|
|
|
|
if self.common_step_counter % int(self.cfg.domain_rand.gravity_rand_interval) == 0:
|
|
self._randomize_gravity()
|
|
if int(self.common_step_counter - self.cfg.domain_rand.gravity_rand_duration) % int(
|
|
self.cfg.domain_rand.gravity_rand_interval) == 0:
|
|
self._randomize_gravity(torch.tensor([0, 0, 0]))
|
|
if self.cfg.domain_rand.randomize_rigids_after_start:
|
|
self._call_train_eval(self._randomize_rigid_body_props, env_ids)
|
|
self._call_train_eval(self.refresh_actor_rigid_shape_props, env_ids)
|
|
|
|
def _resample_commands(self, env_ids):
|
|
|
|
if len(env_ids) == 0: return
|
|
|
|
timesteps = int(self.cfg.commands.resampling_time / self.dt)
|
|
ep_len = min(self.cfg.env.max_episode_length, timesteps)
|
|
|
|
# update curricula based on terminated environment bins and categories
|
|
for i, (category, curriculum) in enumerate(zip(self.category_names, self.curricula)):
|
|
env_ids_in_category = self.env_command_categories[env_ids.cpu()] == i
|
|
if isinstance(env_ids_in_category, np.bool_) or len(env_ids_in_category) == 1:
|
|
env_ids_in_category = torch.tensor([env_ids_in_category], dtype=torch.bool)
|
|
elif len(env_ids_in_category) == 0:
|
|
continue
|
|
|
|
env_ids_in_category = env_ids[env_ids_in_category]
|
|
|
|
task_rewards, success_thresholds = [], []
|
|
for key in ["tracking_lin_vel", "tracking_ang_vel", "tracking_contacts_shaped_force",
|
|
"tracking_contacts_shaped_vel"]:
|
|
if key in self.command_sums.keys():
|
|
task_rewards.append(self.command_sums[key][env_ids_in_category] / ep_len)
|
|
success_thresholds.append(self.curriculum_thresholds[key] * self.reward_scales[key])
|
|
|
|
old_bins = self.env_command_bins[env_ids_in_category.cpu().numpy()]
|
|
if len(success_thresholds) > 0:
|
|
curriculum.update(old_bins, task_rewards, success_thresholds,
|
|
local_range=np.array(
|
|
[0.55, 0.55, 0.55, 0.55, 0.35, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0, 1.0, 1.0, 1.0,
|
|
1.0]))
|
|
|
|
# assign resampled environments to new categories
|
|
random_env_floats = torch.rand(len(env_ids), device=self.device)
|
|
probability_per_category = 1. / len(self.category_names)
|
|
category_env_ids = [env_ids[torch.logical_and(probability_per_category * i <= random_env_floats,
|
|
random_env_floats < probability_per_category * (i + 1))] for i in
|
|
range(len(self.category_names))]
|
|
|
|
# sample from new category curricula
|
|
for i, (category, env_ids_in_category, curriculum) in enumerate(
|
|
zip(self.category_names, category_env_ids, self.curricula)):
|
|
|
|
batch_size = len(env_ids_in_category)
|
|
if batch_size == 0: continue
|
|
|
|
new_commands, new_bin_inds = curriculum.sample(batch_size=batch_size)
|
|
|
|
self.env_command_bins[env_ids_in_category.cpu().numpy()] = new_bin_inds
|
|
self.env_command_categories[env_ids_in_category.cpu().numpy()] = i
|
|
|
|
self.commands[env_ids_in_category, :] = torch.Tensor(new_commands[:, :self.cfg.commands.num_commands]).to(
|
|
self.device)
|
|
|
|
if self.cfg.commands.num_commands > 5:
|
|
if self.cfg.commands.gaitwise_curricula:
|
|
for i, (category, env_ids_in_category) in enumerate(zip(self.category_names, category_env_ids)):
|
|
if category == "pronk": # pronking
|
|
self.commands[env_ids_in_category, 5] = (self.commands[env_ids_in_category, 5] / 2 - 0.25) % 1
|
|
self.commands[env_ids_in_category, 6] = (self.commands[env_ids_in_category, 6] / 2 - 0.25) % 1
|
|
self.commands[env_ids_in_category, 7] = (self.commands[env_ids_in_category, 7] / 2 - 0.25) % 1
|
|
elif category == "trot": # trotting
|
|
self.commands[env_ids_in_category, 5] = self.commands[env_ids_in_category, 5] / 2 + 0.25
|
|
self.commands[env_ids_in_category, 6] = 0
|
|
self.commands[env_ids_in_category, 7] = 0
|
|
elif category == "pace": # pacing
|
|
self.commands[env_ids_in_category, 5] = 0
|
|
self.commands[env_ids_in_category, 6] = self.commands[env_ids_in_category, 6] / 2 + 0.25
|
|
self.commands[env_ids_in_category, 7] = 0
|
|
elif category == "bound": # bounding
|
|
self.commands[env_ids_in_category, 5] = 0
|
|
self.commands[env_ids_in_category, 6] = 0
|
|
self.commands[env_ids_in_category, 7] = self.commands[env_ids_in_category, 7] / 2 + 0.25
|
|
|
|
elif self.cfg.commands.exclusive_phase_offset:
|
|
random_env_floats = torch.rand(len(env_ids), device=self.device)
|
|
trotting_envs = env_ids[random_env_floats < 0.34]
|
|
pacing_envs = env_ids[torch.logical_and(0.34 <= random_env_floats, random_env_floats < 0.67)]
|
|
bounding_envs = env_ids[0.67 <= random_env_floats]
|
|
self.commands[pacing_envs, 5] = 0
|
|
self.commands[bounding_envs, 5] = 0
|
|
self.commands[trotting_envs, 6] = 0
|
|
self.commands[bounding_envs, 6] = 0
|
|
self.commands[trotting_envs, 7] = 0
|
|
self.commands[pacing_envs, 7] = 0
|
|
|
|
elif self.cfg.commands.balance_gait_distribution:
|
|
random_env_floats = torch.rand(len(env_ids), device=self.device)
|
|
pronking_envs = env_ids[random_env_floats <= 0.25]
|
|
trotting_envs = env_ids[torch.logical_and(0.25 <= random_env_floats, random_env_floats < 0.50)]
|
|
pacing_envs = env_ids[torch.logical_and(0.50 <= random_env_floats, random_env_floats < 0.75)]
|
|
bounding_envs = env_ids[0.75 <= random_env_floats]
|
|
self.commands[pronking_envs, 5] = (self.commands[pronking_envs, 5] / 2 - 0.25) % 1
|
|
self.commands[pronking_envs, 6] = (self.commands[pronking_envs, 6] / 2 - 0.25) % 1
|
|
self.commands[pronking_envs, 7] = (self.commands[pronking_envs, 7] / 2 - 0.25) % 1
|
|
self.commands[trotting_envs, 6] = 0
|
|
self.commands[trotting_envs, 7] = 0
|
|
self.commands[pacing_envs, 5] = 0
|
|
self.commands[pacing_envs, 7] = 0
|
|
self.commands[bounding_envs, 5] = 0
|
|
self.commands[bounding_envs, 6] = 0
|
|
self.commands[trotting_envs, 5] = self.commands[trotting_envs, 5] / 2 + 0.25
|
|
self.commands[pacing_envs, 6] = self.commands[pacing_envs, 6] / 2 + 0.25
|
|
self.commands[bounding_envs, 7] = self.commands[bounding_envs, 7] / 2 + 0.25
|
|
|
|
if self.cfg.commands.binary_phases:
|
|
self.commands[env_ids, 5] = (torch.round(2 * self.commands[env_ids, 5])) / 2.0 % 1
|
|
self.commands[env_ids, 6] = (torch.round(2 * self.commands[env_ids, 6])) / 2.0 % 1
|
|
self.commands[env_ids, 7] = (torch.round(2 * self.commands[env_ids, 7])) / 2.0 % 1
|
|
|
|
# setting the smaller commands to zero
|
|
self.commands[env_ids, :2] *= (torch.norm(self.commands[env_ids, :2], dim=1) > 0.2).unsqueeze(1)
|
|
|
|
# reset command sums
|
|
for key in self.command_sums.keys():
|
|
self.command_sums[key][env_ids] = 0.
|
|
|
|
def _step_contact_targets(self):
|
|
if self.cfg.env.observe_gait_commands:
|
|
frequencies = self.commands[:, 4]
|
|
phases = self.commands[:, 5]
|
|
offsets = self.commands[:, 6]
|
|
bounds = self.commands[:, 7]
|
|
durations = self.commands[:, 8]
|
|
self.gait_indices = torch.remainder(self.gait_indices + self.dt * frequencies, 1.0)
|
|
|
|
if self.cfg.commands.pacing_offset:
|
|
foot_indices = [self.gait_indices + phases + offsets + bounds,
|
|
self.gait_indices + bounds,
|
|
self.gait_indices + offsets,
|
|
self.gait_indices + phases]
|
|
else:
|
|
foot_indices = [self.gait_indices + phases + offsets + bounds,
|
|
self.gait_indices + offsets,
|
|
self.gait_indices + bounds,
|
|
self.gait_indices + phases]
|
|
|
|
self.foot_indices = torch.remainder(torch.cat([foot_indices[i].unsqueeze(1) for i in range(4)], dim=1), 1.0)
|
|
|
|
for idxs in foot_indices:
|
|
stance_idxs = torch.remainder(idxs, 1) < durations
|
|
swing_idxs = torch.remainder(idxs, 1) > durations
|
|
|
|
idxs[stance_idxs] = torch.remainder(idxs[stance_idxs], 1) * (0.5 / durations[stance_idxs])
|
|
idxs[swing_idxs] = 0.5 + (torch.remainder(idxs[swing_idxs], 1) - durations[swing_idxs]) * (
|
|
0.5 / (1 - durations[swing_idxs]))
|
|
|
|
# if self.cfg.commands.durations_warp_clock_inputs:
|
|
|
|
self.clock_inputs[:, 0] = torch.sin(2 * np.pi * foot_indices[0])
|
|
self.clock_inputs[:, 1] = torch.sin(2 * np.pi * foot_indices[1])
|
|
self.clock_inputs[:, 2] = torch.sin(2 * np.pi * foot_indices[2])
|
|
self.clock_inputs[:, 3] = torch.sin(2 * np.pi * foot_indices[3])
|
|
|
|
self.doubletime_clock_inputs[:, 0] = torch.sin(4 * np.pi * foot_indices[0])
|
|
self.doubletime_clock_inputs[:, 1] = torch.sin(4 * np.pi * foot_indices[1])
|
|
self.doubletime_clock_inputs[:, 2] = torch.sin(4 * np.pi * foot_indices[2])
|
|
self.doubletime_clock_inputs[:, 3] = torch.sin(4 * np.pi * foot_indices[3])
|
|
|
|
self.halftime_clock_inputs[:, 0] = torch.sin(np.pi * foot_indices[0])
|
|
self.halftime_clock_inputs[:, 1] = torch.sin(np.pi * foot_indices[1])
|
|
self.halftime_clock_inputs[:, 2] = torch.sin(np.pi * foot_indices[2])
|
|
self.halftime_clock_inputs[:, 3] = torch.sin(np.pi * foot_indices[3])
|
|
|
|
# von mises distribution
|
|
kappa = self.cfg.rewards.kappa_gait_probs
|
|
smoothing_cdf_start = torch.distributions.normal.Normal(0,
|
|
kappa).cdf # (x) + torch.distributions.normal.Normal(1, kappa).cdf(x)) / 2
|
|
|
|
smoothing_multiplier_FL = (smoothing_cdf_start(torch.remainder(foot_indices[0], 1.0)) * (
|
|
1 - smoothing_cdf_start(torch.remainder(foot_indices[0], 1.0) - 0.5)) +
|
|
smoothing_cdf_start(torch.remainder(foot_indices[0], 1.0) - 1) * (
|
|
1 - smoothing_cdf_start(
|
|
torch.remainder(foot_indices[0], 1.0) - 0.5 - 1)))
|
|
smoothing_multiplier_FR = (smoothing_cdf_start(torch.remainder(foot_indices[1], 1.0)) * (
|
|
1 - smoothing_cdf_start(torch.remainder(foot_indices[1], 1.0) - 0.5)) +
|
|
smoothing_cdf_start(torch.remainder(foot_indices[1], 1.0) - 1) * (
|
|
1 - smoothing_cdf_start(
|
|
torch.remainder(foot_indices[1], 1.0) - 0.5 - 1)))
|
|
smoothing_multiplier_RL = (smoothing_cdf_start(torch.remainder(foot_indices[2], 1.0)) * (
|
|
1 - smoothing_cdf_start(torch.remainder(foot_indices[2], 1.0) - 0.5)) +
|
|
smoothing_cdf_start(torch.remainder(foot_indices[2], 1.0) - 1) * (
|
|
1 - smoothing_cdf_start(
|
|
torch.remainder(foot_indices[2], 1.0) - 0.5 - 1)))
|
|
smoothing_multiplier_RR = (smoothing_cdf_start(torch.remainder(foot_indices[3], 1.0)) * (
|
|
1 - smoothing_cdf_start(torch.remainder(foot_indices[3], 1.0) - 0.5)) +
|
|
smoothing_cdf_start(torch.remainder(foot_indices[3], 1.0) - 1) * (
|
|
1 - smoothing_cdf_start(
|
|
torch.remainder(foot_indices[3], 1.0) - 0.5 - 1)))
|
|
|
|
self.desired_contact_states[:, 0] = smoothing_multiplier_FL
|
|
self.desired_contact_states[:, 1] = smoothing_multiplier_FR
|
|
self.desired_contact_states[:, 2] = smoothing_multiplier_RL
|
|
self.desired_contact_states[:, 3] = smoothing_multiplier_RR
|
|
|
|
if self.cfg.commands.num_commands > 9:
|
|
self.desired_footswing_height = self.commands[:, 9]
|
|
|
|
def _compute_torques(self, actions):
|
|
""" Compute torques from actions.
|
|
Actions can be interpreted as position or velocity targets given to a PD controller, or directly as scaled torques.
|
|
[NOTE]: torques must have the same dimension as the number of DOFs, even if some DOFs are not actuated.
|
|
|
|
Args:
|
|
actions (torch.Tensor): Actions
|
|
|
|
Returns:
|
|
[torch.Tensor]: Torques sent to the simulation
|
|
"""
|
|
# pd controller
|
|
actions_scaled = actions[:, :12] * self.cfg.control.action_scale
|
|
actions_scaled[:, [0, 3, 6, 9]] *= self.cfg.control.hip_scale_reduction # scale down hip flexion range
|
|
|
|
if self.cfg.domain_rand.randomize_lag_timesteps:
|
|
self.lag_buffer = self.lag_buffer[1:] + [actions_scaled.clone()]
|
|
self.joint_pos_target = self.lag_buffer[0] + self.default_dof_pos
|
|
else:
|
|
self.joint_pos_target = actions_scaled + self.default_dof_pos
|
|
|
|
control_type = self.cfg.control.control_type
|
|
|
|
if control_type == "actuator_net":
|
|
self.joint_pos_err = self.dof_pos - self.joint_pos_target + self.motor_offsets
|
|
self.joint_vel = self.dof_vel
|
|
torques = self.actuator_network(self.joint_pos_err, self.joint_pos_err_last, self.joint_pos_err_last_last,
|
|
self.joint_vel, self.joint_vel_last, self.joint_vel_last_last)
|
|
self.joint_pos_err_last_last = torch.clone(self.joint_pos_err_last)
|
|
self.joint_pos_err_last = torch.clone(self.joint_pos_err)
|
|
self.joint_vel_last_last = torch.clone(self.joint_vel_last)
|
|
self.joint_vel_last = torch.clone(self.joint_vel)
|
|
elif control_type == "P":
|
|
torques = self.p_gains * self.Kp_factors * (
|
|
self.joint_pos_target - self.dof_pos + self.motor_offsets) - self.d_gains * self.Kd_factors * self.dof_vel
|
|
else:
|
|
raise NameError(f"Unknown controller type: {control_type}")
|
|
|
|
torques = torques * self.motor_strengths
|
|
return torch.clip(torques, -self.torque_limits, self.torque_limits)
|
|
|
|
def _reset_dofs(self, env_ids, cfg):
|
|
""" Resets DOF position and velocities of selected environmments
|
|
Positions are randomly selected within 0.5:1.5 x default positions.
|
|
Velocities are set to zero.
|
|
|
|
Args:
|
|
env_ids (List[int]): Environemnt ids
|
|
"""
|
|
self.dof_pos[env_ids] = self.default_dof_pos * torch_rand_float(0.5, 1.5, (len(env_ids), self.num_dof),
|
|
device=self.device)
|
|
self.dof_vel[env_ids] = 0.
|
|
|
|
env_ids_int32 = env_ids.to(dtype=torch.int32)
|
|
self.gym.set_dof_state_tensor_indexed(self.sim,
|
|
gymtorch.unwrap_tensor(self.dof_state),
|
|
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
|
|
|
|
def _reset_root_states(self, env_ids, cfg):
|
|
""" Resets ROOT states position and velocities of selected environmments
|
|
Sets base position based on the curriculum
|
|
Selects randomized base velocities within -0.5:0.5 [m/s, rad/s]
|
|
Args:
|
|
env_ids (List[int]): Environemnt ids
|
|
"""
|
|
# base position
|
|
if self.custom_origins:
|
|
self.root_states[env_ids] = self.base_init_state
|
|
self.root_states[env_ids, :3] += self.env_origins[env_ids]
|
|
self.root_states[env_ids, 0:1] += torch_rand_float(-cfg.terrain.x_init_range,
|
|
cfg.terrain.x_init_range, (len(env_ids), 1),
|
|
device=self.device)
|
|
self.root_states[env_ids, 1:2] += torch_rand_float(-cfg.terrain.y_init_range,
|
|
cfg.terrain.y_init_range, (len(env_ids), 1),
|
|
device=self.device)
|
|
self.root_states[env_ids, 0] += cfg.terrain.x_init_offset
|
|
self.root_states[env_ids, 1] += cfg.terrain.y_init_offset
|
|
else:
|
|
self.root_states[env_ids] = self.base_init_state
|
|
self.root_states[env_ids, :3] += self.env_origins[env_ids]
|
|
|
|
# base yaws
|
|
init_yaws = torch_rand_float(-cfg.terrain.yaw_init_range,
|
|
cfg.terrain.yaw_init_range, (len(env_ids), 1),
|
|
device=self.device)
|
|
quat = quat_from_angle_axis(init_yaws, torch.Tensor([0, 0, 1]).to(self.device))[:, 0, :]
|
|
self.root_states[env_ids, 3:7] = quat
|
|
|
|
# base velocities
|
|
self.root_states[env_ids, 7:13] = torch_rand_float(-0.5, 0.5, (len(env_ids), 6),
|
|
device=self.device) # [7:10]: lin vel, [10:13]: ang vel
|
|
env_ids_int32 = env_ids.to(dtype=torch.int32)
|
|
self.gym.set_actor_root_state_tensor_indexed(self.sim,
|
|
gymtorch.unwrap_tensor(self.root_states),
|
|
gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
|
|
|
|
if cfg.env.record_video and 0 in env_ids:
|
|
if self.complete_video_frames is None:
|
|
self.complete_video_frames = []
|
|
else:
|
|
self.complete_video_frames = self.video_frames[:]
|
|
self.video_frames = []
|
|
|
|
if cfg.env.record_video and self.eval_cfg is not None and self.num_train_envs in env_ids:
|
|
if self.complete_video_frames_eval is None:
|
|
self.complete_video_frames_eval = []
|
|
else:
|
|
self.complete_video_frames_eval = self.video_frames_eval[:]
|
|
self.video_frames_eval = []
|
|
|
|
def _push_robots(self, env_ids, cfg):
|
|
""" Random pushes the robots. Emulates an impulse by setting a randomized base velocity.
|
|
"""
|
|
if cfg.domain_rand.push_robots:
|
|
env_ids = env_ids[self.episode_length_buf[env_ids] % int(cfg.domain_rand.push_interval) == 0]
|
|
|
|
max_vel = cfg.domain_rand.max_push_vel_xy
|
|
self.root_states[env_ids, 7:9] = torch_rand_float(-max_vel, max_vel, (len(env_ids), 2),
|
|
device=self.device) # lin vel x/y
|
|
self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
|
|
|
|
def _teleport_robots(self, env_ids, cfg):
|
|
""" Teleports any robots that are too close to the edge to the other side
|
|
"""
|
|
if cfg.terrain.teleport_robots:
|
|
thresh = cfg.terrain.teleport_thresh
|
|
|
|
x_offset = int(cfg.terrain.x_offset * cfg.terrain.horizontal_scale)
|
|
|
|
low_x_ids = env_ids[self.root_states[env_ids, 0] < thresh + x_offset]
|
|
self.root_states[low_x_ids, 0] += cfg.terrain.terrain_length * (cfg.terrain.num_rows - 1)
|
|
|
|
high_x_ids = env_ids[
|
|
self.root_states[env_ids, 0] > cfg.terrain.terrain_length * cfg.terrain.num_rows - thresh + x_offset]
|
|
self.root_states[high_x_ids, 0] -= cfg.terrain.terrain_length * (cfg.terrain.num_rows - 1)
|
|
|
|
low_y_ids = env_ids[self.root_states[env_ids, 1] < thresh]
|
|
self.root_states[low_y_ids, 1] += cfg.terrain.terrain_width * (cfg.terrain.num_cols - 1)
|
|
|
|
high_y_ids = env_ids[
|
|
self.root_states[env_ids, 1] > cfg.terrain.terrain_width * cfg.terrain.num_cols - thresh]
|
|
self.root_states[high_y_ids, 1] -= cfg.terrain.terrain_width * (cfg.terrain.num_cols - 1)
|
|
|
|
self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
|
|
self.gym.refresh_actor_root_state_tensor(self.sim)
|
|
|
|
def _get_noise_scale_vec(self, cfg):
|
|
""" Sets a vector used to scale the noise added to the observations.
|
|
[NOTE]: Must be adapted when changing the observations structure
|
|
|
|
Args:
|
|
cfg (Dict): Environment config file
|
|
|
|
Returns:
|
|
[torch.Tensor]: Vector of scales used to multiply a uniform distribution in [-1, 1]
|
|
"""
|
|
# noise_vec = torch.zeros_like(self.obs_buf[0])
|
|
self.add_noise = self.cfg.noise.add_noise
|
|
noise_scales = self.cfg.noise_scales
|
|
noise_level = self.cfg.noise.noise_level
|
|
noise_vec = torch.cat((torch.ones(3) * noise_scales.gravity * noise_level,
|
|
torch.ones(
|
|
self.num_actuated_dof) * noise_scales.dof_pos * noise_level * self.obs_scales.dof_pos,
|
|
torch.ones(
|
|
self.num_actuated_dof) * noise_scales.dof_vel * noise_level * self.obs_scales.dof_vel,
|
|
torch.zeros(self.num_actions),
|
|
), dim=0)
|
|
|
|
if self.cfg.env.observe_command:
|
|
noise_vec = torch.cat((torch.ones(3) * noise_scales.gravity * noise_level,
|
|
torch.zeros(self.cfg.commands.num_commands),
|
|
torch.ones(
|
|
self.num_actuated_dof) * noise_scales.dof_pos * noise_level * self.obs_scales.dof_pos,
|
|
torch.ones(
|
|
self.num_actuated_dof) * noise_scales.dof_vel * noise_level * self.obs_scales.dof_vel,
|
|
torch.zeros(self.num_actions),
|
|
), dim=0)
|
|
if self.cfg.env.observe_two_prev_actions:
|
|
noise_vec = torch.cat((noise_vec,
|
|
torch.zeros(self.num_actions)
|
|
), dim=0)
|
|
if self.cfg.env.observe_timing_parameter:
|
|
noise_vec = torch.cat((noise_vec,
|
|
torch.zeros(1)
|
|
), dim=0)
|
|
if self.cfg.env.observe_clock_inputs:
|
|
noise_vec = torch.cat((noise_vec,
|
|
torch.zeros(4)
|
|
), dim=0)
|
|
if self.cfg.env.observe_vel:
|
|
noise_vec = torch.cat((torch.ones(3) * noise_scales.lin_vel * noise_level * self.obs_scales.lin_vel,
|
|
torch.ones(3) * noise_scales.ang_vel * noise_level * self.obs_scales.ang_vel,
|
|
noise_vec
|
|
), dim=0)
|
|
|
|
if self.cfg.env.observe_only_lin_vel:
|
|
noise_vec = torch.cat((torch.ones(3) * noise_scales.lin_vel * noise_level * self.obs_scales.lin_vel,
|
|
noise_vec
|
|
), dim=0)
|
|
|
|
if self.cfg.env.observe_yaw:
|
|
noise_vec = torch.cat((noise_vec,
|
|
torch.zeros(1),
|
|
), dim=0)
|
|
|
|
if self.cfg.env.observe_contact_states:
|
|
noise_vec = torch.cat((noise_vec,
|
|
torch.ones(4) * noise_scales.contact_states * noise_level,
|
|
), dim=0)
|
|
|
|
|
|
noise_vec = noise_vec.to(self.device)
|
|
|
|
return noise_vec
|
|
|
|
# ----------------------------------------
|
|
def _init_buffers(self):
|
|
""" Initialize torch tensors which will contain simulation states and processed quantities
|
|
"""
|
|
# get gym GPU state tensors
|
|
actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
|
|
dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
|
|
net_contact_forces = self.gym.acquire_net_contact_force_tensor(self.sim)
|
|
rigid_body_state = self.gym.acquire_rigid_body_state_tensor(self.sim)
|
|
self.gym.refresh_dof_state_tensor(self.sim)
|
|
self.gym.refresh_actor_root_state_tensor(self.sim)
|
|
self.gym.refresh_net_contact_force_tensor(self.sim)
|
|
self.gym.refresh_rigid_body_state_tensor(self.sim)
|
|
self.gym.render_all_camera_sensors(self.sim)
|
|
|
|
# create some wrapper tensors for different slices
|
|
self.root_states = gymtorch.wrap_tensor(actor_root_state)
|
|
self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
|
|
self.net_contact_forces = gymtorch.wrap_tensor(net_contact_forces)[:self.num_envs * self.num_bodies, :]
|
|
self.dof_pos = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 0]
|
|
self.base_pos = self.root_states[:self.num_envs, 0:3]
|
|
self.dof_vel = self.dof_state.view(self.num_envs, self.num_dof, 2)[..., 1]
|
|
self.base_quat = self.root_states[:self.num_envs, 3:7]
|
|
self.rigid_body_state = gymtorch.wrap_tensor(rigid_body_state)[:self.num_envs * self.num_bodies, :]
|
|
self.foot_velocities = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:,
|
|
self.feet_indices,
|
|
7:10]
|
|
self.foot_positions = self.rigid_body_state.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices,
|
|
0:3]
|
|
self.prev_base_pos = self.base_pos.clone()
|
|
self.prev_foot_velocities = self.foot_velocities.clone()
|
|
|
|
self.lag_buffer = [torch.zeros_like(self.dof_pos) for i in range(self.cfg.domain_rand.lag_timesteps+1)]
|
|
|
|
self.contact_forces = gymtorch.wrap_tensor(net_contact_forces)[:self.num_envs * self.num_bodies, :].view(self.num_envs, -1,
|
|
3) # shape: num_envs, num_bodies, xyz axis
|
|
|
|
# initialize some data used later on
|
|
self.common_step_counter = 0
|
|
self.extras = {}
|
|
|
|
if self.cfg.terrain.measure_heights:
|
|
self.height_points = self._init_height_points(torch.arange(self.num_envs, device=self.device), self.cfg)
|
|
self.measured_heights = 0
|
|
|
|
self.noise_scale_vec = self._get_noise_scale_vec(self.cfg) # , self.eval_cfg)
|
|
self.gravity_vec = to_torch(get_axis_params(-1., self.up_axis_idx), device=self.device).repeat(
|
|
(self.num_envs, 1))
|
|
self.forward_vec = to_torch([1., 0., 0.], device=self.device).repeat((self.num_envs, 1))
|
|
self.torques = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.p_gains = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
|
|
self.d_gains = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
|
|
self.actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.last_last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.joint_pos_target = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float,
|
|
device=self.device,
|
|
requires_grad=False)
|
|
self.last_joint_pos_target = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.last_last_joint_pos_target = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float,
|
|
device=self.device,
|
|
requires_grad=False)
|
|
self.last_dof_vel = torch.zeros_like(self.dof_vel)
|
|
self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13])
|
|
|
|
|
|
self.commands_value = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float,
|
|
device=self.device, requires_grad=False)
|
|
self.commands = torch.zeros_like(self.commands_value) # x vel, y vel, yaw vel, heading
|
|
self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel,
|
|
self.obs_scales.body_height_cmd, self.obs_scales.gait_freq_cmd,
|
|
self.obs_scales.gait_phase_cmd, self.obs_scales.gait_phase_cmd,
|
|
self.obs_scales.gait_phase_cmd, self.obs_scales.gait_phase_cmd,
|
|
self.obs_scales.footswing_height_cmd, self.obs_scales.body_pitch_cmd,
|
|
self.obs_scales.body_roll_cmd, self.obs_scales.stance_width_cmd,
|
|
self.obs_scales.stance_length_cmd, self.obs_scales.aux_reward_cmd],
|
|
device=self.device, requires_grad=False, )[:self.cfg.commands.num_commands]
|
|
self.desired_contact_states = torch.zeros(self.num_envs, 4, dtype=torch.float, device=self.device,
|
|
requires_grad=False, )
|
|
|
|
|
|
self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float,
|
|
device=self.device, requires_grad=False)
|
|
self.last_contacts = torch.zeros(self.num_envs, len(self.feet_indices), dtype=torch.bool, device=self.device,
|
|
requires_grad=False)
|
|
self.last_contact_filt = torch.zeros(self.num_envs, len(self.feet_indices), dtype=torch.bool,
|
|
device=self.device,
|
|
requires_grad=False)
|
|
self.base_lin_vel = quat_rotate_inverse(self.base_quat, self.root_states[:self.num_envs, 7:10])
|
|
self.base_ang_vel = quat_rotate_inverse(self.base_quat, self.root_states[:self.num_envs, 10:13])
|
|
self.projected_gravity = quat_rotate_inverse(self.base_quat, self.gravity_vec)
|
|
|
|
# joint positions offsets and PD gains
|
|
self.default_dof_pos = torch.zeros(self.num_dof, dtype=torch.float, device=self.device, requires_grad=False)
|
|
for i in range(self.num_dofs):
|
|
name = self.dof_names[i]
|
|
angle = self.cfg.init_state.default_joint_angles[name]
|
|
self.default_dof_pos[i] = angle
|
|
found = False
|
|
for dof_name in self.cfg.control.stiffness.keys():
|
|
if dof_name in name:
|
|
self.p_gains[i] = self.cfg.control.stiffness[dof_name]
|
|
self.d_gains[i] = self.cfg.control.damping[dof_name]
|
|
found = True
|
|
if not found:
|
|
self.p_gains[i] = 0.
|
|
self.d_gains[i] = 0.
|
|
if self.cfg.control.control_type in ["P", "V"]:
|
|
print(f"PD gain of joint {name} were not defined, setting them to zero")
|
|
self.default_dof_pos = self.default_dof_pos.unsqueeze(0)
|
|
|
|
if self.cfg.control.control_type == "actuator_net":
|
|
actuator_path = f'{os.path.dirname(os.path.dirname(os.path.realpath(__file__)))}/../../resources/actuator_nets/unitree_go1.pt'
|
|
actuator_network = torch.jit.load(actuator_path).to(self.device)
|
|
|
|
def eval_actuator_network(joint_pos, joint_pos_last, joint_pos_last_last, joint_vel, joint_vel_last,
|
|
joint_vel_last_last):
|
|
xs = torch.cat((joint_pos.unsqueeze(-1),
|
|
joint_pos_last.unsqueeze(-1),
|
|
joint_pos_last_last.unsqueeze(-1),
|
|
joint_vel.unsqueeze(-1),
|
|
joint_vel_last.unsqueeze(-1),
|
|
joint_vel_last_last.unsqueeze(-1)), dim=-1)
|
|
torques = actuator_network(xs.view(self.num_envs * 12, 6))
|
|
return torques.view(self.num_envs, 12)
|
|
|
|
self.actuator_network = eval_actuator_network
|
|
|
|
self.joint_pos_err_last_last = torch.zeros((self.num_envs, 12), device=self.device)
|
|
self.joint_pos_err_last = torch.zeros((self.num_envs, 12), device=self.device)
|
|
self.joint_vel_last_last = torch.zeros((self.num_envs, 12), device=self.device)
|
|
self.joint_vel_last = torch.zeros((self.num_envs, 12), device=self.device)
|
|
|
|
def _init_custom_buffers__(self):
|
|
# domain randomization properties
|
|
self.friction_coeffs = self.default_friction * torch.ones(self.num_envs, 4, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.restitutions = self.default_restitution * torch.ones(self.num_envs, 4, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.payloads = torch.zeros(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False)
|
|
self.com_displacements = torch.zeros(self.num_envs, 3, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.motor_strengths = torch.ones(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.motor_offsets = torch.zeros(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.Kp_factors = torch.ones(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.Kd_factors = torch.ones(self.num_envs, self.num_dof, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.gravities = torch.zeros(self.num_envs, 3, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.gravity_vec = to_torch(get_axis_params(-1., self.up_axis_idx), device=self.device).repeat(
|
|
(self.num_envs, 1))
|
|
|
|
# if custom initialization values were passed in, set them here
|
|
dynamics_params = ["friction_coeffs", "restitutions", "payloads", "com_displacements", "motor_strengths",
|
|
"Kp_factors", "Kd_factors"]
|
|
if self.initial_dynamics_dict is not None:
|
|
for k, v in self.initial_dynamics_dict.items():
|
|
if k in dynamics_params:
|
|
setattr(self, k, v.to(self.device))
|
|
|
|
self.gait_indices = torch.zeros(self.num_envs, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.clock_inputs = torch.zeros(self.num_envs, 4, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.doubletime_clock_inputs = torch.zeros(self.num_envs, 4, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.halftime_clock_inputs = torch.zeros(self.num_envs, 4, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
|
|
def _init_command_distribution(self, env_ids):
|
|
# new style curriculum
|
|
self.category_names = ['nominal']
|
|
if self.cfg.commands.gaitwise_curricula:
|
|
self.category_names = ['pronk', 'trot', 'pace', 'bound']
|
|
|
|
if self.cfg.commands.curriculum_type == "RewardThresholdCurriculum":
|
|
from .curriculum import RewardThresholdCurriculum
|
|
CurriculumClass = RewardThresholdCurriculum
|
|
self.curricula = []
|
|
for category in self.category_names:
|
|
self.curricula += [CurriculumClass(seed=self.cfg.commands.curriculum_seed,
|
|
x_vel=(self.cfg.commands.limit_vel_x[0],
|
|
self.cfg.commands.limit_vel_x[1],
|
|
self.cfg.commands.num_bins_vel_x),
|
|
y_vel=(self.cfg.commands.limit_vel_y[0],
|
|
self.cfg.commands.limit_vel_y[1],
|
|
self.cfg.commands.num_bins_vel_y),
|
|
yaw_vel=(self.cfg.commands.limit_vel_yaw[0],
|
|
self.cfg.commands.limit_vel_yaw[1],
|
|
self.cfg.commands.num_bins_vel_yaw),
|
|
body_height=(self.cfg.commands.limit_body_height[0],
|
|
self.cfg.commands.limit_body_height[1],
|
|
self.cfg.commands.num_bins_body_height),
|
|
gait_frequency=(self.cfg.commands.limit_gait_frequency[0],
|
|
self.cfg.commands.limit_gait_frequency[1],
|
|
self.cfg.commands.num_bins_gait_frequency),
|
|
gait_phase=(self.cfg.commands.limit_gait_phase[0],
|
|
self.cfg.commands.limit_gait_phase[1],
|
|
self.cfg.commands.num_bins_gait_phase),
|
|
gait_offset=(self.cfg.commands.limit_gait_offset[0],
|
|
self.cfg.commands.limit_gait_offset[1],
|
|
self.cfg.commands.num_bins_gait_offset),
|
|
gait_bounds=(self.cfg.commands.limit_gait_bound[0],
|
|
self.cfg.commands.limit_gait_bound[1],
|
|
self.cfg.commands.num_bins_gait_bound),
|
|
gait_duration=(self.cfg.commands.limit_gait_duration[0],
|
|
self.cfg.commands.limit_gait_duration[1],
|
|
self.cfg.commands.num_bins_gait_duration),
|
|
footswing_height=(self.cfg.commands.limit_footswing_height[0],
|
|
self.cfg.commands.limit_footswing_height[1],
|
|
self.cfg.commands.num_bins_footswing_height),
|
|
body_pitch=(self.cfg.commands.limit_body_pitch[0],
|
|
self.cfg.commands.limit_body_pitch[1],
|
|
self.cfg.commands.num_bins_body_pitch),
|
|
body_roll=(self.cfg.commands.limit_body_roll[0],
|
|
self.cfg.commands.limit_body_roll[1],
|
|
self.cfg.commands.num_bins_body_roll),
|
|
stance_width=(self.cfg.commands.limit_stance_width[0],
|
|
self.cfg.commands.limit_stance_width[1],
|
|
self.cfg.commands.num_bins_stance_width),
|
|
stance_length=(self.cfg.commands.limit_stance_length[0],
|
|
self.cfg.commands.limit_stance_length[1],
|
|
self.cfg.commands.num_bins_stance_length),
|
|
aux_reward_coef=(self.cfg.commands.limit_aux_reward_coef[0],
|
|
self.cfg.commands.limit_aux_reward_coef[1],
|
|
self.cfg.commands.num_bins_aux_reward_coef),
|
|
)]
|
|
|
|
if self.cfg.commands.curriculum_type == "LipschitzCurriculum":
|
|
for curriculum in self.curricula:
|
|
curriculum.set_params(lipschitz_threshold=self.cfg.commands.lipschitz_threshold,
|
|
binary_phases=self.cfg.commands.binary_phases)
|
|
self.env_command_bins = np.zeros(len(env_ids), dtype=np.int)
|
|
self.env_command_categories = np.zeros(len(env_ids), dtype=np.int)
|
|
low = np.array(
|
|
[self.cfg.commands.lin_vel_x[0], self.cfg.commands.lin_vel_y[0],
|
|
self.cfg.commands.ang_vel_yaw[0], self.cfg.commands.body_height_cmd[0],
|
|
self.cfg.commands.gait_frequency_cmd_range[0],
|
|
self.cfg.commands.gait_phase_cmd_range[0], self.cfg.commands.gait_offset_cmd_range[0],
|
|
self.cfg.commands.gait_bound_cmd_range[0], self.cfg.commands.gait_duration_cmd_range[0],
|
|
self.cfg.commands.footswing_height_range[0], self.cfg.commands.body_pitch_range[0],
|
|
self.cfg.commands.body_roll_range[0],self.cfg.commands.stance_width_range[0],
|
|
self.cfg.commands.stance_length_range[0], self.cfg.commands.aux_reward_coef_range[0], ])
|
|
high = np.array(
|
|
[self.cfg.commands.lin_vel_x[1], self.cfg.commands.lin_vel_y[1],
|
|
self.cfg.commands.ang_vel_yaw[1], self.cfg.commands.body_height_cmd[1],
|
|
self.cfg.commands.gait_frequency_cmd_range[1],
|
|
self.cfg.commands.gait_phase_cmd_range[1], self.cfg.commands.gait_offset_cmd_range[1],
|
|
self.cfg.commands.gait_bound_cmd_range[1], self.cfg.commands.gait_duration_cmd_range[1],
|
|
self.cfg.commands.footswing_height_range[1], self.cfg.commands.body_pitch_range[1],
|
|
self.cfg.commands.body_roll_range[1],self.cfg.commands.stance_width_range[1],
|
|
self.cfg.commands.stance_length_range[1], self.cfg.commands.aux_reward_coef_range[1], ])
|
|
for curriculum in self.curricula:
|
|
curriculum.set_to(low=low, high=high)
|
|
|
|
def _prepare_reward_function(self):
|
|
""" Prepares a list of reward functions, whcih will be called to compute the total reward.
|
|
Looks for self._reward_<REWARD_NAME>, where <REWARD_NAME> are names of all non zero reward scales in the cfg.
|
|
"""
|
|
# reward containers
|
|
from go2_gym.envs.rewards.corl_rewards import CoRLRewards
|
|
reward_containers = {"CoRLRewards": CoRLRewards}
|
|
self.reward_container = reward_containers[self.cfg.rewards.reward_container_name](self)
|
|
|
|
# remove zero scales + multiply non-zero ones by dt
|
|
for key in list(self.reward_scales.keys()):
|
|
scale = self.reward_scales[key]
|
|
if scale == 0:
|
|
self.reward_scales.pop(key)
|
|
else:
|
|
self.reward_scales[key] *= self.dt
|
|
# prepare list of functions
|
|
self.reward_functions = []
|
|
self.reward_names = []
|
|
for name, scale in self.reward_scales.items():
|
|
if name == "termination":
|
|
continue
|
|
|
|
if not hasattr(self.reward_container, '_reward_' + name):
|
|
print(f"Warning: reward {'_reward_' + name} has nonzero coefficient but was not found!")
|
|
else:
|
|
self.reward_names.append(name)
|
|
self.reward_functions.append(getattr(self.reward_container, '_reward_' + name))
|
|
|
|
# reward episode sums
|
|
self.episode_sums = {
|
|
name: torch.zeros(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False)
|
|
for name in self.reward_scales.keys()}
|
|
self.episode_sums["total"] = torch.zeros(self.num_envs, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.episode_sums_eval = {
|
|
name: -1 * torch.ones(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False)
|
|
for name in self.reward_scales.keys()}
|
|
self.episode_sums_eval["total"] = torch.zeros(self.num_envs, dtype=torch.float, device=self.device,
|
|
requires_grad=False)
|
|
self.command_sums = {
|
|
name: torch.zeros(self.num_envs, dtype=torch.float, device=self.device, requires_grad=False)
|
|
for name in
|
|
list(self.reward_scales.keys()) + ["lin_vel_raw", "ang_vel_raw", "lin_vel_residual", "ang_vel_residual",
|
|
"ep_timesteps"]}
|
|
|
|
def _create_ground_plane(self):
|
|
""" Adds a ground plane to the simulation, sets friction and restitution based on the cfg.
|
|
"""
|
|
plane_params = gymapi.PlaneParams()
|
|
plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
|
|
plane_params.static_friction = self.cfg.terrain.static_friction
|
|
plane_params.dynamic_friction = self.cfg.terrain.dynamic_friction
|
|
plane_params.restitution = self.cfg.terrain.restitution
|
|
self.gym.add_ground(self.sim, plane_params)
|
|
|
|
def _create_heightfield(self):
|
|
""" Adds a heightfield terrain to the simulation, sets parameters based on the cfg.
|
|
"""
|
|
hf_params = gymapi.HeightFieldParams()
|
|
hf_params.column_scale = self.terrain.cfg.horizontal_scale
|
|
hf_params.row_scale = self.terrain.cfg.horizontal_scale
|
|
hf_params.vertical_scale = self.terrain.cfg.vertical_scale
|
|
hf_params.nbRows = self.terrain.tot_cols
|
|
hf_params.nbColumns = self.terrain.tot_rows
|
|
hf_params.transform.p.x = -self.terrain.cfg.border_size
|
|
hf_params.transform.p.y = -self.terrain.cfg.border_size
|
|
hf_params.transform.p.z = 0.0
|
|
hf_params.static_friction = self.cfg.terrain.static_friction
|
|
hf_params.dynamic_friction = self.cfg.terrain.dynamic_friction
|
|
hf_params.restitution = self.cfg.terrain.restitution
|
|
|
|
print(self.terrain.heightsamples.shape, hf_params.nbRows, hf_params.nbColumns)
|
|
|
|
self.gym.add_heightfield(self.sim, self.terrain.heightsamples.T, hf_params)
|
|
self.height_samples = torch.tensor(self.terrain.heightsamples).view(self.terrain.tot_rows,
|
|
self.terrain.tot_cols).to(self.device)
|
|
|
|
def _create_trimesh(self):
|
|
""" Adds a triangle mesh terrain to the simulation, sets parameters based on the cfg.
|
|
# """
|
|
tm_params = gymapi.TriangleMeshParams()
|
|
tm_params.nb_vertices = self.terrain.vertices.shape[0]
|
|
tm_params.nb_triangles = self.terrain.triangles.shape[0]
|
|
|
|
tm_params.transform.p.x = -self.terrain.cfg.border_size
|
|
tm_params.transform.p.y = -self.terrain.cfg.border_size
|
|
tm_params.transform.p.z = 0.0
|
|
tm_params.static_friction = self.cfg.terrain.static_friction
|
|
tm_params.dynamic_friction = self.cfg.terrain.dynamic_friction
|
|
tm_params.restitution = self.cfg.terrain.restitution
|
|
self.gym.add_triangle_mesh(self.sim, self.terrain.vertices.flatten(order='C'),
|
|
self.terrain.triangles.flatten(order='C'), tm_params)
|
|
self.height_samples = torch.tensor(self.terrain.heightsamples).view(self.terrain.tot_rows,
|
|
self.terrain.tot_cols).to(self.device)
|
|
|
|
def _create_envs(self):
|
|
""" Creates environments:
|
|
1. loads the robot URDF/MJCF asset,
|
|
2. For each environment
|
|
2.1 creates the environment,
|
|
2.2 calls DOF and Rigid shape properties callbacks,
|
|
2.3 create actor with these properties and add them to the env
|
|
3. Store indices of different bodies of the robot
|
|
"""
|
|
asset_path = self.cfg.asset.file.format(MINI_GYM_ROOT_DIR=MINI_GYM_ROOT_DIR)
|
|
asset_root = os.path.dirname(asset_path)
|
|
asset_file = os.path.basename(asset_path)
|
|
|
|
asset_options = gymapi.AssetOptions()
|
|
asset_options.default_dof_drive_mode = self.cfg.asset.default_dof_drive_mode
|
|
asset_options.collapse_fixed_joints = self.cfg.asset.collapse_fixed_joints
|
|
asset_options.replace_cylinder_with_capsule = self.cfg.asset.replace_cylinder_with_capsule
|
|
asset_options.flip_visual_attachments = self.cfg.asset.flip_visual_attachments
|
|
asset_options.fix_base_link = self.cfg.asset.fix_base_link
|
|
asset_options.density = self.cfg.asset.density
|
|
asset_options.angular_damping = self.cfg.asset.angular_damping
|
|
asset_options.linear_damping = self.cfg.asset.linear_damping
|
|
asset_options.max_angular_velocity = self.cfg.asset.max_angular_velocity
|
|
asset_options.max_linear_velocity = self.cfg.asset.max_linear_velocity
|
|
asset_options.armature = self.cfg.asset.armature
|
|
asset_options.thickness = self.cfg.asset.thickness
|
|
asset_options.disable_gravity = self.cfg.asset.disable_gravity
|
|
|
|
self.robot_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
|
|
self.num_dof = self.gym.get_asset_dof_count(self.robot_asset)
|
|
self.num_actuated_dof = self.num_actions
|
|
self.num_bodies = self.gym.get_asset_rigid_body_count(self.robot_asset)
|
|
dof_props_asset = self.gym.get_asset_dof_properties(self.robot_asset)
|
|
rigid_shape_props_asset = self.gym.get_asset_rigid_shape_properties(self.robot_asset)
|
|
|
|
# save body names from the asset
|
|
body_names = self.gym.get_asset_rigid_body_names(self.robot_asset)
|
|
self.dof_names = self.gym.get_asset_dof_names(self.robot_asset)
|
|
self.num_bodies = len(body_names)
|
|
self.num_dofs = len(self.dof_names)
|
|
feet_names = [s for s in body_names if self.cfg.asset.foot_name in s]
|
|
penalized_contact_names = []
|
|
for name in self.cfg.asset.penalize_contacts_on:
|
|
penalized_contact_names.extend([s for s in body_names if name in s])
|
|
termination_contact_names = []
|
|
for name in self.cfg.asset.terminate_after_contacts_on:
|
|
termination_contact_names.extend([s for s in body_names if name in s])
|
|
|
|
base_init_state_list = self.cfg.init_state.pos + self.cfg.init_state.rot + self.cfg.init_state.lin_vel + self.cfg.init_state.ang_vel
|
|
self.base_init_state = to_torch(base_init_state_list, device=self.device, requires_grad=False)
|
|
start_pose = gymapi.Transform()
|
|
start_pose.p = gymapi.Vec3(*self.base_init_state[:3])
|
|
|
|
self.env_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False)
|
|
self.terrain_levels = torch.zeros(self.num_envs, device=self.device, requires_grad=False, dtype=torch.long)
|
|
self.terrain_origins = torch.zeros(self.num_envs, 3, device=self.device, requires_grad=False)
|
|
self.terrain_types = torch.zeros(self.num_envs, device=self.device, requires_grad=False, dtype=torch.long)
|
|
self._call_train_eval(self._get_env_origins, torch.arange(self.num_envs, device=self.device))
|
|
env_lower = gymapi.Vec3(0., 0., 0.)
|
|
env_upper = gymapi.Vec3(0., 0., 0.)
|
|
self.actor_handles = []
|
|
self.imu_sensor_handles = []
|
|
self.envs = []
|
|
|
|
self.default_friction = rigid_shape_props_asset[1].friction
|
|
self.default_restitution = rigid_shape_props_asset[1].restitution
|
|
self._init_custom_buffers__()
|
|
self._call_train_eval(self._randomize_rigid_body_props, torch.arange(self.num_envs, device=self.device))
|
|
self._randomize_gravity()
|
|
|
|
for i in range(self.num_envs):
|
|
# create env instance
|
|
env_handle = self.gym.create_env(self.sim, env_lower, env_upper, int(np.sqrt(self.num_envs)))
|
|
pos = self.env_origins[i].clone()
|
|
pos[0:1] += torch_rand_float(-self.cfg.terrain.x_init_range, self.cfg.terrain.x_init_range, (1, 1),
|
|
device=self.device).squeeze(1)
|
|
pos[1:2] += torch_rand_float(-self.cfg.terrain.y_init_range, self.cfg.terrain.y_init_range, (1, 1),
|
|
device=self.device).squeeze(1)
|
|
start_pose.p = gymapi.Vec3(*pos)
|
|
|
|
rigid_shape_props = self._process_rigid_shape_props(rigid_shape_props_asset, i)
|
|
self.gym.set_asset_rigid_shape_properties(self.robot_asset, rigid_shape_props)
|
|
anymal_handle = self.gym.create_actor(env_handle, self.robot_asset, start_pose, "anymal", i,
|
|
self.cfg.asset.self_collisions, 0)
|
|
dof_props = self._process_dof_props(dof_props_asset, i)
|
|
self.gym.set_actor_dof_properties(env_handle, anymal_handle, dof_props)
|
|
body_props = self.gym.get_actor_rigid_body_properties(env_handle, anymal_handle)
|
|
body_props = self._process_rigid_body_props(body_props, i)
|
|
self.gym.set_actor_rigid_body_properties(env_handle, anymal_handle, body_props, recomputeInertia=True)
|
|
self.envs.append(env_handle)
|
|
self.actor_handles.append(anymal_handle)
|
|
|
|
self.feet_indices = torch.zeros(len(feet_names), dtype=torch.long, device=self.device, requires_grad=False)
|
|
for i in range(len(feet_names)):
|
|
self.feet_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0], self.actor_handles[0],
|
|
feet_names[i])
|
|
|
|
self.penalised_contact_indices = torch.zeros(len(penalized_contact_names), dtype=torch.long, device=self.device,
|
|
requires_grad=False)
|
|
for i in range(len(penalized_contact_names)):
|
|
self.penalised_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0],
|
|
self.actor_handles[0],
|
|
penalized_contact_names[i])
|
|
|
|
self.termination_contact_indices = torch.zeros(len(termination_contact_names), dtype=torch.long,
|
|
device=self.device, requires_grad=False)
|
|
for i in range(len(termination_contact_names)):
|
|
self.termination_contact_indices[i] = self.gym.find_actor_rigid_body_handle(self.envs[0],
|
|
self.actor_handles[0],
|
|
termination_contact_names[i])
|
|
# if recording video, set up camera
|
|
if self.cfg.env.record_video:
|
|
self.camera_props = gymapi.CameraProperties()
|
|
self.camera_props.width = 360
|
|
self.camera_props.height = 240
|
|
self.rendering_camera = self.gym.create_camera_sensor(self.envs[0], self.camera_props)
|
|
self.gym.set_camera_location(self.rendering_camera, self.envs[0], gymapi.Vec3(1.5, 1, 3.0),
|
|
gymapi.Vec3(0, 0, 0))
|
|
if self.eval_cfg is not None:
|
|
self.rendering_camera_eval = self.gym.create_camera_sensor(self.envs[self.num_train_envs],
|
|
self.camera_props)
|
|
self.gym.set_camera_location(self.rendering_camera_eval, self.envs[self.num_train_envs],
|
|
gymapi.Vec3(1.5, 1, 3.0),
|
|
gymapi.Vec3(0, 0, 0))
|
|
self.video_writer = None
|
|
self.video_frames = []
|
|
self.video_frames_eval = []
|
|
self.complete_video_frames = []
|
|
self.complete_video_frames_eval = []
|
|
|
|
def render(self, mode="rgb_array"):
|
|
assert mode == "rgb_array"
|
|
bx, by, bz = self.root_states[0, 0], self.root_states[0, 1], self.root_states[0, 2]
|
|
self.gym.set_camera_location(self.rendering_camera, self.envs[0], gymapi.Vec3(bx, by - 1.0, bz + 1.0),
|
|
gymapi.Vec3(bx, by, bz))
|
|
self.gym.step_graphics(self.sim)
|
|
self.gym.render_all_camera_sensors(self.sim)
|
|
img = self.gym.get_camera_image(self.sim, self.envs[0], self.rendering_camera, gymapi.IMAGE_COLOR)
|
|
w, h = img.shape
|
|
return img.reshape([w, h // 4, 4])
|
|
|
|
def _render_headless(self):
|
|
if self.record_now and self.complete_video_frames is not None and len(self.complete_video_frames) == 0:
|
|
bx, by, bz = self.root_states[0, 0], self.root_states[0, 1], self.root_states[0, 2]
|
|
self.gym.set_camera_location(self.rendering_camera, self.envs[0], gymapi.Vec3(bx, by - 1.0, bz + 1.0),
|
|
gymapi.Vec3(bx, by, bz))
|
|
self.video_frame = self.gym.get_camera_image(self.sim, self.envs[0], self.rendering_camera,
|
|
gymapi.IMAGE_COLOR)
|
|
self.video_frame = self.video_frame.reshape((self.camera_props.height, self.camera_props.width, 4))
|
|
self.video_frames.append(self.video_frame)
|
|
|
|
if self.record_eval_now and self.complete_video_frames_eval is not None and len(
|
|
self.complete_video_frames_eval) == 0:
|
|
if self.eval_cfg is not None:
|
|
bx, by, bz = self.root_states[self.num_train_envs, 0], self.root_states[self.num_train_envs, 1], \
|
|
self.root_states[self.num_train_envs, 2]
|
|
self.gym.set_camera_location(self.rendering_camera_eval, self.envs[self.num_train_envs],
|
|
gymapi.Vec3(bx, by - 1.0, bz + 1.0),
|
|
gymapi.Vec3(bx, by, bz))
|
|
self.video_frame_eval = self.gym.get_camera_image(self.sim, self.envs[self.num_train_envs],
|
|
self.rendering_camera_eval,
|
|
gymapi.IMAGE_COLOR)
|
|
self.video_frame_eval = self.video_frame_eval.reshape(
|
|
(self.camera_props.height, self.camera_props.width, 4))
|
|
self.video_frames_eval.append(self.video_frame_eval)
|
|
|
|
def start_recording(self):
|
|
self.complete_video_frames = None
|
|
self.record_now = True
|
|
|
|
def start_recording_eval(self):
|
|
self.complete_video_frames_eval = None
|
|
self.record_eval_now = True
|
|
|
|
def pause_recording(self):
|
|
self.complete_video_frames = []
|
|
self.video_frames = []
|
|
self.record_now = False
|
|
|
|
def pause_recording_eval(self):
|
|
self.complete_video_frames_eval = []
|
|
self.video_frames_eval = []
|
|
self.record_eval_now = False
|
|
|
|
def get_complete_frames(self):
|
|
if self.complete_video_frames is None:
|
|
return []
|
|
return self.complete_video_frames
|
|
|
|
def get_complete_frames_eval(self):
|
|
if self.complete_video_frames_eval is None:
|
|
return []
|
|
return self.complete_video_frames_eval
|
|
|
|
def _get_env_origins(self, env_ids, cfg):
|
|
""" Sets environment origins. On rough terrain the origins are defined by the terrain platforms.
|
|
Otherwise create a grid.
|
|
"""
|
|
if cfg.terrain.mesh_type in ["heightfield", "trimesh"]:
|
|
self.custom_origins = True
|
|
# put robots at the origins defined by the terrain
|
|
max_init_level = cfg.terrain.max_init_terrain_level
|
|
min_init_level = cfg.terrain.min_init_terrain_level
|
|
if not cfg.terrain.curriculum: max_init_level = cfg.terrain.num_rows - 1
|
|
if not cfg.terrain.curriculum: min_init_level = 0
|
|
if cfg.terrain.center_robots:
|
|
min_terrain_level = cfg.terrain.num_rows // 2 - cfg.terrain.center_span
|
|
max_terrain_level = cfg.terrain.num_rows // 2 + cfg.terrain.center_span - 1
|
|
min_terrain_type = cfg.terrain.num_cols // 2 - cfg.terrain.center_span
|
|
max_terrain_type = cfg.terrain.num_cols // 2 + cfg.terrain.center_span - 1
|
|
self.terrain_levels[env_ids] = torch.randint(min_terrain_level, max_terrain_level + 1, (len(env_ids),),
|
|
device=self.device)
|
|
self.terrain_types[env_ids] = torch.randint(min_terrain_type, max_terrain_type + 1, (len(env_ids),),
|
|
device=self.device)
|
|
else:
|
|
self.terrain_levels[env_ids] = torch.randint(min_init_level, max_init_level + 1, (len(env_ids),),
|
|
device=self.device)
|
|
self.terrain_types[env_ids] = torch.div(torch.arange(len(env_ids), device=self.device),
|
|
(len(env_ids) / cfg.terrain.num_cols), rounding_mode='floor').to(
|
|
torch.long)
|
|
cfg.terrain.max_terrain_level = cfg.terrain.num_rows
|
|
cfg.terrain.terrain_origins = torch.from_numpy(cfg.terrain.env_origins).to(self.device).to(torch.float)
|
|
self.env_origins[env_ids] = cfg.terrain.terrain_origins[
|
|
self.terrain_levels[env_ids], self.terrain_types[env_ids]]
|
|
else:
|
|
self.custom_origins = False
|
|
# create a grid of robots
|
|
num_cols = np.floor(np.sqrt(len(env_ids)))
|
|
num_rows = np.ceil(self.num_envs / num_cols)
|
|
xx, yy = torch.meshgrid(torch.arange(num_rows), torch.arange(num_cols))
|
|
spacing = cfg.env.env_spacing
|
|
self.env_origins[env_ids, 0] = spacing * xx.flatten()[:len(env_ids)]
|
|
self.env_origins[env_ids, 1] = spacing * yy.flatten()[:len(env_ids)]
|
|
self.env_origins[env_ids, 2] = 0.
|
|
|
|
def _parse_cfg(self, cfg):
|
|
self.dt = self.cfg.control.decimation * self.sim_params.dt
|
|
self.obs_scales = self.cfg.obs_scales
|
|
self.reward_scales = vars(self.cfg.reward_scales)
|
|
self.curriculum_thresholds = vars(self.cfg.curriculum_thresholds)
|
|
cfg.command_ranges = vars(cfg.commands)
|
|
if cfg.terrain.mesh_type not in ['heightfield', 'trimesh']:
|
|
cfg.terrain.curriculum = False
|
|
max_episode_length_s = cfg.env.episode_length_s
|
|
cfg.env.max_episode_length = np.ceil(max_episode_length_s / self.dt)
|
|
self.max_episode_length = cfg.env.max_episode_length
|
|
|
|
cfg.domain_rand.push_interval = np.ceil(cfg.domain_rand.push_interval_s / self.dt)
|
|
cfg.domain_rand.rand_interval = np.ceil(cfg.domain_rand.rand_interval_s / self.dt)
|
|
cfg.domain_rand.gravity_rand_interval = np.ceil(cfg.domain_rand.gravity_rand_interval_s / self.dt)
|
|
cfg.domain_rand.gravity_rand_duration = np.ceil(
|
|
cfg.domain_rand.gravity_rand_interval * cfg.domain_rand.gravity_impulse_duration)
|
|
|
|
def _draw_debug_vis(self):
|
|
""" Draws visualizations for dubugging (slows down simulation a lot).
|
|
Default behaviour: draws height measurement points
|
|
"""
|
|
# draw height lines
|
|
if not self.terrain.cfg.measure_heights:
|
|
return
|
|
self.gym.clear_lines(self.viewer)
|
|
self.gym.refresh_rigid_body_state_tensor(self.sim)
|
|
sphere_geom = gymutil.WireframeSphereGeometry(0.02, 4, 4, None, color=(1, 1, 0))
|
|
for i in range(self.num_envs):
|
|
base_pos = (self.root_states[i, :3]).cpu().numpy()
|
|
heights = self.measured_heights[i].cpu().numpy()
|
|
height_points = quat_apply_yaw(self.base_quat[i].repeat(heights.shape[0]),
|
|
self.height_points[i]).cpu().numpy()
|
|
for j in range(heights.shape[0]):
|
|
x = height_points[j, 0] + base_pos[0]
|
|
y = height_points[j, 1] + base_pos[1]
|
|
z = heights[j]
|
|
sphere_pose = gymapi.Transform(gymapi.Vec3(x, y, z), r=None)
|
|
gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[i], sphere_pose)
|
|
|
|
def _init_height_points(self, env_ids, cfg):
|
|
""" Returns points at which the height measurments are sampled (in base frame)
|
|
|
|
Returns:
|
|
[torch.Tensor]: Tensor of shape (num_envs, self.num_height_points, 3)
|
|
"""
|
|
y = torch.tensor(cfg.terrain.measured_points_y, device=self.device, requires_grad=False)
|
|
x = torch.tensor(cfg.terrain.measured_points_x, device=self.device, requires_grad=False)
|
|
grid_x, grid_y = torch.meshgrid(x, y)
|
|
|
|
cfg.env.num_height_points = grid_x.numel()
|
|
points = torch.zeros(len(env_ids), cfg.env.num_height_points, 3, device=self.device, requires_grad=False)
|
|
points[:, :, 0] = grid_x.flatten()
|
|
points[:, :, 1] = grid_y.flatten()
|
|
return points
|
|
|
|
def _get_heights(self, env_ids, cfg):
|
|
""" Samples heights of the terrain at required points around each robot.
|
|
The points are offset by the base's position and rotated by the base's yaw
|
|
|
|
Args:
|
|
env_ids (List[int], optional): Subset of environments for which to return the heights. Defaults to None.
|
|
|
|
Raises:
|
|
NameError: [description]
|
|
|
|
Returns:
|
|
[type]: [description]
|
|
"""
|
|
if cfg.terrain.mesh_type == 'plane':
|
|
return torch.zeros(len(env_ids), cfg.env.num_height_points, device=self.device, requires_grad=False)
|
|
elif cfg.terrain.mesh_type == 'none':
|
|
raise NameError("Can't measure height with terrain mesh type 'none'")
|
|
|
|
points = quat_apply_yaw(self.base_quat[env_ids].repeat(1, cfg.env.num_height_points),
|
|
self.height_points[env_ids]) + (self.root_states[env_ids, :3]).unsqueeze(1)
|
|
|
|
points += self.terrain.cfg.border_size
|
|
points = (points / self.terrain.cfg.horizontal_scale).long()
|
|
px = points[:, :, 0].view(-1)
|
|
py = points[:, :, 1].view(-1)
|
|
px = torch.clip(px, 0, self.height_samples.shape[0] - 2)
|
|
py = torch.clip(py, 0, self.height_samples.shape[1] - 2)
|
|
|
|
heights1 = self.height_samples[px, py]
|
|
heights2 = self.height_samples[px + 1, py]
|
|
heights3 = self.height_samples[px, py + 1]
|
|
heights = torch.min(heights1, heights2)
|
|
heights = torch.min(heights, heights3)
|
|
|
|
return heights.view(len(env_ids), -1) * self.terrain.cfg.vertical_scale
|