unitree_rl_gym/deploy/deploy_mujoco/deploy_mujoco_navigation.py

179 lines
6.8 KiB
Python
Raw Normal View History

2025-02-17 15:43:33 +08:00
import time
import mujoco.viewer
import mujoco
import numpy as np
from legged_gym import LEGGED_GYM_ROOT_DIR
import torch
import yaml
from icecream import ic
import deploy.deploy_real.math_utils as math_utils
def get_gravity_orientation(quaternion):
qw = quaternion[0]
qx = quaternion[1]
qy = quaternion[2]
qz = quaternion[3]
gravity_orientation = np.zeros(3)
gravity_orientation[0] = 2 * (-qz * qx + qw * qy)
gravity_orientation[1] = -2 * (qz * qy + qw * qx)
gravity_orientation[2] = 1 - 2 * (qw * qw + qz * qz)
return gravity_orientation
def pd_control(target_q, q, kp, target_dq, dq, kd):
"""Calculates torques from position commands"""
return (target_q - q) * kp + (target_dq - dq) * kd
if __name__ == "__main__":
# get config file name from command line
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("config_file", type=str, help="config file name in the config folder")
args = parser.parse_args()
config_file = args.config_file
with open(f"{LEGGED_GYM_ROOT_DIR}/deploy/deploy_mujoco/configs/{config_file}", "r") as f:
config = yaml.load(f, Loader=yaml.FullLoader)
isaaclab_joint_order = config["lab_joint"]
raw_joint_order = config["motor_joint"]
mapping_tensor = torch.zeros((len(raw_joint_order), len(isaaclab_joint_order)))
# Fill the mapping tensor
for b_idx, b_joint in enumerate(raw_joint_order):
if b_joint in isaaclab_joint_order:
a_idx = isaaclab_joint_order.index(b_joint)
mapping_tensor[a_idx, b_idx] = 1.0
policy_path = config["policy_path"].replace("{LEGGED_GYM_ROOT_DIR}", LEGGED_GYM_ROOT_DIR)
xml_path = config["xml_path"].replace("{LEGGED_GYM_ROOT_DIR}", LEGGED_GYM_ROOT_DIR)
simulation_duration = config["simulation_duration"]
simulation_dt = config["simulation_dt"]
control_decimation = config["control_decimation"]
kps = np.array(config["kps"], dtype=np.float32)
kds = np.array(config["kds"], dtype=np.float32)
default_angles = np.array(config["default_angles"], dtype=np.float32)
ang_vel_scale = config["ang_vel_scale"]
dof_pos_scale = config["dof_pos_scale"]
dof_vel_scale = config["dof_vel_scale"]
action_scale = config["action_scale"]
cmd_scale = np.array(config["cmd_scale"], dtype=np.float32)
num_actions = config["num_actions"]
num_obs = config["num_obs"]
cmd = np.array(config["cmd_init"], dtype=np.float32)
target_cmd = np.array(config["cmd_init"], dtype=np.float32)
# define context variables
action = np.zeros(num_actions, dtype=np.float32)
target_dof_pos = default_angles.copy()
obs = np.zeros(num_obs, dtype=np.float32)
counter = 0
# Load robot model
m = mujoco.MjModel.from_xml_path(xml_path)
d = mujoco.MjData(m)
m.opt.timestep = simulation_dt
# load policy
policy = torch.jit.load(policy_path)
with mujoco.viewer.launch_passive(m, d) as viewer:
# Close the viewer automatically after simulation_duration wall-seconds.
start = time.time()
while viewer.is_running() and time.time() - start < simulation_duration:
step_start = time.time()
tau = pd_control(target_dof_pos, d.qpos[7:], kps, np.zeros_like(kds), d.qvel[6:], kds)
d.ctrl[:] = tau
# mj_step can be replaced with code that also evaluates
# a policy and applies a control signal before stepping the physics.
mujoco.mj_step(m, d)
counter += 1
if counter % control_decimation == 0:
# create observation
qj = d.qpos[7:]
dqj = d.qvel[6:]
quat = d.qpos[3:7]
omega = d.qvel[3:6]
position = d.qpos[:3]
# position is given on world frame.
# ic(position)
2025-02-18 17:07:32 +08:00
# ic(cmd)
2025-02-17 15:43:33 +08:00
cmd[0] = target_cmd[0] - position[0]
cmd[1] = target_cmd[1] - position[1]
# cmd[2] -= position[2]
cmd[:3] = math_utils.quat_rotate_inverse(
math_utils.yaw_quat(torch.as_tensor(quat).float().to("cpu")),
torch.as_tensor(cmd[:3]).float().to("cpu")
).float().numpy()
robot_forward_vec_w = math_utils.quat_apply(torch.as_tensor(quat).float().to("cpu"), torch.tensor([1,0,0]).float().to("cpu"))
robot_heading_w = torch.atan2(robot_forward_vec_w[1], robot_forward_vec_w[0])
cmd[3] = math_utils.wrap_to_pi(cmd[3] - robot_heading_w)
ic(cmd)
2025-02-17 15:43:33 +08:00
qj = (qj - default_angles) * dof_pos_scale
dqj = dqj * dof_vel_scale
gravity_orientation = get_gravity_orientation(quat)
omega = omega * ang_vel_scale
period = 0.8
count = counter * simulation_dt
phase = count % period / period
obs[:3] = omega
obs[3:6] = gravity_orientation
obs[6:10] = cmd * cmd_scale
obs[10 : 10 + num_actions] = qj
obs[10 + num_actions : 10 + 2 * num_actions] = dqj
obs[10 + 2 * num_actions : 10 + 3 * num_actions] = action
# obs[9 + 3 * num_actions : 9 + 3 * num_actions + 2] = np.array([sin_phase, cos_phase])
obs_tensor = torch.from_numpy(obs).unsqueeze(0)
obs_tensor[..., 10:39] = obs_tensor[..., 10:39] @ mapping_tensor.transpose(0, 1)
obs_tensor[..., 39:68] = obs_tensor[..., 39:68] @ mapping_tensor.transpose(0, 1)
obs_tensor[..., 68:97] = obs_tensor[..., 68:97] @ mapping_tensor.transpose(0, 1)
# policy inference
action = policy(obs_tensor).detach().numpy().squeeze()
# reordered_actions = action @ mapping_tensor.detach().cpu().numpy()
# applied_action = action @ mapping_tensor.detach().cpu().numpy()
action = action @ mapping_tensor.detach().cpu().numpy()
# ic(
# action
# )
# action = 0.
# transform action to target_dof_pos
# target_dof_pos = action * action_scale + default_angles
target_dof_pos = action * action_scale + default_angles
# raise NotImplementedError
# Pick up changes to the physics state, apply perturbations, update options from GUI.
viewer.sync()
# Rudimentary time keeping, will drift relative to wall clock.
time_until_next_step = m.opt.timestep - (time.time() - step_start)
if time_until_next_step > 0:
time.sleep(time_until_next_step)