2024-03-07 17:46:42 +08:00
|
|
|
def train_go2(headless=True):
|
2024-01-28 17:11:38 +08:00
|
|
|
|
|
|
|
import isaacgym
|
|
|
|
assert isaacgym
|
|
|
|
import torch
|
|
|
|
|
2024-03-07 17:46:42 +08:00
|
|
|
from go2_gym.envs.base.legged_robot_config import Cfg
|
|
|
|
from go2_gym.envs.go2.go2_config import config_go2
|
|
|
|
from go2_gym.envs.go2.velocity_tracking import VelocityTrackingEasyEnv
|
2024-01-28 17:11:38 +08:00
|
|
|
|
|
|
|
from ml_logger import logger
|
|
|
|
|
2024-03-07 17:46:42 +08:00
|
|
|
from go2_gym_learn.ppo_cse import Runner
|
|
|
|
from go2_gym.envs.wrappers.history_wrapper import HistoryWrapper
|
|
|
|
from go2_gym_learn.ppo_cse.actor_critic import AC_Args
|
|
|
|
from go2_gym_learn.ppo_cse.ppo import PPO_Args
|
|
|
|
from go2_gym_learn.ppo_cse import RunnerArgs
|
2024-01-28 17:11:38 +08:00
|
|
|
|
2024-03-07 17:46:42 +08:00
|
|
|
config_go2(Cfg)
|
2024-01-28 17:11:38 +08:00
|
|
|
|
|
|
|
Cfg.commands.num_lin_vel_bins = 30
|
|
|
|
Cfg.commands.num_ang_vel_bins = 30
|
|
|
|
Cfg.curriculum_thresholds.tracking_ang_vel = 0.7
|
|
|
|
Cfg.curriculum_thresholds.tracking_lin_vel = 0.8
|
|
|
|
Cfg.curriculum_thresholds.tracking_contacts_shaped_vel = 0.90
|
|
|
|
Cfg.curriculum_thresholds.tracking_contacts_shaped_force = 0.90
|
|
|
|
|
|
|
|
Cfg.commands.distributional_commands = True
|
|
|
|
|
|
|
|
Cfg.domain_rand.lag_timesteps = 6
|
|
|
|
Cfg.domain_rand.randomize_lag_timesteps = True
|
|
|
|
Cfg.control.control_type = "actuator_net"
|
|
|
|
|
|
|
|
Cfg.domain_rand.randomize_rigids_after_start = False
|
|
|
|
Cfg.env.priv_observe_motion = False
|
|
|
|
Cfg.env.priv_observe_gravity_transformed_motion = False
|
|
|
|
Cfg.domain_rand.randomize_friction_indep = False
|
|
|
|
Cfg.env.priv_observe_friction_indep = False
|
|
|
|
Cfg.domain_rand.randomize_friction = True
|
|
|
|
Cfg.env.priv_observe_friction = True
|
|
|
|
Cfg.domain_rand.friction_range = [0.1, 3.0]
|
|
|
|
Cfg.domain_rand.randomize_restitution = True
|
|
|
|
Cfg.env.priv_observe_restitution = True
|
|
|
|
Cfg.domain_rand.restitution_range = [0.0, 0.4]
|
|
|
|
Cfg.domain_rand.randomize_base_mass = True
|
|
|
|
Cfg.env.priv_observe_base_mass = False
|
|
|
|
Cfg.domain_rand.added_mass_range = [-1.0, 3.0]
|
|
|
|
Cfg.domain_rand.randomize_gravity = True
|
|
|
|
Cfg.domain_rand.gravity_range = [-1.0, 1.0]
|
|
|
|
Cfg.domain_rand.gravity_rand_interval_s = 8.0
|
|
|
|
Cfg.domain_rand.gravity_impulse_duration = 0.99
|
|
|
|
Cfg.env.priv_observe_gravity = False
|
|
|
|
Cfg.domain_rand.randomize_com_displacement = False
|
|
|
|
Cfg.domain_rand.com_displacement_range = [-0.15, 0.15]
|
|
|
|
Cfg.env.priv_observe_com_displacement = False
|
|
|
|
Cfg.domain_rand.randomize_ground_friction = True
|
|
|
|
Cfg.env.priv_observe_ground_friction = False
|
|
|
|
Cfg.env.priv_observe_ground_friction_per_foot = False
|
|
|
|
Cfg.domain_rand.ground_friction_range = [0.0, 0.0]
|
|
|
|
Cfg.domain_rand.randomize_motor_strength = True
|
|
|
|
Cfg.domain_rand.motor_strength_range = [0.9, 1.1]
|
|
|
|
Cfg.env.priv_observe_motor_strength = False
|
|
|
|
Cfg.domain_rand.randomize_motor_offset = True
|
|
|
|
Cfg.domain_rand.motor_offset_range = [-0.02, 0.02]
|
|
|
|
Cfg.env.priv_observe_motor_offset = False
|
|
|
|
Cfg.domain_rand.push_robots = False
|
|
|
|
Cfg.domain_rand.randomize_Kp_factor = False
|
|
|
|
Cfg.env.priv_observe_Kp_factor = False
|
|
|
|
Cfg.domain_rand.randomize_Kd_factor = False
|
|
|
|
Cfg.env.priv_observe_Kd_factor = False
|
|
|
|
Cfg.env.priv_observe_body_velocity = False
|
|
|
|
Cfg.env.priv_observe_body_height = False
|
|
|
|
Cfg.env.priv_observe_desired_contact_states = False
|
|
|
|
Cfg.env.priv_observe_contact_forces = False
|
|
|
|
Cfg.env.priv_observe_foot_displacement = False
|
|
|
|
Cfg.env.priv_observe_gravity_transformed_foot_displacement = False
|
|
|
|
|
|
|
|
Cfg.env.num_privileged_obs = 2
|
|
|
|
Cfg.env.num_observation_history = 30
|
|
|
|
Cfg.reward_scales.feet_contact_forces = 0.0
|
|
|
|
|
|
|
|
Cfg.domain_rand.rand_interval_s = 4
|
|
|
|
Cfg.commands.num_commands = 15
|
|
|
|
Cfg.env.observe_two_prev_actions = True
|
|
|
|
Cfg.env.observe_yaw = False
|
|
|
|
Cfg.env.num_observations = 70
|
|
|
|
Cfg.env.num_scalar_observations = 70
|
|
|
|
Cfg.env.observe_gait_commands = True
|
|
|
|
Cfg.env.observe_timing_parameter = False
|
|
|
|
Cfg.env.observe_clock_inputs = True
|
|
|
|
|
|
|
|
Cfg.domain_rand.tile_height_range = [-0.0, 0.0]
|
|
|
|
Cfg.domain_rand.tile_height_curriculum = False
|
|
|
|
Cfg.domain_rand.tile_height_update_interval = 1000000
|
|
|
|
Cfg.domain_rand.tile_height_curriculum_step = 0.01
|
|
|
|
Cfg.terrain.border_size = 0.0
|
|
|
|
# Default value = "trimesh"
|
|
|
|
Cfg.terrain.mesh_type = "trimesh" # "heightfield" # none, plane, heightfield or trimesh
|
|
|
|
Cfg.terrain.num_cols = 30
|
|
|
|
Cfg.terrain.num_rows = 30
|
|
|
|
Cfg.terrain.terrain_width = 1.0 # 5.0
|
|
|
|
Cfg.terrain.terrain_length = 1.0 # 5.0
|
|
|
|
Cfg.terrain.x_init_range = 0.2
|
|
|
|
Cfg.terrain.y_init_range = 0.2
|
|
|
|
Cfg.terrain.teleport_thresh = 0.3
|
|
|
|
Cfg.terrain.teleport_robots = False
|
|
|
|
Cfg.terrain.center_robots = True
|
|
|
|
Cfg.terrain.center_span = 4
|
|
|
|
Cfg.terrain.horizontal_scale = 0.10
|
|
|
|
Cfg.rewards.use_terminal_foot_height = False
|
|
|
|
Cfg.rewards.use_terminal_body_height = True
|
|
|
|
Cfg.rewards.terminal_body_height = 0.05
|
|
|
|
Cfg.rewards.use_terminal_roll_pitch = True
|
|
|
|
Cfg.rewards.terminal_body_ori = 1.6
|
|
|
|
|
|
|
|
Cfg.commands.resampling_time = 10
|
|
|
|
|
|
|
|
Cfg.reward_scales.feet_slip = -0.04
|
|
|
|
Cfg.reward_scales.action_smoothness_1 = -0.1
|
|
|
|
Cfg.reward_scales.action_smoothness_2 = -0.1
|
|
|
|
Cfg.reward_scales.dof_vel = -1e-4
|
|
|
|
Cfg.reward_scales.dof_pos = -0.0
|
|
|
|
Cfg.reward_scales.jump = 10.0
|
|
|
|
Cfg.reward_scales.base_height = 0.0
|
|
|
|
Cfg.rewards.base_height_target = 0.30
|
|
|
|
Cfg.reward_scales.estimation_bonus = 0.0
|
|
|
|
Cfg.reward_scales.raibert_heuristic = -10.0
|
|
|
|
Cfg.reward_scales.feet_impact_vel = -0.0
|
|
|
|
Cfg.reward_scales.feet_clearance = -0.0
|
|
|
|
Cfg.reward_scales.feet_clearance_cmd = -0.0
|
|
|
|
Cfg.reward_scales.feet_clearance_cmd_linear = -30.0
|
|
|
|
Cfg.reward_scales.orientation = 0.0
|
|
|
|
Cfg.reward_scales.orientation_control = -5.0
|
|
|
|
Cfg.reward_scales.tracking_stance_width = -0.0
|
|
|
|
Cfg.reward_scales.tracking_stance_length = -0.0
|
|
|
|
Cfg.reward_scales.lin_vel_z = -0.02
|
|
|
|
Cfg.reward_scales.ang_vel_xy = -0.001
|
|
|
|
Cfg.reward_scales.feet_air_time = 0.0
|
|
|
|
Cfg.reward_scales.hop_symmetry = 0.0
|
|
|
|
Cfg.rewards.kappa_gait_probs = 0.07
|
|
|
|
Cfg.rewards.gait_force_sigma = 100.
|
|
|
|
Cfg.rewards.gait_vel_sigma = 10.
|
|
|
|
Cfg.reward_scales.tracking_contacts_shaped_force = 4.0
|
|
|
|
Cfg.reward_scales.tracking_contacts_shaped_vel = 4.0
|
|
|
|
Cfg.reward_scales.collision = -5.0
|
|
|
|
|
|
|
|
Cfg.rewards.reward_container_name = "CoRLRewards"
|
|
|
|
Cfg.rewards.only_positive_rewards = False
|
|
|
|
Cfg.rewards.only_positive_rewards_ji22_style = True
|
|
|
|
Cfg.rewards.sigma_rew_neg = 0.02
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Cfg.commands.lin_vel_x = [-1.0, 1.0]
|
|
|
|
Cfg.commands.lin_vel_y = [-0.6, 0.6]
|
|
|
|
Cfg.commands.ang_vel_yaw = [-1.0, 1.0]
|
|
|
|
Cfg.commands.body_height_cmd = [-0.25, 0.15]
|
|
|
|
Cfg.commands.gait_frequency_cmd_range = [2.0, 4.0]
|
|
|
|
Cfg.commands.gait_phase_cmd_range = [0.0, 1.0]
|
|
|
|
Cfg.commands.gait_offset_cmd_range = [0.0, 1.0]
|
|
|
|
Cfg.commands.gait_bound_cmd_range = [0.0, 1.0]
|
|
|
|
Cfg.commands.gait_duration_cmd_range = [0.5, 0.5]
|
|
|
|
Cfg.commands.footswing_height_range = [0.03, 0.35]
|
|
|
|
Cfg.commands.body_pitch_range = [-0.4, 0.4]
|
|
|
|
Cfg.commands.body_roll_range = [-0.0, 0.0]
|
|
|
|
Cfg.commands.stance_width_range = [0.10, 0.45]
|
|
|
|
Cfg.commands.stance_length_range = [0.35, 0.45]
|
|
|
|
|
|
|
|
Cfg.commands.limit_vel_x = [-5.0, 5.0]
|
|
|
|
Cfg.commands.limit_vel_y = [-0.6, 0.6]
|
|
|
|
Cfg.commands.limit_vel_yaw = [-5.0, 5.0]
|
|
|
|
Cfg.commands.limit_body_height = [-0.25, 0.15]
|
|
|
|
Cfg.commands.limit_gait_frequency = [2.0, 4.0]
|
|
|
|
Cfg.commands.limit_gait_phase = [0.0, 1.0]
|
|
|
|
Cfg.commands.limit_gait_offset = [0.0, 1.0]
|
|
|
|
Cfg.commands.limit_gait_bound = [0.0, 1.0]
|
|
|
|
Cfg.commands.limit_gait_duration = [0.5, 0.5]
|
|
|
|
Cfg.commands.limit_footswing_height = [0.03, 0.35]
|
|
|
|
Cfg.commands.limit_body_pitch = [-0.4, 0.4]
|
|
|
|
Cfg.commands.limit_body_roll = [-0.0, 0.0]
|
|
|
|
Cfg.commands.limit_stance_width = [0.10, 0.45]
|
|
|
|
Cfg.commands.limit_stance_length = [0.35, 0.45]
|
|
|
|
|
|
|
|
Cfg.commands.num_bins_vel_x = 21
|
|
|
|
Cfg.commands.num_bins_vel_y = 1
|
|
|
|
Cfg.commands.num_bins_vel_yaw = 21
|
|
|
|
Cfg.commands.num_bins_body_height = 1
|
|
|
|
Cfg.commands.num_bins_gait_frequency = 1
|
|
|
|
Cfg.commands.num_bins_gait_phase = 1
|
|
|
|
Cfg.commands.num_bins_gait_offset = 1
|
|
|
|
Cfg.commands.num_bins_gait_bound = 1
|
|
|
|
Cfg.commands.num_bins_gait_duration = 1
|
|
|
|
Cfg.commands.num_bins_footswing_height = 1
|
|
|
|
Cfg.commands.num_bins_body_roll = 1
|
|
|
|
Cfg.commands.num_bins_body_pitch = 1
|
|
|
|
Cfg.commands.num_bins_stance_width = 1
|
|
|
|
|
|
|
|
Cfg.normalization.friction_range = [0, 1]
|
|
|
|
Cfg.normalization.ground_friction_range = [0, 1]
|
|
|
|
Cfg.terrain.yaw_init_range = 3.14
|
|
|
|
Cfg.normalization.clip_actions = 10.0
|
|
|
|
|
|
|
|
Cfg.commands.exclusive_phase_offset = False
|
|
|
|
Cfg.commands.pacing_offset = False
|
|
|
|
Cfg.commands.binary_phases = True
|
|
|
|
Cfg.commands.gaitwise_curricula = True
|
|
|
|
|
|
|
|
env = VelocityTrackingEasyEnv(sim_device='cuda:0', headless=False, cfg=Cfg)
|
|
|
|
|
|
|
|
# log the experiment parameters
|
|
|
|
logger.log_params(AC_Args=vars(AC_Args), PPO_Args=vars(PPO_Args), RunnerArgs=vars(RunnerArgs),
|
|
|
|
Cfg=vars(Cfg))
|
|
|
|
|
|
|
|
env = HistoryWrapper(env)
|
|
|
|
gpu_id = 0
|
|
|
|
runner = Runner(env, device=f"cuda:{gpu_id}")
|
|
|
|
runner.learn(num_learning_iterations=100000, init_at_random_ep_len=True, eval_freq=100)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
from pathlib import Path
|
|
|
|
from ml_logger import logger
|
2024-03-07 17:46:42 +08:00
|
|
|
from go2_gym import MINI_GYM_ROOT_DIR
|
2024-01-28 17:11:38 +08:00
|
|
|
|
|
|
|
stem = Path(__file__).stem
|
|
|
|
logger.configure(logger.utcnow(f'gait-conditioned-agility/%Y-%m-%d/{stem}/%H%M%S.%f'),
|
|
|
|
root=Path(f"{MINI_GYM_ROOT_DIR}/runs").resolve(), )
|
|
|
|
logger.log_text("""
|
|
|
|
charts:
|
|
|
|
- yKey: train/episode/rew_total/mean
|
|
|
|
xKey: iterations
|
|
|
|
- yKey: train/episode/rew_tracking_lin_vel/mean
|
|
|
|
xKey: iterations
|
|
|
|
- yKey: train/episode/rew_tracking_contacts_shaped_force/mean
|
|
|
|
xKey: iterations
|
|
|
|
- yKey: train/episode/rew_action_smoothness_1/mean
|
|
|
|
xKey: iterations
|
|
|
|
- yKey: train/episode/rew_action_smoothness_2/mean
|
|
|
|
xKey: iterations
|
|
|
|
- yKey: train/episode/rew_tracking_contacts_shaped_vel/mean
|
|
|
|
xKey: iterations
|
|
|
|
- yKey: train/episode/rew_orientation_control/mean
|
|
|
|
xKey: iterations
|
|
|
|
- yKey: train/episode/rew_dof_pos/mean
|
|
|
|
xKey: iterations
|
|
|
|
- yKey: train/episode/command_area_trot/mean
|
|
|
|
xKey: iterations
|
|
|
|
- yKey: train/episode/max_terrain_height/mean
|
|
|
|
xKey: iterations
|
|
|
|
- type: video
|
|
|
|
glob: "videos/*.mp4"
|
|
|
|
- yKey: adaptation_loss/mean
|
|
|
|
xKey: iterations
|
|
|
|
""", filename=".charts.yml", dedent=True)
|
|
|
|
|
|
|
|
# to see the environment rendering, set headless=False
|
2024-03-07 17:46:42 +08:00
|
|
|
train_go2(headless=False)
|