[update] env and training config for go1

This commit is contained in:
Ziwen Zhuang 2023-11-14 18:58:32 +08:00
parent f771497315
commit 1ab4639f15
28 changed files with 2151 additions and 384 deletions

View File

@ -8,7 +8,7 @@ Authors:
[Sören Schwertfeger](https://robotics.shanghaitech.edu.cn/people/soeren), [Sören Schwertfeger](https://robotics.shanghaitech.edu.cn/people/soeren),
[Chelsea Finn](https://ai.stanford.edu/~cbfinn/), [Chelsea Finn](https://ai.stanford.edu/~cbfinn/),
[Hang Zhao](https://hangzhaomit.github.io/)<br> [Hang Zhao](https://hangzhaomit.github.io/)<br>
Conference on Robot Learning (CoRL) 2023, Oral <br> Conference on Robot Learning (CoRL) 2023, **Oral**, **Best Systems Paper Award Finalist (top 3)** <br>
<p align="center"> <p align="center">
<img src="images/teaser.jpeg" width="80%"/> <img src="images/teaser.jpeg" width="80%"/>
@ -34,7 +34,7 @@ To deploy the trained model on your real robot, please follow the instructions i
If you cannot run the distillation part or all graphics computing goes to GPU 0 dispite you have multiple GPUs and have set the CUDA_VISIBLE_DEVICES, please use docker to isolate each GPU. If you cannot run the distillation part or all graphics computing goes to GPU 0 dispite you have multiple GPUs and have set the CUDA_VISIBLE_DEVICES, please use docker to isolate each GPU.
## To Do (will be done before Nov 2023) ## ## To Do (will be done before Nov 2023) ##
- [ ] Go1 training pipeline in simulation - [x] Go1 training configuration (not from scratch)
- [ ] A1 deployment code - [ ] A1 deployment code
- [x] Go1 deployment code - [x] Go1 deployment code

View File

@ -62,11 +62,27 @@ task_registry.register( "go1_field", LeggedRobotNoisy, Go1FieldCfg(), Go1FieldCf
task_registry.register( "go1_distill", LeggedRobotNoisy, Go1FieldDistillCfg(), Go1FieldDistillCfgPPO()) task_registry.register( "go1_distill", LeggedRobotNoisy, Go1FieldDistillCfg(), Go1FieldDistillCfgPPO())
## The following tasks are for the convinience of opensource ## The following tasks are for the convinience of opensource
from .a1.a1_climb_config import A1ClimbCfg, A1ClimbCfgPPO from .a1.a1_remote_config import A1RemoteCfg, A1RemoteCfgPPO
task_registry.register( "a1_climb", LeggedRobotNoisy, A1ClimbCfg(), A1ClimbCfgPPO() ) task_registry.register( "a1_remote", LeggedRobotNoisy, A1RemoteCfg(), A1RemoteCfgPPO() )
from .a1.a1_jump_config import A1JumpCfg, A1JumpCfgPPO
task_registry.register( "a1_jump", LeggedRobotNoisy, A1JumpCfg(), A1JumpCfgPPO() )
from .a1.a1_down_config import A1DownCfg, A1DownCfgPPO
task_registry.register( "a1_down", LeggedRobotNoisy, A1DownCfg(), A1DownCfgPPO() )
from .a1.a1_leap_config import A1LeapCfg, A1LeapCfgPPO from .a1.a1_leap_config import A1LeapCfg, A1LeapCfgPPO
task_registry.register( "a1_leap", LeggedRobotNoisy, A1LeapCfg(), A1LeapCfgPPO() ) task_registry.register( "a1_leap", LeggedRobotNoisy, A1LeapCfg(), A1LeapCfgPPO() )
from .a1.a1_crawl_config import A1CrawlCfg, A1CrawlCfgPPO from .a1.a1_crawl_config import A1CrawlCfg, A1CrawlCfgPPO
task_registry.register( "a1_crawl", LeggedRobotNoisy, A1CrawlCfg(), A1CrawlCfgPPO() ) task_registry.register( "a1_crawl", LeggedRobotNoisy, A1CrawlCfg(), A1CrawlCfgPPO() )
from .a1.a1_tilt_config import A1TiltCfg, A1TiltCfgPPO from .a1.a1_tilt_config import A1TiltCfg, A1TiltCfgPPO
task_registry.register( "a1_tilt", LeggedRobotNoisy, A1TiltCfg(), A1TiltCfgPPO() ) task_registry.register( "a1_tilt", LeggedRobotNoisy, A1TiltCfg(), A1TiltCfgPPO() )
from .go1.go1_remote_config import Go1RemoteCfg, Go1RemoteCfgPPO
task_registry.register( "go1_remote", LeggedRobotNoisy, Go1RemoteCfg(), Go1RemoteCfgPPO() )
from .go1.go1_jump_config import Go1JumpCfg, Go1JumpCfgPPO
task_registry.register( "go1_jump", LeggedRobotNoisy, Go1JumpCfg(), Go1JumpCfgPPO() )
from .go1.go1_down_config import Go1DownCfg, Go1DownCfgPPO
task_registry.register( "go1_down", LeggedRobotNoisy, Go1DownCfg(), Go1DownCfgPPO() )
from .go1.go1_leap_config import Go1LeapCfg, Go1LeapCfgPPO
task_registry.register( "go1_leap", LeggedRobotNoisy, Go1LeapCfg(), Go1LeapCfgPPO() )
from .go1.go1_crawl_config import Go1CrawlCfg, Go1CrawlCfgPPO
task_registry.register( "go1_crawl", LeggedRobotNoisy, Go1CrawlCfg(), Go1CrawlCfgPPO() )
from .go1.go1_tilt_config import Go1TiltCfg, Go1TiltCfgPPO
task_registry.register( "go1_tilt", LeggedRobotNoisy, Go1TiltCfg(), Go1TiltCfgPPO() )

View File

@ -71,6 +71,14 @@ class A1RoughCfg( LeggedRobotCfg ):
penalize_contacts_on = ["thigh", "calf"] penalize_contacts_on = ["thigh", "calf"]
terminate_after_contacts_on = ["base"] terminate_after_contacts_on = ["base"]
self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
sdk_dof_range = dict( # copied from a1_const.h from unitree_legged_sdk
Hip_max= 0.802,
Hip_min=-0.802,
Thigh_max= 4.19,
Thigh_min= -1.05,
Calf_max= -0.916,
Calf_min= -2.7,
)
class rewards( LeggedRobotCfg.rewards ): class rewards( LeggedRobotCfg.rewards ):
soft_dof_pos_limit = 0.9 soft_dof_pos_limit = 0.9

View File

@ -0,0 +1,116 @@
from os import path as osp
import numpy as np
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class A1DownCfg( A1FieldCfg ):
#### uncomment this to train non-virtual terrain
class sensor( A1FieldCfg.sensor ):
class proprioception( A1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( A1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = True
BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"jump",
],
track_block_length= 1.6,
jump= dict(
height= (0.2, 0.45),
depth= 0.3,
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
jump_down_prob= 1., # probability of jumping down use it in non-virtual terrain
),
virtual_terrain= True, # Change this to False for real terrain
no_perlin_threshold= 0.1,
n_obstacles_per_track= 3,
))
TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.15],
))
class commands( A1FieldCfg.commands ):
class ranges( A1FieldCfg.commands.ranges ):
lin_vel_x = [0.5, 1.5]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class termination( A1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
z_low_kwargs = merge_dict(A1FieldCfg.termination.z_low_kwargs, dict(
threshold= -3.,
))
class domain_rand( A1FieldCfg.domain_rand ):
init_base_pos_range = dict(
x= [0.2, 0.6],
y= [-0.25, 0.25],
)
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
push_robots = True
class rewards( A1FieldCfg.rewards ):
class scales:
tracking_ang_vel = 0.1
world_vel_l2norm = -1.3
legs_energy_substeps = -1e-6
alive = 2.
penetrate_depth = -2e-3
penetrate_volume = -2e-3
exceed_dof_pos_limits = -4e-1
exceed_torque_limits_l1norm = -4e-1
feet_contact_forces = -1e-2
torques = -2e-5
lin_pos_y = -0.1
yaw_abs = -0.1
collision = -0.1
down_cond = 0.3
soft_dof_pos_limit = 0.8
max_contact_force = 200.0
class curriculum( A1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 2000000
penetrate_volume_threshold_easier = 4000000
penetrate_depth_threshold_harder = 200000
penetrate_depth_threshold_easier = 400000
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class A1DownCfgPPO( A1FieldCfgPPO ):
class algorithm( A1FieldCfgPPO.algorithm ):
entropy_coef = 0.0
clip_min_std = 0.2
class runner( A1FieldCfgPPO.runner ):
policy_class_name = "ActorCriticRecurrent"
experiment_name = "field_a1"
resume = True
load_run = "{Your trained walk model directory}"
run_name = "".join(["Skills_",
("down" if A1DownCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] > 0. else "jump"),
("_noResume" if not resume else "from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])
max_iterations = 20000
save_interval = 500

View File

@ -1,4 +1,5 @@
import numpy as np import numpy as np
import os.path as osp
from legged_gym.envs.a1.a1_config import A1RoughCfg, A1RoughCfgPPO from legged_gym.envs.a1.a1_config import A1RoughCfg, A1RoughCfgPPO
class A1FieldCfg( A1RoughCfg ): class A1FieldCfg( A1RoughCfg ):
@ -59,7 +60,7 @@ class A1FieldCfg( A1RoughCfg ):
BarrierTrack_kwargs = dict( BarrierTrack_kwargs = dict(
options= [ options= [
# "climb", # "jump",
# "crawl", # "crawl",
# "tilt", # "tilt",
# "leap", # "leap",
@ -68,11 +69,11 @@ class A1FieldCfg( A1RoughCfg ):
track_block_length= 2., # the x-axis distance from the env origin point track_block_length= 2., # the x-axis distance from the env origin point
wall_thickness= (0.04, 0.2), # [m] wall_thickness= (0.04, 0.2), # [m]
wall_height= -0.05, wall_height= -0.05,
climb= dict( jump= dict(
height= (0.2, 0.6), height= (0.2, 0.6),
depth= (0.1, 0.8), # size along the forward axis depth= (0.1, 0.8), # size along the forward axis
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
climb_down_prob= 0.0, jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain
), ),
crawl= dict( crawl= dict(
height= (0.25, 0.5), height= (0.25, 0.5),
@ -97,13 +98,14 @@ class A1FieldCfg( A1RoughCfg ):
virtual_terrain= False, virtual_terrain= False,
draw_virtual_terrain= True, draw_virtual_terrain= True,
engaging_next_threshold= 1.2, engaging_next_threshold= 1.2,
engaging_finish_threshold= 0.,
curriculum_perlin= False, curriculum_perlin= False,
no_perlin_threshold= 0.0, no_perlin_threshold= 0.1,
) )
TerrainPerlin_kwargs = dict( TerrainPerlin_kwargs = dict(
zScale= [0.1, 0.15], zScale= [0.08, 0.15],
# zScale= 0.1, # Use a constant zScale for training a walk policy # zScale= 0.15, # Use a constant zScale for training a walk policy
frequency= 10, frequency= 10,
) )
@ -114,9 +116,6 @@ class A1FieldCfg( A1RoughCfg ):
lin_vel_x = [-1.0, 1.0] lin_vel_x = [-1.0, 1.0]
lin_vel_y = [0.0, 0.0] lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.] ang_vel_yaw = [0., 0.]
######## configs for training a walk policy #########
# lin_vel_y = [-1., 1.]
# ang_vel_yaw = [-1., 1.]
class control( A1RoughCfg.control ): class control( A1RoughCfg.control ):
stiffness = {'joint': 50.} stiffness = {'joint': 50.}
@ -125,10 +124,15 @@ class A1FieldCfg( A1RoughCfg ):
torque_limits = 25 # override the urdf torque_limits = 25 # override the urdf
computer_clip_torque = True computer_clip_torque = True
motor_clip_torque = False motor_clip_torque = False
# action_delay = False # set to True to enable action delay in sim
# action_delay_range = [0.002, 0.022] # [s]
# action_delay_resample_time = 5.0 # [s]
class asset( A1RoughCfg.asset ): class asset( A1RoughCfg.asset ):
penalize_contacts_on = ["base", "thigh"] penalize_contacts_on = ["base", "thigh", "calf"]
terminate_after_contacts_on = ["base", "imu"] terminate_after_contacts_on = ["base", "imu"]
front_hip_names = ["FR_hip_joint", "FL_hip_joint"]
rear_hip_names = ["RR_hip_joint", "RL_hip_joint"]
class termination: class termination:
# additional factors that determines whether to terminates the episode # additional factors that determines whether to terminates the episode
@ -145,8 +149,8 @@ class A1FieldCfg( A1RoughCfg ):
tilt_threshold= 1.5, tilt_threshold= 1.5,
) )
pitch_kwargs = dict( pitch_kwargs = dict(
threshold= 1.6, # [rad] # for leap, climb threshold= 1.6, # [rad] # for leap, jump
climb_threshold= 1.6, jump_threshold= 1.6,
leap_threshold= 1.5, leap_threshold= 1.5,
) )
z_low_kwargs = dict( z_low_kwargs = dict(
@ -176,7 +180,7 @@ class A1FieldCfg( A1RoughCfg ):
added_mass_range = [1.0, 3.0] added_mass_range = [1.0, 3.0]
randomize_friction = True randomize_friction = True
friction_range = [0.0, 1.] friction_range = [0., 2.]
init_base_pos_range = dict( init_base_pos_range = dict(
x= [0.2, 0.6], x= [0.2, 0.6],
@ -200,10 +204,29 @@ class A1FieldCfg( A1RoughCfg ):
class normalization( A1RoughCfg.normalization ): class normalization( A1RoughCfg.normalization ):
class obs_scales( A1RoughCfg.normalization.obs_scales ): class obs_scales( A1RoughCfg.normalization.obs_scales ):
forward_depth = 1. forward_depth = 1.
base_pose = [0., 0., 1., 1., 1., 1.] base_pose = [0., 0., 0., 1., 1., 1.]
engaging_block = 1. engaging_block = 1.
""" The following action clip is used for tanh policy activation. """
# clip_actions_method = "hard"
# dof_pos_redundancy = 0.2
# clip_actions_low = []
# clip_actions_high = []
# for sdk_joint_name, sim_joint_name in zip(
# ["Hip", "Thigh", "Calf"] * 4,
# [ # in the order as simulation
# "FL_hip_joint", "FL_thigh_joint", "FL_calf_joint",
# "FR_hip_joint", "FR_thigh_joint", "FR_calf_joint",
# "RL_hip_joint", "RL_thigh_joint", "RL_calf_joint",
# "RR_hip_joint", "RR_thigh_joint", "RR_calf_joint",
# ],
# ): # This setting is only for position control.
# clip_actions_low.append( (A1RoughCfg.asset.sdk_dof_range[sdk_joint_name + "_min"] + dof_pos_redundancy - A1RoughCfg.init_state.default_joint_angles[sim_joint_name]) / a1_action_scale )
# clip_actions_high.append( (A1RoughCfg.asset.sdk_dof_range[sdk_joint_name + "_max"] - dof_pos_redundancy - A1RoughCfg.init_state.default_joint_angles[sim_joint_name]) / a1_action_scale )
# del dof_pos_redundancy, sdk_joint_name, sim_joint_name # This is not intended to be an attribute of normalization
""" The above action clip is used for tanh policy activation. """
class noise( A1RoughCfg.noise ): class noise( A1RoughCfg.noise ):
add_noise = False # disable internal uniform +- 1 noise, and no noise in proprioception
class noise_scales( A1RoughCfg.noise.noise_scales ): class noise_scales( A1RoughCfg.noise.noise_scales ):
forward_depth = 0.1 forward_depth = 0.1
base_pose = 1.0 base_pose = 1.0
@ -244,6 +267,7 @@ class A1FieldCfg( A1RoughCfg ):
no_moveup_when_fall = False no_moveup_when_fall = False
# chosen heuristically, please refer to `LeggedRobotField._get_terrain_curriculum_move` with fixed body_measure_points # chosen heuristically, please refer to `LeggedRobotField._get_terrain_curriculum_move` with fixed body_measure_points
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class A1FieldCfgPPO( A1RoughCfgPPO ): class A1FieldCfgPPO( A1RoughCfgPPO ):
class algorithm( A1RoughCfgPPO.algorithm ): class algorithm( A1RoughCfgPPO.algorithm ):
entropy_coef = 0.01 entropy_coef = 0.01
@ -256,8 +280,9 @@ class A1FieldCfgPPO( A1RoughCfgPPO ):
class runner( A1RoughCfgPPO.runner ): class runner( A1RoughCfgPPO.runner ):
policy_class_name = "ActorCriticRecurrent" policy_class_name = "ActorCriticRecurrent"
experiment_name = "field_a1" experiment_name = "field_a1"
run_name = "".join(["WalkingBase", resume = False
("_pEnergySubsteps" + np.format_float_scientific(-A1FieldCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1, trim="-") if A1FieldCfg.rewards.scales.legs_energy_substeps != 0 else ""),
run_name = "".join(["WalkForward",
("_propDelay{:.2f}-{:.2f}".format( ("_propDelay{:.2f}-{:.2f}".format(
A1FieldCfg.sensor.proprioception.latency_range[0], A1FieldCfg.sensor.proprioception.latency_range[0],
A1FieldCfg.sensor.proprioception.latency_range[1], A1FieldCfg.sensor.proprioception.latency_range[1],

View File

@ -3,12 +3,14 @@ import numpy as np
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict from legged_gym.utils.helpers import merge_dict
class A1ClimbCfg( A1FieldCfg ): class A1JumpCfg( A1FieldCfg ):
class init_state( A1FieldCfg.init_state ):
pos = [0., 0., 0.45]
#### uncomment this to train non-virtual terrain #### uncomment this to train non-virtual terrain
# class sensor( A1FieldCfg.sensor ): # class sensor( A1FieldCfg.sensor ):
# class proprioception( A1FieldCfg.sensor.proprioception ): # class proprioception( A1FieldCfg.sensor.proprioception ):
# delay_action_obs = True
# latency_range = [0.04-0.0025, 0.04+0.0075] # latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain #### uncomment the above to train non-virtual terrain
@ -20,22 +22,23 @@ class A1ClimbCfg( A1FieldCfg ):
BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict( BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [ options= [
"climb", "jump",
], ],
track_block_length= 1.6, track_block_length= 1.6,
climb= dict( jump= dict(
height= (0.2, 0.6), # use this to train in virtual terrain height= (0.2, 0.6), # use this to train in virtual terrain
# height= (0.1, 0.5), # use this to train in non-virtual terrain # height= (0.1, 0.5), # use this to train in non-virtual terrain
depth= (0.1, 0.2), depth= (0.1, 0.2),
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
climb_down_prob= 0., # probability of climbing down use it in non-virtual terrain jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain
), ),
virtual_terrain= False, # Change this to False for real terrain virtual_terrain= False, # Change this to False for real terrain
no_perlin_threshold= 0.06, no_perlin_threshold= 0.12,
n_obstacles_per_track= 3,
)) ))
TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict( TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.1], zScale= [0.05, 0.15],
)) ))
class commands( A1FieldCfg.commands ): class commands( A1FieldCfg.commands ):
@ -58,10 +61,19 @@ class A1ClimbCfg( A1FieldCfg ):
)) ))
class domain_rand( A1FieldCfg.domain_rand ): class domain_rand( A1FieldCfg.domain_rand ):
class com_range( A1FieldCfg.domain_rand.com_range ):
z = [-0.1, 0.1]
init_base_pos_range = dict( init_base_pos_range = dict(
x= [0.2, 0.6], x= [0.2, 0.6],
y= [-0.25, 0.25], y= [-0.25, 0.25],
) )
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
push_robots = False
class rewards( A1FieldCfg.rewards ): class rewards( A1FieldCfg.rewards ):
class scales: class scales:
@ -73,6 +85,8 @@ class A1ClimbCfg( A1FieldCfg ):
penetrate_volume = -1e-2 penetrate_volume = -1e-2
exceed_dof_pos_limits = -1e-1 exceed_dof_pos_limits = -1e-1
exceed_torque_limits_i = -2e-1 exceed_torque_limits_i = -2e-1
soft_dof_pos_limit = 0.8
max_contact_force = 100.0
class curriculum( A1FieldCfg.curriculum ): class curriculum( A1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 8000 penetrate_volume_threshold_harder = 8000
@ -81,7 +95,8 @@ class A1ClimbCfg( A1FieldCfg ):
penetrate_depth_threshold_easier = 1600 penetrate_depth_threshold_easier = 1600
class A1ClimbCfgPPO( A1FieldCfgPPO ): logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class A1JumpCfgPPO( A1FieldCfgPPO ):
class algorithm( A1FieldCfgPPO.algorithm ): class algorithm( A1FieldCfgPPO.algorithm ):
entropy_coef = 0.0 entropy_coef = 0.0
clip_min_std = 0.2 clip_min_std = 0.2
@ -89,23 +104,15 @@ class A1ClimbCfgPPO( A1FieldCfgPPO ):
class runner( A1FieldCfgPPO.runner ): class runner( A1FieldCfgPPO.runner ):
policy_class_name = "ActorCriticRecurrent" policy_class_name = "ActorCriticRecurrent"
experiment_name = "field_a1" experiment_name = "field_a1"
run_name = "".join(["Skill",
("Multi" if len(A1ClimbCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (A1ClimbCfg.terrain.BarrierTrack_kwargs["options"][0] if A1ClimbCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_pEnergySubsteps{:.0e}".format(A1ClimbCfg.rewards.scales.legs_energy_substeps) if A1ClimbCfg.rewards.scales.legs_energy_substeps != -1e-6 else ""),
("_pDofLimit{:.0e}".format(A1ClimbCfg.rewards.scales.exceed_dof_pos_limits) if getattr(A1ClimbCfg.rewards.scales, "exceed_dof_pos_limits") != 0.0 else ""),
("_climbDownProb{:.1f}".format(A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"]["climb_down_prob"]) if A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"].get("climb_down_prob", 0.) > 0. else ""),
("_climbHeight{:.1f}-{:.1f}".format(*A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"]["height"])),
("_propDelay{:.2f}-{:.2f}".format(
A1ClimbCfg.sensor.proprioception.latency_range[0],
A1ClimbCfg.sensor.proprioception.latency_range[1],
) if A1ClimbCfg.sensor.proprioception.delay_action_obs else ""
),
("_virtual" if A1ClimbCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_trackBlockLen{:.1f}".format(A1ClimbCfg.terrain.BarrierTrack_kwargs["track_block_length"]) if A1ClimbCfg.terrain.BarrierTrack_kwargs["track_block_length"] != 2. else ""),
])
resume = True resume = True
load_run = "{Your traind walking model directory}" load_run = "{Your traind walking model directory}"
load_run = "{Your virtually trained climb model directory}" load_run = "{Your virtually trained jump model directory}"
run_name = "".join(["Skills_",
("jump" if A1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] < 1. else "down"),
("_virtual" if A1JumpCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])
max_iterations = 20000 max_iterations = 20000
save_interval = 500 save_interval = 500

View File

@ -0,0 +1,82 @@
import numpy as np
import os.path as osp
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class A1RemoteCfg( A1FieldCfg ):
class env( A1FieldCfg.env ):
num_envs = 4096
obs_components = [
"proprioception", # 48
# "height_measurements", # 187
# "forward_depth",
# "base_pose",
# "robot_config",
# "engaging_block",
# "sidewall_distance",
]
privileged_obs_components = [
"proprioception",
# "height_measurements",
# "forward_depth",
"robot_config",
]
use_lin_vel = False
privileged_use_lin_vel = True
class terrain( A1FieldCfg.terrain ):
num_rows = 4
num_cols = 4
selected = "TerrainPerlin"
TerrainPerlin_kwargs = dict(
zScale= 0.15,
frequency= 10,
)
class commands( A1FieldCfg.commands ):
class ranges( A1FieldCfg.commands.ranges ):
lin_vel_x = [-1.0, 1.0]
lin_vel_y = [-1., 1.]
ang_vel_yaw = [-1., 1.]
class domain_rand( A1FieldCfg.domain_rand ):
class com_range( A1FieldCfg.domain_rand.com_range ):
x = [-0.2, 0.2]
max_push_vel_ang = 0.5
push_robots = True
class rewards( A1FieldCfg.rewards ):
class scales:
###### hacker from Field
tracking_ang_vel = 0.5
lin_vel_l2norm = -1.
legs_energy_substeps = -1e-5
alive = 2.
# penalty for hardware safety
exceed_dof_pos_limits = -4e-2
exceed_torque_limits_l1norm = -4e-1
# penalty for walking gait, probably no need
collision = -0.1
orientation = -0.1
feet_contact_forces = -1e-3
soft_dof_pos_limit = 0.5
max_contact_force = 60.0
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class A1RemoteCfgPPO( A1FieldCfgPPO ):
class algorithm( A1FieldCfgPPO.algorithm ):
entropy_coef = 0.01
clip_min_std = 0.05
class runner( A1FieldCfgPPO.runner ):
resume = False
load_run = None
run_name = "".join(["WalkByRemote",
("_rLinTrack{:.1f}".format(A1RemoteCfg.rewards.scales.tracking_lin_vel) if getattr(A1RemoteCfg.rewards.scales, "tracking_lin_vel", 0.) != 0. else ""),
("_pLin{:.1f}".format(-A1RemoteCfg.rewards.scales.lin_vel_l2norm) if getattr(A1RemoteCfg.rewards.scales, "lin_vel_l2norm", 0.) != 0. else ""),
("_rAng{:.1f}".format(A1RemoteCfg.rewards.scales.tracking_ang_vel) if A1RemoteCfg.rewards.scales.tracking_ang_vel != 0.2 else ""),
("_rAlive{:.1f}".format(A1RemoteCfg.rewards.scales.alive) if getattr(A1RemoteCfg.rewards.scales, "alive", 2.) != 2. else ""),
("_pEnergySubsteps{:.0e}".format(A1RemoteCfg.rewards.scales.legs_energy_substeps) if getattr(A1RemoteCfg.rewards.scales, "legs_energy_substeps", -2e-5) != -2e-5 else "_nopEnergy"),
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])

View File

@ -141,6 +141,7 @@ class LeggedRobot(BaseTask):
self.last_actions[:] = self.actions[:] self.last_actions[:] = self.actions[:]
self.last_dof_vel[:] = self.dof_vel[:] self.last_dof_vel[:] = self.dof_vel[:]
self.last_root_vel[:] = self.root_states[:, 7:13] self.last_root_vel[:] = self.root_states[:, 7:13]
self.last_torques[:] = self.torques[:]
if self.viewer and self.enable_viewer_sync and self.debug_viz: if self.viewer and self.enable_viewer_sync and self.debug_viz:
self._draw_debug_vis() self._draw_debug_vis()
@ -343,6 +344,7 @@ class LeggedRobot(BaseTask):
# set small commands to zero # set small commands to zero
self.commands[env_ids, :2] *= (torch.norm(self.commands[env_ids, :2], dim=1) > 0.2).unsqueeze(1) self.commands[env_ids, :2] *= (torch.norm(self.commands[env_ids, :2], dim=1) > 0.2).unsqueeze(1)
self.commands[env_ids, 2] *= (torch.abs(self.commands[env_ids, 2]) > 0.1)
def _compute_torques(self, actions): def _compute_torques(self, actions):
""" Compute torques from actions. """ Compute torques from actions.
@ -387,7 +389,9 @@ class LeggedRobot(BaseTask):
) )
else: else:
self.dof_pos[env_ids] = self.default_dof_pos self.dof_pos[env_ids] = self.default_dof_pos
self.dof_vel[env_ids] = 0. # self.dof_vel[env_ids] = 0. # history init method
dof_vel_range = getattr(self.cfg.domain_rand, "init_dof_vel_range", [-3., 3.])
self.dof_vel[env_ids] = torch.rand_like(self.dof_vel[env_ids]) * abs(dof_vel_range[1] - dof_vel_range[0]) + min(dof_vel_range)
# Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first. # Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first.
dof_idx = env_ids * self.all_root_states.shape[0] / self.num_envs dof_idx = env_ids * self.all_root_states.shape[0] / self.num_envs
@ -414,16 +418,64 @@ class LeggedRobot(BaseTask):
else: else:
self.root_states[env_ids] = self.base_init_state self.root_states[env_ids] = self.base_init_state
self.root_states[env_ids, :3] += self.env_origins[env_ids] self.root_states[env_ids, :3] += self.env_origins[env_ids]
# base rotation (roll and pitch)
if hasattr(self.cfg.domain_rand, "init_base_rot_range"):
base_roll = torch_rand_float(
*self.cfg.domain_rand.init_base_rot_range["roll"],
(len(env_ids), 1),
device=self.device,
)[:, 0]
base_pitch = torch_rand_float(
*self.cfg.domain_rand.init_base_rot_range["pitch"],
(len(env_ids), 1),
device=self.device,
)[:, 0]
base_quat = quat_from_euler_xyz(base_roll, base_pitch, torch.zeros_like(base_roll))
self.root_states[env_ids, 3:7] = base_quat
# base velocities # base velocities
if getattr(self.cfg.domain_rand, "init_base_vel_range", None) is None: if getattr(self.cfg.domain_rand, "init_base_vel_range", None) is None:
base_vel_range = (-0.5, 0.5) base_vel_range = (-0.5, 0.5)
else: else:
base_vel_range = self.cfg.domain_rand.init_base_vel_range base_vel_range = self.cfg.domain_rand.init_base_vel_range
if isinstance(base_vel_range, (tuple, list)):
self.root_states[env_ids, 7:13] = torch_rand_float( self.root_states[env_ids, 7:13] = torch_rand_float(
*base_vel_range, *base_vel_range,
(len(env_ids), 6), (len(env_ids), 6),
device=self.device, device=self.device,
) # [7:10]: lin vel, [10:13]: ang vel ) # [7:10]: lin vel, [10:13]: ang vel
elif isinstance(base_vel_range, dict):
self.root_states[env_ids, 7:8] = torch_rand_float(
*base_vel_range["x"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 8:9] = torch_rand_float(
*base_vel_range["y"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 9:10] = torch_rand_float(
*base_vel_range["z"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 10:11] = torch_rand_float(
*base_vel_range["roll"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 11:12] = torch_rand_float(
*base_vel_range["pitch"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 12:13] = torch_rand_float(
*base_vel_range["yaw"],
(len(env_ids), 1),
device=self.device,
)
else:
raise NameError(f"Unknown base_vel_range type: {type(base_vel_range)}")
# Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first. # Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first.
actor_idx = env_ids * self.all_root_states.shape[0] / self.num_envs actor_idx = env_ids * self.all_root_states.shape[0] / self.num_envs
@ -546,6 +598,7 @@ class LeggedRobot(BaseTask):
self.last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) self.last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False)
self.last_dof_vel = torch.zeros_like(self.dof_vel) self.last_dof_vel = torch.zeros_like(self.dof_vel)
self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13]) self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13])
self.last_torques = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False)
self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading
self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) # TODO change this self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) # TODO change this
self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False) self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False)
@ -580,8 +633,11 @@ class LeggedRobot(BaseTask):
self.default_dof_pos = self.default_dof_pos.unsqueeze(0) self.default_dof_pos = self.default_dof_pos.unsqueeze(0)
def _reset_buffers(self, env_ids): def _reset_buffers(self, env_ids):
if getattr(self.cfg.init_state, "zero_actions", False):
self.actions[env_ids] = 0.
self.last_actions[env_ids] = 0. self.last_actions[env_ids] = 0.
self.last_dof_vel[env_ids] = 0. self.last_dof_vel[env_ids] = 0.
self.last_torques[env_ids] = 0.
self.feet_air_time[env_ids] = 0. self.feet_air_time[env_ids] = 0.
self.episode_length_buf[env_ids] = 0 self.episode_length_buf[env_ids] = 0
self.reset_buf[env_ids] = 1 self.reset_buf[env_ids] = 1
@ -902,8 +958,13 @@ class LeggedRobot(BaseTask):
# log additional curriculum info # log additional curriculum info
if self.cfg.terrain.curriculum: if self.cfg.terrain.curriculum:
self.extras["episode"]["terrain_level"] = torch.mean(self.terrain_levels.float()) self.extras["episode"]["terrain_level"] = torch.mean(self.terrain_levels.float())
if len(env_ids) > 0:
self.extras["episode"]["terrain_level_max"] = torch.max(self.terrain_levels[env_ids].float())
self.extras["episode"]["terrain_level_min"] = torch.min(self.terrain_levels[env_ids].float())
if self.cfg.commands.curriculum: if self.cfg.commands.curriculum:
self.extras["episode"]["max_command_x"] = self.command_ranges["lin_vel_x"][1] self.extras["episode"]["max_command_x"] = self.command_ranges["lin_vel_x"][1]
# log whether the episode ends by timeout or dead, or by reaching the goal
self.extras["episode"]["timeout_ratio"] = self.time_out_buf.float().sum() / self.reset_buf.float().sum()
# send timeout info to the algorithm # send timeout info to the algorithm
if self.cfg.env.send_timeouts: if self.cfg.env.send_timeouts:
self.extras["time_outs"] = self.time_out_buf self.extras["time_outs"] = self.time_out_buf
@ -930,6 +991,9 @@ class LeggedRobot(BaseTask):
# Penalize torques # Penalize torques
return torch.sum(torch.square(self.torques), dim=1) return torch.sum(torch.square(self.torques), dim=1)
def _reward_delta_torques(self):
return torch.sum(torch.square(self.torques - self.last_torques), dim=1)
def _reward_dof_vel(self): def _reward_dof_vel(self):
# Penalize dof velocities # Penalize dof velocities
return torch.sum(torch.square(self.dof_vel), dim=1) return torch.sum(torch.square(self.dof_vel), dim=1)

View File

@ -7,6 +7,7 @@ import torch
from legged_gym.envs.base.legged_robot import LeggedRobot from legged_gym.envs.base.legged_robot import LeggedRobot
from legged_gym.utils.terrain import get_terrain_cls from legged_gym.utils.terrain import get_terrain_cls
from legged_gym.utils.observation import get_obs_slice
from .legged_robot_config import LeggedRobotCfg from .legged_robot_config import LeggedRobotCfg
class LeggedRobotField(LeggedRobot): class LeggedRobotField(LeggedRobot):
@ -102,10 +103,34 @@ class LeggedRobotField(LeggedRobot):
self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions,
device= self.device, device= self.device,
) )
if isinstance(getattr(self.cfg.normalization, "clip_actions_low", None), (tuple, list)):
self.cfg.normalization.clip_actions_low = torch.tensor(
self.cfg.normalization.clip_actions_low,
device= self.device
)
if isinstance(getattr(self.cfg.normalization, "clip_actions_high", None), (tuple, list)):
self.cfg.normalization.clip_actions_high = torch.tensor(
self.cfg.normalization.clip_actions_high,
device= self.device
)
if getattr(self.cfg.normalization, "clip_actions_method", None) == "tanh": if getattr(self.cfg.normalization, "clip_actions_method", None) == "tanh":
clip_actions = self.cfg.normalization.clip_actions clip_actions = self.cfg.normalization.clip_actions
self.actions = (torch.tanh(actions) * clip_actions).to(self.device) self.actions = (torch.tanh(actions) * clip_actions).to(self.device)
actions_preprocessed = True actions_preprocessed = True
elif getattr(self.cfg.normalization, "clip_actions_method", None) == "hard":
if self.cfg.control.control_type == "P":
actions_low = getattr(
self.cfg.normalization, "clip_actions_low",
self.dof_pos_limits[:, 0] - self.default_dof_pos,
)
actions_high = getattr(
self.cfg.normalization, "clip_actions_high",
self.dof_pos_limits[:, 1] - self.default_dof_pos,
)
self.actions = torch.clip(actions, actions_low, actions_high)
else:
raise NotImplementedError()
actions_preprocessed = True
if getattr(self.cfg.normalization, "clip_actions_delta", None) is not None: if getattr(self.cfg.normalization, "clip_actions_delta", None) is not None:
self.actions = torch.clip( self.actions = torch.clip(
self.actions, self.actions,
@ -226,6 +251,7 @@ class LeggedRobotField(LeggedRobot):
torch.div(pos_x, self.terrain.env_block_length, rounding_mode= "floor") - 1, torch.div(pos_x, self.terrain.env_block_length, rounding_mode= "floor") - 1,
min= 0.0, min= 0.0,
)).cpu() )).cpu()
self.extras["episode"]["max_power_throughout_episode"] = self.max_power_per_timestep[env_ids].max().cpu().item()
return return_ return return_
@ -245,6 +271,11 @@ class LeggedRobotField(LeggedRobot):
min= 0.0, min= 0.0,
)).cpu() )).cpu()
self.max_power_per_timestep = torch.maximum(
self.max_power_per_timestep,
torch.max(torch.sum(self.substep_torques * self.substep_dof_vel, dim= -1), dim= -1)[0],
)
return return_ return return_
def _compute_torques(self, actions): def _compute_torques(self, actions):
@ -322,9 +353,40 @@ class LeggedRobotField(LeggedRobot):
gymapi.IMAGE_COLOR, gymapi.IMAGE_COLOR,
))) )))
if getattr(self.cfg.normalization, "clip_actions_method", None) is None:
print("WARNING: No action clipping method is specified, actions will only be clipped by clip_actions.")
# projected gravity bias (if needed)
if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False):
print("Initializing gravity bias for domain randomization")
# add cross trajectory domain randomization on projected gravity bias
# uniform sample from range
self.gravity_bias = torch.rand(self.num_envs, 3, dtype= torch.float, device= self.device, requires_grad= False)
self.gravity_bias[:, 0] *= self.cfg.domain_rand.gravity_bias_range["x"][1] - self.cfg.domain_rand.gravity_bias_range["x"][0]
self.gravity_bias[:, 0] += self.cfg.domain_rand.gravity_bias_range["x"][0]
self.gravity_bias[:, 1] *= self.cfg.domain_rand.gravity_bias_range["y"][1] - self.cfg.domain_rand.gravity_bias_range["y"][0]
self.gravity_bias[:, 1] += self.cfg.domain_rand.gravity_bias_range["y"][0]
self.gravity_bias[:, 2] *= self.cfg.domain_rand.gravity_bias_range["z"][1] - self.cfg.domain_rand.gravity_bias_range["z"][0]
self.gravity_bias[:, 2] += self.cfg.domain_rand.gravity_bias_range["z"][0]
self.max_power_per_timestep = torch.zeros(self.num_envs, dtype= torch.float32, device= self.device)
def _reset_buffers(self, env_ids): def _reset_buffers(self, env_ids):
return_ = super()._reset_buffers(env_ids) return_ = super()._reset_buffers(env_ids)
if hasattr(self, "velocity_sample_points"): self.velocity_sample_points[env_ids] = 0. if hasattr(self, "velocity_sample_points"): self.velocity_sample_points[env_ids] = 0.
if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False):
assert hasattr(self, "gravity_bias")
self.gravity_bias[env_ids] = torch.rand_like(self.gravity_bias[env_ids])
self.gravity_bias[env_ids, 0] *= self.cfg.domain_rand.gravity_bias_range["x"][1] - self.cfg.domain_rand.gravity_bias_range["x"][0]
self.gravity_bias[env_ids, 0] += self.cfg.domain_rand.gravity_bias_range["x"][0]
self.gravity_bias[env_ids, 1] *= self.cfg.domain_rand.gravity_bias_range["y"][1] - self.cfg.domain_rand.gravity_bias_range["y"][0]
self.gravity_bias[env_ids, 1] += self.cfg.domain_rand.gravity_bias_range["y"][0]
self.gravity_bias[env_ids, 2] *= self.cfg.domain_rand.gravity_bias_range["z"][1] - self.cfg.domain_rand.gravity_bias_range["z"][0]
self.gravity_bias[env_ids, 2] += self.cfg.domain_rand.gravity_bias_range["z"][0]
self.max_power_per_timestep[env_ids] = 0.
return return_ return return_
def _prepare_reward_function(self): def _prepare_reward_function(self):
@ -483,6 +545,16 @@ class LeggedRobotField(LeggedRobot):
self.cfg.env.obs_components, self.cfg.env.obs_components,
privileged= False, privileged= False,
) )
if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False) and "proprioception" in self.cfg.env.obs_components:
# NOTE: The logic order is a bit messed up here when combining using legged_robot_noisy.py
# When simulating the sensor bias/noise and latency, the gravity bias should be added before
# proprioception latency.
# However, considering gravirty bias is randomized across trajectory, the latency simulation
# from legged_robot_noisy.py does not need to be computed after adding the bias.
# So, this computation order looks OK.
assert hasattr(self, "gravity_bias")
proprioception_slice = get_obs_slice(self.obs_segments, "proprioception")
self.obs_buf[:, proprioception_slice[0].start + 6: proprioception_slice[0].start + 9] += self.gravity_bias
if self.add_noise: if self.add_noise:
self.obs_buf += (2 * torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec self.obs_buf += (2 * torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec
@ -496,9 +568,13 @@ class LeggedRobotField(LeggedRobot):
if "proprioception" in getattr(self.cfg.env, "privileged_obs_components", []) \ if "proprioception" in getattr(self.cfg.env, "privileged_obs_components", []) \
and getattr(self.cfg.env, "privileged_use_lin_vel", False): and getattr(self.cfg.env, "privileged_use_lin_vel", False):
# NOTE: according to self.get_obs_segment_from_components, "proprioception" observation # NOTE: according to self.get_obs_segment_from_components, "proprioception" observation
# is always the first part of this flattened observation. check super().compute_observations # is always the FIRST part of this flattened observation. check super().compute_observations
# and self.cfg.env.use_lin_vel for the reason of this if branch. # and self.cfg.env.use_lin_vel for the reason of this if branch.
self.privileged_obs_buf[:, :3] = self.base_lin_vel * self.obs_scales.lin_vel self.privileged_obs_buf[:, :3] = self.base_lin_vel * self.obs_scales.lin_vel
if getattr(self.cfg.domain_rand, "randomize_privileged_gravity_bias", False) and "proprioception" in getattr(self.cfg.env, "privileged_obs_components", []):
assert hasattr(self, "gravity_bias")
proprioception_slice = get_obs_slice(self.privileged_obs_segments, "proprioception")
self.privileged_obs_buf[:, proprioception_slice[0].start + 6: proprioception_slice[0].start + 9] += self.gravity_bias
for key in self.sensor_handles[0].keys(): for key in self.sensor_handles[0].keys():
if "camera" in key: if "camera" in key:
@ -558,12 +634,31 @@ class LeggedRobotField(LeggedRobot):
def _create_envs(self): def _create_envs(self):
if self.cfg.domain_rand.randomize_motor: if self.cfg.domain_rand.randomize_motor:
mtr_rng = self.cfg.domain_rand.leg_motor_strength_range
self.motor_strength = torch_rand_float( self.motor_strength = torch_rand_float(
self.cfg.domain_rand.leg_motor_strength_range[0], mtr_rng[0],
self.cfg.domain_rand.leg_motor_strength_range[1], mtr_rng[1],
(self.num_envs, 12), (self.num_envs, self.num_actions),
device=self.device, device=self.device,
) )
return_ = super()._create_envs()
front_hip_names = getattr(self.cfg.asset, "front_hip_names", ["FR_hip_joint", "FL_hip_joint"])
self.front_hip_indices = torch.zeros(len(front_hip_names), dtype=torch.long, device=self.device, requires_grad=False)
for i, name in enumerate(front_hip_names):
self.front_hip_indices[i] = self.dof_names.index(name)
rear_hip_names = getattr(self.cfg.asset, "rear_hip_names", ["RR_hip_joint", "RL_hip_joint"])
self.rear_hip_indices = torch.zeros(len(rear_hip_names), dtype=torch.long, device=self.device, requires_grad=False)
for i, name in enumerate(rear_hip_names):
self.rear_hip_indices[i] = self.dof_names.index(name)
hip_names = front_hip_names + rear_hip_names
self.hip_indices = torch.zeros(len(hip_names), dtype=torch.long, device=self.device, requires_grad=False)
for i, name in enumerate(hip_names):
self.hip_indices[i] = self.dof_names.index(name)
return super()._create_envs() return super()._create_envs()
def _process_rigid_shape_props(self, props, env_id): def _process_rigid_shape_props(self, props, env_id):
@ -703,9 +798,15 @@ class LeggedRobotField(LeggedRobot):
def draw_volume_sample_points(self): def draw_volume_sample_points(self):
sphere_geom = gymutil.WireframeSphereGeometry(0.005, 4, 4, None, color=(1, 0.1, 0)) sphere_geom = gymutil.WireframeSphereGeometry(0.005, 4, 4, None, color=(1, 0.1, 0))
sphere_penetrate_geom = gymutil.WireframeSphereGeometry(0.005, 4, 4, None, color=(0.1, 0.6, 0.6))
if self.cfg.terrain.selected == "BarrierTrack":
penetration_mask = self.terrain.get_penetration_mask(self.volume_sample_points.view(-1, 3)).view(self.num_envs, -1)
for env_idx in range(self.num_envs): for env_idx in range(self.num_envs):
for point_idx in range(self.volume_sample_points.shape[1]): for point_idx in range(self.volume_sample_points.shape[1]):
sphere_pose = gymapi.Transform(gymapi.Vec3(*self.volume_sample_points[env_idx, point_idx]), r= None) sphere_pose = gymapi.Transform(gymapi.Vec3(*self.volume_sample_points[env_idx, point_idx]), r= None)
if penetration_mask[env_idx, point_idx]:
gymutil.draw_lines(sphere_penetrate_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose)
else:
gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose) gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose)
##### Additional rewards ##### ##### Additional rewards #####
@ -715,6 +816,10 @@ class LeggedRobotField(LeggedRobot):
def _reward_world_vel_l2norm(self): def _reward_world_vel_l2norm(self):
return torch.norm((self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1) return torch.norm((self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1)
def _reward_tracking_world_vel(self):
world_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1)
return torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)
def _reward_legs_energy(self): def _reward_legs_energy(self):
return torch.sum(torch.square(self.torques * self.dof_vel), dim=1) return torch.sum(torch.square(self.torques * self.dof_vel), dim=1)
@ -730,6 +835,10 @@ class LeggedRobotField(LeggedRobot):
def _reward_alive(self): def _reward_alive(self):
return 1. return 1.
def _reward_dof_error(self):
dof_error = torch.sum(torch.square(self.dof_pos - self.default_dof_pos), dim=1)
return dof_error
def _reward_lin_cmd(self): def _reward_lin_cmd(self):
""" This reward term does not depend on the policy, depends on the command """ """ This reward term does not depend on the policy, depends on the command """
return torch.norm(self.commands[:, :2], dim= 1) return torch.norm(self.commands[:, :2], dim= 1)
@ -767,6 +876,157 @@ class LeggedRobotField(LeggedRobot):
penetration_mask *= torch.norm(self.velocity_sample_points, dim= -1) + 1e-3 penetration_mask *= torch.norm(self.velocity_sample_points, dim= -1) + 1e-3
return torch.sum(penetration_mask, dim= -1) return torch.sum(penetration_mask, dim= -1)
def _reward_tilt_cond(self):
""" Conditioned reward term in terms of whether the robot is engaging the tilt obstacle
Use positive factor to enable rolling angle when incountering tilt obstacle
"""
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7])
pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732
roll[roll > pi] -= pi * 2 # to range (-pi, pi)
roll[roll < -pi] += pi * 2 # to range (-pi, pi)
if hasattr(self, "volume_sample_points"):
self.refresh_volume_sample_points()
stepping_obstacle_info = self.terrain.get_stepping_obstacle_info(self.volume_sample_points.view(-1, 3))
else:
stepping_obstacle_info = self.terrain.get_stepping_obstacle_info(self.root_states[:, :3])
stepping_obstacle_info = stepping_obstacle_info.view(self.num_envs, -1, stepping_obstacle_info.shape[-1])
# Assuming that each robot will only be in one obstacle or non obstacle.
robot_stepping_obstacle_id = torch.max(stepping_obstacle_info[:, :, 0], dim= -1)[0]
tilting_mask = robot_stepping_obstacle_id == self.terrain.track_options_id_dict["tilt"]
return_ = torch.where(tilting_mask, torch.clip(torch.abs(roll), 0, torch.pi/2), -torch.clip(torch.abs(roll), 0, torch.pi/2))
return return_
def _reward_hip_pos(self):
return torch.sum(torch.square(self.dof_pos[:, self.hip_indices] - self.default_dof_pos[:, self.hip_indices]), dim=1)
def _reward_front_hip_pos(self):
""" Reward the robot to stop moving its front hips """
return torch.sum(torch.square(self.dof_pos[:, self.front_hip_indices] - self.default_dof_pos[:, self.front_hip_indices]), dim=1)
def _reward_rear_hip_pos(self):
""" Reward the robot to stop moving its rear hips """
return torch.sum(torch.square(self.dof_pos[:, self.rear_hip_indices] - self.default_dof_pos[:, self.rear_hip_indices]), dim=1)
def _reward_down_cond(self):
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7])
pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732
pitch[pitch > pi] -= pi * 2 # to range (-pi, pi)
pitch[pitch < -pi] += pi * 2 # to range (-pi, pi)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] < 0.)
pitch_err = torch.abs(pitch - 0.2)
return torch.exp(-pitch_err/self.cfg.rewards.tracking_sigma) * engaging_mask # the higher positive factor, the more you want the robot to pitch down 0.2 rad
def _reward_jump_x_vel_cond(self):
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \
& (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance.
roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7])
pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732
pitch[pitch > pi] -= pi * 2 # to range (-pi, pi)
pitch[pitch < -pi] += pi * 2 # to range (-pi, pi)
pitch_up_mask = pitch < -0.75 # a hack value
return torch.clip(self.base_lin_vel[:, 0], max= 1.5) * engaging_mask * pitch_up_mask
def _reward_sync_legs_cond(self):
""" A hack to force same actuation on both rear legs when jump. """
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \
& (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance.
rr_legs = torch.clone(self.actions[:, 6:9]) # shoulder, thigh, calf
rl_legs = torch.clone(self.actions[:, 9:12]) # shoulder, thigh, calf
rl_legs[:, 0] *= -1 # flip the sign of shoulder action
return torch.norm(rr_legs - rl_legs, dim= -1) * engaging_mask
def _reward_sync_all_legs_cond(self):
""" A hack to force same actuation on both front/rear legs when jump. """
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \
& (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance.
right_legs = torch.clone(torch.cat([
self.actions[:, 0:3],
self.actions[:, 6:9],
], dim= -1)) # shoulder, thigh, calf
left_legs = torch.clone(torch.cat([
self.actions[:, 3:6],
self.actions[:, 9:12],
], dim= -1)) # shoulder, thigh, calf
left_legs[:, 0] *= -1 # flip the sign of shoulder action
left_legs[:, 3] *= -1 # flip the sign of shoulder action
return torch.norm(right_legs - left_legs, p= 1, dim= -1) * engaging_mask
def _reward_sync_all_legs(self):
right_legs = torch.clone(torch.cat([
self.actions[:, 0:3],
self.actions[:, 6:9],
], dim= -1)) # shoulder, thigh, calf
left_legs = torch.clone(torch.cat([
self.actions[:, 3:6],
self.actions[:, 9:12],
], dim= -1)) # shoulder, thigh, calf
left_legs[:, 0] *= -1 # flip the sign of shoulder action
left_legs[:, 3] *= -1 # flip the sign of shoulder action
return torch.norm(right_legs - left_legs, p= 1, dim= -1)
def _reward_dof_error_cond(self):
""" Force dof error when not engaging obstacle """
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1] > 0)
return torch.sum(torch.square(self.dof_pos - self.default_dof_pos), dim=1) * engaging_mask
def _reward_leap_bonous_cond(self):
""" counteract the tracking reward loss during leap"""
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["leap"]] > 0) \
& (-engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 1] < engaging_obstacle_info[:, 0]) \
& (engaging_obstacle_info[:, 0] < 0.) # engaging jump-up, not engaging jump-down, positive distance.
world_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1)
return (1 - torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)) * engaging_mask # reverse version of tracking reward
##### Some helper functions that override parent class attributes ##### ##### Some helper functions that override parent class attributes #####
@property @property
def all_obs_components(self): def all_obs_components(self):

View File

@ -40,11 +40,24 @@ class LeggedRobotNoisy(LeggedRobotField):
self.actions_scaled = self.actions * self.cfg.control.action_scale self.actions_scaled = self.actions * self.cfg.control.action_scale
control_type = self.cfg.control.control_type control_type = self.cfg.control.control_type
if control_type == "P": if control_type == "P":
self.actions_scaled_torque_clipped = self.clip_position_action_by_torque_limit(self.actions_scaled) actions_scaled_torque_clipped = self.clip_position_action_by_torque_limit(self.actions_scaled)
else: else:
raise NotImplementedError raise NotImplementedError
else: else:
self.actions_scaled_torque_clipped = self.actions * self.cfg.control.action_scale actions_scaled_torque_clipped = self.actions * self.cfg.control.action_scale
if getattr(self.cfg.control, "action_delay", False):
# always put the latest action at the end of the buffer
self.actions_history_buffer = torch.roll(self.actions_history_buffer, shifts= -1, dims= 0)
self.actions_history_buffer[-1] = actions_scaled_torque_clipped
# get the delayed action
self.action_delayed_frames = ((self.current_action_delay / self.dt) + 1).to(int)
self.actions_scaled_torque_clipped = self.actions_history_buffer[
-self.action_delayed_frames,
torch.arange(self.num_envs, device= self.device),
]
else:
self.actions_scaled_torque_clipped = actions_scaled_torque_clipped
return return_ return return_
@ -79,7 +92,10 @@ class LeggedRobotNoisy(LeggedRobotField):
torch.max(torch.abs(self.torques), dim= -1)[0], torch.max(torch.abs(self.torques), dim= -1)[0],
self.max_torques, self.max_torques,
) )
### The set torque limit is usally smaller than the robot dataset
self.torque_exceed_count_substep[(torch.abs(self.torques) > self.torque_limits).any(dim= -1)] += 1 self.torque_exceed_count_substep[(torch.abs(self.torques) > self.torque_limits).any(dim= -1)] += 1
### Hack to check the torque limit exceeding by your own value.
# self.torque_exceed_count_envstep[(torch.abs(self.torques) > 38.).any(dim= -1)] += 1
### count how many times in the episode the robot is out of dof pos limit (summing all dofs) ### count how many times in the episode the robot is out of dof pos limit (summing all dofs)
self.out_of_dof_pos_limit_count_substep += self._reward_dof_pos_limits().int() self.out_of_dof_pos_limit_count_substep += self._reward_dof_pos_limits().int()
@ -108,6 +124,12 @@ class LeggedRobotNoisy(LeggedRobotField):
def _post_physics_step_callback(self): def _post_physics_step_callback(self):
super()._post_physics_step_callback() super()._post_physics_step_callback()
if hasattr(self, "actions_history_buffer"):
resampling_time = getattr(self.cfg.control, "action_delay_resampling_time", self.dt)
resample_env_ids = (self.episode_length_buf % int(resampling_time / self.dt) == 0).nonzero(as_tuple= False).flatten()
if len(resample_env_ids) > 0:
self._resample_action_delay(resample_env_ids)
if hasattr(self, "proprioception_buffer"): if hasattr(self, "proprioception_buffer"):
resampling_time = getattr(self.cfg.sensor.proprioception, "latency_resampling_time", self.dt) resampling_time = getattr(self.cfg.sensor.proprioception, "latency_resampling_time", self.dt)
resample_env_ids = (self.episode_length_buf % int(resampling_time / self.dt) == 0).nonzero(as_tuple= False).flatten() resample_env_ids = (self.episode_length_buf % int(resampling_time / self.dt) == 0).nonzero(as_tuple= False).flatten()
@ -122,6 +144,14 @@ class LeggedRobotNoisy(LeggedRobotField):
self.torque_exceed_count_envstep[(torch.abs(self.substep_torques) > self.torque_limits).any(dim= 1).any(dim= 1)] += 1 self.torque_exceed_count_envstep[(torch.abs(self.substep_torques) > self.torque_limits).any(dim= 1).any(dim= 1)] += 1
def _resample_action_delay(self, env_ids):
self.current_action_delay[env_ids] = torch_rand_float(
self.cfg.control.action_delay_range[0],
self.cfg.control.action_delay_range[1],
(len(env_ids), 1),
device= self.device,
).flatten()
def _resample_proprioception_latency(self, env_ids): def _resample_proprioception_latency(self, env_ids):
self.current_proprioception_latency[env_ids] = torch_rand_float( self.current_proprioception_latency[env_ids] = torch_rand_float(
self.cfg.sensor.proprioception.latency_range[0], self.cfg.sensor.proprioception.latency_range[0],
@ -142,6 +172,27 @@ class LeggedRobotNoisy(LeggedRobotField):
return_ = super()._init_buffers() return_ = super()._init_buffers()
all_obs_components = self.all_obs_components all_obs_components = self.all_obs_components
if getattr(self.cfg.control, "action_delay", False):
assert hasattr(self.cfg.control, "action_delay_range") and hasattr(self.cfg.control, "action_delay_resample_time"), "Please specify action_delay_range and action_delay_resample_time in the config file."
""" Used in pre-physics step """
self.cfg.control.action_history_buffer_length = int((self.cfg.control.action_delay_range[1] + self.dt) / self.dt)
self.actions_history_buffer = torch.zeros(
(
self.cfg.control.action_history_buffer_length,
self.num_envs,
self.num_actions,
),
dtype= torch.float32,
device= self.device,
)
self.current_action_delay = torch_rand_float(
self.cfg.control.action_delay_range[0],
self.cfg.control.action_delay_range[1],
(self.num_envs, 1),
device= self.device,
).flatten()
self.action_delayed_frames = ((self.current_action_delay / self.dt) + 1).to(int)
if "proprioception" in all_obs_components and hasattr(self.cfg.sensor, "proprioception"): if "proprioception" in all_obs_components and hasattr(self.cfg.sensor, "proprioception"):
""" Adding proprioception delay buffer """ """ Adding proprioception delay buffer """
self.cfg.sensor.proprioception.buffer_length = int((self.cfg.sensor.proprioception.latency_range[1] + self.dt) / self.dt) self.cfg.sensor.proprioception.buffer_length = int((self.cfg.sensor.proprioception.latency_range[1] + self.dt) / self.dt)
@ -154,13 +205,13 @@ class LeggedRobotNoisy(LeggedRobotField):
dtype= torch.float32, dtype= torch.float32,
device= self.device, device= self.device,
) )
self.proprioception_delayed_frames = torch.ones((self.num_envs,), device= self.device, dtype= int) * self.cfg.sensor.proprioception.buffer_length
self.current_proprioception_latency = torch_rand_float( self.current_proprioception_latency = torch_rand_float(
self.cfg.sensor.proprioception.latency_range[0], self.cfg.sensor.proprioception.latency_range[0],
self.cfg.sensor.proprioception.latency_range[1], self.cfg.sensor.proprioception.latency_range[1],
(self.num_envs, 1), (self.num_envs, 1),
device= self.device, device= self.device,
).flatten() ).flatten()
self.proprioception_delayed_frames = ((self.current_proprioception_latency / self.dt) + 1).to(int)
if "forward_depth" in all_obs_components and hasattr(self.cfg.sensor, "forward_camera"): if "forward_depth" in all_obs_components and hasattr(self.cfg.sensor, "forward_camera"):
output_resolution = getattr(self.cfg.sensor.forward_camera, "output_resolution", self.cfg.sensor.forward_camera.resolution) output_resolution = getattr(self.cfg.sensor.forward_camera, "output_resolution", self.cfg.sensor.forward_camera.resolution)
@ -220,6 +271,9 @@ class LeggedRobotNoisy(LeggedRobotField):
def _reset_buffers(self, env_ids): def _reset_buffers(self, env_ids):
return_ = super()._reset_buffers(env_ids) return_ = super()._reset_buffers(env_ids)
if hasattr(self, "actions_history_buffer"):
self.actions_history_buffer[:, env_ids] = 0.
self.action_delayed_frames[env_ids] = self.cfg.control.action_history_buffer_length
if hasattr(self, "forward_depth_buffer"): if hasattr(self, "forward_depth_buffer"):
self.forward_depth_buffer[:, env_ids] = 0. self.forward_depth_buffer[:, env_ids] = 0.
self.forward_depth_delayed_frames[env_ids] = self.cfg.sensor.forward_camera.buffer_length self.forward_depth_delayed_frames[env_ids] = self.cfg.sensor.forward_camera.buffer_length
@ -537,11 +591,10 @@ class LeggedRobotNoisy(LeggedRobotField):
-self.proprioception_delayed_frames, -self.proprioception_delayed_frames,
torch.arange(self.num_envs, device= self.device), torch.arange(self.num_envs, device= self.device),
].clone() ].clone()
### NOTE: WARN: ERROR: remove this code in final version, no action delay should be used.
if getattr(self.cfg.sensor.proprioception, "delay_action_obs", False) or getattr(self.cfg.sensor.proprioception, "delay_privileged_action_obs", False):
raise ValueError("LeggedRobotNoisy: No action delay should be used. Please remove these settings")
# The last-action is not delayed. # The last-action is not delayed.
if getattr(self.cfg.sensor.proprioception, "delay_action_obs", False):
not_delayed_mask = torch.randint(0, 1, size= (self.num_envs,), device= self.device).bool()
self.proprioception_output[not_delayed_mask, -12:] = self.proprioception_buffer[-1, not_delayed_mask, -12:]
else:
self.proprioception_output[:, -12:] = self.proprioception_buffer[-1, :, -12:] self.proprioception_output[:, -12:] = self.proprioception_buffer[-1, :, -12:]
self.proprioception_refreshed = True self.proprioception_refreshed = True
if not hasattr(self.cfg.sensor, "proprioception") or privileged: if not hasattr(self.cfg.sensor, "proprioception") or privileged:
@ -573,5 +626,12 @@ class LeggedRobotNoisy(LeggedRobotField):
# sum along decimation axis and dof axis # sum along decimation axis and dof axis
return torch.square(exceeded_torques).sum(dim= 1).sum(dim= 1) return torch.square(exceeded_torques).sum(dim= 1).sum(dim= 1)
def _reward_exceed_torque_limits_l1norm(self):
""" square function for exceeding part """
exceeded_torques = torch.abs(self.substep_torques) - self.torque_limits
exceeded_torques[exceeded_torques < 0.] = 0.
# sum along decimation axis and dof axis
return torch.norm(exceeded_torques, p= 1, dim= -1).sum(dim= 1)
def _reward_exceed_dof_pos_limits(self): def _reward_exceed_dof_pos_limits(self):
return self.substep_exceed_dof_pos_limits.to(torch.float32).sum(dim= -1).mean(dim= -1) return self.substep_exceed_dof_pos_limits.to(torch.float32).sum(dim= -1).mean(dim= -1)

View File

@ -0,0 +1,127 @@
import numpy as np
from os import path as osp
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1CrawlCfg( Go1FieldCfg ):
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.45]
#### uncomment this to train non-virtual terrain
class sensor( Go1FieldCfg.sensor ):
class proprioception( Go1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = True
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"crawl",
],
track_block_length= 1.6,
crawl= dict(
height= (0.28, 0.5),
depth= (0.1, 0.6), # size along the forward axis
wall_height= 0.6,
no_perlin_at_obstacle= False,
),
virtual_terrain= False, # Change this to False for real terrain
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= 0.06,
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [0.3, 0.8]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class control( Go1FieldCfg.control ):
computer_clip_torque = False
class asset( Go1FieldCfg.asset ):
terminate_after_contacts_on = ["base"]
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
class domain_rand( Go1FieldCfg.domain_rand ):
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
init_base_vel_range = [-0.1, 0.1]
class rewards( Go1FieldCfg.rewards ):
class scales:
# tracking_ang_vel = 0.05
# world_vel_l2norm = -1.
# legs_energy_substeps = -4e-5
# alive = 5.
# penetrate_depth = -6e-2
# penetrate_volume = -6e-2
# exceed_dof_pos_limits = -8e-1
# exceed_torque_limits_l1norm = -8e-1
# lin_pos_y = -0.1
###############################################
tracking_ang_vel = 0.05
tracking_world_vel = 5.
# world_vel_l2norm = -1.
# alive = 2.
legs_energy_substeps = -1e-5
# penetrate_depth = -6e-2 # comment this out if trianing non-virtual terrain
# penetrate_volume = -6e-2 # comment this out if trianing non-virtual terrain
exceed_dof_pos_limits = -8e-1
# exceed_torque_limits_i = -2e-1
exceed_torque_limits_l1norm = -1.
# collision = -0.05
# tilt_cond = 0.1
torques = -1e-5
yaw_abs = -0.1
lin_pos_y = -0.1
soft_dof_pos_limit = 0.7
only_positive_rewards = False
class curriculum( Go1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 1500
penetrate_volume_threshold_easier = 10000
penetrate_depth_threshold_harder = 10
penetrate_depth_threshold_easier = 400
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1CrawlCfgPPO( Go1FieldCfgPPO ):
class algorithm( Go1FieldCfgPPO.algorithm ):
entropy_coef = 0.0
clip_min_std = 0.1
class runner( Go1FieldCfgPPO.runner ):
resume = True
load_run = "{Your traind walking model directory}"
load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep26_14-30-24_Skills_crawl_propDelay0.04-0.05_pEnergy-2e-5_pDof8e-01_pTorqueL14e-01_rTilt5e-01_pCollision0.2_maxPushAng0.5_kp40_fromSep26_01-38-19")
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26")
run_name = "".join(["Skills_",
("Multi" if len(Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_pEnergy" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1)),
("_pDof" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.exceed_dof_pos_limits, precision=1, exp_digits=1)),
("_pTorqueL1" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, exp_digits=1)),
("_noComputerClip" if not Go1CrawlCfg.control.computer_clip_torque else ""),
("_noTanh"),
])
max_iterations = 20000
save_interval = 500

View File

@ -0,0 +1,166 @@
from os import path as osp
import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1DownCfg( Go1FieldCfg ):
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.45]
#### uncomment this to train non-virtual terrain
class sensor( Go1FieldCfg.sensor ):
class proprioception( Go1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = False
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"jump",
],
track_block_length= 1.6,
jump= dict(
height= (0.2, 0.45),
depth= 0.3,
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
jump_down_prob= 1., # probability of jumping down use it in non-virtual terrain
),
virtual_terrain= True, # Change this to False for real terrain
no_perlin_threshold= 0.6,
randomize_obstacle_order= True,
n_obstacles_per_track= 3,
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.04, 0.15],
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [0.5, 1.]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class control( Go1FieldCfg.control ):
computer_clip_torque = False
class asset( Go1FieldCfg.asset ):
terminate_after_contacts_on = ["base"]
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
z_low_kwargs = merge_dict(Go1FieldCfg.termination.z_low_kwargs, dict(
threshold= -3.,
))
class domain_rand( Go1FieldCfg.domain_rand ):
init_base_pos_range = dict(
x= [0.2, 0.6],
y= [-0.25, 0.25],
)
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
init_base_vel_range = merge_dict(Go1FieldCfg.domain_rand.init_base_vel_range, dict(
x= [-0.1, 0.5],
))
push_robots = True
class rewards( Go1FieldCfg.rewards ):
class scales:
action_rate = -0.1
collision = -10.0
delta_torques = -1e-07
dof_acc = -1e-07
down_cond = 0.04
exceed_dof_pos_limits = -0.001
exceed_torque_limits_l1norm = -0.2
feet_contact_forces = -0.01
hip_pos = -5.0
legs_energy_substeps = -5e-06
lin_pos_y = -0.3
penetrate_depth = -0.002
penetrate_volume = -0.002
torques = -0.0001
tracking_ang_vel = 0.1
tracking_world_vel = 5.0
yaw_abs = -0.
soft_dof_pos_limit = 0.75
max_contact_force = 200.
only_positive_rewards = False
class noise( Go1FieldCfg.noise ):
add_noise = False
class curriculum( Go1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 2000000
penetrate_volume_threshold_easier = 4000000
penetrate_depth_threshold_harder = 200000
penetrate_depth_threshold_easier = 400000
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1DownCfgPPO( Go1FieldCfgPPO ):
class algorithm( Go1FieldCfgPPO.algorithm ):
entropy_coef = 0.0
clip_min_std = 0.2
class runner( Go1FieldCfgPPO.runner ):
policy_class_name = "ActorCriticRecurrent"
experiment_name = "field_go1"
resume = True
load_run = "{Your trained climb model directory}"
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep28_14-59-53_Skills_down_pEnergySubsteps-8e-06_pPenD1e-02_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pCollision-1e-1_downRedundant0.1_softDof0.8_pushRobot_propDelay0.04-0.05_kp40_kd0.5fromSep27_14-32-13")
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct14_02-06-47_Skills_down_comXRange-0.2-0.2_pEnergySubsteps-4e-06_rAlive3_pPenD4e-03_pDHarder2.e+05_pushRobot_noTanh_virtualfromOct09_09-49-58")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_10-48-54_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond3.e-1_kp40_kd0.5_virtualfromOct14_02-06-47")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct26_08-45-02_Skills_down_pEnergySubsteps-4e-05_rTrackVel4._pZVel1e-01_pAngXYVel5e-02_pDTorques1e-07_rDownCond1.e-1_noTanh_virtualfromOct25_10-48-54")
load_run = "Oct28_07-38-39_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos4e-01_rDownCond4.e-2_noTanh_virtualfromOct14_02-06-47"
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-08-21_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_withPropNoise_noTanh_virtualfromOct28_07-38-39")
load_run = "Oct29_22-07-51_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_noComputerClip_noPropNoise_noTanh_virtualfromOct28_13-08-21"
load_run = "Oct30_04-23-31_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL14e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtual_fromOct29_22-07-51"
run_name = "".join(["Skills_",
("down" if Go1DownCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] > 0. else "jump"),
("_pEnergySubsteps{:.0e}".format(Go1DownCfg.rewards.scales.legs_energy_substeps) if getattr(Go1DownCfg.rewards.scales, "legs_energy_substeps", 0.) != -1e-6 else ""),
("_rTrackVel" + np.format_float_positional(Go1DownCfg.rewards.scales.tracking_world_vel) if getattr(Go1DownCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""),
# ("_pLinVel" + np.format_float_positional(-Go1DownCfg.rewards.scales.world_vel_l2norm) if getattr(Go1DownCfg.rewards.scales, "world_vel_l2norm", 0.) != 0.0 else ""),
# ("_rAlive" + np.format_float_positional(Go1DownCfg.rewards.scales.alive) if getattr(Go1DownCfg.rewards.scales, "alive", 0.) != 0.0 else ""),
# ("_pDofLimit{:.0e}".format(Go1DownCfg.rewards.scales.exceed_dof_pos_limits) if getattr(Go1DownCfg.rewards.scales, "exceed_dof_pos_limits") != 0.0 else ""),
# ("_pYawAbs" if getattr(Go1DownCfg.rewards.scales, "yaw_abs", 0.) != 0.1 else ""),
("_pTorqueL1{:.0e}".format(-Go1DownCfg.rewards.scales.exceed_torque_limits_l1norm) if getattr(Go1DownCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0 else ""),
# ("_pActRate" + np.format_float_scientific(-Go1DownCfg.rewards.scales.action_rate, precision=1, exp_digits=1) if getattr(Go1DownCfg.rewards.scales, "action_rate", 0.) != 0.0 else ""),
# ("_pZVel" + np.format_float_scientific(-Go1DownCfg.rewards.scales.lin_vel_z, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "lin_vel_z", 0.) != 0. else ""),
# ("_pAngXYVel" + np.format_float_scientific(-Go1DownCfg.rewards.scales.ang_vel_xy, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "ang_vel_xy", 0.) != 0. else ""),
# ("_pDTorques" + np.format_float_scientific(-Go1DownCfg.rewards.scales.delta_torques, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "delta_torques", 0.) != 0. else ""),
# ("_pHipPos" + np.format_float_scientific(-Go1DownCfg.rewards.scales.hip_pos, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "hip_pos", 0.) != 0. else ""),
# ("_rDownCond" + np.format_float_scientific(Go1DownCfg.rewards.scales.down_cond, precision=1, exp_digits=1) if getattr(Go1DownCfg.rewards.scales, "down_cond", 0.) != 0.0 else ""),
("_pCollision{:d}".format(int(Go1DownCfg.rewards.scales.collision)) if getattr(Go1DownCfg.rewards.scales, "collision", 0.) != 0.0 else ""),
# ("_downRedundant{:.1f}".format(Go1DownCfg.terrain.BarrierTrack_kwargs["jump"]["down_forward_length"]) if (Go1DownCfg.terrain.BarrierTrack_kwargs["jump"].get("down_forward_length", 0.) > 0. and Go1DownCfg.terrain.BarrierTrack_kwargs["jump"].get("jump_down_prob", 0.) > 0.) else ""),
("_cmdRange{:.1f}-{:.1f}".format(*Go1DownCfg.commands.ranges.lin_vel_x)),
("_allowNegativeReward" if not Go1DownCfg.rewards.only_positive_rewards else ""),
("_pushRobot" if Go1DownCfg.domain_rand.push_robots else "_noPush"),
("_noComputerClip" if not Go1DownCfg.control.computer_clip_torque else ""),
("_noPropNoise" if not Go1DownCfg.noise.add_noise else "_withPropNoise"),
# ("_kp{:d}".format(int(Go1DownCfg.control.stiffness["joint"])) if Go1DownCfg.control.stiffness["joint"] != 50 else ""),
# ("_kd{:.1f}".format(Go1DownCfg.control.damping["joint"]) if Go1DownCfg.control.damping["joint"] != 1. else ""),
("_noTanh"),
("_virtual" if Go1DownCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])
max_iterations = 20000
save_interval = 200

View File

@ -1,162 +1,161 @@
import numpy as np import numpy as np
import os import os
from os import path as osp from os import path as osp
from legged_gym.utils.helpers import merge_dict
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
class Go1FieldCfg( A1FieldCfg ): go1_const_dof_range = dict(
Hip_max= 1.047,
class terrain( A1FieldCfg.terrain ): Hip_min=-1.047,
Thigh_max= 2.966,
num_rows = 20 Thigh_min= -0.663,
num_cols = 50 Calf_max= -0.837,
selected = "BarrierTrack" Calf_min= -2.721,
max_init_terrain_level = 0 # for climb, leap finetune
border_size = 5
slope_treshold = 100.
curriculum = True # for tilt, crawl, climb, leap
# curriculum = False # for walk
horizontal_scale = 0.025 # [m]
pad_unavailable_info = True
BarrierTrack_kwargs = dict(
options= [
# "climb",
# "crawl",
"tilt",
# "leap",
], # each race track will permute all the options
track_width= 1.6, # for climb, crawl, tilt, walk
# track_width= 1.0, # for leap
track_block_length= 2., # the x-axis distance from the env origin point
wall_thickness= (0.04, 0.2), # [m]
wall_height= 0.01, # [m] for climb, crawl, tilt, walk
# wall_height= -0.5, # for leap
climb= dict(
height= (0.2, 0.6),
depth= (0.1, 0.8), # size along the forward axis
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
),
crawl= dict(
height= (0.28, 0.38),
depth= (0.1, 0.5), # size along the forward axis
wall_height= 0.6,
no_perlin_at_obstacle= False,
),
tilt= dict(
width= (0.24, 0.32),
depth= (0.4, 1.), # size along the forward axis
opening_angle= 0.0, # [rad] an opening that make the robot easier to get into the obstacle
wall_height= 0.5,
),
leap= dict(
length= (0.2, 1.0),
depth= (0.4, 0.8),
height= 0.25,
),
add_perlin_noise= True,
border_perlin_noise= True,
border_height= 0., # for climb, crawl, tilt, walk
# border_height= -0.5, # for leap
virtual_terrain= False, # for climb, crawl, leap
# virtual_terrain= True, # for tilt
draw_virtual_terrain= True,
engaging_next_threshold= 1.2,
curriculum_perlin= False,
no_perlin_threshold= 0.0, # for crawl, tilt, walk
# no_perlin_threshold= 0.05, # for leap
# no_perlin_threshold= 0.06, # for climb
) )
go1_action_scale = 0.5
class Go1FieldCfg( A1FieldCfg ):
class env( A1FieldCfg.env ):
num_envs = 8192
class init_state( A1FieldCfg.init_state ):
pos = [0., 0., 0.7]
zero_actions = False
class sensor( A1FieldCfg.sensor ):
class proprioception( A1FieldCfg.sensor.proprioception ):
delay_action_obs = False
latency_range = [0.04-0.0025, 0.04+0.0075] # comment this if it is too hard to train.
class terrain( A1FieldCfg.terrain ):
num_rows = 20
num_cols = 80
# curriculum = True # for tilt, crawl, jump, leap
curriculum = False # for walk
pad_unavailable_info = True
BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
# "jump",
# "crawl",
# "tilt",
# "leap",
], # each race track will permute all the options
# randomize_obstacle_order= True,
track_width= 1.6,
track_block_length= 2., # the x-axis distance from the env origin point
wall_thickness= (0.04, 0.2), # [m]
wall_height= 0.0,
add_perlin_noise= True,
border_perlin_noise= True,
border_height= 0.,
virtual_terrain= False,
draw_virtual_terrain= True,
engaging_next_threshold= 1.2,
engaging_finish_threshold= 0.,
curriculum_perlin= False,
no_perlin_threshold= 0.1,
))
TerrainPerlin_kwargs = dict( TerrainPerlin_kwargs = dict(
# zScale= 0.1, # for crawl zScale= [0.08, 0.15],
zScale= 0.12, # for tilt
# zScale= [0.05, 0.1], # for climb
# zScale= [0.04, 0.1], # for leap
# zScale= [0.1, 0.15], # for walk
frequency= 10, frequency= 10,
) )
class commands( A1FieldCfg.commands ): class commands( A1FieldCfg.commands ):
class ranges( A1FieldCfg.commands.ranges ): class ranges( A1FieldCfg.commands.ranges ):
# lin_vel_x = [0.0, 1.0] # for walk lin_vel_x = [-1.0, 1.0]
# lin_vel_x = [0.8, 1.5] # for climb
# lin_vel_x = [1.0, 1.5] # for leap
lin_vel_x = [0.3, 0.8] # for tilt, crawl
lin_vel_y = [0.0, 0.0] lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.] ang_vel_yaw = [0., 0.]
class control( A1FieldCfg.control ): class control( A1FieldCfg.control ):
stiffness = {'joint': 50.} stiffness = {'joint': 40.}
damping = {'joint': 1.} damping = {'joint': 0.5}
# action_scale = [0.2, 0.4, 0.4] * 4 # for walk action_scale = go1_action_scale
action_scale = 0.5 # for tilt, crawl, climb, leap
# for climb, leap
torque_limits = [20., 20., 25.] * 4 torque_limits = [20., 20., 25.] * 4
computer_clip_torque = True computer_clip_torque = False
motor_clip_torque = False motor_clip_torque = False
class asset( A1FieldCfg.asset ): class asset( A1FieldCfg.asset ):
file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/go1/urdf/go1.urdf" file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/go1/urdf/go1.urdf"
penalize_contacts_on = ["base", "thigh"] sdk_dof_range = go1_const_dof_range
terminate_after_contacts_on = ["base", "imu"] # for climb, leap, tilt, walk no-virtual
class termination:
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll", # for tilt
"pitch",
"z_low",
"z_high",
"out_of_track", # for leap, walk
]
class termination( A1FieldCfg.termination ):
roll_kwargs = dict( roll_kwargs = dict(
threshold= 0.8, # [rad] # for tilt threshold= 1.5,
tilt_threshold= 1.5, # for tilt (condition on engaging block)
) )
pitch_kwargs = dict( pitch_kwargs = dict(
threshold= 1.6, threshold= 1.5,
climb_threshold= 1.6,
leap_threshold= 1.5,
) )
z_low_kwargs = dict(
threshold= 0.08, # [m]
)
z_high_kwargs = dict(
threshold= 1.5, # [m]
)
out_of_track_kwargs = dict(
threshold= 1., # [m]
)
check_obstacle_conditioned_threshold = True
timeout_at_border = True
timeout_at_finished = True
class domain_rand( A1FieldCfg.domain_rand ): class domain_rand( A1FieldCfg.domain_rand ):
randomize_com = True class com_range( A1FieldCfg.domain_rand.com_range ):
class com_range: x = [-0.2, 0.2]
x = [-0.05, 0.15]
y = [-0.1, 0.1]
z = [-0.05, 0.05]
randomize_base_mass = True init_base_pos_range = merge_dict(A1FieldCfg.domain_rand.init_base_pos_range, dict(
added_mass_range = [1.0, 3.0] x= [0.05, 0.6],
))
init_base_rot_range = dict(
roll= [-0.75, 0.75],
pitch= [-0.75, 0.75],
)
# init_base_vel_range = [-1.0, 1.0]
init_base_vel_range = dict(
x= [-0.2, 1.5],
y= [-0.2, 0.2],
z= [-0.2, 0.2],
roll= [-1., 1.],
pitch= [-1., 1.],
yaw= [-1., 1.],
)
init_dof_vel_range = [-5, 5]
class rewards( A1FieldCfg.rewards ): class rewards( A1FieldCfg.rewards ):
class scales: class scales:
tracking_ang_vel = 0.1 tracking_ang_vel = 0.05
world_vel_l2norm = -1. tracking_world_vel = 3.
legs_energy_substeps = -1e-5 # world_vel_l2norm = -2.
exceed_torque_limits_i = -0.1 # alive = 3.
alive = 2. legs_energy_substeps = -2e-5
lin_pos_y = -0.2 # penalty for hardware safety
yaw_abs = -0.5 exceed_dof_pos_limits = -8e-1
penetrate_depth = -5e-3 exceed_torque_limits_l1norm = -8e-1
penetrate_volume = -5e-3 # penalty for walking gait, probably no need
soft_dof_pos_limit = 0.01 lin_vel_z = -1.
ang_vel_xy = -0.05
orientation = -4.
dof_acc = -2.5e-7
collision = -10.
action_rate = -0.1
delta_torques = -1e-7
torques = -1.e-5
yaw_abs = -0.8
lin_pos_y = -0.8
hip_pos = -0.4
dof_error = -0.04
soft_dof_pos_limit = 0.8 # only in training walking
max_contact_force = 200.0
class normalization( A1FieldCfg.normalization ):
dof_pos_redundancy = 0.2
clip_actions_method = "hard"
clip_actions_low = []
clip_actions_high = []
for sdk_joint_name, sim_joint_name in zip(
["Hip", "Thigh", "Calf"] * 4,
[ # in the order as simulation
"FL_hip_joint", "FL_thigh_joint", "FL_calf_joint",
"FR_hip_joint", "FR_thigh_joint", "FR_calf_joint",
"RL_hip_joint", "RL_thigh_joint", "RL_calf_joint",
"RR_hip_joint", "RR_thigh_joint", "RR_calf_joint",
],
):
clip_actions_low.append( (go1_const_dof_range[sdk_joint_name + "_min"] + dof_pos_redundancy - A1FieldCfg.init_state.default_joint_angles[sim_joint_name]) / go1_action_scale )
clip_actions_high.append( (go1_const_dof_range[sdk_joint_name + "_max"] - dof_pos_redundancy - A1FieldCfg.init_state.default_joint_angles[sim_joint_name]) / go1_action_scale )
del dof_pos_redundancy, sdk_joint_name, sim_joint_name
class sim( A1FieldCfg.sim ): class sim( A1FieldCfg.sim ):
body_measure_points = { # transform are related to body frame body_measure_points = { # transform are related to body frame
@ -184,49 +183,24 @@ class Go1FieldCfg( A1FieldCfg ):
), ),
} }
class curriculum:
# chosen heuristically, please refer to `LeggedRobotField._get_terrain_curriculum_move` with fixed body_measure_points
# for crawl (not updated)
penetrate_volume_threshold_harder = 1500
penetrate_volume_threshold_easier = 10000
penetrate_depth_threshold_harder = 10
penetrate_depth_threshold_easier = 400
# for tilt
# penetrate_volume_threshold_harder = 2000
# penetrate_volume_threshold_easier = 10000
# penetrate_depth_threshold_harder = 20
# penetrate_depth_threshold_easier = 300
# for climb
# penetrate_volume_threshold_harder = 6000
# penetrate_volume_threshold_easier = 12000
# penetrate_depth_threshold_harder = 600
# penetrate_depth_threshold_easier = 1600
# for leap
# penetrate_volume_threshold_harder = 9000
# penetrate_volume_threshold_easier = 10000
# penetrate_depth_threshold_harder = 300
# penetrate_depth_threshold_easier = 5000
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1FieldCfgPPO( A1FieldCfgPPO ): class Go1FieldCfgPPO( A1FieldCfgPPO ):
class algorithm( A1FieldCfgPPO.algorithm ): class algorithm( A1FieldCfgPPO.algorithm ):
entropy_coef = 0.01 # for walk entropy_coef = 0.0
clip_min_std = 1e-12 # for walk clip_min_std = 0.2
class policy( A1FieldCfgPPO.policy ):
mu_activation = None # use action clip method by env
class runner( A1FieldCfgPPO.runner ): class runner( A1FieldCfgPPO.runner ):
experiment_name = "field_go1" experiment_name = "field_go1"
run_name = "".join(["Skills", resume = False
("_all" if len(Go1FieldCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else ("_" + Go1FieldCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1FieldCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")), load_run = None
run_name = "".join(["WalkForward",
("_pEnergySubsteps" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.legs_energy_substeps, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "legs_energy_substeps", 0.0) != 0.0 else ""), ("_pEnergySubsteps" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.legs_energy_substeps, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "legs_energy_substeps", 0.0) != 0.0 else ""),
("_pPenV" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.penetrate_volume, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "penetrate_volume", 0.) < 0. else ""), ("_rTrackVel" + np.format_float_scientific(Go1FieldCfg.rewards.scales.tracking_world_vel, precision=1, exp_digits=1, trim="-") if getattr(Go1FieldCfg.rewards.scales, "tracking_world_vel", 0.0) != 0.0 else ""),
("_pPenD" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.penetrate_depth, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "penetrate_depth", 0.) < 0. else ""), ("_pWorldVel" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.world_vel_l2norm, precision=1, exp_digits=1, trim="-") if getattr(Go1FieldCfg.rewards.scales, "world_vel_l2norm", 0.0) != 0.0 else ""),
("_rAlive{:d}".format(int(Go1FieldCfg.rewards.scales.alive)) if Go1FieldCfg.rewards.scales.alive != 2 else ""),
("_rAngVel{:.2f}".format(Go1FieldCfg.rewards.scales.tracking_ang_vel) if Go1FieldCfg.rewards.scales.tracking_ang_vel != 0.05 else ""),
("_pTorqueExceedIndicate" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.exceed_torque_limits_i, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "exceed_torque_limits_i", 0.) != 0. else ""),
("_pTorqueExceedSquare" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.exceed_torque_limits_square, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "exceed_torque_limits_square", 0.) != 0. else ""),
("_pYaw{:.2f}".format(-Go1FieldCfg.rewards.scales.yaw_abs) if Go1FieldCfg.rewards.scales.yaw_abs != 0. else ""),
("_noComputerTorqueClip" if not Go1FieldCfg.control.computer_clip_torque else ""),
("_virtualTerrain" if Go1FieldCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_aScale{:d}{:d}{:d}".format( ("_aScale{:d}{:d}{:d}".format(
int(Go1FieldCfg.control.action_scale[0] * 10), int(Go1FieldCfg.control.action_scale[0] * 10),
int(Go1FieldCfg.control.action_scale[1] * 10), int(Go1FieldCfg.control.action_scale[1] * 10),
@ -234,15 +208,8 @@ class Go1FieldCfgPPO( A1FieldCfgPPO ):
) if isinstance(Go1FieldCfg.control.action_scale, (tuple, list)) \ ) if isinstance(Go1FieldCfg.control.action_scale, (tuple, list)) \
else "_aScale{:.1f}".format(Go1FieldCfg.control.action_scale) else "_aScale{:.1f}".format(Go1FieldCfg.control.action_scale)
), ),
("_torqueClip{:.0f}".format(Go1FieldCfg.control.torque_limits) if not isinstance(Go1FieldCfg.control.torque_limits, (tuple, list)) else ( ("_actionClip" + Go1FieldCfg.normalization.clip_actions_method if getattr(Go1FieldCfg.normalization, "clip_actions_method", None) is not None else ""),
"_tClip{:d}{:d}{:d}".format( ("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"),
int(Go1FieldCfg.control.torque_limits[0]),
int(Go1FieldCfg.control.torque_limits[1]),
int(Go1FieldCfg.control.torque_limits[2]),
)
)),
]) ])
resume = False max_iterations = 20000
load_run = ""
max_iterations = 10000
save_interval = 500 save_interval = 500

View File

@ -1,6 +1,6 @@
from collections import OrderedDict from collections import OrderedDict
import os import os
import datetime from datetime import datetime
import os.path as osp import os.path as osp
import numpy as np import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
@ -12,48 +12,74 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
num_envs = 256 num_envs = 256
obs_components = [ obs_components = [
"proprioception", # 48 "proprioception", # 48
# "height_measurements", # 187
"forward_depth", "forward_depth",
] ]
privileged_obs_components = [ privileged_obs_components = [
"proprioception", # 48 "proprioception", # 48
"base_pose", # "height_measurements", # 187
"robot_config", # "forward_depth",
"engaging_block", "base_pose", # 6
"sidewall_distance", "robot_config", # 1 + 3 + 1 + 12
"engaging_block", # 1 + (4 + 1) + 2
"sidewall_distance", # 2
] ]
use_lin_vel = False use_lin_vel = False
privileged_use_lin_vel = True privileged_use_lin_vel = True
# if True privileged_obs will not have noise based on implementations # if True privileged_obs will not have noise based on implementations
privileged_obs_gets_privilege = False # for climb temporarily privileged_obs_gets_privilege = False # for jump temporarily
class init_state( Go1FieldCfg.init_state ): class init_state( Go1FieldCfg.init_state ):
pos = [0.0, 0.0, 0.38] # x,y,z [m] pos = [0.0, 0.0, 0.43] # x,y,z [m]
class sensor( Go1FieldCfg.sensor ): class sensor( Go1FieldCfg.sensor ):
class forward_camera( Go1FieldCfg.sensor.forward_camera ): class forward_camera( Go1FieldCfg.sensor.forward_camera ):
resolution = [int(240/4), int(424/4)] resolution = [int(240/4), int(424/4)]
# position = dict(
# mean= [0.245, 0.0075, 0.072+0.018],
# std= [0.002, 0.002, 0.0005],
# ) # position in base_link ##### small randomization
# rotation = dict(
# lower= [0, 0, 0],
# upper= [0, 0, 0],
# ) # rotation in base_link ##### small randomization
########## new camera extrinsics with 30degree down ####################
position = dict( position = dict(
mean= [0.245, 0.0075, 0.072+0.018], mean= [0.245+0.027, 0.0075, 0.072+0.02],
std= [0.002, 0.002, 0.0005], std= [0.002, 0.002, 0.0002],
) # position in base_link ##### small randomization ) # position in base_link ##### small randomization
rotation = dict( rotation = dict(
lower= [0, 0, 0], lower= [0, 0.5, 0], # positive for pitch down
upper= [0, 0, 0], upper= [0, 0.54, 0],
) # rotation in base_link ##### small randomization ) # rotation in base_link ##### small randomization
########## new camera extrinsics with 30degree down ####################
########## new camera extrinsics with 15degree down ####################
# position = dict(
# mean= [0.245+0.027, 0.0075, 0.072+0.018],
# std= [0.003, 0.002, 0.0005],
# ) # position in base_link ##### small randomization
# rotation = dict(
# lower= [0, 0.24, 0], # positive for pitch down
# upper= [0, 0.28, 0],
# ) # rotation in base_link ##### small randomization
########## new camera extrinsics with 15degree down ####################
resized_resolution= [48, 64] resized_resolution= [48, 64]
output_resolution = [48, 64] output_resolution = [48, 64]
horizontal_fov = [85, 87] # measured around 87 degree horizontal_fov = [85, 87] # measured around 87 degree
# for go1, usb2.0, 480x640, d435i camera # for go1, usb2.0, 480x640, d435i camera
latency_range = [0.25, 0.30] latency_range = [0.25, 0.30] # [s]
# latency_range = [0.28, 0.36] # [s]
latency_resample_time = 5.0 # [s] latency_resample_time = 5.0 # [s]
refresh_duration = 1/10 # [s] for (240, 424 option with onboard script fixed to no more than 20Hz) refresh_duration = 1/10 # [s] for (240, 424 option with onboard script fixed to no more than 20Hz)
# config to simulate stero RGBD camera # config to simulate stero RGBD camera
crop_top_bottom = [0, 0] crop_top_bottom = [0, 0]
crop_left_right = [int(60/4), int(46/4)] crop_left_right = [int(60/4), int(46/4)]
depth_range = [0.0, 1.5] # [m] depth_range = [0.0, 2.0] # [m]
# class proprioception( Go1FieldCfg.sensor.proprioception ): # inherited from A1FieldCfg class proprioception( Go1FieldCfg.sensor.proprioception ): # inherited from A1FieldCfg
delay_action_obs = False
delay_privileged_action_obs = False
class terrain( Go1FieldCfg.terrain ): class terrain( Go1FieldCfg.terrain ):
num_rows = 2 num_rows = 2
@ -67,21 +93,24 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
options= [ options= [
# "tilt", # "tilt",
"crawl", "crawl",
"climb", "jump",
"jump",
"leap", "leap",
], # each race track will permute all the options ], # each race track will permute all the options
one_obstacle_per_track= True, n_obstacles_per_track= 4,
randomize_obstacle_order= True,
track_width= 1.6, track_width= 1.6,
track_block_length= 1.8, # the x-axis distance from the env origin point track_block_length= 2.0, # the x-axis distance from the env origin point
wall_thickness= (0.04, 0.2), # [m] wall_thickness= (0.04, 0.3), # [m]
wall_height= 0.0, # [m] wall_height= (-0.5, 0.0), # [m]
climb= dict( jump= dict(
height= (0.40, 0.45), height= (0.40, 0.45),
depth= (0.5, 1.0), # size along the forward axis depth= (0.01, 0.01), # size along the forward axis
fake_offset= 0.05, # [m] making the climb's height info greater than its physical height. fake_offset= 0.03, # [m] making the jump's height info greater than its physical height.
jump_down_prob= 0.5,
), ),
crawl= dict( crawl= dict(
height= (0.28, 0.38), height= (0.36, 0.5),
depth= (0.1, 0.2), # size along the forward axis depth= (0.1, 0.2), # size along the forward axis
wall_height= 0.6, wall_height= 0.6,
fake_depth= 0.4, # when track block length is 1.8m fake_depth= 0.4, # when track block length is 1.8m
@ -94,11 +123,11 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
), ),
leap= dict( leap= dict(
# length= (0.45, 0.7), # length= (0.45, 0.7),
length= (0.5, 0.7), # for parkour real-world env length= (0.3, 0.74), # for parkour real-world env
depth= (0.5, 0.6), depth= (0.5, 0.6),
height= 0.2, height= 0.2,
fake_offset= 0.2, fake_offset= 0.1,
follow_climb_ratio= 0.5, # when following climb, not drop down to ground suddenly. follow_jump_ratio= 0.5, # when following jump, not drop down to ground suddenly.
), ),
add_perlin_noise= True, add_perlin_noise= True,
border_perlin_noise= True, border_perlin_noise= True,
@ -106,6 +135,7 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
virtual_terrain= False, virtual_terrain= False,
draw_virtual_terrain= True, draw_virtual_terrain= True,
engaging_next_threshold= 1.2, engaging_next_threshold= 1.2,
engaging_finish_threshold= 0.3,
check_skill_combinations= True, check_skill_combinations= True,
curriculum_perlin= False, curriculum_perlin= False,
no_perlin_threshold= 0.04, # for parkour real-world env no_perlin_threshold= 0.04, # for parkour real-world env
@ -113,33 +143,65 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
) )
TerrainPerlin_kwargs = dict( TerrainPerlin_kwargs = dict(
zScale= [0.0, 0.05], # for parkour real-world env zScale= [0.01, 0.1], # for parkour real-world env
frequency= 10, frequency= 10,
) )
class commands( A1FieldDistillCfg.commands ): class commands( A1FieldDistillCfg.commands ):
pass pass
class control( Go1FieldCfg.control ):
computer_clip_torque = False
class asset( Go1FieldCfg.asset ):
terminate_after_contacts_on = ["base"]
class termination( A1FieldDistillCfg.termination ): class termination( A1FieldDistillCfg.termination ):
pass out_of_track_kwargs = dict(
threshold= 1.6,
)
class domain_rand( A1FieldDistillCfg.domain_rand ): class domain_rand( A1FieldDistillCfg.domain_rand ):
randomize_com = True randomize_com = True
randomize_motor = True randomize_motor = True
randomize_friction = True randomize_friction = True
friction_range = [0.0, 0.8] friction_range = [0.2, 2.0]
randomize_base_mass = True randomize_base_mass = True
push_robots = False push_robots = False
init_dof_pos_ratio_range = [0.9, 1.1] init_dof_pos_ratio_range = [0.8, 1.2]
init_base_vel_range = [-0.0, 0.0] init_dof_vel_range = [-2., 2.]
# init_base_vel_range = [-0.0, 0.0] # use super class
init_base_pos_range = dict( init_base_pos_range = dict(
x= [0.4, 0.6], x= [0.4, 0.6],
y= [-0.05, 0.05], y= [-0.05, 0.05],
) )
randomize_gravity_bias = True
randomize_privileged_gravity_bias = False
gravity_bias_range = dict(
x= [-0.12, 0.12],
y= [-0.12, 0.12],
z= [-0.05, 0.05],
)
class rewards( A1FieldDistillCfg.rewards ):
class scales:
pass
class noise( A1FieldDistillCfg.noise ): class noise( A1FieldDistillCfg.noise ):
add_noise = True # This only account for proprioception noise and height measurements noise at most
class noise_scales( A1FieldDistillCfg.noise.noise_scales ): class noise_scales( A1FieldDistillCfg.noise.noise_scales ):
ang_vel = 0.2 # measured in 0.02
dof_pos = 0.0006 # measured in 0.0002
dof_vel = 0.02 # measured in 0.015
gravity = 0.06 # measured in 0.05
# These aspects of obs should not have noise,
# which are not used or set to zero or has its own noise mechanism
height_measurements = 0.0
forward_depth = 0.0 forward_depth = 0.0
base_pose = 0.0
lin_vel = 0.0
class forward_depth: class forward_depth:
stereo_min_distance = 0.12 # when using (240, 424) resolution stereo_min_distance = 0.12 # when using (240, 424) resolution
stereo_far_distance = 2. stereo_far_distance = 2.
@ -164,7 +226,8 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
# override base class attributes # override base class attributes
pass pass
distill_target_ = "tanh" # distill_target_ = "tanh"
distill_target_ = "l1"
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ): class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
class algorithm( A1FieldDistillCfgPPO.algorithm ): class algorithm( A1FieldDistillCfgPPO.algorithm ):
@ -177,10 +240,10 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
using_ppo = False using_ppo = False
distill_target = distill_target_ distill_target = distill_target_
buffer_dilation_ratio = 1. buffer_dilation_ratio = 1.
learning_rate = 3e-4 learning_rate = 1.e-4
optimizer_class_name = "AdamW" optimizer_class_name = "AdamW"
teacher_policy_class_name = "ActorCriticFieldMutex" teacher_policy_class_name = "ActorCriticClimbMutex"
teacher_ac_path = None teacher_ac_path = None
class teacher_policy (A1FieldDistillCfgPPO.policy ): class teacher_policy (A1FieldDistillCfgPPO.policy ):
# For loading teacher policy. No need to change for training student # For loading teacher policy. No need to change for training student
@ -195,20 +258,77 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
sidewall_distance= (2,), sidewall_distance= (2,),
) )
env_action_scale = Go1FieldCfg.control.action_scale env_action_scale = Go1FieldCfg.control.action_scale
action_smoothing_buffer_len = 3
reset_non_selected = "when_skill"
# reset_non_selected = True
sub_policy_class_name = "ActorCriticRecurrent" sub_policy_class_name = "ActorCriticRecurrent"
sub_policy_paths = [ # must in the order of obstacle ID
logs_root + "/field_go1/{your go1 walking policy}", sub_policy_paths = [
logs_root + "/field_go1/{your go1 tilting policy}", # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct10_00-03-12_WalkForward_pEnergySubsteps1e-5_maxPushAng0.5_pushInter3.0_noTanh_fromOct09_15-50-14"),
logs_root + "/field_go1/{your go1 crawling policy}", # os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_14-43-48_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_rAlive3e+0_pTorqueL1norm8e-1_pDofLim8e-1_actionCliphard_fromOct24_09-00-16"),
logs_root + "/field_go1/{your go1 climbing policy}", os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"),
logs_root + "/field_go1/{your go1 leaping policy}", os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-24-22_Skills_tilt_propDelay0.04-0.05_pEnergySubsteps1e-5_pPenD2e-3_pDofLimit8e-1_rTilt8e-03_pCollision0.1_PushRobot_kp40_kd0.5_tiltMax0.40fromSep27_13-59-27"),
os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct13_09-12-39_258k_Skills_jump_comXRange-0.2-0.2_pEnergySubsteps-4e-06_pushRobot_jumpHeight0.1-0.7_propDelay0.04-0.05_noTanh_fromOct09_10-13-26"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct16_10-55-09_Skills_jump_comXRange-0.2-0.2_pLinVel1.4_pTorque2e-06_pY0.1_pCollision0.1_pushRobot_jumpHeight0.10-0.45_propDelay0.04-0.05_noTanh_fromOct13_09-12-39"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct22_15-19-26_Skills_jump_rTrackVel3._pJumpSameLegs0.6_noPContact_pDofAcc1.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_08-47-13"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct24_07-41-26_Skills_jump_rTrackVel4._pY-2e-01_pJumpSameLegs0.2_noPContact_pDofAcc2.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-1e-01_noPush_minStd0.2_noTanh_zeroResetAction_jumpRange0.4-0.5_fromOct20_16-09-47"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-16_Skills_jump_pEnergySubsteps3e-05_rTrackVel4._pY-3e-01_propDelay0.04-0.05_noPropNoise_noPush_noTanh_zeroResetAction_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct24_07-41-26"),
# os.path.join(logs_root, "field_go1", "Oct27_16-03-48_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pDofErrCond4e-01_pHipPos5e-01_pCollision3e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_16-04-45_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pPenD-6e-03_pDofErrCond6e-01_pHipPos5e-01_pCollision6e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_11-05-56_Skills_jump_pEnergySubsteps2e-06_rTrackVel5._pYaw-1e-01_pHipPos5e+00_pCollision1e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_allowNegativeReward_fromOct28_08-54-23"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct09_09-51-58_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pPenD1.e-2_pDofLimit4e-01_pCollision0.5_kp40_kd0.5fromOct05_02-16-22"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-39-12_Skills_leap_pEnertySubsteps4.e-5_pActRate2.e-1_pDofLimit8.e-1_pCollision5.e-1_noPropNoise_noTanh_zeroResetAction_actionCliphard_fromOct09_09-51-58"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-23-39_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pSyncAllLegs6.e-1_pOrient6.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct28_03-32-35"),
] ]
# jump_down_policy_path = logs_root + "/field_a1_noTanh_oracle/Oct09_09-49-58_Skills_down_pEnergySubsteps-4e-06_rAlive3_pPenD1e-02_pDofLimit-4e-01_pTorqueL14e-01_maxForce200_downRedundant0.2_softDof0.8_pushRobot_kp40_kd0.5fromSep28_14-59-53"
# jump_down_policy_path = os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct14_02-06-47_Skills_down_comXRange-0.2-0.2_pEnergySubsteps-4e-06_rAlive3_pPenD4e-03_pDHarder2.e+05_pushRobot_noTanh_virtualfromOct09_09-49-58")
# jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_10-48-54_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond3.e-1_kp40_kd0.5_virtualfromOct14_02-06-47")
#tested OK, tends to stop
# jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-31_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond1.e-2_kp40_kd0.5_virtualfromOct14_02-06-47")
# jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct26_08-45-02_Skills_down_pEnergySubsteps-4e-05_rTrackVel4._pZVel1e-01_pAngXYVel5e-02_pDTorques1e-07_rDownCond1.e-1_noTanh_virtualfromOct25_10-48-54")
jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-08-21_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_withPropNoise_noTanh_virtualfromOct28_07-38-39")
# sub_policy_paths = [ # must in the order of obstacle ID
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct18_12-37-48_WalkForward_comXRange-0.2-0.2_noDelayActObs_pLinY0.05_noTanh_fromOct10_00-03-12"), # a little pitch up, accetable
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-24-22_Skills_tilt_propDelay0.04-0.05_pEnergySubsteps1e-5_pPenD2e-3_pDofLimit8e-1_rTilt8e-03_pCollision0.1_PushRobot_kp40_kd0.5_tiltMax0.40fromSep27_13-59-27"), # a little pitch up, accetable
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct18_12-32-28_Skills_crawl_comXRange-0.2-0.2_pEnergySubsteps-1e-5_pDof8e-01_pTorque1e-5_pTorqueL14e-01_noDelayActObs_noTanh_fromOct11_12-19-00"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_06-29-14_Skills_jump_comXRange-0.2-0.2_pLinVel1._pDofAcc5e-7_pTorque6e-05_jumpHeight0.10-0.46_noDelayActObs_noTanh_minStd0.1_fromOct18_15-57-31"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_01-25-22_Skills_leap_comXRange-0.2-0.2_pTorques2.e-5_pContactForces1.e-2_leapHeight0.1_noDelayActObs_noTanh_virtualfromOct09_09-51-58"),
# ]
# jump_down_policy_path = \
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_07-01-54_Skills_down_comXRange-0.2-0.2_pXVelL21.3_rDown0.3_pPenD2e-03_pDHarder2e+5_pTorque2e-5_pushRobot_noDelayActObs_noTanh_virtualfromOct18_00-51-27")
# The group of ckpt that used for no Computer Clip and no Motor Clip
sub_policy_paths = [
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"),
os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26"),
# os.path.join(logs_root, "field_go1", "Oct30_03-46-42_Skills_crawl_pEnergy1.e-5_pDof8.e-1_pTorqueL11.e+0_noComputerClip_noTanh"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct29_22-31-44_Skills_jump_pEnergySubsteps4e-05_rTrackVel4._pY-4e-01_pTorque4e-04_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"),
# os.path.join(logs_root, "field_go1", "Oct30_11-11-43_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-8e-01_pTorqueExceed1.2e+00_pTorque4e-04_pDTorques1e-06_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct30_13-00-12_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-4e-01_pTorqueExceed1.8e+00_pTorque4e-04_pDTorques1e-05_propDelay0.04-0.05_noPropNoise_noPush_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"),
### New jump oracle with more spreaded rear hip.
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Nov02_11-07-49_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.8e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Nov02_13-53-48_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.21_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12"),
# os.path.join(logs_root, "field_go1", "Oct30_05-08-03_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct30_07-32-10_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed8.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_pDorErr1.5e-1_noPenCurriculum_noCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_05-07-24"),
]
jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle",
"Oct30_03-42-56_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL12e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtualfromOct29_22-07-51",
)
# jump_down_policy_path = os.path.join(logs_root, "field_go1",
# "Oct30_04-23-31_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL14e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtual_fromOct29_22-07-51",
# )
jump_down_vel = 1.0
cmd_vel_mapping = { cmd_vel_mapping = {
0: 1.0, 0: 1.0,
1: 0.5, 1: 0.6,
2: 0.8, 2: 1.0,
3: 1.2, 3: 1.0,
4: 1.5, 4: 1.5,
} }
@ -218,38 +338,125 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
runner_class_name = "TwoStageRunner" runner_class_name = "TwoStageRunner"
class runner( A1FieldDistillCfgPPO.runner ): class runner( A1FieldDistillCfgPPO.runner ):
experiment_name = "distill_go1" experiment_name = "distill_go1"
num_steps_per_env = 32
class pretrain_dataset: class pretrain_dataset:
# data_dir = [ # data_dir = [
# "logs/distill_a1_dagger/" + dir_ \ # "/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/20231021_camPitch0.52_jumpA1Oct16_10-55-09/" + dir_ \
# for dir_ in os.listdir("logs/distill_a1_dagger") # for dir_ in os.listdir("/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/20231021_camPitch0.52_jumpA1Oct16_10-55-09")
# ] # ]
scan_dir = "".join([ scan_dir = "".join([
"logs/distill_go1_dagger/{}_".format(datetime.datetime.now().strftime("%b%d")), "/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/", datetime.now().strftime('%b%d_%H-%M-%S'), "_",
"".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]), "".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]),
"_vDelay{:.2f}-{:.2f}".format( "_vDelay{:.2f}-{:.2f}".format(
Go1FieldDistillCfg.sensor.forward_camera.latency_range[0], Go1FieldDistillCfg.sensor.forward_camera.latency_range[0],
Go1FieldDistillCfg.sensor.forward_camera.latency_range[1], Go1FieldDistillCfg.sensor.forward_camera.latency_range[1],
), ),
"_pDelay{:.2f}-{:.2f}".format(
Go1FieldDistillCfg.sensor.proprioception.latency_range[0],
Go1FieldDistillCfg.sensor.proprioception.latency_range[1],
),
("_camPitch{:.2f}".format(
(Go1FieldDistillCfg.sensor.forward_camera.rotation["lower"][1] + Go1FieldDistillCfg.sensor.forward_camera.rotation["upper"][1]) / 2
)),
("_depthMax{:.1f}".format(Go1FieldDistillCfg.sensor.forward_camera.depth_range[1])),
# ("_zeroResetAction" if Go1FieldDistillCfg.init_state.zero_actions else ""),
# ("_actionClip" + Go1FieldDistillCfg.normalization.clip_actions_method if getattr(Go1FieldDistillCfg.normalization, "clip_actions_method", None) else ""),
("_jumpOffset{:.2f}".format(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["jump"]["fake_offset"])),
("_noComputerClip" if not Go1FieldDistillCfg.control.computer_clip_torque else ""),
("_randOrder" if Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"] else ""),
]) ])
dataset_loops = -1 dataset_loops = -1
random_shuffle_traj_order = True random_shuffle_traj_order = True
keep_latest_ratio = 1.0 keep_latest_ratio = 1.0
keep_latest_n_trajs = 4000 keep_latest_n_trajs = 3000
starting_frame_range = [0, 100] starting_frame_range = [0, 100]
resume = False resume = True
load_run = "" # load_run = "/home/zzw/Data/legged_gym_logs/distill_a1/Jun06_00-15-50_distill_crawljumpleap_vDelay0.20-0.26_jump0.40-0.45_vFps10_leap0.55-0.62_fromJun04_20-18-56_06-3308JumpOracle"
# load_run = "/home/zzw/Data/legged_gym_logs/distill_go1/Jul11_16-21-59_distill_crawljumpleap"
# load_run = "Jul17_08-47-39_distill_jump_vDelay0.25-0.30"
# load_run = "Oct08_02-51-14_distill_crawljumpjumpleap_vDelay0.25-0.30_noTanh_fromJul17_08-47-39"
# load_run = "Oct08_14-51-22_distill_crawljumpjumpleap_vDelay0.25-0.30_noTanh_fromOct08_02-51-14"
load_run = "Oct20_05-35-58_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_jumpA1Oct19_06-29-14_noTanh_fromOct08_14-51-22"
load_run = "Oct21_09-01-45_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_jumpA1Oct16_10-55-09_noTanh_fromOct20_05-35-58"
load_run = "Oct21_16-21-33_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_jumpA1Oct16_10-55-09_noTanh_fromOct21_09-01-45"
load_run = "Oct22_02-25-42_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_jumpA1Oct16_10-55-09_noTanh_fromOct21_16-21-33"
load_run = "Oct24_15-43-34_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_zeroResetAction_actionCliphard_jumpOct24_07-41-26_noTanh_fromOct22_02-25-42"
load_run = "Oct25_17-10-09_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_actionCliphard_jumpOct25_11-36-16_downOct25_11-36-31_noTanh_fromOct24_15-43-34"
#### From wrong walk and jump skill policy
# load_run = "Oct26_15-38-28_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_noPropNoise_gravityBias_actionCliphard_jumpOct25_11-36-16_downOct25_11-36-31_noTanh_fromOct25_17-10-09"
# load_run = "Oct27_06-00-07_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_noPropNoise_gravityBias_actionCliphard_jumpOct25_11-36-16_downOctOct26_08-45-02_fricMax2.0_noTanh_fromOct26_15-38-28"
load_run = "Oct28_16-16-58_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_addPropNoise_gravityBias_actionCliphard_jumpOct28_11-05-56_fricMax2.0_noTanh_fromOct25_17-10-09"
######## Continue tuning policy to fix jump and leap.
## This ckpt looks good. But the jump is not good enough.
load_run = "Oct30_16-31-09_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_addPropNoise_gravityBias_actionCliphard_noComputerClip_oracleResetWhenSkill_fricMax2.0_noTanh_fromOct28_16-16-58"
max_iterations = 80000 max_iterations = 100000
save_interval = 2000 save_interval = 2000
log_interval = 100
run_name = "".join(["distill_", run_name = "".join(["distill_",
"".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]), "".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]),
"_vDelay{:.2f}-{:.2f}".format( "_vDelay{:.2f}-{:.2f}".format(
Go1FieldDistillCfg.sensor.forward_camera.latency_range[0], Go1FieldDistillCfg.sensor.forward_camera.latency_range[0],
Go1FieldDistillCfg.sensor.forward_camera.latency_range[1], Go1FieldDistillCfg.sensor.forward_camera.latency_range[1],
), ),
("_camPitch{:.2f}".format(
(Go1FieldDistillCfg.sensor.forward_camera.rotation["lower"][1] + Go1FieldDistillCfg.sensor.forward_camera.rotation["upper"][1]) / 2
)),
("_depthMax{:.1f}".format(Go1FieldDistillCfg.sensor.forward_camera.depth_range[1])),
# ("_addPropNoise" if Go1FieldDistillCfg.noise.add_noise else "_noPropNoise"),
# ("_gravityBias" if Go1FieldDistillCfg.domain_rand.randomize_gravity_bias else "_noGravityBias"),
# ("_actionClip" + Go1FieldDistillCfg.normalization.clip_actions_method if getattr(Go1FieldDistillCfg.normalization, "clip_actions_method", None) else ""),
# ("_noComputerClip" if not Go1FieldDistillCfg.control.computer_clip_torque else ""),
("_jumpNov02_13-53-48"),
("_leapVel1.5"),
("_jumpOffset{:.2f}".format(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["jump"]["fake_offset"])),
("_oracleResetWhenSkill"),
("_fricMax{:.1f}".format(Go1FieldDistillCfg.domain_rand.friction_range[1])),
("_noTanh"),
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"),
]) ])
# Checking variants:
"""
1. Faster leap (1.5m/s)
higher offset 0.1 for jump up
2. Longer leap offset 0.2, faster leap (1.5m/s)
higher offset 0.1 for jump up
3. Faster leap (1.5m/s)
higher offset 0.1 for jump up
from older ckpt
4. Faster leap (1.5m/s)
faster jump (1.2m/s) and higher offset 0.1 for jump up
from older ckpt
smaller learning rate 1e-4
---- 20231101 after testing preious ckpts ----
5. Faster leap (1.5m/s)
normal 0.05 offset for jump up, normal (1.0m/s) jump
from older ckpt (Oct28_16-16-58)
same learning rate as Oct30_16-31-09
6. Faster leap (1.5m/s)
slight higher 0.07 offset for jump up, faster (1.2m/s) jump
from older ckpt (Oct28_16-16-58)
lower learning rate as 1e-4
---- 20231102 after testing preious ckpts ----
New jump oracle to test:
* Nov02_11-07-49_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.8e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
Nov02_11-09-03_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed2e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
Nov02_13-53-13_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.22_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
* Nov02_13-53-48_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.21_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
7. Faster leap (1.5m/s)
lower 0.05 offset for jump up, normal (1.0m/s) jump
using new jump oracle
same older ckpt (Oct28_16-16-58)
same learning rate as Oct30_16-31-09 (1.5e-4)
8. Faster leap (1.5m/s)
lower 0.03 offset for jump up, normal (1.0m/s) jump
using new jump oracle
new ckpt (Oct30_16-31-09)
lower learning rate as 1e-4
"""

View File

@ -0,0 +1,223 @@
from os import path as osp
import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1JumpCfg( Go1FieldCfg ):
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.45]
#### uncomment this to train non-virtual terrain
class sensor( Go1FieldCfg.sensor ):
class proprioception( Go1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 10
border_size = 5
slope_treshold = 20.
curriculum = True
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"jump",
],
track_width= 1.6,
track_block_length= 1.6,
wall_thickness= 0.2,
jump= dict(
# height= (0.38, 0.46),
height= (0.2, 0.46),
depth= (0.1, 0.2),
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain
),
virtual_terrain= False, # Change this to False for real terrain
no_perlin_threshold= 0.06,
randomize_obstacle_order= True,
n_obstacles_per_track= 3,
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.12],
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [0.5, 1.5]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class control( Go1FieldCfg.control ):
computer_clip_torque = False
class asset( Go1FieldCfg.asset ):
penalize_contacts_on = ["base", "thigh", "calf", "imu"]
terminate_after_contacts_on = ["base"]
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_high",
"out_of_track",
]
timeout_at_finished = True
timeout_at_border = True
class domain_rand( Go1FieldCfg.domain_rand ):
class com_range( Go1FieldCfg.domain_rand.com_range ):
z = [-0.1, 0.1]
init_base_pos_range = dict(
x= [0.2, 0.8],
y= [-0.25, 0.25],
)
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
push_robots = True
class rewards( Go1FieldCfg.rewards ):
class scales:
tracking_ang_vel = 0.08
# alive = 10.
tracking_world_vel = 5. ######## 1. always ########
legs_energy_substeps = -6.e-5 # -6e-6
jump_x_vel_cond = 0.1 ######## 1. segment ########
# hip_pos = -5.
front_hip_pos = -5.
rear_hip_pos = -0.000001
penetrate_depth = -6e-3
penetrate_volume = -6e-4
exceed_dof_pos_limits = -8e-4 # -8e-4
exceed_torque_limits_l1norm = -2. # -8e-2 # -6e-2
action_rate = -0.1
delta_torques = -1.e-5
dof_acc = -1e-7
torques = -4e-4 # 2e-3
yaw_abs = -0.1
lin_pos_y = -0.4
collision = -1.
sync_all_legs_cond = -0.3 # -0.6
dof_error = -0.06
only_positive_rewards = False # False
soft_dof_pos_limit = 0.8
max_contact_force = 100.
tracking_sigma = 0.3
class noise( Go1FieldCfg.noise ):
add_noise = False
class curriculum( Go1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 6000
penetrate_volume_threshold_easier = 12000
penetrate_depth_threshold_harder = 600
penetrate_depth_threshold_easier = 1200
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1JumpCfgPPO( Go1FieldCfgPPO ):
class algorithm( Go1FieldCfgPPO.algorithm ):
entropy_coef = 0.01
clip_min_std = 0.21
class runner( Go1FieldCfgPPO.runner ):
resume = True
load_run = "{Your traind walking model directory}"
# load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep27_14-32-13_Skills_jump_pEnergySubsteps-8e-06_pDofLimit-8e-01_pTorqueL18e-01_pContact1e-02_pCollision0.4_pushRobot_propDelay0.04-0.05_kp40_kd0.5_fromSep27_03-36-02")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct07_10-24-52_Skills_jump_pEnergy1e-06_pY-1e-02_pCollision-1e-02_noPush_noTanh_jumpRange0.2-0.6_fromSep27_14-32-13")
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct19_06-29-14_Skills_jump_comXRange-0.2-0.2_pLinVel1._pDofAcc5e-7_pTorque6e-05_jumpHeight0.10-0.46_noDelayActObs_noTanh_minStd0.1_fromOct18_15-57-31")
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct16_10-55-09_Skills_jump_comXRange-0.2-0.2_pLinVel1.4_pTorque2e-06_pY0.1_pCollision0.1_pushRobot_jumpHeight0.10-0.45_propDelay0.04-0.05_noTanh_fromOct13_09-12-39")
######################################
# load_run = "Oct20_16-09-47_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14"
# load_run = "Oct21_05-34-37_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._noPropNoise_propDelay0.04-0.05_pCollision-5e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct20_16-09-47"
# load_run = "Oct21_12-30-28_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._noPropNoise_propDelay0.04-0.05_pCollision-1e+00_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct21_05-34-37"
# load_run = "Oct21_14-26-29_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel1.2_noPropNoise_propDelay0.04-0.05_pCollision-2e+00_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct21_12-30-28"
# load_run = "Oct20_16-09-47_275k_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14"
# load_run = osp.join(logs_root, "field_go1_before20231030_tuneJump_noTanh", "Oct20_16-09-47_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14")
# load_run = "Oct22_02-20-52_Skills_jump_pLinVel1._noPContact_pDofAcc2.5e-07_rJumpXVel1.4_noPropNoise_propDelay0.04-0.05_pCollision-1e+00_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct20_16-09-47"
# load_run = "Oct22_08-47-56_Skills_jump_rTrackVel2._pJumpSameLegs0.8_noPContact_pDofAcc2.5e-07_rJumpXVel1._noPropNoise_propDelay0.04-0.05_pCollision-4e-01_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_02-20-52"
# load_run = "Oct22_08-47-13_Skills_jump_rTrackVel2._pJumpSameLegs0.6_noPContact_pDofAcc2.5e-07_rJumpXVel1.2_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_02-20-52"
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct22_15-19-26_Skills_jump_rTrackVel3._pJumpSameLegs0.6_noPContact_pDofAcc1.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_08-47-13")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct24_07-41-26_Skills_jump_rTrackVel4._pY-2e-01_pJumpSameLegs0.2_noPContact_pDofAcc2.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-1e-01_noPush_minStd0.2_noTanh_zeroResetAction_jumpRange0.4-0.5_fromOct20_16-09-47")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-16_Skills_jump_pEnergySubsteps3e-05_rTrackVel4._pY-3e-01_propDelay0.04-0.05_noPropNoise_noPush_noTanh_zeroResetAction_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct24_07-41-26")
# load_run = "Oct27_08-20-20_Skills_jump_rTrackVel5._noPPen_pY-3e-01_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct25_11-36-16"
# load_run = "Oct27_09-17-23_Skills_jump_rTrackVel5._noPPen_pDofErr6e-02_pDofErrCond6e-01_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_08-20-20"
#### Force hip_pos and sync_legs
# load_run = "Oct27_13-20-59_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pDofErr4e-02_pDofErrCond4e-01_pHipPos5e-01_pCollision5e+00_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_09-17-23"
# load_run = "Oct27_13-24-58_Skills_jump_pEnergySubsteps3e-05_rTrackVel5._pDofErr4e-02_pDofErrCond3e-01_pHipPos3e-01_pCollision5e+00_pSyncSymLegs6e-01_propDelay0.04-0.05_withPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_09-17-23"
#### Adding RL gamma for energy penalty
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct27_16-04-45_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pPenD-6e-03_pDofErrCond6e-01_pHipPos5e-01_pCollision6e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59")
#### Wide spreading legs first
# load_run = "Oct28_03-08-02_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._rAlive0.5_pDofErrCond8e-01_pHipPos5e-01_pSyncSymLegs2e-01_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct27_16-04-45"
# load_run = "Oct28_07-07-03_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._rAlive1.0_pPenD-1e-03_pDofErrCond8e-01_pHipPos3e+00_pSyncSymLegs2e-01_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct27_16-04-45"
load_run = "Oct28_08-54-23_Skills_jump_pEnergySubsteps1e-05_rTrackVel3._rAlive0.5_pYaw-1e-01_pHipPos5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct28_03-08-02"
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_11-05-56_Skills_jump_pEnergySubsteps2e-06_rTrackVel5._pYaw-1e-01_pHipPos5e+00_pCollision1e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_allowNegativeReward_fromOct28_08-54-23")
#### lowering max torques and energy
load_run = "Oct29_03-21-19_Skills_jump_pEnergySubsteps6e-06_rTrackVel5._pHipPos5e+00_pTorque2e-05_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.4-0.5_allowNegativeReward_fromOct28_11-05-56"
load_run = "Oct29_07-54-42_Skills_jump_pEnergySubsteps1e-05_rTrackVel5._pHipPos5e+00_pTorqueExceed6e-02_pTorque2e-03_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.4-0.5_allowNegativeReward_fromOct29_03-21-19"
load_run = "Oct29_12-18-08_Skills_jump_pEnergySubsteps1e-05_rTrackVel6._pTorqueExceed6e-02_pTorque1e-03_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_07-54-42"
load_run = "Oct29_15-18-02_Skills_jump_pEnergySubsteps1e-05_rTrackVel4._pTorqueExceed3e-02_pTorque1e-03_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_12-18-08"
load_run = "Oct29_16-37-44_Skills_jump_pEnergySubsteps1.2e-05_rTrackVel4._pTorqueExceed6e-02_pTorque2e-03_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_15-18-02"
load_run = "Oct29_20-17-31_Skills_jump_pEnergySubsteps5e-05_rTrackVel4._pTorqueExceed1e+00_pTorque4e-04_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_16-37-44"
load_run = "Oct30_11-11-43_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-8e-01_pTorqueExceed1.2e+00_pTorque4e-04_pDTorques1e-06_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct30_13-00-12_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-4e-01_pTorqueExceed1.8e+00_pTorque4e-04_pDTorques1e-05_propDelay0.04-0.05_noPropNoise_noPush_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31")
run_name = "".join(["Skills_",
("Multi" if len(Go1JumpCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1JumpCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1JumpCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_pEnergySubsteps" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.legs_energy_substeps, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "legs_energy_substeps", -2e-5) != -2e-5 else ""),
# ("_rTrackVel" + np.format_float_positional(Go1JumpCfg.rewards.scales.tracking_world_vel) if getattr(Go1JumpCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""),
# ("_pLinVel" + np.format_float_positional(-Go1JumpCfg.rewards.scales.world_vel_l2norm) if getattr(Go1JumpCfg.rewards.scales, "world_vel_l2norm", 0.) != 0.0 else ""),
# ("_rAlive{:.1f}".format(Go1JumpCfg.rewards.scales.alive) if getattr(Go1JumpCfg.rewards.scales, "alive", 0.) != 0. else ""),
# ("_pPenD{:.0e}".format(Go1JumpCfg.rewards.scales.penetrate_depth) if getattr(Go1JumpCfg.rewards.scales, "penetrate_depth", 0.) < 0. else ""),
# ("_pYaw{:.0e}".format(Go1JumpCfg.rewards.scales.yaw_abs) if getattr(Go1JumpCfg.rewards.scales, "yaw_abs", 0.) < 0. else ""),
# ("_pY{:.0e}".format(Go1JumpCfg.rewards.scales.lin_pos_y) if getattr(Go1JumpCfg.rewards.scales, "lin_pos_y", 0.) < 0. else ""),
# ("_pDofLimit{:.0e}".format(Go1JumpCfg.rewards.scales.exceed_dof_pos_limits) if getattr(Go1JumpCfg.rewards.scales, "exceed_dof_pos_limits", 0.) < 0. else ""),
# ("_pDofErr" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_error, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_error", 0.) != 0. else ""),
# ("_pDofErrCond" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_error_cond, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_error_cond", 0.) != 0. else ""),
# ("_pHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "hip_pos", 0.) != 0. else ""),
("_pFHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.front_hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "front_hip_pos", 0.) != 0. else ""),
("_pRHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.rear_hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "rear_hip_pos", 0.) != 0. else ""),
# ("_pTorqueExceedI" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.exceed_torque_limits_i, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "exceed_torque_limits_i", 0.) != 0. else ""),
("_pTorqueExceed" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, trim="-") if Go1JumpCfg.rewards.scales.exceed_torque_limits_l1norm != -8e-1 else ""),
# ("_pContact" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.feet_contact_forces, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "feet_contact_forces", 0.) != 0. else ""),
# ("_pJumpSameLegs{:.1f}".format(-Go1JumpCfg.rewards.scales.sync_legs_cond) if getattr(Go1JumpCfg.rewards.scales, "sync_legs_cond", 0.) != 0. else ""),
# ("_pDofAcc" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_acc, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_acc", 0.) != 0. else ""),
# ("_pTorque" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.torques, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "torques", 0.) != 0. else ""),
# ("_rJumpXVel" + np.format_float_positional(Go1JumpCfg.rewards.scales.jump_x_vel_cond) if getattr(Go1JumpCfg.rewards.scales, "jump_x_vel_cond", 0.) != 0. else "_noJumpBonous"),
# ("_pCollision" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.collision, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "collision", 0.) < 0. else ""),
# ("_pSyncSymLegs" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.sync_all_legs_cond, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "sync_all_legs_cond", 0.) != 0. else ""),
# ("_pZVel" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.lin_vel_z, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "lin_vel_z", 0.) != 0. else ""),
# ("_pAngXYVel" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.ang_vel_xy, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "ang_vel_xy", 0.) != 0. else ""),
# ("_pDTorques" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.delta_torques, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "delta_torques", 0.) != 0. else ""),
("_propDelay{:.2f}-{:.2f}".format(
Go1JumpCfg.sensor.proprioception.latency_range[0],
Go1JumpCfg.sensor.proprioception.latency_range[1],
)),
# ("_softDof{:.1f}".format(Go1JumpCfg.rewards.soft_dof_pos_limit) if Go1JumpCfg.rewards.soft_dof_pos_limit != 0.9 else ""),
# ("_timeoutFinished" if getattr(Go1JumpCfg.termination, "timeout_at_finished", False) else ""),
# ("_noPitchTerm" if "pitch" not in Go1JumpCfg.termination.termination_terms else ""),
("_noPropNoise" if not Go1JumpCfg.noise.add_noise else "_withPropNoise"),
("_noPush" if not Go1JumpCfg.domain_rand.push_robots else "_pushRobot"),
("_minStd0.21"),
("_entropy0.01"),
# ("_gamma0.999"),
("_noTanh"),
# ("_zeroResetAction" if Go1JumpCfg.init_state.zero_actions else ""),
# ("_actionClip" + Go1JumpCfg.normalization.clip_actions_method if getattr(Go1JumpCfg.normalization, "clip_actions_method", "") != "" else ""),
# ("_noDelayActObs" if not Go1JumpCfg.sensor.proprioception.delay_action_obs else ""),
("_noComputerClip" if not Go1JumpCfg.control.computer_clip_torque else ""),
("_jumpRange{:.1f}-{:.1f}".format(*Go1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["height"]) if Go1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["height"] != (0.1, 0.6) else ""),
# ("_noCurriculum" if not Go1JumpCfg.terrain.curriculum else ""),
# ("_allowNegativeReward" if not Go1JumpCfg.rewards.only_positive_rewards else ""),
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"),
])
max_iterations = 20000
save_interval = 200

View File

@ -0,0 +1,184 @@
from os import path as osp
import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1LeapCfg( Go1FieldCfg ):
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.4]
#### uncomment this to train non-virtual terrain
class sensor( Go1FieldCfg.sensor ):
class proprioception( Go1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
# latency_range = [0.06-0.005, 0.06+0.005]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = False
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"leap",
],
leap= dict(
length= (0.3, 0.8),
depth= (0.4, 0.8),
height= 0.15,
),
wall_height= -0.4,
virtual_terrain= True, # Change this to False for real terrain
no_perlin_threshold= 0.06,
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.12],
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [1.8, 1.5]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class control( Go1FieldCfg.control ):
computer_clip_torque = True
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
# roll_kwargs = merge_dict(Go1FieldCfg.termination.roll_kwargs, dict(
# threshold= 0.7,
# leap_threshold= 0.7,
# ))
# pitch_kwargs = merge_dict(Go1FieldCfg.termination.pitch_kwargs, dict(
# threshold= 0.7,
# leap_threshold= 0.7,
# ))
z_low_kwargs = merge_dict(Go1FieldCfg.termination.z_low_kwargs, dict(
threshold= -0.1,
))
z_high_kwargs = merge_dict(Go1FieldCfg.termination.z_high_kwargs, dict(
threshold= 2.0,
))
class domain_rand( Go1FieldCfg.domain_rand ):
class com_range( Go1FieldCfg.domain_rand.com_range ):
z = [-0.2, 0.2]
init_base_pos_range = dict(
x= [0.2, 0.8],
y= [-0.25, 0.25],
)
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
push_robots = False
class rewards( Go1FieldCfg.rewards ):
class scales:
tracking_ang_vel = 0.05
# world_vel_l2norm = -1.
tracking_world_vel = 5.
# leap_bonous_cond = 6.
legs_energy_substeps = -7e-6 # 5e5 spike (not scaled)
# alive = 3.
penetrate_depth = -1e-2 # 8 spike (not scaled)
penetrate_volume = -1e-4 # 100 spike (not scaled)
exceed_dof_pos_limits = -8e-1
exceed_torque_limits_l1norm = -1.
# action_rate = -0.1
delta_torques = -1e-7
dof_acc = -1e-7
torques = -4e-5 # 2000 spike (not scaled)
yaw_abs = -0.2
collision = -1.
lin_pos_y = -0.4
orientation = -0.1 # 0.3 segment (not scaled)
hip_pos = -5. # 0.5 spike (not scaled)
dof_error = -0.15 # 1.2 spike (not scaled)
tracking_sigma = 0.35
only_positive_rewards = False
soft_dof_pos_limit = 0.7
class curriculum( Go1FieldCfg.curriculum ):
# penetrate_volume_threshold_harder = 9000
# penetrate_volume_threshold_easier = 10000
# penetrate_depth_threshold_harder = 300
# penetrate_depth_threshold_easier = 5000
####### use extreme large value to disable penetrate curriculum
penetrate_volume_threshold_harder = 900000
penetrate_volume_threshold_easier = 1000000
penetrate_depth_threshold_harder = 30000
penetrate_depth_threshold_easier = 500000
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1LeapCfgPPO( Go1FieldCfgPPO ):
class runner( Go1FieldCfgPPO.runner ):
resume = True
load_run = "{Your traind walking model directory}"
# load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep27_14-56-25_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pDofLimit8e-01_pCollision0.1_kp40_kd0.5fromSep27_02-44-48")
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct09_09-51-58_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pPenD1.e-2_pDofLimit4e-01_pCollision0.5_kp40_kd0.5fromOct05_02-16-22")
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-39-12_Skills_leap_pEnertySubsteps4.e-5_pActRate2.e-1_pDofLimit8.e-1_pCollision5.e-1_noPropNoise_noTanh_zeroResetAction_actionCliphard_fromOct09_09-51-58")
load_run = "Oct28_03-32-35_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pActRate2.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct25_11-39-12"
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-23-39_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pSyncAllLegs6.e-1_pOrient6.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct28_03-32-35")
load_run = "Oct29_04-23-33_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorques2.e-5_pHipPos5.e+0_pDorErrCond8.e-2_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct28_13-23-39"
# load_run = "Oct29_08-30-23_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed2.e-1_pPosY2.e-1_pTorques2.e-5_pHipPos5.e+0_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_04-23-33"
# load_run = "Oct29_15-46-30_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed2.e-1_pOrient3.e-1_pTorques2.e-5_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_08-30-23"
load_run = "Oct30_02-07-26_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed1.e-1_pPosY4.e-1_pTorques2.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_04-23-33"
load_run = "Oct30_04-10-12_Skills_leap_pEnertySubsteps4.e-6_rTrackVel5._pTorqueExceed3.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_02-07-26"
load_run = "Oct30_05-07-24_Skills_leap_pEnertySubsteps5.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"
load_run = "Oct30_05-08-03_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"
load_run = "Oct30_05-08-20_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed5.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"
load_run = "Oct30_07-32-10_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed8.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_pDorErr1.5e-1_noPenCurriculum_noCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_05-07-24"
run_name = "".join(["Skills_",
("Multi" if len(Go1LeapCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1LeapCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1LeapCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_pEnertySubsteps" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "legs_energy_substeps", 0.0) != 0.0 else ""),
("_rTrackVel" + np.format_float_positional(Go1LeapCfg.rewards.scales.tracking_world_vel) if getattr(Go1LeapCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""),
("_rAlive{:.1f}".format(Go1LeapCfg.rewards.scales.alive) if getattr(Go1LeapCfg.rewards.scales, "alive", 0.) != 0. else ""),
# ("_pActRate" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.action_rate, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "action_rate", 0.) != 0.0 else ""),
# ("_pDofAcc" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_acc, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_acc", 0.) != 0.0 else ""),
# ("_pDofLimit" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.exceed_dof_pos_limits, precision=1, exp_digits=1) if Go1LeapCfg.rewards.scales.exceed_dof_pos_limits != 0.0 else ""),
# ("_pCollision" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.collision, precision=1, exp_digits=1) if Go1LeapCfg.rewards.scales.collision != 0.0 else ""),
# ("_leapBonousCond" if getattr(Go1LeapCfg.rewards.scales, "leap_bonous_cond", 0.) != 0.0 else ""),
("_pTorqueExceed" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0.0 else ""),
# ("_pSyncAllLegs" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.sync_all_legs, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "sync_all_legs", 0.) != 0.0 else ""),
# ("_pOrient" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.orientation, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "orientation", 0.) != 0.0 else ""),
# ("_pPenD" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.penetrate_depth, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "penetrate_depth", 0.) != 0.0 else ""),
("_pPosY" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.lin_pos_y, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "lin_pos_y", 0.) != 0.0 else ""),
# ("_pPenV" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.penetrate_volume, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "penetrate_volume", 0.) != 0.0 else ""),
("_pTorques" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.torques, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "torques", 0.) != 0.0 else ""),
("_pHipPos" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.hip_pos, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "hip_pos", 0.) != 0.0 else ""),
# ("_pDorErrCond" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_error_cond, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_error_cond", 0.) != 0.0 else ""),
("_pDorErr" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_error, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_error", 0.) != 0.0 else ""),
# ("_noPropNoise" if not Go1LeapCfg.noise.add_noise else ""),
("_noPenCurriculum" if Go1LeapCfg.curriculum.penetrate_volume_threshold_harder > 2e5 else ""),
("_noCurriculum" if not Go1LeapCfg.terrain.curriculum else ""),
("_noComputerClip" if not Go1LeapCfg.control.computer_clip_torque else ""),
("_noTanh"),
("_noPush" if not Go1LeapCfg.domain_rand.push_robots else "_pushRobot"),
# ("_zeroResetAction" if Go1LeapCfg.init_state.zero_actions else ""),
("_allowNegativeReward" if not Go1LeapCfg.rewards.only_positive_rewards else ""),
("_actionClip" + Go1LeapCfg.normalization.clip_actions_method if getattr(Go1LeapCfg.normalization, "clip_actions_method", "") != "" else ""),
("_virtual" if Go1LeapCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else ""),
])
max_iterations = 20000
save_interval = 200

View File

@ -0,0 +1,120 @@
import numpy as np
import os.path as osp
from legged_gym.utils.helpers import merge_dict
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
class Go1RemoteCfg( Go1FieldCfg ):
class env( Go1FieldCfg.env ):
num_envs = 4096
obs_components = [
"proprioception", # 48
]
privileged_obs_components = [
"proprioception",
"robot_config",
]
use_lin_vel = False
privileged_use_lin_vel = True
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.42]
class terrain( Go1FieldCfg.terrain ):
num_rows = 6
num_cols = 6
selected = "TerrainPerlin"
TerrainPerlin_kwargs = dict(
zScale= 0.15,
frequency= 10,
)
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [-1.0, 1.0]
lin_vel_y = [-1., 1.]
ang_vel_yaw = [-1., 1.]
class domain_rand( Go1FieldCfg.domain_rand ):
init_base_pos_range = dict(
x= [-0.1, 0.1],
y= [-0.1, 0.1],
)
init_base_rot_range = dict(
roll= [-0.4, 0.4],
pitch= [-0.4, 0.4],
)
init_base_vel_range = merge_dict(Go1FieldCfg.domain_rand.init_base_vel_range, dict(
x= [-0.8, 1.5],
y= [-0.8, 0.8],
))
class rewards( Go1FieldCfg.rewards ):
class scales:
###### hacker from Field
tracking_ang_vel = 1.
tracking_lin_vel = 3.
# lin_vel_l2norm = -1.
alive = 1.
legs_energy_substeps = -6e-7
# penalty for hardware safety
# exceed_dof_pos_limits = -4e-2
exceed_torque_limits_l1norm = -1e-2
# penalty for walking gait, probably no need
lin_vel_z = -0.5
ang_vel_xy = -0.05
orientation = -4.
dof_acc = -1.e-7
collision = -10.
action_rate = -0.01
delta_torques = -1e-7
torques = -1.e-5
hip_pos = -0.4
dof_error = -0.04
stand_still = -0.6
only_positive_rewards = False
soft_dof_pos_limit = 0.6
class normalization( Go1FieldCfg.normalization ):
clip_actions_method = "hard"
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1RemoteCfgPPO( Go1FieldCfgPPO ):
class algorithm( Go1FieldCfgPPO.algorithm ):
entropy_coef = 0.01
clip_min_std = 0.02
class runner( Go1FieldCfgPPO.runner ):
resume = True
# load_run = osp.join(logs_root, "field_a1/Sep26_15-33-45_WalkByRemote_pEnergySubsteps2e-5_pTorqueL18e-01_pCollision0.2_propDelay0.04-0.05_aScale0.5_kp40_kd0.5_maxPushAng0.5_noTanh_hardActClip_noResume")
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep28_11-53-30_WalkByRemote_pEnergySubsteps-4e-05_kp40_kd0.5_noTanh_fromSep26_15-33-45")
load_run = "Oct04_16-15-17_WalkByRemote_pEnergySubsteps-1e-04_softDof0.5_pStand0.0_pDofAcc5e-7_kp40_kd0.5_noTanh_fromSep28_11-53-30"
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct12_16-01-06_WalkByRemote_pLin1.0_pFootForce1.e-02_xComMin-0.2_maxForce200_propDelay0.04-0.05_entropyCoef0.01_noTanh_fromOct11_04-34-39")
load_run = "Oct23_03-33-29_WalkByRemote_pEnergySubsteps-1e-05_rLinTrack3.0_softDof0.6_pStand1.0_kp40_kd0.5_noTanh_fromOct12_16-01-06"
load_run = "Oct23_11-01-47_WalkByRemote_pEnergySubsteps-1e-05_rLinTrack5.0_pCollide1e-1_pOrient1e-1_pStand2e-1_softDof0.6_kp40_kd0.5_noTanh_fromOct23_03-33-29"
load_run = "Oct23_13-14-42_WalkByRemote_pEnergySubsteps1e-05_rLinTrack5.0_pDofAcc2.5e-7_pCollide1e-1_pOrient1e-1_pExceedDof8e-1_pStand4e-1_softDof0.6_noTanh_EntropyCoef0.01_fromOct23_11-01-47"
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct12_16-00-55_WalkByRemote_pLin1.0_pFootForce1.e-02_xComMin-0.2_maxForce200_propDelay0.05-0.07_entropyCoef0.01_noTanh_fromOct11_04-34-39")
load_run = "Oct24_15-05-23_WalkByRemote_rLinTrack3.0_pActRate1e-1_softDof0.6_noTanh_zeroResetAction_hackReset_EntropyCoef0.01_actionCliphard_fromOct12_16-00-55"
run_name = "".join(["WalkByRemote",
("_pEnergySubsteps{:.0e}".format(-Go1RemoteCfg.rewards.scales.legs_energy_substeps) if getattr(Go1RemoteCfg.rewards.scales, "legs_energy_substeps", 0.) != 0. else ""),
("_rLinTrack{:.1f}".format(Go1RemoteCfg.rewards.scales.tracking_lin_vel) if getattr(Go1RemoteCfg.rewards.scales, "tracking_lin_vel", 0.) != 0. else ""),
("_pLinVelL2{:.1f}".format(-Go1RemoteCfg.rewards.scales.lin_vel_l2norm) if getattr(Go1RemoteCfg.rewards.scales, "lin_vel_l2norm", 0.) != 0. else ""),
("_rAlive{:.1f}".format(Go1RemoteCfg.rewards.scales.alive) if getattr(Go1RemoteCfg.rewards.scales, "alive", 0.) != 0. else ""),
# ("_pDofAcc" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.dof_acc, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "dof_acc", 0.) != 0. else ""),
# ("_pCollide" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.collision, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "collision", 0.) != 0. else ""),
# ("_pOrient" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.orientation, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "orientation", 0.) != 0. else ""),
# ("_pExceedDof" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.exceed_dof_pos_limits, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "exceed_dof_pos_limits", 0.) != 0. else ""),
("_pExceedTorqueL1" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.exceed_torque_limits_l1norm, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0. else ""),
# ("_pStand" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.stand_still, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "stand_still", 0.) != 0. else ""),
("_pActRate" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.action_rate, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "action_rate", 0.) != 0. else ""),
("_softDof{:.1f}".format(Go1RemoteCfg.rewards.soft_dof_pos_limit) if Go1RemoteCfg.rewards.soft_dof_pos_limit != 0.9 else ""),
# ("_kp{:d}".format(int(Go1RemoteCfg.control.stiffness["joint"])) if Go1RemoteCfg.control.stiffness["joint"] != 50 else ""),
# ("_kd{:.1f}".format(Go1RemoteCfg.control.damping["joint"]) if Go1RemoteCfg.control.damping["joint"] != 1. else ""),
("_noTanh"),
# ("_zeroResetAction" if Go1RemoteCfg.init_state.zero_actions else ""),
("_EntropyCoef0.01"),
("_actionClip" + Go1RemoteCfg.normalization.clip_actions_method if getattr(Go1RemoteCfg.normalization, "clip_actions_method", None) is not None else ""),
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])

View File

@ -0,0 +1,90 @@
from os import path as osp
import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1TiltCfg( Go1FieldCfg ):
#### uncomment this to train non-virtual terrain
# class sensor( Go1FieldCfg.sensor ):
# class proprioception( Go1FieldCfg.sensor.proprioception ):
# delay_action_obs = True
# latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = True
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"tilt",
],
tilt= dict(
width= (0.24, 0.8),
depth= (0.4, 1.), # size along the forward axis
opening_angle= 0.0, # [rad] an opening that make the robot easier to get into the obstacle
wall_height= 0.5,
),
virtual_terrain= True, # Change this to False for real terrain
no_perlin_threshold= 0.06,
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.1],
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [0.3, 0.6]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
roll_kwargs = merge_dict(Go1FieldCfg.termination.roll_kwargs, dict(
threshold= 0.4,
leap_threshold= 0.4,
))
z_high_kwargs = merge_dict(Go1FieldCfg.termination.z_high_kwargs, dict(
threshold= 2.0,
))
class rewards( Go1FieldCfg.rewards ):
class scales:
tracking_ang_vel = 0.05
world_vel_l2norm = -1.
legs_energy_substeps = -1e-6
alive = 2.
penetrate_depth = -4e-3
penetrate_volume = -4e-3
exceed_dof_pos_limits = -1e-1
exceed_torque_limits_i = -1e-1
class curriculum( Go1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 9000
penetrate_volume_threshold_easier = 10000
penetrate_depth_threshold_harder = 300
penetrate_depth_threshold_easier = 5000
class Go1TiltCfgPPO( Go1FieldCfgPPO ):
class runner( Go1FieldCfgPPO.runner ):
run_name = "".join(["Skill",
("Multi" if len(Go1TiltCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1TiltCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1TiltCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_tiltMax{:.1f}".format(Go1TiltCfg.terrain.BarrierTrack_kwargs["tilt"]["width"][1]) if Go1TiltCfg.terrain.BarrierTrack_kwargs["tilt"]["width"][1] > 0.5 else ""),
])
resume = True
load_run = "{Your traind walking model directory}"
load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
max_iterations = 20000
save_interval = 500

View File

@ -7,13 +7,13 @@ import argparse
def main(args): def main(args):
for data_dir in args.data_dirs: for data_dir in args.data_dirs:
if osp.isfile(osp.join(data_dir, "metadata.json")): if osp.isfile(osp.join(data_dir, "metadata.json")):
shutil.move( shutil.copy2(
osp.join(data_dir, "metadata.json"), osp.join(data_dir, "metadata.json"),
osp.join(osp.dirname(data_dir), data_dir.split("/")[-1] + ".json") osp.join(osp.dirname(data_dir), data_dir.split("/")[-1] + ".json")
) )
print(f"Moved metadata.json to {osp.join(osp.dirname(data_dir), data_dir.split('/')[-1] + '.json')}") print(f"Moved metadata.json to {osp.join(osp.dirname(data_dir), data_dir.split('/')[-1] + '.json')}")
# removing the directory # removing the directory
if osp.isdir(data_dir): if osp.isdir(data_dir) and (not osp.islink(data_dir)):
print("Removing directory: ", data_dir) print("Removing directory: ", data_dir)
if not args.only_files: if not args.only_files:
shutil.rmtree(data_dir) shutil.rmtree(data_dir)

View File

@ -29,36 +29,34 @@ def main(args):
update_class_from_dict(train_cfg, d, strict= True) update_class_from_dict(train_cfg, d, strict= True)
# Some custom settings # Some custom settings
env_cfg.terrain.BarrierTrack_kwargs["options"] = [ # ####### customized option to increase data distribution #######
"tilt", # "tilt",
"crawl", # "crawl",
"climb", # "climb",
"climb", # "climb",
"leap", # "leap",
]
####### customized option to increase data distribution #######
action_sample_std = 0.0 action_sample_std = 0.0
############# some predefined options ############# ############# some predefined options #############
if len(env_cfg.terrain.BarrierTrack_kwargs["options"]) == 1: if len(env_cfg.terrain.BarrierTrack_kwargs["options"]) == 1:
env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1
env_cfg.terrain.num_rows = 20; env_cfg.terrain.num_cols = 30 env_cfg.terrain.num_rows = 20; env_cfg.terrain.num_cols = 30
else: # for parkour env else: # for parkour env
# >>> option 1 # >>> option 1
env_cfg.domain_rand.added_mass_range = [1.5, 3.0] env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 2.8
env_cfg.terrain.BarrierTrack_kwargs["climb_down_prob"] = 0.45 env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 2.4
env_cfg.terrain.num_rows = 60; env_cfg.terrain.num_cols = 1 env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.6)
env_cfg.domain_rand.init_base_pos_range["x"] = (0.4, 1.8)
env_cfg.terrain.num_rows = 12; env_cfg.terrain.num_cols = 10
# >>> option 2 # >>> option 2
# env_cfg.terrain.BarrierTrack_kwargs["climb"]["depth"] = (0.1, 0.3) # env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 3.
# env_cfg.terrain.BarrierTrack_kwargs["climb"]["climb_down_prob"] = 0.55 # env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 4.0
# env_cfg.terrain.num_rows = 60; env_cfg.terrain.num_cols = 1 # env_cfg.terrain.BarrierTrack_kwargs["wall_height"] = (-0.5, -0.2)
# env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 1.4)
# env_cfg.domain_rand.init_base_pos_range["x"] = (1.6, 2.0)
# env_cfg.terrain.num_rows = 16; env_cfg.terrain.num_cols = 5
# >>> option 3 # >>> option 3
# env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 1.6
# env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 2.2
# env_cfg.terrain.BarrierTrack_kwargs["wall_height"] = (-0.5, 0.1)
# env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.5)
# env_cfg.domain_rand.init_base_pos_range["x"] = (0.2, 0.9)
# env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1 # env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1
# env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 1.6
# env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.3)
# env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 3.0
# env_cfg.terrain.BarrierTrack_kwargs["climb"]["depth"] = (0.5, 1.8)
# env_cfg.terrain.num_rows = 20; env_cfg.terrain.num_cols = 24
# action_sample_std = 0.1 # action_sample_std = 0.1
# env_cfg.terrain.num_rows = 22; env_cfg.terrain.num_cols = 16
pass pass
if (env_cfg.terrain.BarrierTrack_kwargs["options"][0] == "leap") and all(i == env_cfg.terrain.BarrierTrack_kwargs["options"][0] for i in env_cfg.terrain.BarrierTrack_kwargs["options"]): if (env_cfg.terrain.BarrierTrack_kwargs["options"][0] == "leap") and all(i == env_cfg.terrain.BarrierTrack_kwargs["options"][0] for i in env_cfg.terrain.BarrierTrack_kwargs["options"]):
######### For leap, because the platform is usually higher than the ground. ######### For leap, because the platform is usually higher than the ground.

View File

@ -101,7 +101,7 @@ def play(args):
env_cfg.terrain.curriculum = False env_cfg.terrain.curriculum = False
env_cfg.terrain.BarrierTrack_kwargs["options"] = [ env_cfg.terrain.BarrierTrack_kwargs["options"] = [
# "crawl", # "crawl",
"climb", "jump",
# "leap", # "leap",
# "tilt", # "tilt",
] ]
@ -146,7 +146,7 @@ def play(args):
logs_root + "/field_a1_oracle/Jun03_00-33-08_Skills_climb_pEnergySubsteps2e-6_rAlive2_pTorqueExceedIndicate2e-1_propDelay0.04-0.05_noPerlinRate0.2_nSubsteps4_envFreq50.0_aScale0.5", logs_root + "/field_a1_oracle/Jun03_00-33-08_Skills_climb_pEnergySubsteps2e-6_rAlive2_pTorqueExceedIndicate2e-1_propDelay0.04-0.05_noPerlinRate0.2_nSubsteps4_envFreq50.0_aScale0.5",
logs_root + "/field_a1_oracle/Jun04_01-03-59_Skills_leap_pEnergySubsteps2e-6_rAlive2_pPenV4e-3_pPenD4e-3_pPosY0.20_pYaw0.20_pTorqueExceedSquare1e-3_leapH0.2_propDelay0.04-0.05_noPerlinRate0.2_aScale0.5", logs_root + "/field_a1_oracle/Jun04_01-03-59_Skills_leap_pEnergySubsteps2e-6_rAlive2_pPenV4e-3_pPenD4e-3_pPosY0.20_pYaw0.20_pTorqueExceedSquare1e-3_leapH0.2_propDelay0.04-0.05_noPerlinRate0.2_aScale0.5",
] ]
train_cfg.policy.climb_down_policy_path = logs_root + "/field_a1_oracle/Aug30_16-12-14_Skills_climb_climbDownProb0.5_propDelay0.04-0.05" train_cfg.policy.jump_down_policy_path = logs_root + "/field_a1_oracle/Aug30_16-12-14_Skills_climb_climbDownProb0.5_propDelay0.04-0.05"
train_cfg.runner.resume = False train_cfg.runner.resume = False
env_cfg.env.use_lin_vel = True env_cfg.env.use_lin_vel = True
train_cfg.policy.cmd_vel_mapping = { train_cfg.policy.cmd_vel_mapping = {

View File

@ -163,7 +163,7 @@ def update_cfg_from_args(env_cfg, cfg_train, args):
return env_cfg, cfg_train return env_cfg, cfg_train
def get_args(): def get_args(custom_args=[]):
custom_parameters = [ custom_parameters = [
{"name": "--task", "type": str, "default": "anymal_c_flat", "help": "Resume training or start testing from a checkpoint. Overrides config file if provided."}, {"name": "--task", "type": str, "default": "anymal_c_flat", "help": "Resume training or start testing from a checkpoint. Overrides config file if provided."},
{"name": "--resume", "action": "store_true", "default": False, "help": "Resume training from a checkpoint"}, {"name": "--resume", "action": "store_true", "default": False, "help": "Resume training from a checkpoint"},
@ -178,7 +178,7 @@ def get_args():
{"name": "--num_envs", "type": int, "help": "Number of environments to create. Overrides config file if provided."}, {"name": "--num_envs", "type": int, "help": "Number of environments to create. Overrides config file if provided."},
{"name": "--seed", "type": int, "help": "Random seed. Overrides config file if provided."}, {"name": "--seed", "type": int, "help": "Random seed. Overrides config file if provided."},
{"name": "--max_iterations", "type": int, "help": "Maximum number of training iterations. Overrides config file if provided."}, {"name": "--max_iterations", "type": int, "help": "Maximum number of training iterations. Overrides config file if provided."},
] ] + custom_args
# parse arguments # parse arguments
args = gymutil.parse_arguments( args = gymutil.parse_arguments(
description="RL Policy", description="RL Policy",

View File

@ -96,7 +96,9 @@ class Logger:
a.legend() a.legend()
# plot base pitch # plot base pitch
a = axs[0, 2] a = axs[0, 2]
if log["base_pitch"]: a.plot(time, log["base_pitch"], label='measured') if log["base_pitch"]:
a.plot(time, log["base_pitch"], label='measured')
a.plot(time, [-0.75] * len(time), label= 'thresh')
# if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded') # if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded')
a.set(xlabel='time [s]', ylabel='base ang [rad]', title='Base pitch') a.set(xlabel='time [s]', ylabel='base ang [rad]', title='Base pitch')
a.legend() a.legend()
@ -113,11 +115,15 @@ class Logger:
a.plot(time, forces[:, i], label=f'force {i}') a.plot(time, forces[:, i], label=f'force {i}')
a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces') a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces')
a.legend() a.legend()
# plot torque/vel curves # # plot torque/vel curves
# a = axs[2, 1]
# if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured')
# a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves')
# a.legend()
# plot power curves
a = axs[2, 1] a = axs[2, 1]
if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured') if log["power"]!=[]: a.plot(time, log["power"], label='power [W]')
a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves') a.set(xlabel='time [s]', ylabel='Power [W]', title='Power')
a.legend()
# plot torques # plot torques
a = axs[2, 2] a = axs[2, 2]
if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured') if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured')
@ -126,20 +132,29 @@ class Logger:
# plot rewards # plot rewards
a = axs[3, 0] a = axs[3, 0]
if log["max_torques"]: a.plot(time, log["max_torques"], label='max_torques') if log["max_torques"]: a.plot(time, log["max_torques"], label='max_torques')
if log["max_torque_motor"]: a.plot(time, log["max_torque_motor"], label='max_torque_motor')
if log["max_torque_leg"]: a.plot(time, log["max_torque_leg"], label='max_torque_leg')
a.set(xlabel='time [s]', ylabel='max_torques [Nm]', title='max_torques') a.set(xlabel='time [s]', ylabel='max_torques [Nm]', title='max_torques')
a.legend() a.legend(fontsize= 5)
# plot customed data # plot customed data
a = axs[3, 1] a = axs[3, 1]
if log["student_action"]: if log["student_action"]:
a.plot(time, log["student_action"], label='s') a.plot(time, log["student_action"], label='s')
a.plot(time, log["teacher_action"], label='t') a.plot(time, log["teacher_action"], label='t')
a.legend() a.legend()
a.set(xlabel='time [s]', ylabel='tanh', title='student/teacher action') a.set(xlabel='time [s]', ylabel='value before step()', title='student/teacher action')
a = axs[3, 2] a = axs[3, 2]
if log["teacher_action"]: a.plot(time, log["reward"], label='rewards')
a.plot(time, [i-j for i, j in zip(log["student_action"], log["teacher_action"])], label='action difference') for i in log["mark"]:
a.set(xlabel='time [s]', ylabel='tanh', title='action difference') if i > 0:
a.legend() a.plot(time, log["mark"], label='user mark')
break
for key in log.keys():
if "reward_removed_" in key:
a.plot(time, log[key], label= key)
a.set(xlabel='time [s]', ylabel='', title='rewards')
# a.set_ylim([-0.12, 0.1])
a.legend(fontsize = 5)
plt.show() plt.show()
def print_rewards(self): def print_rewards(self):

View File

@ -12,7 +12,7 @@ class BarrierTrack:
# default kwargs # default kwargs
track_kwargs = dict( track_kwargs = dict(
options= [ options= [
"climb", "jump",
"crawl", "crawl",
"tilt", "tilt",
], # each race track will permute all the options ], # each race track will permute all the options
@ -22,12 +22,11 @@ class BarrierTrack:
track_block_length= 1.2, # the x-axis distance from the env origin point track_block_length= 1.2, # the x-axis distance from the env origin point
wall_thickness= 0.04, # [m] wall_thickness= 0.04, # [m]
wall_height= 0.5, # [m] wall_height= 0.5, # [m]
climb= dict( jump= dict(
height= 0.3, height= 0.3,
depth= 0.04, # size along the forward axis depth= 0.04, # size along the forward axis
fake_offset= 0.0, # [m] fake offset will make climb's height info greater than its physical height. fake_offset= 0.0, # [m] fake offset will make jump's height info greater than its physical height.
fake_height= 0.0, # [m] offset/height only one of them can be non-zero jump_down_prob= 0.0, # if > 0, will have a chance to jump down from the obstacle
climb_down_prob= 0.0, # if > 0, will have a chance to climb down from the obstacle
), ),
crawl= dict( crawl= dict(
height= 0.32, height= 0.32,
@ -53,16 +52,18 @@ class BarrierTrack:
border_height= 0., # Incase we want the surrounding plane to be lower than the track border_height= 0., # Incase we want the surrounding plane to be lower than the track
virtual_terrain= False, virtual_terrain= False,
draw_virtual_terrain= False, # set True for visualization draw_virtual_terrain= False, # set True for visualization
check_skill_combinations= False, # check if some specific skills are connected, if set. e.g. climb -> leap check_skill_combinations= False, # check if some specific skills are connected, if set. e.g. jump -> leap
engaging_next_threshold= 0., # if > 0, engaging_next is based on this threshold instead of track_block_length/2. Make sure the obstacle is not too long. engaging_next_threshold= 0., # if > 0, engaging_next is based on this threshold instead of track_block_length/2. Make sure the obstacle is not too long.
engaging_finish_threshold= 0., # an obstacle is considered finished only if the last volume point is this amount away from the block origin.
curriculum_perlin= True, # If True, perlin noise scale will be depends on the difficulty if possible. curriculum_perlin= True, # If True, perlin noise scale will be depends on the difficulty if possible.
no_perlin_threshold= 0.02, # If the perlin noise is too small, clip it to zero. no_perlin_threshold= 0.02, # If the perlin noise is too small, clip it to zero.
walk_in_skill_gap= False, # If True, obstacle ID will be walk when the distance to the obstacle does not reach engaging_next_threshold
) )
max_track_options = 4 # ("tilt", "crawl", "climb", "dynamic") at most max_track_options = 4 # ("tilt", "crawl", "jump", "dynamic") at most
track_options_id_dict = { track_options_id_dict = {
"tilt": 1, "tilt": 1,
"crawl": 2, "crawl": 2,
"climb": 3, "jump": 3,
"leap": 4, "leap": 4,
} # track_id are aranged in this order } # track_id are aranged in this order
def __init__(self, cfg, num_robots: int) -> None: def __init__(self, cfg, num_robots: int) -> None:
@ -88,7 +89,7 @@ class BarrierTrack:
# For each track block (n_options + 1 in total), 3 parameters are enabled: # For each track block (n_options + 1 in total), 3 parameters are enabled:
# - track_id: int, starting track is 0, other numbers depends on the options order. # - track_id: int, starting track is 0, other numbers depends on the options order.
# - obstacle_depth: float, # - obstacle_depth: float,
# - obstacle_critical_params: e.g. tilt width, crawl height, climb height # - obstacle_critical_params: e.g. tilt width, crawl height, jump height
# num_rows + 1 incase the robot finish the entire row of tracks # num_rows + 1 incase the robot finish the entire row of tracks
self.track_info_map = torch.zeros( self.track_info_map = torch.zeros(
@ -101,6 +102,11 @@ class BarrierTrack:
dtype= torch.float32, dtype= torch.float32,
device= self.device, device= self.device,
) )
self.block_starting_height_map = torch.zeros(
(self.cfg.num_rows, self.cfg.num_cols, self.n_blocks_per_track),
dtype= torch.float32,
device= self.device,
) # height [m] related to the world coordinate system
def initialize_track(self): def initialize_track(self):
""" All track blocks are defined as follows """ All track blocks are defined as follows
@ -126,7 +132,7 @@ class BarrierTrack:
np.ceil(self.track_kwargs["track_block_length"] / self.cfg.horizontal_scale).astype(int), np.ceil(self.track_kwargs["track_block_length"] / self.cfg.horizontal_scale).astype(int),
np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int), np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int),
) )
self.n_blocks_per_track = (self.track_kwargs["n_obstacles_per_track"] + 1) if self.track_kwargs["randomize_obstacle_order"] else (len(self.track_kwargs["options"]) + 1) self.n_blocks_per_track = (self.track_kwargs["n_obstacles_per_track"] + 1) if (self.track_kwargs["randomize_obstacle_order"] and len(self.track_kwargs["options"]) > 0) else (len(self.track_kwargs["options"]) + 1)
self.track_resolution = ( self.track_resolution = (
np.ceil(self.track_kwargs["track_block_length"] * self.n_blocks_per_track / self.cfg.horizontal_scale).astype(int), np.ceil(self.track_kwargs["track_block_length"] * self.n_blocks_per_track / self.cfg.horizontal_scale).astype(int),
np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int), np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int),
@ -183,7 +189,7 @@ class BarrierTrack:
height_offset_px = 0 height_offset_px = 0
return track_trimesh, track_heightfield, block_info, height_offset_px return track_trimesh, track_heightfield, block_info, height_offset_px
def get_climb_track(self, def get_jump_track(self,
wall_thickness, wall_thickness,
trimesh_template, trimesh_template,
heightfield_template, heightfield_template,
@ -191,25 +197,25 @@ class BarrierTrack:
heightfield_noise= None, heightfield_noise= None,
virtual= False, virtual= False,
): ):
if isinstance(self.track_kwargs["climb"]["depth"], (tuple, list)): if isinstance(self.track_kwargs["jump"]["depth"], (tuple, list)):
if not virtual: if not virtual:
climb_depth = min(*self.track_kwargs["climb"]["depth"]) jump_depth = min(*self.track_kwargs["jump"]["depth"])
else: else:
climb_depth = np.random.uniform(*self.track_kwargs["climb"]["depth"]) jump_depth = np.random.uniform(*self.track_kwargs["jump"]["depth"])
else: else:
climb_depth = self.track_kwargs["climb"]["depth"] jump_depth = self.track_kwargs["jump"]["depth"]
if isinstance(self.track_kwargs["climb"]["height"], (tuple, list)): if isinstance(self.track_kwargs["jump"]["height"], (tuple, list)):
if difficulty is None: if difficulty is None:
climb_height = np.random.uniform(*self.track_kwargs["climb"]["height"]) jump_height = np.random.uniform(*self.track_kwargs["jump"]["height"])
else: else:
climb_height = (1-difficulty) * self.track_kwargs["climb"]["height"][0] + difficulty * self.track_kwargs["climb"]["height"][1] jump_height = (1-difficulty) * self.track_kwargs["jump"]["height"][0] + difficulty * self.track_kwargs["jump"]["height"][1]
else: else:
climb_height = self.track_kwargs["climb"]["height"] jump_height = self.track_kwargs["jump"]["height"]
if self.track_kwargs["climb"].get("climb_down_prob", 0.) > 0.: if self.track_kwargs["jump"].get("jump_down_prob", 0.) > 0.:
if np.random.uniform() < self.track_kwargs["climb"]["climb_down_prob"]: if np.random.uniform() < self.track_kwargs["jump"]["jump_down_prob"]:
climb_height = -climb_height jump_height = -jump_height
depth_px = int(climb_depth / self.cfg.horizontal_scale) depth_px = int(jump_depth / self.cfg.horizontal_scale)
height_value = climb_height / self.cfg.vertical_scale height_value = jump_height / self.cfg.vertical_scale
wall_thickness_px = int(wall_thickness / self.cfg.horizontal_scale) + 1 wall_thickness_px = int(wall_thickness / self.cfg.horizontal_scale) + 1
if not heightfield_noise is None: if not heightfield_noise is None:
@ -223,7 +229,7 @@ class BarrierTrack:
] += height_value ] += height_value
if height_value < 0.: if height_value < 0.:
track_heightfield[ track_heightfield[
depth_px:, (0 if virtual else depth_px):,
max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1), max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1),
] += height_value ] += height_value
track_trimesh = convert_heightfield_to_trimesh( track_trimesh = convert_heightfield_to_trimesh(
@ -233,12 +239,15 @@ class BarrierTrack:
self.cfg.slope_treshold, self.cfg.slope_treshold,
) )
assert not ( assert not (
self.track_kwargs["climb"].get("fake_offset", 0.) != 0. and self.track_kwargs["climb"].get("fake_height", 0.) != 0.), \ self.track_kwargs["jump"].get("fake_offset", 0.) != 0. and self.track_kwargs["jump"].get("fake_height", 0.) != 0.), \
"fake_offset and fake_height cannot be both non-zero" "fake_offset and fake_height cannot be both non-zero"
climb_height_ = climb_height + (self.track_kwargs["climb"].get("fake_offset", 0.) if self.track_kwargs["climb"].get("fake_offset", 0.) == 0. else self.track_kwargs["climb"].get("fake_height", 0.)) jump_height_ = jump_height + (
self.track_kwargs["jump"].get("fake_offset", 0.) \
if jump_height > 0. \
else 0.)
block_info = torch.tensor([ block_info = torch.tensor([
climb_depth, jump_depth,
climb_height_, jump_height_,
], dtype= torch.float32, device= self.device) ], dtype= torch.float32, device= self.device)
height_offset_px = height_value if not virtual else min(height_value, 0) height_offset_px = height_value if not virtual else min(height_value, 0)
return track_trimesh, track_heightfield, block_info, height_offset_px return track_trimesh, track_heightfield, block_info, height_offset_px
@ -399,9 +408,9 @@ class BarrierTrack:
track_heightfield = heightfield_template + heightfield_noise track_heightfield = heightfield_template + heightfield_noise
else: else:
track_heightfield = heightfield_template.copy() track_heightfield = heightfield_template.copy()
# There is no difference between virtual/non-virtual environment. start_px = int(self.track_kwargs["leap"].get("fake_offset", 0.) / self.cfg.horizontal_scale) + 1 if not virtual else 1
track_heightfield[ track_heightfield[
1: length_px+1, start_px: length_px+1,
max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1), max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1),
] -= depth_value ] -= depth_value
track_trimesh = convert_heightfield_to_trimesh( track_trimesh = convert_heightfield_to_trimesh(
@ -538,13 +547,14 @@ class BarrierTrack:
self.track_info_map[row_idx, col_idx, 0, 0] = 0 self.track_info_map[row_idx, col_idx, 0, 0] = 0
self.track_info_map[row_idx, col_idx, 0, 1:] = block_info self.track_info_map[row_idx, col_idx, 0, 1:] = block_info
self.track_width_map[row_idx, col_idx] = self.env_width - wall_thickness * 2 self.track_width_map[row_idx, col_idx] = self.env_width - wall_thickness * 2
self.block_starting_height_map[row_idx, col_idx, 0] = block_starting_height_px * self.cfg.vertical_scale
block_starting_height_px += height_offset_px block_starting_height_px += height_offset_px
for obstacle_idx, obstacle_selection in enumerate(obstacle_order): for obstacle_idx, obstacle_selection in enumerate(obstacle_order):
obstacle_name = self.track_kwargs["options"][obstacle_selection] obstacle_name = self.track_kwargs["options"][obstacle_selection]
obstacle_id = self.track_options_id_dict[obstacle_name] obstacle_id = self.track_options_id_dict[obstacle_name]
# call method to generate trimesh and heightfield for each track block. # call method to generate trimesh and heightfield for each track block.
# For example get_climb_track, get_tilt_track # For example get_jump_track, get_tilt_track
# using `virtual_track` to create non-collision mesh for collocation method in training. # using `virtual_track` to create non-collision mesh for collocation method in training.
track_trimesh, track_heightfield, block_info, height_offset_px = getattr(self, "get_" + obstacle_name + "_track")( track_trimesh, track_heightfield, block_info, height_offset_px = getattr(self, "get_" + obstacle_name + "_track")(
wall_thickness, wall_thickness,
@ -576,6 +586,7 @@ class BarrierTrack:
) )
self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 0] = obstacle_id self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 0] = obstacle_id
self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 1:] = block_info self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 1:] = block_info
self.block_starting_height_map[row_idx, col_idx, obstacle_idx + 1] = block_starting_height_px * self.cfg.vertical_scale
block_starting_height_px += height_offset_px block_starting_height_px += height_offset_px
return block_starting_height_px return block_starting_height_px
@ -711,7 +722,7 @@ class BarrierTrack:
block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,) block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,)
block_idx_clipped = torch.clip( block_idx_clipped = torch.clip(
block_idx, block_idx,
0., 0,
(self.n_blocks_per_track - 1), (self.n_blocks_per_track - 1),
) )
in_track_mask = (track_idx == track_idx_clipped).all(dim= -1) & (block_idx == block_idx_clipped) in_track_mask = (track_idx == track_idx_clipped).all(dim= -1) & (block_idx == block_idx_clipped)
@ -757,6 +768,7 @@ class BarrierTrack:
else self.env_block_length / 2 else self.env_block_length / 2
min_x_points_offset = torch.min(volume_points_offset[:, :, 0], dim= -1)[0] # (n,) min_x_points_offset = torch.min(volume_points_offset[:, :, 0], dim= -1)[0] # (n,)
engaging_next_block = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) \ engaging_next_block = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) \
& (in_block_distance > self.track_kwargs.get("engaging_finish_threshold", 0.)) \
& (in_block_distance > engaging_next_distance) & (in_block_distance > engaging_next_distance)
# update the engaging track_idx and block_idx if engaging next # update the engaging track_idx and block_idx if engaging next
@ -790,7 +802,9 @@ class BarrierTrack:
0 0
].to(int) ].to(int)
if self.track_kwargs.get("walk_in_skill_gap", False): if self.track_kwargs.get("walk_in_skill_gap", False):
between_skill_mask = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) & (in_block_distance < engaging_next_distance) between_skill_mask = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) \
& (in_block_distance > self.track_kwargs.get("engaging_finish_threshold", 0.)) \
& (in_block_distance < engaging_next_distance)
obstacle_id_selection[between_skill_mask] = 0. obstacle_id_selection[between_skill_mask] = 0.
engaging_block_info[between_skill_mask] = 0. engaging_block_info[between_skill_mask] = 0.
engaging_obstacle_onehot[ engaging_obstacle_onehot[
@ -827,18 +841,35 @@ class BarrierTrack:
distance_neg_y, distance_neg_y,
], dim= -1) # (n, 2) ], dim= -1) # (n, 2)
def get_climb_penetration_depths(self, def get_jump_penetration_depths(self,
block_infos, block_infos,
positions_in_block, positions_in_block,
mask_only= False, mask_only= False,
): ):
in_obstacle_mask = positions_in_block[:, 0] <= block_infos[:, 1] in_up_mask = torch.logical_and(
climb_over_mask = positions_in_block[:, 2] > block_infos[:, 2] positions_in_block[:, 0] <= block_infos[:, 1],
penetrated_mask = torch.logical_and(in_obstacle_mask, (torch.logical_not(climb_over_mask))) block_infos[:, 2] > 0.,
)
in_down_mask = torch.logical_and(
positions_in_block[:, 0] <= block_infos[:, 1],
block_infos[:, 2] < 0.,
)
jump_over_mask = torch.logical_and(
positions_in_block[:, 2] > block_infos[:, 2],
positions_in_block[:, 2] > 0, # to avoid the penetration of virtual obstacle in jump down.
) # (n_points,)
penetrated_mask = torch.logical_and(
torch.logical_or(in_up_mask, in_down_mask),
(torch.logical_not(jump_over_mask)),
)
if mask_only: if mask_only:
return penetrated_mask.to(torch.float32) return penetrated_mask.to(torch.float32)
penetration_depths_buffer = torch.zeros_like(penetrated_mask, dtype= torch.float32) penetration_depths_buffer = torch.zeros_like(penetrated_mask, dtype= torch.float32)
penetration_depths_buffer[penetrated_mask] = block_infos[penetrated_mask, 2] - positions_in_block[penetrated_mask, 2] penetrate_up_mask = torch.logical_and(penetrated_mask, in_up_mask)
penetration_depths_buffer[penetrate_up_mask] = block_infos[penetrate_up_mask, 2] - positions_in_block[penetrate_up_mask, 2]
penetrate_down_mask = torch.logical_and(penetrated_mask, in_down_mask)
penetration_depths_buffer[penetrate_down_mask] = 0. - positions_in_block[penetrate_down_mask, 2]
return penetration_depths_buffer return penetration_depths_buffer
def get_tilt_penetration_depths(self, def get_tilt_penetration_depths(self,
@ -906,10 +937,11 @@ class BarrierTrack:
forward_distance = sample_points[:, 0] - self.cfg.border_size - (track_idx[:, 0] * self.env_length) # (N,) w.r.t a track forward_distance = sample_points[:, 0] - self.cfg.border_size - (track_idx[:, 0] * self.env_length) # (N,) w.r.t a track
block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,) block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,)
block_idx[block_idx >= self.track_info_map.shape[2]] = 0. block_idx[block_idx >= self.track_info_map.shape[2]] = 0.
positions_in_block = torch.cat([ positions_in_block = torch.stack([
(forward_distance % self.env_block_length).unsqueeze(-1), forward_distance % self.env_block_length,
sample_points[:, 1:] - self.env_origins_pyt[track_idx_clipped[:, 0], track_idx_clipped[:, 1]][:, 1:], sample_points[:, 1] - self.env_origins_pyt[track_idx_clipped[:, 0], track_idx_clipped[:, 1]][:, 1],
], dim= -1) sample_points[:, 2] - self.block_starting_height_map[track_idx_clipped[:, 0], track_idx_clipped[:, 1], block_idx],
], dim= -1) # (N, 3) related to the origin of the block, not the track.
block_infos = self.track_info_map[track_idx_clipped[:, 0], track_idx_clipped[:, 1], block_idx] # (N, 3) block_infos = self.track_info_map[track_idx_clipped[:, 0], track_idx_clipped[:, 1], block_idx] # (N, 3)
penetration_depths = torch.zeros_like(sample_points[:, 0]) # shape (N,) penetration_depths = torch.zeros_like(sample_points[:, 0]) # shape (N,)
@ -967,23 +999,23 @@ class BarrierTrack:
return passed_depths return passed_depths
######## methods to draw visualization ####################### ######## methods to draw visualization #######################
def draw_virtual_climb_track(self, def draw_virtual_jump_track(self,
block_info, block_info,
xy_origin, block_origin,
): ):
climb_depth = block_info[0] jump_depth = block_info[1]
climb_height = block_info[1] jump_height = block_info[2]
geom = gymutil.WireframeBoxGeometry( geom = gymutil.WireframeBoxGeometry(
climb_depth, jump_depth if jump_height > 0 else self.track_kwargs["jump"].get("down_forward_length", 0.1),
self.env_width, self.env_width,
climb_height, jump_height,
pose= None, pose= None,
color= (0, 0, 1), color= (0, 0, 1),
) )
pose = gymapi.Transform(gymapi.Vec3( pose = gymapi.Transform(gymapi.Vec3(
climb_depth/2 + xy_origin[0], jump_depth/2 + block_origin[0],
self.env_width/2 + xy_origin[1], block_origin[1],
climb_height/2, jump_height/2 + block_origin[2],
), r= None) ), r= None)
gymutil.draw_lines( gymutil.draw_lines(
geom, geom,
@ -995,10 +1027,10 @@ class BarrierTrack:
def draw_virtual_tilt_track(self, def draw_virtual_tilt_track(self,
block_info, block_info,
xy_origin, block_origin,
): ):
tilt_depth = block_info[0] tilt_depth = block_info[1]
tilt_width = block_info[1] tilt_width = block_info[2]
wall_height = self.track_kwargs["tilt"]["wall_height"] wall_height = self.track_kwargs["tilt"]["wall_height"]
geom = gymutil.WireframeBoxGeometry( geom = gymutil.WireframeBoxGeometry(
tilt_depth, tilt_depth,
@ -1009,9 +1041,9 @@ class BarrierTrack:
) )
pose = gymapi.Transform(gymapi.Vec3( pose = gymapi.Transform(gymapi.Vec3(
tilt_depth/2 + xy_origin[0], tilt_depth/2 + block_origin[0],
(self.env_width - tilt_width) / 4 + xy_origin[1], block_origin[1] + (self.env_width + tilt_width) / 4,
wall_height/2, wall_height/2 + block_origin[2],
), r= None) ), r= None)
gymutil.draw_lines( gymutil.draw_lines(
geom, geom,
@ -1021,9 +1053,9 @@ class BarrierTrack:
pose, pose,
) )
pose = gymapi.Transform(gymapi.Vec3( pose = gymapi.Transform(gymapi.Vec3(
tilt_depth/2 + xy_origin[0], tilt_depth/2 + block_origin[0],
self.env_width - (self.env_width - tilt_width) / 4 + xy_origin[1], block_origin[1] - (self.env_width + tilt_width) / 4,
wall_height/2, wall_height/2 + block_origin[2],
), r= None) ), r= None)
gymutil.draw_lines( gymutil.draw_lines(
geom, geom,
@ -1035,11 +1067,11 @@ class BarrierTrack:
def draw_virtual_crawl_track(self, def draw_virtual_crawl_track(self,
block_info, block_info,
xy_origin, block_origin,
): ):
crawl_depth = block_info[0] crawl_depth = block_info[1]
crawl_height = block_info[1] crawl_height = block_info[2]
wall_height = self.track_kwargs["crawl"]["wall_height"] wall_height = self.track_kwargs["crawl"]["wall_height"][1] if isinstance(self.track_kwargs["crawl"]["wall_height"], (list, tuple)) else self.track_kwargs["crawl"]["wall_height"]
geom = gymutil.WireframeBoxGeometry( geom = gymutil.WireframeBoxGeometry(
crawl_depth, crawl_depth,
self.env_width, self.env_width,
@ -1048,9 +1080,9 @@ class BarrierTrack:
color= (0, 0, 1), color= (0, 0, 1),
) )
pose = gymapi.Transform(gymapi.Vec3( pose = gymapi.Transform(gymapi.Vec3(
crawl_depth/2 + xy_origin[0], crawl_depth/2 + block_origin[0],
self.env_width/2 + xy_origin[1], block_origin[1],
wall_height/2 + crawl_height, wall_height/2 + crawl_height + block_origin[2],
), r= None) ), r= None)
gymutil.draw_lines( gymutil.draw_lines(
geom, geom,
@ -1062,11 +1094,11 @@ class BarrierTrack:
def draw_virtual_leap_track(self, def draw_virtual_leap_track(self,
block_info, block_info,
xy_origin, block_origin,
): ):
# virtual/non-virtual terrain looks the same when leaping the gap. # virtual/non-virtual terrain looks the same when leaping the gap.
# but the expected height can be visualized # but the expected height can be visualized
leap_length = block_info[0] leap_length = block_info[1]
expected_height = self.track_kwargs["leap"]["height"] expected_height = self.track_kwargs["leap"]["height"]
geom = gymutil.WireframeBoxGeometry( geom = gymutil.WireframeBoxGeometry(
leap_length, leap_length,
@ -1076,9 +1108,9 @@ class BarrierTrack:
color= (0, 0.5, 0.5), color= (0, 0.5, 0.5),
) )
pose = gymapi.Transform(gymapi.Vec3( pose = gymapi.Transform(gymapi.Vec3(
leap_length/2 + xy_origin[0], leap_length/2 + block_origin[0],
self.env_width/2 + xy_origin[1], block_origin[1],
expected_height/2, expected_height/2 + block_origin[2],
), r= None) ), r= None)
gymutil.draw_lines( gymutil.draw_lines(
geom, geom,
@ -1090,12 +1122,12 @@ class BarrierTrack:
def draw_virtual_track(self, def draw_virtual_track(self,
track_origin_px,
row_idx, row_idx,
col_idx, col_idx,
): ):
difficulties = self.get_difficulty(row_idx, col_idx) difficulties = self.get_difficulty(row_idx, col_idx)
virtual_terrain = difficulties[1] virtual_terrain = difficulties[1]
track_origin = self.env_origins[row_idx, col_idx]
for block_idx in range(1, self.track_info_map.shape[2]): for block_idx in range(1, self.track_info_map.shape[2]):
if virtual_terrain and self.track_kwargs["draw_virtual_terrain"]: if virtual_terrain and self.track_kwargs["draw_virtual_terrain"]:
@ -1104,13 +1136,14 @@ class BarrierTrack:
if v == obstacle_id: if v == obstacle_id:
obstacle_name = k obstacle_name = k
break break
block_info = self.track_info_map[row_idx, col_idx, block_idx, 1:] block_info = self.track_info_map[row_idx, col_idx, block_idx] # (3,)
heightfield_x0 = track_origin_px[0] + self.track_block_resolution[0] * block_idx
heightfield_y0 = track_origin_px[1]
getattr(self, "draw_virtual_" + obstacle_name + "_track")( getattr(self, "draw_virtual_" + obstacle_name + "_track")(
block_info, block_info,
(np.array([heightfield_x0, heightfield_y0]) * self.cfg.horizontal_scale), np.array([
track_origin[0] + self.track_kwargs["track_block_length"] * block_idx,
track_origin[1],
self.block_starting_height_map[row_idx, col_idx, block_idx].cpu().numpy(),
]),
) )
def draw_virtual_terrain(self, viewer): def draw_virtual_terrain(self, viewer):
@ -1119,7 +1152,6 @@ class BarrierTrack:
for row_idx in range(self.cfg.num_rows): for row_idx in range(self.cfg.num_rows):
for col_idx in range(self.cfg.num_cols): for col_idx in range(self.cfg.num_cols):
self.draw_virtual_track( self.draw_virtual_track(
self.track_origins_px[row_idx, col_idx, :2],
row_idx= row_idx, row_idx= row_idx,
col_idx= col_idx, col_idx= col_idx,
) )