[update] env and training config for go1

This commit is contained in:
Ziwen Zhuang 2023-11-14 18:58:32 +08:00
parent f771497315
commit 1ab4639f15
28 changed files with 2151 additions and 384 deletions

View File

@ -8,7 +8,7 @@ Authors:
[Sören Schwertfeger](https://robotics.shanghaitech.edu.cn/people/soeren),
[Chelsea Finn](https://ai.stanford.edu/~cbfinn/),
[Hang Zhao](https://hangzhaomit.github.io/)<br>
Conference on Robot Learning (CoRL) 2023, Oral <br>
Conference on Robot Learning (CoRL) 2023, **Oral**, **Best Systems Paper Award Finalist (top 3)** <br>
<p align="center">
<img src="images/teaser.jpeg" width="80%"/>
@ -34,7 +34,7 @@ To deploy the trained model on your real robot, please follow the instructions i
If you cannot run the distillation part or all graphics computing goes to GPU 0 dispite you have multiple GPUs and have set the CUDA_VISIBLE_DEVICES, please use docker to isolate each GPU.
## To Do (will be done before Nov 2023) ##
- [ ] Go1 training pipeline in simulation
- [x] Go1 training configuration (not from scratch)
- [ ] A1 deployment code
- [x] Go1 deployment code

View File

@ -62,11 +62,27 @@ task_registry.register( "go1_field", LeggedRobotNoisy, Go1FieldCfg(), Go1FieldCf
task_registry.register( "go1_distill", LeggedRobotNoisy, Go1FieldDistillCfg(), Go1FieldDistillCfgPPO())
## The following tasks are for the convinience of opensource
from .a1.a1_climb_config import A1ClimbCfg, A1ClimbCfgPPO
task_registry.register( "a1_climb", LeggedRobotNoisy, A1ClimbCfg(), A1ClimbCfgPPO() )
from .a1.a1_remote_config import A1RemoteCfg, A1RemoteCfgPPO
task_registry.register( "a1_remote", LeggedRobotNoisy, A1RemoteCfg(), A1RemoteCfgPPO() )
from .a1.a1_jump_config import A1JumpCfg, A1JumpCfgPPO
task_registry.register( "a1_jump", LeggedRobotNoisy, A1JumpCfg(), A1JumpCfgPPO() )
from .a1.a1_down_config import A1DownCfg, A1DownCfgPPO
task_registry.register( "a1_down", LeggedRobotNoisy, A1DownCfg(), A1DownCfgPPO() )
from .a1.a1_leap_config import A1LeapCfg, A1LeapCfgPPO
task_registry.register( "a1_leap", LeggedRobotNoisy, A1LeapCfg(), A1LeapCfgPPO() )
from .a1.a1_crawl_config import A1CrawlCfg, A1CrawlCfgPPO
task_registry.register( "a1_crawl", LeggedRobotNoisy, A1CrawlCfg(), A1CrawlCfgPPO() )
from .a1.a1_tilt_config import A1TiltCfg, A1TiltCfgPPO
task_registry.register( "a1_tilt", LeggedRobotNoisy, A1TiltCfg(), A1TiltCfgPPO() )
from .go1.go1_remote_config import Go1RemoteCfg, Go1RemoteCfgPPO
task_registry.register( "go1_remote", LeggedRobotNoisy, Go1RemoteCfg(), Go1RemoteCfgPPO() )
from .go1.go1_jump_config import Go1JumpCfg, Go1JumpCfgPPO
task_registry.register( "go1_jump", LeggedRobotNoisy, Go1JumpCfg(), Go1JumpCfgPPO() )
from .go1.go1_down_config import Go1DownCfg, Go1DownCfgPPO
task_registry.register( "go1_down", LeggedRobotNoisy, Go1DownCfg(), Go1DownCfgPPO() )
from .go1.go1_leap_config import Go1LeapCfg, Go1LeapCfgPPO
task_registry.register( "go1_leap", LeggedRobotNoisy, Go1LeapCfg(), Go1LeapCfgPPO() )
from .go1.go1_crawl_config import Go1CrawlCfg, Go1CrawlCfgPPO
task_registry.register( "go1_crawl", LeggedRobotNoisy, Go1CrawlCfg(), Go1CrawlCfgPPO() )
from .go1.go1_tilt_config import Go1TiltCfg, Go1TiltCfgPPO
task_registry.register( "go1_tilt", LeggedRobotNoisy, Go1TiltCfg(), Go1TiltCfgPPO() )

View File

@ -71,6 +71,14 @@ class A1RoughCfg( LeggedRobotCfg ):
penalize_contacts_on = ["thigh", "calf"]
terminate_after_contacts_on = ["base"]
self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
sdk_dof_range = dict( # copied from a1_const.h from unitree_legged_sdk
Hip_max= 0.802,
Hip_min=-0.802,
Thigh_max= 4.19,
Thigh_min= -1.05,
Calf_max= -0.916,
Calf_min= -2.7,
)
class rewards( LeggedRobotCfg.rewards ):
soft_dof_pos_limit = 0.9

View File

@ -0,0 +1,116 @@
from os import path as osp
import numpy as np
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class A1DownCfg( A1FieldCfg ):
#### uncomment this to train non-virtual terrain
class sensor( A1FieldCfg.sensor ):
class proprioception( A1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( A1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = True
BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"jump",
],
track_block_length= 1.6,
jump= dict(
height= (0.2, 0.45),
depth= 0.3,
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
jump_down_prob= 1., # probability of jumping down use it in non-virtual terrain
),
virtual_terrain= True, # Change this to False for real terrain
no_perlin_threshold= 0.1,
n_obstacles_per_track= 3,
))
TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.15],
))
class commands( A1FieldCfg.commands ):
class ranges( A1FieldCfg.commands.ranges ):
lin_vel_x = [0.5, 1.5]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class termination( A1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
z_low_kwargs = merge_dict(A1FieldCfg.termination.z_low_kwargs, dict(
threshold= -3.,
))
class domain_rand( A1FieldCfg.domain_rand ):
init_base_pos_range = dict(
x= [0.2, 0.6],
y= [-0.25, 0.25],
)
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
push_robots = True
class rewards( A1FieldCfg.rewards ):
class scales:
tracking_ang_vel = 0.1
world_vel_l2norm = -1.3
legs_energy_substeps = -1e-6
alive = 2.
penetrate_depth = -2e-3
penetrate_volume = -2e-3
exceed_dof_pos_limits = -4e-1
exceed_torque_limits_l1norm = -4e-1
feet_contact_forces = -1e-2
torques = -2e-5
lin_pos_y = -0.1
yaw_abs = -0.1
collision = -0.1
down_cond = 0.3
soft_dof_pos_limit = 0.8
max_contact_force = 200.0
class curriculum( A1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 2000000
penetrate_volume_threshold_easier = 4000000
penetrate_depth_threshold_harder = 200000
penetrate_depth_threshold_easier = 400000
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class A1DownCfgPPO( A1FieldCfgPPO ):
class algorithm( A1FieldCfgPPO.algorithm ):
entropy_coef = 0.0
clip_min_std = 0.2
class runner( A1FieldCfgPPO.runner ):
policy_class_name = "ActorCriticRecurrent"
experiment_name = "field_a1"
resume = True
load_run = "{Your trained walk model directory}"
run_name = "".join(["Skills_",
("down" if A1DownCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] > 0. else "jump"),
("_noResume" if not resume else "from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])
max_iterations = 20000
save_interval = 500

View File

@ -1,4 +1,5 @@
import numpy as np
import os.path as osp
from legged_gym.envs.a1.a1_config import A1RoughCfg, A1RoughCfgPPO
class A1FieldCfg( A1RoughCfg ):
@ -59,7 +60,7 @@ class A1FieldCfg( A1RoughCfg ):
BarrierTrack_kwargs = dict(
options= [
# "climb",
# "jump",
# "crawl",
# "tilt",
# "leap",
@ -68,11 +69,11 @@ class A1FieldCfg( A1RoughCfg ):
track_block_length= 2., # the x-axis distance from the env origin point
wall_thickness= (0.04, 0.2), # [m]
wall_height= -0.05,
climb= dict(
jump= dict(
height= (0.2, 0.6),
depth= (0.1, 0.8), # size along the forward axis
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
climb_down_prob= 0.0,
jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain
),
crawl= dict(
height= (0.25, 0.5),
@ -97,13 +98,14 @@ class A1FieldCfg( A1RoughCfg ):
virtual_terrain= False,
draw_virtual_terrain= True,
engaging_next_threshold= 1.2,
engaging_finish_threshold= 0.,
curriculum_perlin= False,
no_perlin_threshold= 0.0,
no_perlin_threshold= 0.1,
)
TerrainPerlin_kwargs = dict(
zScale= [0.1, 0.15],
# zScale= 0.1, # Use a constant zScale for training a walk policy
zScale= [0.08, 0.15],
# zScale= 0.15, # Use a constant zScale for training a walk policy
frequency= 10,
)
@ -114,9 +116,6 @@ class A1FieldCfg( A1RoughCfg ):
lin_vel_x = [-1.0, 1.0]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
######## configs for training a walk policy #########
# lin_vel_y = [-1., 1.]
# ang_vel_yaw = [-1., 1.]
class control( A1RoughCfg.control ):
stiffness = {'joint': 50.}
@ -125,10 +124,15 @@ class A1FieldCfg( A1RoughCfg ):
torque_limits = 25 # override the urdf
computer_clip_torque = True
motor_clip_torque = False
# action_delay = False # set to True to enable action delay in sim
# action_delay_range = [0.002, 0.022] # [s]
# action_delay_resample_time = 5.0 # [s]
class asset( A1RoughCfg.asset ):
penalize_contacts_on = ["base", "thigh"]
penalize_contacts_on = ["base", "thigh", "calf"]
terminate_after_contacts_on = ["base", "imu"]
front_hip_names = ["FR_hip_joint", "FL_hip_joint"]
rear_hip_names = ["RR_hip_joint", "RL_hip_joint"]
class termination:
# additional factors that determines whether to terminates the episode
@ -145,8 +149,8 @@ class A1FieldCfg( A1RoughCfg ):
tilt_threshold= 1.5,
)
pitch_kwargs = dict(
threshold= 1.6, # [rad] # for leap, climb
climb_threshold= 1.6,
threshold= 1.6, # [rad] # for leap, jump
jump_threshold= 1.6,
leap_threshold= 1.5,
)
z_low_kwargs = dict(
@ -176,7 +180,7 @@ class A1FieldCfg( A1RoughCfg ):
added_mass_range = [1.0, 3.0]
randomize_friction = True
friction_range = [0.0, 1.]
friction_range = [0., 2.]
init_base_pos_range = dict(
x= [0.2, 0.6],
@ -200,10 +204,29 @@ class A1FieldCfg( A1RoughCfg ):
class normalization( A1RoughCfg.normalization ):
class obs_scales( A1RoughCfg.normalization.obs_scales ):
forward_depth = 1.
base_pose = [0., 0., 1., 1., 1., 1.]
base_pose = [0., 0., 0., 1., 1., 1.]
engaging_block = 1.
""" The following action clip is used for tanh policy activation. """
# clip_actions_method = "hard"
# dof_pos_redundancy = 0.2
# clip_actions_low = []
# clip_actions_high = []
# for sdk_joint_name, sim_joint_name in zip(
# ["Hip", "Thigh", "Calf"] * 4,
# [ # in the order as simulation
# "FL_hip_joint", "FL_thigh_joint", "FL_calf_joint",
# "FR_hip_joint", "FR_thigh_joint", "FR_calf_joint",
# "RL_hip_joint", "RL_thigh_joint", "RL_calf_joint",
# "RR_hip_joint", "RR_thigh_joint", "RR_calf_joint",
# ],
# ): # This setting is only for position control.
# clip_actions_low.append( (A1RoughCfg.asset.sdk_dof_range[sdk_joint_name + "_min"] + dof_pos_redundancy - A1RoughCfg.init_state.default_joint_angles[sim_joint_name]) / a1_action_scale )
# clip_actions_high.append( (A1RoughCfg.asset.sdk_dof_range[sdk_joint_name + "_max"] - dof_pos_redundancy - A1RoughCfg.init_state.default_joint_angles[sim_joint_name]) / a1_action_scale )
# del dof_pos_redundancy, sdk_joint_name, sim_joint_name # This is not intended to be an attribute of normalization
""" The above action clip is used for tanh policy activation. """
class noise( A1RoughCfg.noise ):
add_noise = False # disable internal uniform +- 1 noise, and no noise in proprioception
class noise_scales( A1RoughCfg.noise.noise_scales ):
forward_depth = 0.1
base_pose = 1.0
@ -244,6 +267,7 @@ class A1FieldCfg( A1RoughCfg ):
no_moveup_when_fall = False
# chosen heuristically, please refer to `LeggedRobotField._get_terrain_curriculum_move` with fixed body_measure_points
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class A1FieldCfgPPO( A1RoughCfgPPO ):
class algorithm( A1RoughCfgPPO.algorithm ):
entropy_coef = 0.01
@ -256,8 +280,9 @@ class A1FieldCfgPPO( A1RoughCfgPPO ):
class runner( A1RoughCfgPPO.runner ):
policy_class_name = "ActorCriticRecurrent"
experiment_name = "field_a1"
run_name = "".join(["WalkingBase",
("_pEnergySubsteps" + np.format_float_scientific(-A1FieldCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1, trim="-") if A1FieldCfg.rewards.scales.legs_energy_substeps != 0 else ""),
resume = False
run_name = "".join(["WalkForward",
("_propDelay{:.2f}-{:.2f}".format(
A1FieldCfg.sensor.proprioception.latency_range[0],
A1FieldCfg.sensor.proprioception.latency_range[1],

View File

@ -3,12 +3,14 @@ import numpy as np
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class A1ClimbCfg( A1FieldCfg ):
class A1JumpCfg( A1FieldCfg ):
class init_state( A1FieldCfg.init_state ):
pos = [0., 0., 0.45]
#### uncomment this to train non-virtual terrain
# class sensor( A1FieldCfg.sensor ):
# class proprioception( A1FieldCfg.sensor.proprioception ):
# delay_action_obs = True
# latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
@ -20,22 +22,23 @@ class A1ClimbCfg( A1FieldCfg ):
BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"climb",
"jump",
],
track_block_length= 1.6,
climb= dict(
jump= dict(
height= (0.2, 0.6), # use this to train in virtual terrain
# height= (0.1, 0.5), # use this to train in non-virtual terrain
depth= (0.1, 0.2),
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
climb_down_prob= 0., # probability of climbing down use it in non-virtual terrain
jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain
),
virtual_terrain= False, # Change this to False for real terrain
no_perlin_threshold= 0.06,
no_perlin_threshold= 0.12,
n_obstacles_per_track= 3,
))
TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.1],
zScale= [0.05, 0.15],
))
class commands( A1FieldCfg.commands ):
@ -58,10 +61,19 @@ class A1ClimbCfg( A1FieldCfg ):
))
class domain_rand( A1FieldCfg.domain_rand ):
class com_range( A1FieldCfg.domain_rand.com_range ):
z = [-0.1, 0.1]
init_base_pos_range = dict(
x= [0.2, 0.6],
y= [-0.25, 0.25],
)
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
push_robots = False
class rewards( A1FieldCfg.rewards ):
class scales:
@ -73,6 +85,8 @@ class A1ClimbCfg( A1FieldCfg ):
penetrate_volume = -1e-2
exceed_dof_pos_limits = -1e-1
exceed_torque_limits_i = -2e-1
soft_dof_pos_limit = 0.8
max_contact_force = 100.0
class curriculum( A1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 8000
@ -81,7 +95,8 @@ class A1ClimbCfg( A1FieldCfg ):
penetrate_depth_threshold_easier = 1600
class A1ClimbCfgPPO( A1FieldCfgPPO ):
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class A1JumpCfgPPO( A1FieldCfgPPO ):
class algorithm( A1FieldCfgPPO.algorithm ):
entropy_coef = 0.0
clip_min_std = 0.2
@ -89,23 +104,15 @@ class A1ClimbCfgPPO( A1FieldCfgPPO ):
class runner( A1FieldCfgPPO.runner ):
policy_class_name = "ActorCriticRecurrent"
experiment_name = "field_a1"
run_name = "".join(["Skill",
("Multi" if len(A1ClimbCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (A1ClimbCfg.terrain.BarrierTrack_kwargs["options"][0] if A1ClimbCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_pEnergySubsteps{:.0e}".format(A1ClimbCfg.rewards.scales.legs_energy_substeps) if A1ClimbCfg.rewards.scales.legs_energy_substeps != -1e-6 else ""),
("_pDofLimit{:.0e}".format(A1ClimbCfg.rewards.scales.exceed_dof_pos_limits) if getattr(A1ClimbCfg.rewards.scales, "exceed_dof_pos_limits") != 0.0 else ""),
("_climbDownProb{:.1f}".format(A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"]["climb_down_prob"]) if A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"].get("climb_down_prob", 0.) > 0. else ""),
("_climbHeight{:.1f}-{:.1f}".format(*A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"]["height"])),
("_propDelay{:.2f}-{:.2f}".format(
A1ClimbCfg.sensor.proprioception.latency_range[0],
A1ClimbCfg.sensor.proprioception.latency_range[1],
) if A1ClimbCfg.sensor.proprioception.delay_action_obs else ""
),
("_virtual" if A1ClimbCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_trackBlockLen{:.1f}".format(A1ClimbCfg.terrain.BarrierTrack_kwargs["track_block_length"]) if A1ClimbCfg.terrain.BarrierTrack_kwargs["track_block_length"] != 2. else ""),
])
resume = True
load_run = "{Your traind walking model directory}"
load_run = "{Your virtually trained climb model directory}"
load_run = "{Your virtually trained jump model directory}"
run_name = "".join(["Skills_",
("jump" if A1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] < 1. else "down"),
("_virtual" if A1JumpCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])
max_iterations = 20000
save_interval = 500

View File

@ -0,0 +1,82 @@
import numpy as np
import os.path as osp
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class A1RemoteCfg( A1FieldCfg ):
class env( A1FieldCfg.env ):
num_envs = 4096
obs_components = [
"proprioception", # 48
# "height_measurements", # 187
# "forward_depth",
# "base_pose",
# "robot_config",
# "engaging_block",
# "sidewall_distance",
]
privileged_obs_components = [
"proprioception",
# "height_measurements",
# "forward_depth",
"robot_config",
]
use_lin_vel = False
privileged_use_lin_vel = True
class terrain( A1FieldCfg.terrain ):
num_rows = 4
num_cols = 4
selected = "TerrainPerlin"
TerrainPerlin_kwargs = dict(
zScale= 0.15,
frequency= 10,
)
class commands( A1FieldCfg.commands ):
class ranges( A1FieldCfg.commands.ranges ):
lin_vel_x = [-1.0, 1.0]
lin_vel_y = [-1., 1.]
ang_vel_yaw = [-1., 1.]
class domain_rand( A1FieldCfg.domain_rand ):
class com_range( A1FieldCfg.domain_rand.com_range ):
x = [-0.2, 0.2]
max_push_vel_ang = 0.5
push_robots = True
class rewards( A1FieldCfg.rewards ):
class scales:
###### hacker from Field
tracking_ang_vel = 0.5
lin_vel_l2norm = -1.
legs_energy_substeps = -1e-5
alive = 2.
# penalty for hardware safety
exceed_dof_pos_limits = -4e-2
exceed_torque_limits_l1norm = -4e-1
# penalty for walking gait, probably no need
collision = -0.1
orientation = -0.1
feet_contact_forces = -1e-3
soft_dof_pos_limit = 0.5
max_contact_force = 60.0
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class A1RemoteCfgPPO( A1FieldCfgPPO ):
class algorithm( A1FieldCfgPPO.algorithm ):
entropy_coef = 0.01
clip_min_std = 0.05
class runner( A1FieldCfgPPO.runner ):
resume = False
load_run = None
run_name = "".join(["WalkByRemote",
("_rLinTrack{:.1f}".format(A1RemoteCfg.rewards.scales.tracking_lin_vel) if getattr(A1RemoteCfg.rewards.scales, "tracking_lin_vel", 0.) != 0. else ""),
("_pLin{:.1f}".format(-A1RemoteCfg.rewards.scales.lin_vel_l2norm) if getattr(A1RemoteCfg.rewards.scales, "lin_vel_l2norm", 0.) != 0. else ""),
("_rAng{:.1f}".format(A1RemoteCfg.rewards.scales.tracking_ang_vel) if A1RemoteCfg.rewards.scales.tracking_ang_vel != 0.2 else ""),
("_rAlive{:.1f}".format(A1RemoteCfg.rewards.scales.alive) if getattr(A1RemoteCfg.rewards.scales, "alive", 2.) != 2. else ""),
("_pEnergySubsteps{:.0e}".format(A1RemoteCfg.rewards.scales.legs_energy_substeps) if getattr(A1RemoteCfg.rewards.scales, "legs_energy_substeps", -2e-5) != -2e-5 else "_nopEnergy"),
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])

View File

@ -141,6 +141,7 @@ class LeggedRobot(BaseTask):
self.last_actions[:] = self.actions[:]
self.last_dof_vel[:] = self.dof_vel[:]
self.last_root_vel[:] = self.root_states[:, 7:13]
self.last_torques[:] = self.torques[:]
if self.viewer and self.enable_viewer_sync and self.debug_viz:
self._draw_debug_vis()
@ -343,6 +344,7 @@ class LeggedRobot(BaseTask):
# set small commands to zero
self.commands[env_ids, :2] *= (torch.norm(self.commands[env_ids, :2], dim=1) > 0.2).unsqueeze(1)
self.commands[env_ids, 2] *= (torch.abs(self.commands[env_ids, 2]) > 0.1)
def _compute_torques(self, actions):
""" Compute torques from actions.
@ -387,7 +389,9 @@ class LeggedRobot(BaseTask):
)
else:
self.dof_pos[env_ids] = self.default_dof_pos
self.dof_vel[env_ids] = 0.
# self.dof_vel[env_ids] = 0. # history init method
dof_vel_range = getattr(self.cfg.domain_rand, "init_dof_vel_range", [-3., 3.])
self.dof_vel[env_ids] = torch.rand_like(self.dof_vel[env_ids]) * abs(dof_vel_range[1] - dof_vel_range[0]) + min(dof_vel_range)
# Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first.
dof_idx = env_ids * self.all_root_states.shape[0] / self.num_envs
@ -414,16 +418,64 @@ class LeggedRobot(BaseTask):
else:
self.root_states[env_ids] = self.base_init_state
self.root_states[env_ids, :3] += self.env_origins[env_ids]
# base rotation (roll and pitch)
if hasattr(self.cfg.domain_rand, "init_base_rot_range"):
base_roll = torch_rand_float(
*self.cfg.domain_rand.init_base_rot_range["roll"],
(len(env_ids), 1),
device=self.device,
)[:, 0]
base_pitch = torch_rand_float(
*self.cfg.domain_rand.init_base_rot_range["pitch"],
(len(env_ids), 1),
device=self.device,
)[:, 0]
base_quat = quat_from_euler_xyz(base_roll, base_pitch, torch.zeros_like(base_roll))
self.root_states[env_ids, 3:7] = base_quat
# base velocities
if getattr(self.cfg.domain_rand, "init_base_vel_range", None) is None:
base_vel_range = (-0.5, 0.5)
else:
base_vel_range = self.cfg.domain_rand.init_base_vel_range
self.root_states[env_ids, 7:13] = torch_rand_float(
*base_vel_range,
(len(env_ids), 6),
device=self.device,
) # [7:10]: lin vel, [10:13]: ang vel
if isinstance(base_vel_range, (tuple, list)):
self.root_states[env_ids, 7:13] = torch_rand_float(
*base_vel_range,
(len(env_ids), 6),
device=self.device,
) # [7:10]: lin vel, [10:13]: ang vel
elif isinstance(base_vel_range, dict):
self.root_states[env_ids, 7:8] = torch_rand_float(
*base_vel_range["x"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 8:9] = torch_rand_float(
*base_vel_range["y"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 9:10] = torch_rand_float(
*base_vel_range["z"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 10:11] = torch_rand_float(
*base_vel_range["roll"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 11:12] = torch_rand_float(
*base_vel_range["pitch"],
(len(env_ids), 1),
device=self.device,
)
self.root_states[env_ids, 12:13] = torch_rand_float(
*base_vel_range["yaw"],
(len(env_ids), 1),
device=self.device,
)
else:
raise NameError(f"Unknown base_vel_range type: {type(base_vel_range)}")
# Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first.
actor_idx = env_ids * self.all_root_states.shape[0] / self.num_envs
@ -546,6 +598,7 @@ class LeggedRobot(BaseTask):
self.last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False)
self.last_dof_vel = torch.zeros_like(self.dof_vel)
self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13])
self.last_torques = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False)
self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading
self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) # TODO change this
self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False)
@ -580,8 +633,11 @@ class LeggedRobot(BaseTask):
self.default_dof_pos = self.default_dof_pos.unsqueeze(0)
def _reset_buffers(self, env_ids):
if getattr(self.cfg.init_state, "zero_actions", False):
self.actions[env_ids] = 0.
self.last_actions[env_ids] = 0.
self.last_dof_vel[env_ids] = 0.
self.last_torques[env_ids] = 0.
self.feet_air_time[env_ids] = 0.
self.episode_length_buf[env_ids] = 0
self.reset_buf[env_ids] = 1
@ -902,8 +958,13 @@ class LeggedRobot(BaseTask):
# log additional curriculum info
if self.cfg.terrain.curriculum:
self.extras["episode"]["terrain_level"] = torch.mean(self.terrain_levels.float())
if len(env_ids) > 0:
self.extras["episode"]["terrain_level_max"] = torch.max(self.terrain_levels[env_ids].float())
self.extras["episode"]["terrain_level_min"] = torch.min(self.terrain_levels[env_ids].float())
if self.cfg.commands.curriculum:
self.extras["episode"]["max_command_x"] = self.command_ranges["lin_vel_x"][1]
# log whether the episode ends by timeout or dead, or by reaching the goal
self.extras["episode"]["timeout_ratio"] = self.time_out_buf.float().sum() / self.reset_buf.float().sum()
# send timeout info to the algorithm
if self.cfg.env.send_timeouts:
self.extras["time_outs"] = self.time_out_buf
@ -930,6 +991,9 @@ class LeggedRobot(BaseTask):
# Penalize torques
return torch.sum(torch.square(self.torques), dim=1)
def _reward_delta_torques(self):
return torch.sum(torch.square(self.torques - self.last_torques), dim=1)
def _reward_dof_vel(self):
# Penalize dof velocities
return torch.sum(torch.square(self.dof_vel), dim=1)

View File

@ -7,6 +7,7 @@ import torch
from legged_gym.envs.base.legged_robot import LeggedRobot
from legged_gym.utils.terrain import get_terrain_cls
from legged_gym.utils.observation import get_obs_slice
from .legged_robot_config import LeggedRobotCfg
class LeggedRobotField(LeggedRobot):
@ -102,10 +103,34 @@ class LeggedRobotField(LeggedRobot):
self.cfg.normalization.clip_actions,
device= self.device,
)
if isinstance(getattr(self.cfg.normalization, "clip_actions_low", None), (tuple, list)):
self.cfg.normalization.clip_actions_low = torch.tensor(
self.cfg.normalization.clip_actions_low,
device= self.device
)
if isinstance(getattr(self.cfg.normalization, "clip_actions_high", None), (tuple, list)):
self.cfg.normalization.clip_actions_high = torch.tensor(
self.cfg.normalization.clip_actions_high,
device= self.device
)
if getattr(self.cfg.normalization, "clip_actions_method", None) == "tanh":
clip_actions = self.cfg.normalization.clip_actions
self.actions = (torch.tanh(actions) * clip_actions).to(self.device)
actions_preprocessed = True
elif getattr(self.cfg.normalization, "clip_actions_method", None) == "hard":
if self.cfg.control.control_type == "P":
actions_low = getattr(
self.cfg.normalization, "clip_actions_low",
self.dof_pos_limits[:, 0] - self.default_dof_pos,
)
actions_high = getattr(
self.cfg.normalization, "clip_actions_high",
self.dof_pos_limits[:, 1] - self.default_dof_pos,
)
self.actions = torch.clip(actions, actions_low, actions_high)
else:
raise NotImplementedError()
actions_preprocessed = True
if getattr(self.cfg.normalization, "clip_actions_delta", None) is not None:
self.actions = torch.clip(
self.actions,
@ -226,6 +251,7 @@ class LeggedRobotField(LeggedRobot):
torch.div(pos_x, self.terrain.env_block_length, rounding_mode= "floor") - 1,
min= 0.0,
)).cpu()
self.extras["episode"]["max_power_throughout_episode"] = self.max_power_per_timestep[env_ids].max().cpu().item()
return return_
@ -245,6 +271,11 @@ class LeggedRobotField(LeggedRobot):
min= 0.0,
)).cpu()
self.max_power_per_timestep = torch.maximum(
self.max_power_per_timestep,
torch.max(torch.sum(self.substep_torques * self.substep_dof_vel, dim= -1), dim= -1)[0],
)
return return_
def _compute_torques(self, actions):
@ -322,9 +353,40 @@ class LeggedRobotField(LeggedRobot):
gymapi.IMAGE_COLOR,
)))
if getattr(self.cfg.normalization, "clip_actions_method", None) is None:
print("WARNING: No action clipping method is specified, actions will only be clipped by clip_actions.")
# projected gravity bias (if needed)
if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False):
print("Initializing gravity bias for domain randomization")
# add cross trajectory domain randomization on projected gravity bias
# uniform sample from range
self.gravity_bias = torch.rand(self.num_envs, 3, dtype= torch.float, device= self.device, requires_grad= False)
self.gravity_bias[:, 0] *= self.cfg.domain_rand.gravity_bias_range["x"][1] - self.cfg.domain_rand.gravity_bias_range["x"][0]
self.gravity_bias[:, 0] += self.cfg.domain_rand.gravity_bias_range["x"][0]
self.gravity_bias[:, 1] *= self.cfg.domain_rand.gravity_bias_range["y"][1] - self.cfg.domain_rand.gravity_bias_range["y"][0]
self.gravity_bias[:, 1] += self.cfg.domain_rand.gravity_bias_range["y"][0]
self.gravity_bias[:, 2] *= self.cfg.domain_rand.gravity_bias_range["z"][1] - self.cfg.domain_rand.gravity_bias_range["z"][0]
self.gravity_bias[:, 2] += self.cfg.domain_rand.gravity_bias_range["z"][0]
self.max_power_per_timestep = torch.zeros(self.num_envs, dtype= torch.float32, device= self.device)
def _reset_buffers(self, env_ids):
return_ = super()._reset_buffers(env_ids)
if hasattr(self, "velocity_sample_points"): self.velocity_sample_points[env_ids] = 0.
if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False):
assert hasattr(self, "gravity_bias")
self.gravity_bias[env_ids] = torch.rand_like(self.gravity_bias[env_ids])
self.gravity_bias[env_ids, 0] *= self.cfg.domain_rand.gravity_bias_range["x"][1] - self.cfg.domain_rand.gravity_bias_range["x"][0]
self.gravity_bias[env_ids, 0] += self.cfg.domain_rand.gravity_bias_range["x"][0]
self.gravity_bias[env_ids, 1] *= self.cfg.domain_rand.gravity_bias_range["y"][1] - self.cfg.domain_rand.gravity_bias_range["y"][0]
self.gravity_bias[env_ids, 1] += self.cfg.domain_rand.gravity_bias_range["y"][0]
self.gravity_bias[env_ids, 2] *= self.cfg.domain_rand.gravity_bias_range["z"][1] - self.cfg.domain_rand.gravity_bias_range["z"][0]
self.gravity_bias[env_ids, 2] += self.cfg.domain_rand.gravity_bias_range["z"][0]
self.max_power_per_timestep[env_ids] = 0.
return return_
def _prepare_reward_function(self):
@ -483,6 +545,16 @@ class LeggedRobotField(LeggedRobot):
self.cfg.env.obs_components,
privileged= False,
)
if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False) and "proprioception" in self.cfg.env.obs_components:
# NOTE: The logic order is a bit messed up here when combining using legged_robot_noisy.py
# When simulating the sensor bias/noise and latency, the gravity bias should be added before
# proprioception latency.
# However, considering gravirty bias is randomized across trajectory, the latency simulation
# from legged_robot_noisy.py does not need to be computed after adding the bias.
# So, this computation order looks OK.
assert hasattr(self, "gravity_bias")
proprioception_slice = get_obs_slice(self.obs_segments, "proprioception")
self.obs_buf[:, proprioception_slice[0].start + 6: proprioception_slice[0].start + 9] += self.gravity_bias
if self.add_noise:
self.obs_buf += (2 * torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec
@ -496,9 +568,13 @@ class LeggedRobotField(LeggedRobot):
if "proprioception" in getattr(self.cfg.env, "privileged_obs_components", []) \
and getattr(self.cfg.env, "privileged_use_lin_vel", False):
# NOTE: according to self.get_obs_segment_from_components, "proprioception" observation
# is always the first part of this flattened observation. check super().compute_observations
# is always the FIRST part of this flattened observation. check super().compute_observations
# and self.cfg.env.use_lin_vel for the reason of this if branch.
self.privileged_obs_buf[:, :3] = self.base_lin_vel * self.obs_scales.lin_vel
if getattr(self.cfg.domain_rand, "randomize_privileged_gravity_bias", False) and "proprioception" in getattr(self.cfg.env, "privileged_obs_components", []):
assert hasattr(self, "gravity_bias")
proprioception_slice = get_obs_slice(self.privileged_obs_segments, "proprioception")
self.privileged_obs_buf[:, proprioception_slice[0].start + 6: proprioception_slice[0].start + 9] += self.gravity_bias
for key in self.sensor_handles[0].keys():
if "camera" in key:
@ -558,12 +634,31 @@ class LeggedRobotField(LeggedRobot):
def _create_envs(self):
if self.cfg.domain_rand.randomize_motor:
mtr_rng = self.cfg.domain_rand.leg_motor_strength_range
self.motor_strength = torch_rand_float(
self.cfg.domain_rand.leg_motor_strength_range[0],
self.cfg.domain_rand.leg_motor_strength_range[1],
(self.num_envs, 12),
mtr_rng[0],
mtr_rng[1],
(self.num_envs, self.num_actions),
device=self.device,
)
return_ = super()._create_envs()
front_hip_names = getattr(self.cfg.asset, "front_hip_names", ["FR_hip_joint", "FL_hip_joint"])
self.front_hip_indices = torch.zeros(len(front_hip_names), dtype=torch.long, device=self.device, requires_grad=False)
for i, name in enumerate(front_hip_names):
self.front_hip_indices[i] = self.dof_names.index(name)
rear_hip_names = getattr(self.cfg.asset, "rear_hip_names", ["RR_hip_joint", "RL_hip_joint"])
self.rear_hip_indices = torch.zeros(len(rear_hip_names), dtype=torch.long, device=self.device, requires_grad=False)
for i, name in enumerate(rear_hip_names):
self.rear_hip_indices[i] = self.dof_names.index(name)
hip_names = front_hip_names + rear_hip_names
self.hip_indices = torch.zeros(len(hip_names), dtype=torch.long, device=self.device, requires_grad=False)
for i, name in enumerate(hip_names):
self.hip_indices[i] = self.dof_names.index(name)
return super()._create_envs()
def _process_rigid_shape_props(self, props, env_id):
@ -703,10 +798,16 @@ class LeggedRobotField(LeggedRobot):
def draw_volume_sample_points(self):
sphere_geom = gymutil.WireframeSphereGeometry(0.005, 4, 4, None, color=(1, 0.1, 0))
sphere_penetrate_geom = gymutil.WireframeSphereGeometry(0.005, 4, 4, None, color=(0.1, 0.6, 0.6))
if self.cfg.terrain.selected == "BarrierTrack":
penetration_mask = self.terrain.get_penetration_mask(self.volume_sample_points.view(-1, 3)).view(self.num_envs, -1)
for env_idx in range(self.num_envs):
for point_idx in range(self.volume_sample_points.shape[1]):
sphere_pose = gymapi.Transform(gymapi.Vec3(*self.volume_sample_points[env_idx, point_idx]), r= None)
gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose)
if penetration_mask[env_idx, point_idx]:
gymutil.draw_lines(sphere_penetrate_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose)
else:
gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose)
##### Additional rewards #####
def _reward_lin_vel_l2norm(self):
@ -715,6 +816,10 @@ class LeggedRobotField(LeggedRobot):
def _reward_world_vel_l2norm(self):
return torch.norm((self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1)
def _reward_tracking_world_vel(self):
world_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1)
return torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)
def _reward_legs_energy(self):
return torch.sum(torch.square(self.torques * self.dof_vel), dim=1)
@ -730,6 +835,10 @@ class LeggedRobotField(LeggedRobot):
def _reward_alive(self):
return 1.
def _reward_dof_error(self):
dof_error = torch.sum(torch.square(self.dof_pos - self.default_dof_pos), dim=1)
return dof_error
def _reward_lin_cmd(self):
""" This reward term does not depend on the policy, depends on the command """
return torch.norm(self.commands[:, :2], dim= 1)
@ -767,6 +876,157 @@ class LeggedRobotField(LeggedRobot):
penetration_mask *= torch.norm(self.velocity_sample_points, dim= -1) + 1e-3
return torch.sum(penetration_mask, dim= -1)
def _reward_tilt_cond(self):
""" Conditioned reward term in terms of whether the robot is engaging the tilt obstacle
Use positive factor to enable rolling angle when incountering tilt obstacle
"""
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7])
pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732
roll[roll > pi] -= pi * 2 # to range (-pi, pi)
roll[roll < -pi] += pi * 2 # to range (-pi, pi)
if hasattr(self, "volume_sample_points"):
self.refresh_volume_sample_points()
stepping_obstacle_info = self.terrain.get_stepping_obstacle_info(self.volume_sample_points.view(-1, 3))
else:
stepping_obstacle_info = self.terrain.get_stepping_obstacle_info(self.root_states[:, :3])
stepping_obstacle_info = stepping_obstacle_info.view(self.num_envs, -1, stepping_obstacle_info.shape[-1])
# Assuming that each robot will only be in one obstacle or non obstacle.
robot_stepping_obstacle_id = torch.max(stepping_obstacle_info[:, :, 0], dim= -1)[0]
tilting_mask = robot_stepping_obstacle_id == self.terrain.track_options_id_dict["tilt"]
return_ = torch.where(tilting_mask, torch.clip(torch.abs(roll), 0, torch.pi/2), -torch.clip(torch.abs(roll), 0, torch.pi/2))
return return_
def _reward_hip_pos(self):
return torch.sum(torch.square(self.dof_pos[:, self.hip_indices] - self.default_dof_pos[:, self.hip_indices]), dim=1)
def _reward_front_hip_pos(self):
""" Reward the robot to stop moving its front hips """
return torch.sum(torch.square(self.dof_pos[:, self.front_hip_indices] - self.default_dof_pos[:, self.front_hip_indices]), dim=1)
def _reward_rear_hip_pos(self):
""" Reward the robot to stop moving its rear hips """
return torch.sum(torch.square(self.dof_pos[:, self.rear_hip_indices] - self.default_dof_pos[:, self.rear_hip_indices]), dim=1)
def _reward_down_cond(self):
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7])
pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732
pitch[pitch > pi] -= pi * 2 # to range (-pi, pi)
pitch[pitch < -pi] += pi * 2 # to range (-pi, pi)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] < 0.)
pitch_err = torch.abs(pitch - 0.2)
return torch.exp(-pitch_err/self.cfg.rewards.tracking_sigma) * engaging_mask # the higher positive factor, the more you want the robot to pitch down 0.2 rad
def _reward_jump_x_vel_cond(self):
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \
& (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance.
roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7])
pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732
pitch[pitch > pi] -= pi * 2 # to range (-pi, pi)
pitch[pitch < -pi] += pi * 2 # to range (-pi, pi)
pitch_up_mask = pitch < -0.75 # a hack value
return torch.clip(self.base_lin_vel[:, 0], max= 1.5) * engaging_mask * pitch_up_mask
def _reward_sync_legs_cond(self):
""" A hack to force same actuation on both rear legs when jump. """
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \
& (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance.
rr_legs = torch.clone(self.actions[:, 6:9]) # shoulder, thigh, calf
rl_legs = torch.clone(self.actions[:, 9:12]) # shoulder, thigh, calf
rl_legs[:, 0] *= -1 # flip the sign of shoulder action
return torch.norm(rr_legs - rl_legs, dim= -1) * engaging_mask
def _reward_sync_all_legs_cond(self):
""" A hack to force same actuation on both front/rear legs when jump. """
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \
& (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance.
right_legs = torch.clone(torch.cat([
self.actions[:, 0:3],
self.actions[:, 6:9],
], dim= -1)) # shoulder, thigh, calf
left_legs = torch.clone(torch.cat([
self.actions[:, 3:6],
self.actions[:, 9:12],
], dim= -1)) # shoulder, thigh, calf
left_legs[:, 0] *= -1 # flip the sign of shoulder action
left_legs[:, 3] *= -1 # flip the sign of shoulder action
return torch.norm(right_legs - left_legs, p= 1, dim= -1) * engaging_mask
def _reward_sync_all_legs(self):
right_legs = torch.clone(torch.cat([
self.actions[:, 0:3],
self.actions[:, 6:9],
], dim= -1)) # shoulder, thigh, calf
left_legs = torch.clone(torch.cat([
self.actions[:, 3:6],
self.actions[:, 9:12],
], dim= -1)) # shoulder, thigh, calf
left_legs[:, 0] *= -1 # flip the sign of shoulder action
left_legs[:, 3] *= -1 # flip the sign of shoulder action
return torch.norm(right_legs - left_legs, p= 1, dim= -1)
def _reward_dof_error_cond(self):
""" Force dof error when not engaging obstacle """
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1] > 0)
return torch.sum(torch.square(self.dof_pos - self.default_dof_pos), dim=1) * engaging_mask
def _reward_leap_bonous_cond(self):
""" counteract the tracking reward loss during leap"""
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
self.refresh_volume_sample_points()
engaging_obstacle_info = self.terrain.get_engaging_block_info(
self.root_states[:, :3],
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
)
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["leap"]] > 0) \
& (-engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 1] < engaging_obstacle_info[:, 0]) \
& (engaging_obstacle_info[:, 0] < 0.) # engaging jump-up, not engaging jump-down, positive distance.
world_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1)
return (1 - torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)) * engaging_mask # reverse version of tracking reward
##### Some helper functions that override parent class attributes #####
@property
def all_obs_components(self):

View File

@ -40,11 +40,24 @@ class LeggedRobotNoisy(LeggedRobotField):
self.actions_scaled = self.actions * self.cfg.control.action_scale
control_type = self.cfg.control.control_type
if control_type == "P":
self.actions_scaled_torque_clipped = self.clip_position_action_by_torque_limit(self.actions_scaled)
actions_scaled_torque_clipped = self.clip_position_action_by_torque_limit(self.actions_scaled)
else:
raise NotImplementedError
else:
self.actions_scaled_torque_clipped = self.actions * self.cfg.control.action_scale
actions_scaled_torque_clipped = self.actions * self.cfg.control.action_scale
if getattr(self.cfg.control, "action_delay", False):
# always put the latest action at the end of the buffer
self.actions_history_buffer = torch.roll(self.actions_history_buffer, shifts= -1, dims= 0)
self.actions_history_buffer[-1] = actions_scaled_torque_clipped
# get the delayed action
self.action_delayed_frames = ((self.current_action_delay / self.dt) + 1).to(int)
self.actions_scaled_torque_clipped = self.actions_history_buffer[
-self.action_delayed_frames,
torch.arange(self.num_envs, device= self.device),
]
else:
self.actions_scaled_torque_clipped = actions_scaled_torque_clipped
return return_
@ -79,7 +92,10 @@ class LeggedRobotNoisy(LeggedRobotField):
torch.max(torch.abs(self.torques), dim= -1)[0],
self.max_torques,
)
### The set torque limit is usally smaller than the robot dataset
self.torque_exceed_count_substep[(torch.abs(self.torques) > self.torque_limits).any(dim= -1)] += 1
### Hack to check the torque limit exceeding by your own value.
# self.torque_exceed_count_envstep[(torch.abs(self.torques) > 38.).any(dim= -1)] += 1
### count how many times in the episode the robot is out of dof pos limit (summing all dofs)
self.out_of_dof_pos_limit_count_substep += self._reward_dof_pos_limits().int()
@ -108,6 +124,12 @@ class LeggedRobotNoisy(LeggedRobotField):
def _post_physics_step_callback(self):
super()._post_physics_step_callback()
if hasattr(self, "actions_history_buffer"):
resampling_time = getattr(self.cfg.control, "action_delay_resampling_time", self.dt)
resample_env_ids = (self.episode_length_buf % int(resampling_time / self.dt) == 0).nonzero(as_tuple= False).flatten()
if len(resample_env_ids) > 0:
self._resample_action_delay(resample_env_ids)
if hasattr(self, "proprioception_buffer"):
resampling_time = getattr(self.cfg.sensor.proprioception, "latency_resampling_time", self.dt)
resample_env_ids = (self.episode_length_buf % int(resampling_time / self.dt) == 0).nonzero(as_tuple= False).flatten()
@ -122,6 +144,14 @@ class LeggedRobotNoisy(LeggedRobotField):
self.torque_exceed_count_envstep[(torch.abs(self.substep_torques) > self.torque_limits).any(dim= 1).any(dim= 1)] += 1
def _resample_action_delay(self, env_ids):
self.current_action_delay[env_ids] = torch_rand_float(
self.cfg.control.action_delay_range[0],
self.cfg.control.action_delay_range[1],
(len(env_ids), 1),
device= self.device,
).flatten()
def _resample_proprioception_latency(self, env_ids):
self.current_proprioception_latency[env_ids] = torch_rand_float(
self.cfg.sensor.proprioception.latency_range[0],
@ -142,6 +172,27 @@ class LeggedRobotNoisy(LeggedRobotField):
return_ = super()._init_buffers()
all_obs_components = self.all_obs_components
if getattr(self.cfg.control, "action_delay", False):
assert hasattr(self.cfg.control, "action_delay_range") and hasattr(self.cfg.control, "action_delay_resample_time"), "Please specify action_delay_range and action_delay_resample_time in the config file."
""" Used in pre-physics step """
self.cfg.control.action_history_buffer_length = int((self.cfg.control.action_delay_range[1] + self.dt) / self.dt)
self.actions_history_buffer = torch.zeros(
(
self.cfg.control.action_history_buffer_length,
self.num_envs,
self.num_actions,
),
dtype= torch.float32,
device= self.device,
)
self.current_action_delay = torch_rand_float(
self.cfg.control.action_delay_range[0],
self.cfg.control.action_delay_range[1],
(self.num_envs, 1),
device= self.device,
).flatten()
self.action_delayed_frames = ((self.current_action_delay / self.dt) + 1).to(int)
if "proprioception" in all_obs_components and hasattr(self.cfg.sensor, "proprioception"):
""" Adding proprioception delay buffer """
self.cfg.sensor.proprioception.buffer_length = int((self.cfg.sensor.proprioception.latency_range[1] + self.dt) / self.dt)
@ -154,13 +205,13 @@ class LeggedRobotNoisy(LeggedRobotField):
dtype= torch.float32,
device= self.device,
)
self.proprioception_delayed_frames = torch.ones((self.num_envs,), device= self.device, dtype= int) * self.cfg.sensor.proprioception.buffer_length
self.current_proprioception_latency = torch_rand_float(
self.cfg.sensor.proprioception.latency_range[0],
self.cfg.sensor.proprioception.latency_range[1],
(self.num_envs, 1),
device= self.device,
).flatten()
self.proprioception_delayed_frames = ((self.current_proprioception_latency / self.dt) + 1).to(int)
if "forward_depth" in all_obs_components and hasattr(self.cfg.sensor, "forward_camera"):
output_resolution = getattr(self.cfg.sensor.forward_camera, "output_resolution", self.cfg.sensor.forward_camera.resolution)
@ -220,6 +271,9 @@ class LeggedRobotNoisy(LeggedRobotField):
def _reset_buffers(self, env_ids):
return_ = super()._reset_buffers(env_ids)
if hasattr(self, "actions_history_buffer"):
self.actions_history_buffer[:, env_ids] = 0.
self.action_delayed_frames[env_ids] = self.cfg.control.action_history_buffer_length
if hasattr(self, "forward_depth_buffer"):
self.forward_depth_buffer[:, env_ids] = 0.
self.forward_depth_delayed_frames[env_ids] = self.cfg.sensor.forward_camera.buffer_length
@ -537,12 +591,11 @@ class LeggedRobotNoisy(LeggedRobotField):
-self.proprioception_delayed_frames,
torch.arange(self.num_envs, device= self.device),
].clone()
### NOTE: WARN: ERROR: remove this code in final version, no action delay should be used.
if getattr(self.cfg.sensor.proprioception, "delay_action_obs", False) or getattr(self.cfg.sensor.proprioception, "delay_privileged_action_obs", False):
raise ValueError("LeggedRobotNoisy: No action delay should be used. Please remove these settings")
# The last-action is not delayed.
if getattr(self.cfg.sensor.proprioception, "delay_action_obs", False):
not_delayed_mask = torch.randint(0, 1, size= (self.num_envs,), device= self.device).bool()
self.proprioception_output[not_delayed_mask, -12:] = self.proprioception_buffer[-1, not_delayed_mask, -12:]
else:
self.proprioception_output[:, -12:] = self.proprioception_buffer[-1, :, -12:]
self.proprioception_output[:, -12:] = self.proprioception_buffer[-1, :, -12:]
self.proprioception_refreshed = True
if not hasattr(self.cfg.sensor, "proprioception") or privileged:
return super()._get_proprioception_obs(privileged)
@ -573,5 +626,12 @@ class LeggedRobotNoisy(LeggedRobotField):
# sum along decimation axis and dof axis
return torch.square(exceeded_torques).sum(dim= 1).sum(dim= 1)
def _reward_exceed_torque_limits_l1norm(self):
""" square function for exceeding part """
exceeded_torques = torch.abs(self.substep_torques) - self.torque_limits
exceeded_torques[exceeded_torques < 0.] = 0.
# sum along decimation axis and dof axis
return torch.norm(exceeded_torques, p= 1, dim= -1).sum(dim= 1)
def _reward_exceed_dof_pos_limits(self):
return self.substep_exceed_dof_pos_limits.to(torch.float32).sum(dim= -1).mean(dim= -1)

View File

@ -0,0 +1,127 @@
import numpy as np
from os import path as osp
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1CrawlCfg( Go1FieldCfg ):
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.45]
#### uncomment this to train non-virtual terrain
class sensor( Go1FieldCfg.sensor ):
class proprioception( Go1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = True
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"crawl",
],
track_block_length= 1.6,
crawl= dict(
height= (0.28, 0.5),
depth= (0.1, 0.6), # size along the forward axis
wall_height= 0.6,
no_perlin_at_obstacle= False,
),
virtual_terrain= False, # Change this to False for real terrain
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= 0.06,
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [0.3, 0.8]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class control( Go1FieldCfg.control ):
computer_clip_torque = False
class asset( Go1FieldCfg.asset ):
terminate_after_contacts_on = ["base"]
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
class domain_rand( Go1FieldCfg.domain_rand ):
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
init_base_vel_range = [-0.1, 0.1]
class rewards( Go1FieldCfg.rewards ):
class scales:
# tracking_ang_vel = 0.05
# world_vel_l2norm = -1.
# legs_energy_substeps = -4e-5
# alive = 5.
# penetrate_depth = -6e-2
# penetrate_volume = -6e-2
# exceed_dof_pos_limits = -8e-1
# exceed_torque_limits_l1norm = -8e-1
# lin_pos_y = -0.1
###############################################
tracking_ang_vel = 0.05
tracking_world_vel = 5.
# world_vel_l2norm = -1.
# alive = 2.
legs_energy_substeps = -1e-5
# penetrate_depth = -6e-2 # comment this out if trianing non-virtual terrain
# penetrate_volume = -6e-2 # comment this out if trianing non-virtual terrain
exceed_dof_pos_limits = -8e-1
# exceed_torque_limits_i = -2e-1
exceed_torque_limits_l1norm = -1.
# collision = -0.05
# tilt_cond = 0.1
torques = -1e-5
yaw_abs = -0.1
lin_pos_y = -0.1
soft_dof_pos_limit = 0.7
only_positive_rewards = False
class curriculum( Go1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 1500
penetrate_volume_threshold_easier = 10000
penetrate_depth_threshold_harder = 10
penetrate_depth_threshold_easier = 400
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1CrawlCfgPPO( Go1FieldCfgPPO ):
class algorithm( Go1FieldCfgPPO.algorithm ):
entropy_coef = 0.0
clip_min_std = 0.1
class runner( Go1FieldCfgPPO.runner ):
resume = True
load_run = "{Your traind walking model directory}"
load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep26_14-30-24_Skills_crawl_propDelay0.04-0.05_pEnergy-2e-5_pDof8e-01_pTorqueL14e-01_rTilt5e-01_pCollision0.2_maxPushAng0.5_kp40_fromSep26_01-38-19")
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26")
run_name = "".join(["Skills_",
("Multi" if len(Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_pEnergy" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1)),
("_pDof" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.exceed_dof_pos_limits, precision=1, exp_digits=1)),
("_pTorqueL1" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, exp_digits=1)),
("_noComputerClip" if not Go1CrawlCfg.control.computer_clip_torque else ""),
("_noTanh"),
])
max_iterations = 20000
save_interval = 500

View File

@ -0,0 +1,166 @@
from os import path as osp
import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1DownCfg( Go1FieldCfg ):
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.45]
#### uncomment this to train non-virtual terrain
class sensor( Go1FieldCfg.sensor ):
class proprioception( Go1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = False
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"jump",
],
track_block_length= 1.6,
jump= dict(
height= (0.2, 0.45),
depth= 0.3,
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
jump_down_prob= 1., # probability of jumping down use it in non-virtual terrain
),
virtual_terrain= True, # Change this to False for real terrain
no_perlin_threshold= 0.6,
randomize_obstacle_order= True,
n_obstacles_per_track= 3,
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.04, 0.15],
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [0.5, 1.]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class control( Go1FieldCfg.control ):
computer_clip_torque = False
class asset( Go1FieldCfg.asset ):
terminate_after_contacts_on = ["base"]
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
z_low_kwargs = merge_dict(Go1FieldCfg.termination.z_low_kwargs, dict(
threshold= -3.,
))
class domain_rand( Go1FieldCfg.domain_rand ):
init_base_pos_range = dict(
x= [0.2, 0.6],
y= [-0.25, 0.25],
)
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
init_base_vel_range = merge_dict(Go1FieldCfg.domain_rand.init_base_vel_range, dict(
x= [-0.1, 0.5],
))
push_robots = True
class rewards( Go1FieldCfg.rewards ):
class scales:
action_rate = -0.1
collision = -10.0
delta_torques = -1e-07
dof_acc = -1e-07
down_cond = 0.04
exceed_dof_pos_limits = -0.001
exceed_torque_limits_l1norm = -0.2
feet_contact_forces = -0.01
hip_pos = -5.0
legs_energy_substeps = -5e-06
lin_pos_y = -0.3
penetrate_depth = -0.002
penetrate_volume = -0.002
torques = -0.0001
tracking_ang_vel = 0.1
tracking_world_vel = 5.0
yaw_abs = -0.
soft_dof_pos_limit = 0.75
max_contact_force = 200.
only_positive_rewards = False
class noise( Go1FieldCfg.noise ):
add_noise = False
class curriculum( Go1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 2000000
penetrate_volume_threshold_easier = 4000000
penetrate_depth_threshold_harder = 200000
penetrate_depth_threshold_easier = 400000
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1DownCfgPPO( Go1FieldCfgPPO ):
class algorithm( Go1FieldCfgPPO.algorithm ):
entropy_coef = 0.0
clip_min_std = 0.2
class runner( Go1FieldCfgPPO.runner ):
policy_class_name = "ActorCriticRecurrent"
experiment_name = "field_go1"
resume = True
load_run = "{Your trained climb model directory}"
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep28_14-59-53_Skills_down_pEnergySubsteps-8e-06_pPenD1e-02_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pCollision-1e-1_downRedundant0.1_softDof0.8_pushRobot_propDelay0.04-0.05_kp40_kd0.5fromSep27_14-32-13")
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct14_02-06-47_Skills_down_comXRange-0.2-0.2_pEnergySubsteps-4e-06_rAlive3_pPenD4e-03_pDHarder2.e+05_pushRobot_noTanh_virtualfromOct09_09-49-58")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_10-48-54_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond3.e-1_kp40_kd0.5_virtualfromOct14_02-06-47")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct26_08-45-02_Skills_down_pEnergySubsteps-4e-05_rTrackVel4._pZVel1e-01_pAngXYVel5e-02_pDTorques1e-07_rDownCond1.e-1_noTanh_virtualfromOct25_10-48-54")
load_run = "Oct28_07-38-39_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos4e-01_rDownCond4.e-2_noTanh_virtualfromOct14_02-06-47"
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-08-21_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_withPropNoise_noTanh_virtualfromOct28_07-38-39")
load_run = "Oct29_22-07-51_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_noComputerClip_noPropNoise_noTanh_virtualfromOct28_13-08-21"
load_run = "Oct30_04-23-31_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL14e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtual_fromOct29_22-07-51"
run_name = "".join(["Skills_",
("down" if Go1DownCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] > 0. else "jump"),
("_pEnergySubsteps{:.0e}".format(Go1DownCfg.rewards.scales.legs_energy_substeps) if getattr(Go1DownCfg.rewards.scales, "legs_energy_substeps", 0.) != -1e-6 else ""),
("_rTrackVel" + np.format_float_positional(Go1DownCfg.rewards.scales.tracking_world_vel) if getattr(Go1DownCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""),
# ("_pLinVel" + np.format_float_positional(-Go1DownCfg.rewards.scales.world_vel_l2norm) if getattr(Go1DownCfg.rewards.scales, "world_vel_l2norm", 0.) != 0.0 else ""),
# ("_rAlive" + np.format_float_positional(Go1DownCfg.rewards.scales.alive) if getattr(Go1DownCfg.rewards.scales, "alive", 0.) != 0.0 else ""),
# ("_pDofLimit{:.0e}".format(Go1DownCfg.rewards.scales.exceed_dof_pos_limits) if getattr(Go1DownCfg.rewards.scales, "exceed_dof_pos_limits") != 0.0 else ""),
# ("_pYawAbs" if getattr(Go1DownCfg.rewards.scales, "yaw_abs", 0.) != 0.1 else ""),
("_pTorqueL1{:.0e}".format(-Go1DownCfg.rewards.scales.exceed_torque_limits_l1norm) if getattr(Go1DownCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0 else ""),
# ("_pActRate" + np.format_float_scientific(-Go1DownCfg.rewards.scales.action_rate, precision=1, exp_digits=1) if getattr(Go1DownCfg.rewards.scales, "action_rate", 0.) != 0.0 else ""),
# ("_pZVel" + np.format_float_scientific(-Go1DownCfg.rewards.scales.lin_vel_z, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "lin_vel_z", 0.) != 0. else ""),
# ("_pAngXYVel" + np.format_float_scientific(-Go1DownCfg.rewards.scales.ang_vel_xy, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "ang_vel_xy", 0.) != 0. else ""),
# ("_pDTorques" + np.format_float_scientific(-Go1DownCfg.rewards.scales.delta_torques, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "delta_torques", 0.) != 0. else ""),
# ("_pHipPos" + np.format_float_scientific(-Go1DownCfg.rewards.scales.hip_pos, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "hip_pos", 0.) != 0. else ""),
# ("_rDownCond" + np.format_float_scientific(Go1DownCfg.rewards.scales.down_cond, precision=1, exp_digits=1) if getattr(Go1DownCfg.rewards.scales, "down_cond", 0.) != 0.0 else ""),
("_pCollision{:d}".format(int(Go1DownCfg.rewards.scales.collision)) if getattr(Go1DownCfg.rewards.scales, "collision", 0.) != 0.0 else ""),
# ("_downRedundant{:.1f}".format(Go1DownCfg.terrain.BarrierTrack_kwargs["jump"]["down_forward_length"]) if (Go1DownCfg.terrain.BarrierTrack_kwargs["jump"].get("down_forward_length", 0.) > 0. and Go1DownCfg.terrain.BarrierTrack_kwargs["jump"].get("jump_down_prob", 0.) > 0.) else ""),
("_cmdRange{:.1f}-{:.1f}".format(*Go1DownCfg.commands.ranges.lin_vel_x)),
("_allowNegativeReward" if not Go1DownCfg.rewards.only_positive_rewards else ""),
("_pushRobot" if Go1DownCfg.domain_rand.push_robots else "_noPush"),
("_noComputerClip" if not Go1DownCfg.control.computer_clip_torque else ""),
("_noPropNoise" if not Go1DownCfg.noise.add_noise else "_withPropNoise"),
# ("_kp{:d}".format(int(Go1DownCfg.control.stiffness["joint"])) if Go1DownCfg.control.stiffness["joint"] != 50 else ""),
# ("_kd{:.1f}".format(Go1DownCfg.control.damping["joint"]) if Go1DownCfg.control.damping["joint"] != 1. else ""),
("_noTanh"),
("_virtual" if Go1DownCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])
max_iterations = 20000
save_interval = 200

View File

@ -1,162 +1,161 @@
import numpy as np
import os
from os import path as osp
from legged_gym.utils.helpers import merge_dict
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
go1_const_dof_range = dict(
Hip_max= 1.047,
Hip_min=-1.047,
Thigh_max= 2.966,
Thigh_min= -0.663,
Calf_max= -0.837,
Calf_min= -2.721,
)
go1_action_scale = 0.5
class Go1FieldCfg( A1FieldCfg ):
class env( A1FieldCfg.env ):
num_envs = 8192
class init_state( A1FieldCfg.init_state ):
pos = [0., 0., 0.7]
zero_actions = False
class sensor( A1FieldCfg.sensor ):
class proprioception( A1FieldCfg.sensor.proprioception ):
delay_action_obs = False
latency_range = [0.04-0.0025, 0.04+0.0075] # comment this if it is too hard to train.
class terrain( A1FieldCfg.terrain ):
num_rows = 20
num_cols = 50
selected = "BarrierTrack"
max_init_terrain_level = 0 # for climb, leap finetune
border_size = 5
slope_treshold = 100.
num_cols = 80
curriculum = True # for tilt, crawl, climb, leap
# curriculum = False # for walk
horizontal_scale = 0.025 # [m]
# curriculum = True # for tilt, crawl, jump, leap
curriculum = False # for walk
pad_unavailable_info = True
BarrierTrack_kwargs = dict(
BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
# "climb",
# "jump",
# "crawl",
"tilt",
# "tilt",
# "leap",
], # each race track will permute all the options
track_width= 1.6, # for climb, crawl, tilt, walk
# track_width= 1.0, # for leap
# randomize_obstacle_order= True,
track_width= 1.6,
track_block_length= 2., # the x-axis distance from the env origin point
wall_thickness= (0.04, 0.2), # [m]
wall_height= 0.01, # [m] for climb, crawl, tilt, walk
# wall_height= -0.5, # for leap
climb= dict(
height= (0.2, 0.6),
depth= (0.1, 0.8), # size along the forward axis
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
),
crawl= dict(
height= (0.28, 0.38),
depth= (0.1, 0.5), # size along the forward axis
wall_height= 0.6,
no_perlin_at_obstacle= False,
),
tilt= dict(
width= (0.24, 0.32),
depth= (0.4, 1.), # size along the forward axis
opening_angle= 0.0, # [rad] an opening that make the robot easier to get into the obstacle
wall_height= 0.5,
),
leap= dict(
length= (0.2, 1.0),
depth= (0.4, 0.8),
height= 0.25,
),
wall_height= 0.0,
add_perlin_noise= True,
border_perlin_noise= True,
border_height= 0., # for climb, crawl, tilt, walk
# border_height= -0.5, # for leap
virtual_terrain= False, # for climb, crawl, leap
# virtual_terrain= True, # for tilt
border_height= 0.,
virtual_terrain= False,
draw_virtual_terrain= True,
engaging_next_threshold= 1.2,
engaging_finish_threshold= 0.,
curriculum_perlin= False,
no_perlin_threshold= 0.0, # for crawl, tilt, walk
# no_perlin_threshold= 0.05, # for leap
# no_perlin_threshold= 0.06, # for climb
)
no_perlin_threshold= 0.1,
))
TerrainPerlin_kwargs = dict(
# zScale= 0.1, # for crawl
zScale= 0.12, # for tilt
# zScale= [0.05, 0.1], # for climb
# zScale= [0.04, 0.1], # for leap
# zScale= [0.1, 0.15], # for walk
zScale= [0.08, 0.15],
frequency= 10,
)
class commands( A1FieldCfg.commands ):
class ranges( A1FieldCfg.commands.ranges ):
# lin_vel_x = [0.0, 1.0] # for walk
# lin_vel_x = [0.8, 1.5] # for climb
# lin_vel_x = [1.0, 1.5] # for leap
lin_vel_x = [0.3, 0.8] # for tilt, crawl
lin_vel_x = [-1.0, 1.0]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class control( A1FieldCfg.control ):
stiffness = {'joint': 50.}
damping = {'joint': 1.}
# action_scale = [0.2, 0.4, 0.4] * 4 # for walk
action_scale = 0.5 # for tilt, crawl, climb, leap
# for climb, leap
stiffness = {'joint': 40.}
damping = {'joint': 0.5}
action_scale = go1_action_scale
torque_limits = [20., 20., 25.] * 4
computer_clip_torque = True
computer_clip_torque = False
motor_clip_torque = False
class asset( A1FieldCfg.asset ):
file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/go1/urdf/go1.urdf"
penalize_contacts_on = ["base", "thigh"]
terminate_after_contacts_on = ["base", "imu"] # for climb, leap, tilt, walk no-virtual
class termination:
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll", # for tilt
"pitch",
"z_low",
"z_high",
"out_of_track", # for leap, walk
]
sdk_dof_range = go1_const_dof_range
class termination( A1FieldCfg.termination ):
roll_kwargs = dict(
threshold= 0.8, # [rad] # for tilt
tilt_threshold= 1.5, # for tilt (condition on engaging block)
threshold= 1.5,
)
pitch_kwargs = dict(
threshold= 1.6,
climb_threshold= 1.6,
leap_threshold= 1.5,
threshold= 1.5,
)
z_low_kwargs = dict(
threshold= 0.08, # [m]
)
z_high_kwargs = dict(
threshold= 1.5, # [m]
)
out_of_track_kwargs = dict(
threshold= 1., # [m]
)
check_obstacle_conditioned_threshold = True
timeout_at_border = True
timeout_at_finished = True
class domain_rand( A1FieldCfg.domain_rand ):
randomize_com = True
class com_range:
x = [-0.05, 0.15]
y = [-0.1, 0.1]
z = [-0.05, 0.05]
randomize_base_mass = True
added_mass_range = [1.0, 3.0]
class com_range( A1FieldCfg.domain_rand.com_range ):
x = [-0.2, 0.2]
init_base_pos_range = merge_dict(A1FieldCfg.domain_rand.init_base_pos_range, dict(
x= [0.05, 0.6],
))
init_base_rot_range = dict(
roll= [-0.75, 0.75],
pitch= [-0.75, 0.75],
)
# init_base_vel_range = [-1.0, 1.0]
init_base_vel_range = dict(
x= [-0.2, 1.5],
y= [-0.2, 0.2],
z= [-0.2, 0.2],
roll= [-1., 1.],
pitch= [-1., 1.],
yaw= [-1., 1.],
)
init_dof_vel_range = [-5, 5]
class rewards( A1FieldCfg.rewards ):
class scales:
tracking_ang_vel = 0.1
world_vel_l2norm = -1.
legs_energy_substeps = -1e-5
exceed_torque_limits_i = -0.1
alive = 2.
lin_pos_y = -0.2
yaw_abs = -0.5
penetrate_depth = -5e-3
penetrate_volume = -5e-3
soft_dof_pos_limit = 0.01
tracking_ang_vel = 0.05
tracking_world_vel = 3.
# world_vel_l2norm = -2.
# alive = 3.
legs_energy_substeps = -2e-5
# penalty for hardware safety
exceed_dof_pos_limits = -8e-1
exceed_torque_limits_l1norm = -8e-1
# penalty for walking gait, probably no need
lin_vel_z = -1.
ang_vel_xy = -0.05
orientation = -4.
dof_acc = -2.5e-7
collision = -10.
action_rate = -0.1
delta_torques = -1e-7
torques = -1.e-5
yaw_abs = -0.8
lin_pos_y = -0.8
hip_pos = -0.4
dof_error = -0.04
soft_dof_pos_limit = 0.8 # only in training walking
max_contact_force = 200.0
class normalization( A1FieldCfg.normalization ):
dof_pos_redundancy = 0.2
clip_actions_method = "hard"
clip_actions_low = []
clip_actions_high = []
for sdk_joint_name, sim_joint_name in zip(
["Hip", "Thigh", "Calf"] * 4,
[ # in the order as simulation
"FL_hip_joint", "FL_thigh_joint", "FL_calf_joint",
"FR_hip_joint", "FR_thigh_joint", "FR_calf_joint",
"RL_hip_joint", "RL_thigh_joint", "RL_calf_joint",
"RR_hip_joint", "RR_thigh_joint", "RR_calf_joint",
],
):
clip_actions_low.append( (go1_const_dof_range[sdk_joint_name + "_min"] + dof_pos_redundancy - A1FieldCfg.init_state.default_joint_angles[sim_joint_name]) / go1_action_scale )
clip_actions_high.append( (go1_const_dof_range[sdk_joint_name + "_max"] - dof_pos_redundancy - A1FieldCfg.init_state.default_joint_angles[sim_joint_name]) / go1_action_scale )
del dof_pos_redundancy, sdk_joint_name, sim_joint_name
class sim( A1FieldCfg.sim ):
body_measure_points = { # transform are related to body frame
@ -184,49 +183,24 @@ class Go1FieldCfg( A1FieldCfg ):
),
}
class curriculum:
# chosen heuristically, please refer to `LeggedRobotField._get_terrain_curriculum_move` with fixed body_measure_points
# for crawl (not updated)
penetrate_volume_threshold_harder = 1500
penetrate_volume_threshold_easier = 10000
penetrate_depth_threshold_harder = 10
penetrate_depth_threshold_easier = 400
# for tilt
# penetrate_volume_threshold_harder = 2000
# penetrate_volume_threshold_easier = 10000
# penetrate_depth_threshold_harder = 20
# penetrate_depth_threshold_easier = 300
# for climb
# penetrate_volume_threshold_harder = 6000
# penetrate_volume_threshold_easier = 12000
# penetrate_depth_threshold_harder = 600
# penetrate_depth_threshold_easier = 1600
# for leap
# penetrate_volume_threshold_harder = 9000
# penetrate_volume_threshold_easier = 10000
# penetrate_depth_threshold_harder = 300
# penetrate_depth_threshold_easier = 5000
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1FieldCfgPPO( A1FieldCfgPPO ):
class algorithm( A1FieldCfgPPO.algorithm ):
entropy_coef = 0.01 # for walk
clip_min_std = 1e-12 # for walk
entropy_coef = 0.0
clip_min_std = 0.2
class policy( A1FieldCfgPPO.policy ):
mu_activation = None # use action clip method by env
class runner( A1FieldCfgPPO.runner ):
experiment_name = "field_go1"
run_name = "".join(["Skills",
("_all" if len(Go1FieldCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else ("_" + Go1FieldCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1FieldCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
resume = False
load_run = None
run_name = "".join(["WalkForward",
("_pEnergySubsteps" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.legs_energy_substeps, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "legs_energy_substeps", 0.0) != 0.0 else ""),
("_pPenV" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.penetrate_volume, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "penetrate_volume", 0.) < 0. else ""),
("_pPenD" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.penetrate_depth, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "penetrate_depth", 0.) < 0. else ""),
("_rAlive{:d}".format(int(Go1FieldCfg.rewards.scales.alive)) if Go1FieldCfg.rewards.scales.alive != 2 else ""),
("_rAngVel{:.2f}".format(Go1FieldCfg.rewards.scales.tracking_ang_vel) if Go1FieldCfg.rewards.scales.tracking_ang_vel != 0.05 else ""),
("_pTorqueExceedIndicate" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.exceed_torque_limits_i, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "exceed_torque_limits_i", 0.) != 0. else ""),
("_pTorqueExceedSquare" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.exceed_torque_limits_square, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "exceed_torque_limits_square", 0.) != 0. else ""),
("_pYaw{:.2f}".format(-Go1FieldCfg.rewards.scales.yaw_abs) if Go1FieldCfg.rewards.scales.yaw_abs != 0. else ""),
("_noComputerTorqueClip" if not Go1FieldCfg.control.computer_clip_torque else ""),
("_virtualTerrain" if Go1FieldCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_rTrackVel" + np.format_float_scientific(Go1FieldCfg.rewards.scales.tracking_world_vel, precision=1, exp_digits=1, trim="-") if getattr(Go1FieldCfg.rewards.scales, "tracking_world_vel", 0.0) != 0.0 else ""),
("_pWorldVel" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.world_vel_l2norm, precision=1, exp_digits=1, trim="-") if getattr(Go1FieldCfg.rewards.scales, "world_vel_l2norm", 0.0) != 0.0 else ""),
("_aScale{:d}{:d}{:d}".format(
int(Go1FieldCfg.control.action_scale[0] * 10),
int(Go1FieldCfg.control.action_scale[1] * 10),
@ -234,15 +208,8 @@ class Go1FieldCfgPPO( A1FieldCfgPPO ):
) if isinstance(Go1FieldCfg.control.action_scale, (tuple, list)) \
else "_aScale{:.1f}".format(Go1FieldCfg.control.action_scale)
),
("_torqueClip{:.0f}".format(Go1FieldCfg.control.torque_limits) if not isinstance(Go1FieldCfg.control.torque_limits, (tuple, list)) else (
"_tClip{:d}{:d}{:d}".format(
int(Go1FieldCfg.control.torque_limits[0]),
int(Go1FieldCfg.control.torque_limits[1]),
int(Go1FieldCfg.control.torque_limits[2]),
)
)),
("_actionClip" + Go1FieldCfg.normalization.clip_actions_method if getattr(Go1FieldCfg.normalization, "clip_actions_method", None) is not None else ""),
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"),
])
resume = False
load_run = ""
max_iterations = 10000
max_iterations = 20000
save_interval = 500

View File

@ -1,6 +1,6 @@
from collections import OrderedDict
import os
import datetime
from datetime import datetime
import os.path as osp
import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
@ -12,48 +12,74 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
num_envs = 256
obs_components = [
"proprioception", # 48
# "height_measurements", # 187
"forward_depth",
]
privileged_obs_components = [
"proprioception", # 48
"base_pose",
"robot_config",
"engaging_block",
"sidewall_distance",
# "height_measurements", # 187
# "forward_depth",
"base_pose", # 6
"robot_config", # 1 + 3 + 1 + 12
"engaging_block", # 1 + (4 + 1) + 2
"sidewall_distance", # 2
]
use_lin_vel = False
privileged_use_lin_vel = True
# if True privileged_obs will not have noise based on implementations
privileged_obs_gets_privilege = False # for climb temporarily
privileged_obs_gets_privilege = False # for jump temporarily
class init_state( Go1FieldCfg.init_state ):
pos = [0.0, 0.0, 0.38] # x,y,z [m]
pos = [0.0, 0.0, 0.43] # x,y,z [m]
class sensor( Go1FieldCfg.sensor ):
class forward_camera( Go1FieldCfg.sensor.forward_camera ):
resolution = [int(240/4), int(424/4)]
# position = dict(
# mean= [0.245, 0.0075, 0.072+0.018],
# std= [0.002, 0.002, 0.0005],
# ) # position in base_link ##### small randomization
# rotation = dict(
# lower= [0, 0, 0],
# upper= [0, 0, 0],
# ) # rotation in base_link ##### small randomization
########## new camera extrinsics with 30degree down ####################
position = dict(
mean= [0.245, 0.0075, 0.072+0.018],
std= [0.002, 0.002, 0.0005],
mean= [0.245+0.027, 0.0075, 0.072+0.02],
std= [0.002, 0.002, 0.0002],
) # position in base_link ##### small randomization
rotation = dict(
lower= [0, 0, 0],
upper= [0, 0, 0],
lower= [0, 0.5, 0], # positive for pitch down
upper= [0, 0.54, 0],
) # rotation in base_link ##### small randomization
########## new camera extrinsics with 30degree down ####################
########## new camera extrinsics with 15degree down ####################
# position = dict(
# mean= [0.245+0.027, 0.0075, 0.072+0.018],
# std= [0.003, 0.002, 0.0005],
# ) # position in base_link ##### small randomization
# rotation = dict(
# lower= [0, 0.24, 0], # positive for pitch down
# upper= [0, 0.28, 0],
# ) # rotation in base_link ##### small randomization
########## new camera extrinsics with 15degree down ####################
resized_resolution= [48, 64]
output_resolution = [48, 64]
horizontal_fov = [85, 87] # measured around 87 degree
# for go1, usb2.0, 480x640, d435i camera
latency_range = [0.25, 0.30]
latency_range = [0.25, 0.30] # [s]
# latency_range = [0.28, 0.36] # [s]
latency_resample_time = 5.0 # [s]
refresh_duration = 1/10 # [s] for (240, 424 option with onboard script fixed to no more than 20Hz)
# config to simulate stero RGBD camera
crop_top_bottom = [0, 0]
crop_left_right = [int(60/4), int(46/4)]
depth_range = [0.0, 1.5] # [m]
depth_range = [0.0, 2.0] # [m]
# class proprioception( Go1FieldCfg.sensor.proprioception ): # inherited from A1FieldCfg
class proprioception( Go1FieldCfg.sensor.proprioception ): # inherited from A1FieldCfg
delay_action_obs = False
delay_privileged_action_obs = False
class terrain( Go1FieldCfg.terrain ):
num_rows = 2
@ -67,21 +93,24 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
options= [
# "tilt",
"crawl",
"climb",
"jump",
"jump",
"leap",
], # each race track will permute all the options
one_obstacle_per_track= True,
n_obstacles_per_track= 4,
randomize_obstacle_order= True,
track_width= 1.6,
track_block_length= 1.8, # the x-axis distance from the env origin point
wall_thickness= (0.04, 0.2), # [m]
wall_height= 0.0, # [m]
climb= dict(
track_block_length= 2.0, # the x-axis distance from the env origin point
wall_thickness= (0.04, 0.3), # [m]
wall_height= (-0.5, 0.0), # [m]
jump= dict(
height= (0.40, 0.45),
depth= (0.5, 1.0), # size along the forward axis
fake_offset= 0.05, # [m] making the climb's height info greater than its physical height.
depth= (0.01, 0.01), # size along the forward axis
fake_offset= 0.03, # [m] making the jump's height info greater than its physical height.
jump_down_prob= 0.5,
),
crawl= dict(
height= (0.28, 0.38),
height= (0.36, 0.5),
depth= (0.1, 0.2), # size along the forward axis
wall_height= 0.6,
fake_depth= 0.4, # when track block length is 1.8m
@ -94,11 +123,11 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
),
leap= dict(
# length= (0.45, 0.7),
length= (0.5, 0.7), # for parkour real-world env
length= (0.3, 0.74), # for parkour real-world env
depth= (0.5, 0.6),
height= 0.2,
fake_offset= 0.2,
follow_climb_ratio= 0.5, # when following climb, not drop down to ground suddenly.
fake_offset= 0.1,
follow_jump_ratio= 0.5, # when following jump, not drop down to ground suddenly.
),
add_perlin_noise= True,
border_perlin_noise= True,
@ -106,6 +135,7 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
virtual_terrain= False,
draw_virtual_terrain= True,
engaging_next_threshold= 1.2,
engaging_finish_threshold= 0.3,
check_skill_combinations= True,
curriculum_perlin= False,
no_perlin_threshold= 0.04, # for parkour real-world env
@ -113,33 +143,65 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
)
TerrainPerlin_kwargs = dict(
zScale= [0.0, 0.05], # for parkour real-world env
zScale= [0.01, 0.1], # for parkour real-world env
frequency= 10,
)
class commands( A1FieldDistillCfg.commands ):
pass
class control( Go1FieldCfg.control ):
computer_clip_torque = False
class asset( Go1FieldCfg.asset ):
terminate_after_contacts_on = ["base"]
class termination( A1FieldDistillCfg.termination ):
pass
out_of_track_kwargs = dict(
threshold= 1.6,
)
class domain_rand( A1FieldDistillCfg.domain_rand ):
randomize_com = True
randomize_motor = True
randomize_friction = True
friction_range = [0.0, 0.8]
friction_range = [0.2, 2.0]
randomize_base_mass = True
push_robots = False
init_dof_pos_ratio_range = [0.9, 1.1]
init_base_vel_range = [-0.0, 0.0]
init_dof_pos_ratio_range = [0.8, 1.2]
init_dof_vel_range = [-2., 2.]
# init_base_vel_range = [-0.0, 0.0] # use super class
init_base_pos_range = dict(
x= [0.4, 0.6],
y= [-0.05, 0.05],
)
randomize_gravity_bias = True
randomize_privileged_gravity_bias = False
gravity_bias_range = dict(
x= [-0.12, 0.12],
y= [-0.12, 0.12],
z= [-0.05, 0.05],
)
class rewards( A1FieldDistillCfg.rewards ):
class scales:
pass
class noise( A1FieldDistillCfg.noise ):
add_noise = True # This only account for proprioception noise and height measurements noise at most
class noise_scales( A1FieldDistillCfg.noise.noise_scales ):
ang_vel = 0.2 # measured in 0.02
dof_pos = 0.0006 # measured in 0.0002
dof_vel = 0.02 # measured in 0.015
gravity = 0.06 # measured in 0.05
# These aspects of obs should not have noise,
# which are not used or set to zero or has its own noise mechanism
height_measurements = 0.0
forward_depth = 0.0
base_pose = 0.0
lin_vel = 0.0
class forward_depth:
stereo_min_distance = 0.12 # when using (240, 424) resolution
stereo_far_distance = 2.
@ -164,7 +226,8 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
# override base class attributes
pass
distill_target_ = "tanh"
# distill_target_ = "tanh"
distill_target_ = "l1"
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
class algorithm( A1FieldDistillCfgPPO.algorithm ):
@ -177,10 +240,10 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
using_ppo = False
distill_target = distill_target_
buffer_dilation_ratio = 1.
learning_rate = 3e-4
learning_rate = 1.e-4
optimizer_class_name = "AdamW"
teacher_policy_class_name = "ActorCriticFieldMutex"
teacher_policy_class_name = "ActorCriticClimbMutex"
teacher_ac_path = None
class teacher_policy (A1FieldDistillCfgPPO.policy ):
# For loading teacher policy. No need to change for training student
@ -195,20 +258,77 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
sidewall_distance= (2,),
)
env_action_scale = Go1FieldCfg.control.action_scale
action_smoothing_buffer_len = 3
reset_non_selected = "when_skill"
# reset_non_selected = True
sub_policy_class_name = "ActorCriticRecurrent"
sub_policy_paths = [ # must in the order of obstacle ID
logs_root + "/field_go1/{your go1 walking policy}",
logs_root + "/field_go1/{your go1 tilting policy}",
logs_root + "/field_go1/{your go1 crawling policy}",
logs_root + "/field_go1/{your go1 climbing policy}",
logs_root + "/field_go1/{your go1 leaping policy}",
sub_policy_paths = [
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct10_00-03-12_WalkForward_pEnergySubsteps1e-5_maxPushAng0.5_pushInter3.0_noTanh_fromOct09_15-50-14"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_14-43-48_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_rAlive3e+0_pTorqueL1norm8e-1_pDofLim8e-1_actionCliphard_fromOct24_09-00-16"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"),
os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-24-22_Skills_tilt_propDelay0.04-0.05_pEnergySubsteps1e-5_pPenD2e-3_pDofLimit8e-1_rTilt8e-03_pCollision0.1_PushRobot_kp40_kd0.5_tiltMax0.40fromSep27_13-59-27"),
os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct13_09-12-39_258k_Skills_jump_comXRange-0.2-0.2_pEnergySubsteps-4e-06_pushRobot_jumpHeight0.1-0.7_propDelay0.04-0.05_noTanh_fromOct09_10-13-26"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct16_10-55-09_Skills_jump_comXRange-0.2-0.2_pLinVel1.4_pTorque2e-06_pY0.1_pCollision0.1_pushRobot_jumpHeight0.10-0.45_propDelay0.04-0.05_noTanh_fromOct13_09-12-39"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct22_15-19-26_Skills_jump_rTrackVel3._pJumpSameLegs0.6_noPContact_pDofAcc1.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_08-47-13"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct24_07-41-26_Skills_jump_rTrackVel4._pY-2e-01_pJumpSameLegs0.2_noPContact_pDofAcc2.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-1e-01_noPush_minStd0.2_noTanh_zeroResetAction_jumpRange0.4-0.5_fromOct20_16-09-47"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-16_Skills_jump_pEnergySubsteps3e-05_rTrackVel4._pY-3e-01_propDelay0.04-0.05_noPropNoise_noPush_noTanh_zeroResetAction_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct24_07-41-26"),
# os.path.join(logs_root, "field_go1", "Oct27_16-03-48_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pDofErrCond4e-01_pHipPos5e-01_pCollision3e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_16-04-45_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pPenD-6e-03_pDofErrCond6e-01_pHipPos5e-01_pCollision6e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_11-05-56_Skills_jump_pEnergySubsteps2e-06_rTrackVel5._pYaw-1e-01_pHipPos5e+00_pCollision1e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_allowNegativeReward_fromOct28_08-54-23"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct09_09-51-58_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pPenD1.e-2_pDofLimit4e-01_pCollision0.5_kp40_kd0.5fromOct05_02-16-22"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-39-12_Skills_leap_pEnertySubsteps4.e-5_pActRate2.e-1_pDofLimit8.e-1_pCollision5.e-1_noPropNoise_noTanh_zeroResetAction_actionCliphard_fromOct09_09-51-58"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-23-39_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pSyncAllLegs6.e-1_pOrient6.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct28_03-32-35"),
]
# jump_down_policy_path = logs_root + "/field_a1_noTanh_oracle/Oct09_09-49-58_Skills_down_pEnergySubsteps-4e-06_rAlive3_pPenD1e-02_pDofLimit-4e-01_pTorqueL14e-01_maxForce200_downRedundant0.2_softDof0.8_pushRobot_kp40_kd0.5fromSep28_14-59-53"
# jump_down_policy_path = os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct14_02-06-47_Skills_down_comXRange-0.2-0.2_pEnergySubsteps-4e-06_rAlive3_pPenD4e-03_pDHarder2.e+05_pushRobot_noTanh_virtualfromOct09_09-49-58")
# jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_10-48-54_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond3.e-1_kp40_kd0.5_virtualfromOct14_02-06-47")
#tested OK, tends to stop
# jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-31_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond1.e-2_kp40_kd0.5_virtualfromOct14_02-06-47")
# jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct26_08-45-02_Skills_down_pEnergySubsteps-4e-05_rTrackVel4._pZVel1e-01_pAngXYVel5e-02_pDTorques1e-07_rDownCond1.e-1_noTanh_virtualfromOct25_10-48-54")
jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-08-21_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_withPropNoise_noTanh_virtualfromOct28_07-38-39")
# sub_policy_paths = [ # must in the order of obstacle ID
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct18_12-37-48_WalkForward_comXRange-0.2-0.2_noDelayActObs_pLinY0.05_noTanh_fromOct10_00-03-12"), # a little pitch up, accetable
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-24-22_Skills_tilt_propDelay0.04-0.05_pEnergySubsteps1e-5_pPenD2e-3_pDofLimit8e-1_rTilt8e-03_pCollision0.1_PushRobot_kp40_kd0.5_tiltMax0.40fromSep27_13-59-27"), # a little pitch up, accetable
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct18_12-32-28_Skills_crawl_comXRange-0.2-0.2_pEnergySubsteps-1e-5_pDof8e-01_pTorque1e-5_pTorqueL14e-01_noDelayActObs_noTanh_fromOct11_12-19-00"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_06-29-14_Skills_jump_comXRange-0.2-0.2_pLinVel1._pDofAcc5e-7_pTorque6e-05_jumpHeight0.10-0.46_noDelayActObs_noTanh_minStd0.1_fromOct18_15-57-31"),
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_01-25-22_Skills_leap_comXRange-0.2-0.2_pTorques2.e-5_pContactForces1.e-2_leapHeight0.1_noDelayActObs_noTanh_virtualfromOct09_09-51-58"),
# ]
# jump_down_policy_path = \
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_07-01-54_Skills_down_comXRange-0.2-0.2_pXVelL21.3_rDown0.3_pPenD2e-03_pDHarder2e+5_pTorque2e-5_pushRobot_noDelayActObs_noTanh_virtualfromOct18_00-51-27")
# The group of ckpt that used for no Computer Clip and no Motor Clip
sub_policy_paths = [
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"),
os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26"),
# os.path.join(logs_root, "field_go1", "Oct30_03-46-42_Skills_crawl_pEnergy1.e-5_pDof8.e-1_pTorqueL11.e+0_noComputerClip_noTanh"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct29_22-31-44_Skills_jump_pEnergySubsteps4e-05_rTrackVel4._pY-4e-01_pTorque4e-04_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"),
# os.path.join(logs_root, "field_go1", "Oct30_11-11-43_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-8e-01_pTorqueExceed1.2e+00_pTorque4e-04_pDTorques1e-06_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"),
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct30_13-00-12_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-4e-01_pTorqueExceed1.8e+00_pTorque4e-04_pDTorques1e-05_propDelay0.04-0.05_noPropNoise_noPush_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"),
### New jump oracle with more spreaded rear hip.
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Nov02_11-07-49_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.8e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Nov02_13-53-48_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.21_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12"),
# os.path.join(logs_root, "field_go1", "Oct30_05-08-03_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"),
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct30_07-32-10_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed8.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_pDorErr1.5e-1_noPenCurriculum_noCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_05-07-24"),
]
jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle",
"Oct30_03-42-56_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL12e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtualfromOct29_22-07-51",
)
# jump_down_policy_path = os.path.join(logs_root, "field_go1",
# "Oct30_04-23-31_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL14e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtual_fromOct29_22-07-51",
# )
jump_down_vel = 1.0
cmd_vel_mapping = {
0: 1.0,
1: 0.5,
2: 0.8,
3: 1.2,
1: 0.6,
2: 1.0,
3: 1.0,
4: 1.5,
}
@ -218,38 +338,125 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
runner_class_name = "TwoStageRunner"
class runner( A1FieldDistillCfgPPO.runner ):
experiment_name = "distill_go1"
num_steps_per_env = 32
class pretrain_dataset:
# data_dir = [
# "logs/distill_a1_dagger/" + dir_ \
# for dir_ in os.listdir("logs/distill_a1_dagger")
# "/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/20231021_camPitch0.52_jumpA1Oct16_10-55-09/" + dir_ \
# for dir_ in os.listdir("/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/20231021_camPitch0.52_jumpA1Oct16_10-55-09")
# ]
scan_dir = "".join([
"logs/distill_go1_dagger/{}_".format(datetime.datetime.now().strftime("%b%d")),
"/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/", datetime.now().strftime('%b%d_%H-%M-%S'), "_",
"".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]),
"_vDelay{:.2f}-{:.2f}".format(
Go1FieldDistillCfg.sensor.forward_camera.latency_range[0],
Go1FieldDistillCfg.sensor.forward_camera.latency_range[1],
),
"_pDelay{:.2f}-{:.2f}".format(
Go1FieldDistillCfg.sensor.proprioception.latency_range[0],
Go1FieldDistillCfg.sensor.proprioception.latency_range[1],
),
("_camPitch{:.2f}".format(
(Go1FieldDistillCfg.sensor.forward_camera.rotation["lower"][1] + Go1FieldDistillCfg.sensor.forward_camera.rotation["upper"][1]) / 2
)),
("_depthMax{:.1f}".format(Go1FieldDistillCfg.sensor.forward_camera.depth_range[1])),
# ("_zeroResetAction" if Go1FieldDistillCfg.init_state.zero_actions else ""),
# ("_actionClip" + Go1FieldDistillCfg.normalization.clip_actions_method if getattr(Go1FieldDistillCfg.normalization, "clip_actions_method", None) else ""),
("_jumpOffset{:.2f}".format(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["jump"]["fake_offset"])),
("_noComputerClip" if not Go1FieldDistillCfg.control.computer_clip_torque else ""),
("_randOrder" if Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"] else ""),
])
dataset_loops = -1
random_shuffle_traj_order = True
keep_latest_ratio = 1.0
keep_latest_n_trajs = 4000
keep_latest_n_trajs = 3000
starting_frame_range = [0, 100]
resume = False
load_run = ""
resume = True
# load_run = "/home/zzw/Data/legged_gym_logs/distill_a1/Jun06_00-15-50_distill_crawljumpleap_vDelay0.20-0.26_jump0.40-0.45_vFps10_leap0.55-0.62_fromJun04_20-18-56_06-3308JumpOracle"
# load_run = "/home/zzw/Data/legged_gym_logs/distill_go1/Jul11_16-21-59_distill_crawljumpleap"
# load_run = "Jul17_08-47-39_distill_jump_vDelay0.25-0.30"
# load_run = "Oct08_02-51-14_distill_crawljumpjumpleap_vDelay0.25-0.30_noTanh_fromJul17_08-47-39"
# load_run = "Oct08_14-51-22_distill_crawljumpjumpleap_vDelay0.25-0.30_noTanh_fromOct08_02-51-14"
load_run = "Oct20_05-35-58_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_jumpA1Oct19_06-29-14_noTanh_fromOct08_14-51-22"
load_run = "Oct21_09-01-45_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_jumpA1Oct16_10-55-09_noTanh_fromOct20_05-35-58"
load_run = "Oct21_16-21-33_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_jumpA1Oct16_10-55-09_noTanh_fromOct21_09-01-45"
load_run = "Oct22_02-25-42_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_jumpA1Oct16_10-55-09_noTanh_fromOct21_16-21-33"
load_run = "Oct24_15-43-34_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_zeroResetAction_actionCliphard_jumpOct24_07-41-26_noTanh_fromOct22_02-25-42"
load_run = "Oct25_17-10-09_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_actionCliphard_jumpOct25_11-36-16_downOct25_11-36-31_noTanh_fromOct24_15-43-34"
#### From wrong walk and jump skill policy
# load_run = "Oct26_15-38-28_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_noPropNoise_gravityBias_actionCliphard_jumpOct25_11-36-16_downOct25_11-36-31_noTanh_fromOct25_17-10-09"
# load_run = "Oct27_06-00-07_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_noPropNoise_gravityBias_actionCliphard_jumpOct25_11-36-16_downOctOct26_08-45-02_fricMax2.0_noTanh_fromOct26_15-38-28"
load_run = "Oct28_16-16-58_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_addPropNoise_gravityBias_actionCliphard_jumpOct28_11-05-56_fricMax2.0_noTanh_fromOct25_17-10-09"
######## Continue tuning policy to fix jump and leap.
## This ckpt looks good. But the jump is not good enough.
load_run = "Oct30_16-31-09_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_addPropNoise_gravityBias_actionCliphard_noComputerClip_oracleResetWhenSkill_fricMax2.0_noTanh_fromOct28_16-16-58"
max_iterations = 80000
max_iterations = 100000
save_interval = 2000
log_interval = 100
run_name = "".join(["distill_",
"".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]),
"_vDelay{:.2f}-{:.2f}".format(
Go1FieldDistillCfg.sensor.forward_camera.latency_range[0],
Go1FieldDistillCfg.sensor.forward_camera.latency_range[1],
),
("_camPitch{:.2f}".format(
(Go1FieldDistillCfg.sensor.forward_camera.rotation["lower"][1] + Go1FieldDistillCfg.sensor.forward_camera.rotation["upper"][1]) / 2
)),
("_depthMax{:.1f}".format(Go1FieldDistillCfg.sensor.forward_camera.depth_range[1])),
# ("_addPropNoise" if Go1FieldDistillCfg.noise.add_noise else "_noPropNoise"),
# ("_gravityBias" if Go1FieldDistillCfg.domain_rand.randomize_gravity_bias else "_noGravityBias"),
# ("_actionClip" + Go1FieldDistillCfg.normalization.clip_actions_method if getattr(Go1FieldDistillCfg.normalization, "clip_actions_method", None) else ""),
# ("_noComputerClip" if not Go1FieldDistillCfg.control.computer_clip_torque else ""),
("_jumpNov02_13-53-48"),
("_leapVel1.5"),
("_jumpOffset{:.2f}".format(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["jump"]["fake_offset"])),
("_oracleResetWhenSkill"),
("_fricMax{:.1f}".format(Go1FieldDistillCfg.domain_rand.friction_range[1])),
("_noTanh"),
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"),
])
# Checking variants:
"""
1. Faster leap (1.5m/s)
higher offset 0.1 for jump up
2. Longer leap offset 0.2, faster leap (1.5m/s)
higher offset 0.1 for jump up
3. Faster leap (1.5m/s)
higher offset 0.1 for jump up
from older ckpt
4. Faster leap (1.5m/s)
faster jump (1.2m/s) and higher offset 0.1 for jump up
from older ckpt
smaller learning rate 1e-4
---- 20231101 after testing preious ckpts ----
5. Faster leap (1.5m/s)
normal 0.05 offset for jump up, normal (1.0m/s) jump
from older ckpt (Oct28_16-16-58)
same learning rate as Oct30_16-31-09
6. Faster leap (1.5m/s)
slight higher 0.07 offset for jump up, faster (1.2m/s) jump
from older ckpt (Oct28_16-16-58)
lower learning rate as 1e-4
---- 20231102 after testing preious ckpts ----
New jump oracle to test:
* Nov02_11-07-49_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.8e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
Nov02_11-09-03_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed2e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
Nov02_13-53-13_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.22_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
* Nov02_13-53-48_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.21_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
7. Faster leap (1.5m/s)
lower 0.05 offset for jump up, normal (1.0m/s) jump
using new jump oracle
same older ckpt (Oct28_16-16-58)
same learning rate as Oct30_16-31-09 (1.5e-4)
8. Faster leap (1.5m/s)
lower 0.03 offset for jump up, normal (1.0m/s) jump
using new jump oracle
new ckpt (Oct30_16-31-09)
lower learning rate as 1e-4
"""

View File

@ -0,0 +1,223 @@
from os import path as osp
import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1JumpCfg( Go1FieldCfg ):
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.45]
#### uncomment this to train non-virtual terrain
class sensor( Go1FieldCfg.sensor ):
class proprioception( Go1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 10
border_size = 5
slope_treshold = 20.
curriculum = True
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"jump",
],
track_width= 1.6,
track_block_length= 1.6,
wall_thickness= 0.2,
jump= dict(
# height= (0.38, 0.46),
height= (0.2, 0.46),
depth= (0.1, 0.2),
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain
),
virtual_terrain= False, # Change this to False for real terrain
no_perlin_threshold= 0.06,
randomize_obstacle_order= True,
n_obstacles_per_track= 3,
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.12],
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [0.5, 1.5]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class control( Go1FieldCfg.control ):
computer_clip_torque = False
class asset( Go1FieldCfg.asset ):
penalize_contacts_on = ["base", "thigh", "calf", "imu"]
terminate_after_contacts_on = ["base"]
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_high",
"out_of_track",
]
timeout_at_finished = True
timeout_at_border = True
class domain_rand( Go1FieldCfg.domain_rand ):
class com_range( Go1FieldCfg.domain_rand.com_range ):
z = [-0.1, 0.1]
init_base_pos_range = dict(
x= [0.2, 0.8],
y= [-0.25, 0.25],
)
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
push_robots = True
class rewards( Go1FieldCfg.rewards ):
class scales:
tracking_ang_vel = 0.08
# alive = 10.
tracking_world_vel = 5. ######## 1. always ########
legs_energy_substeps = -6.e-5 # -6e-6
jump_x_vel_cond = 0.1 ######## 1. segment ########
# hip_pos = -5.
front_hip_pos = -5.
rear_hip_pos = -0.000001
penetrate_depth = -6e-3
penetrate_volume = -6e-4
exceed_dof_pos_limits = -8e-4 # -8e-4
exceed_torque_limits_l1norm = -2. # -8e-2 # -6e-2
action_rate = -0.1
delta_torques = -1.e-5
dof_acc = -1e-7
torques = -4e-4 # 2e-3
yaw_abs = -0.1
lin_pos_y = -0.4
collision = -1.
sync_all_legs_cond = -0.3 # -0.6
dof_error = -0.06
only_positive_rewards = False # False
soft_dof_pos_limit = 0.8
max_contact_force = 100.
tracking_sigma = 0.3
class noise( Go1FieldCfg.noise ):
add_noise = False
class curriculum( Go1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 6000
penetrate_volume_threshold_easier = 12000
penetrate_depth_threshold_harder = 600
penetrate_depth_threshold_easier = 1200
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1JumpCfgPPO( Go1FieldCfgPPO ):
class algorithm( Go1FieldCfgPPO.algorithm ):
entropy_coef = 0.01
clip_min_std = 0.21
class runner( Go1FieldCfgPPO.runner ):
resume = True
load_run = "{Your traind walking model directory}"
# load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep27_14-32-13_Skills_jump_pEnergySubsteps-8e-06_pDofLimit-8e-01_pTorqueL18e-01_pContact1e-02_pCollision0.4_pushRobot_propDelay0.04-0.05_kp40_kd0.5_fromSep27_03-36-02")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct07_10-24-52_Skills_jump_pEnergy1e-06_pY-1e-02_pCollision-1e-02_noPush_noTanh_jumpRange0.2-0.6_fromSep27_14-32-13")
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct19_06-29-14_Skills_jump_comXRange-0.2-0.2_pLinVel1._pDofAcc5e-7_pTorque6e-05_jumpHeight0.10-0.46_noDelayActObs_noTanh_minStd0.1_fromOct18_15-57-31")
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct16_10-55-09_Skills_jump_comXRange-0.2-0.2_pLinVel1.4_pTorque2e-06_pY0.1_pCollision0.1_pushRobot_jumpHeight0.10-0.45_propDelay0.04-0.05_noTanh_fromOct13_09-12-39")
######################################
# load_run = "Oct20_16-09-47_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14"
# load_run = "Oct21_05-34-37_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._noPropNoise_propDelay0.04-0.05_pCollision-5e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct20_16-09-47"
# load_run = "Oct21_12-30-28_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._noPropNoise_propDelay0.04-0.05_pCollision-1e+00_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct21_05-34-37"
# load_run = "Oct21_14-26-29_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel1.2_noPropNoise_propDelay0.04-0.05_pCollision-2e+00_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct21_12-30-28"
# load_run = "Oct20_16-09-47_275k_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14"
# load_run = osp.join(logs_root, "field_go1_before20231030_tuneJump_noTanh", "Oct20_16-09-47_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14")
# load_run = "Oct22_02-20-52_Skills_jump_pLinVel1._noPContact_pDofAcc2.5e-07_rJumpXVel1.4_noPropNoise_propDelay0.04-0.05_pCollision-1e+00_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct20_16-09-47"
# load_run = "Oct22_08-47-56_Skills_jump_rTrackVel2._pJumpSameLegs0.8_noPContact_pDofAcc2.5e-07_rJumpXVel1._noPropNoise_propDelay0.04-0.05_pCollision-4e-01_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_02-20-52"
# load_run = "Oct22_08-47-13_Skills_jump_rTrackVel2._pJumpSameLegs0.6_noPContact_pDofAcc2.5e-07_rJumpXVel1.2_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_02-20-52"
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct22_15-19-26_Skills_jump_rTrackVel3._pJumpSameLegs0.6_noPContact_pDofAcc1.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_08-47-13")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct24_07-41-26_Skills_jump_rTrackVel4._pY-2e-01_pJumpSameLegs0.2_noPContact_pDofAcc2.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-1e-01_noPush_minStd0.2_noTanh_zeroResetAction_jumpRange0.4-0.5_fromOct20_16-09-47")
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-16_Skills_jump_pEnergySubsteps3e-05_rTrackVel4._pY-3e-01_propDelay0.04-0.05_noPropNoise_noPush_noTanh_zeroResetAction_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct24_07-41-26")
# load_run = "Oct27_08-20-20_Skills_jump_rTrackVel5._noPPen_pY-3e-01_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct25_11-36-16"
# load_run = "Oct27_09-17-23_Skills_jump_rTrackVel5._noPPen_pDofErr6e-02_pDofErrCond6e-01_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_08-20-20"
#### Force hip_pos and sync_legs
# load_run = "Oct27_13-20-59_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pDofErr4e-02_pDofErrCond4e-01_pHipPos5e-01_pCollision5e+00_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_09-17-23"
# load_run = "Oct27_13-24-58_Skills_jump_pEnergySubsteps3e-05_rTrackVel5._pDofErr4e-02_pDofErrCond3e-01_pHipPos3e-01_pCollision5e+00_pSyncSymLegs6e-01_propDelay0.04-0.05_withPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_09-17-23"
#### Adding RL gamma for energy penalty
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct27_16-04-45_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pPenD-6e-03_pDofErrCond6e-01_pHipPos5e-01_pCollision6e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59")
#### Wide spreading legs first
# load_run = "Oct28_03-08-02_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._rAlive0.5_pDofErrCond8e-01_pHipPos5e-01_pSyncSymLegs2e-01_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct27_16-04-45"
# load_run = "Oct28_07-07-03_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._rAlive1.0_pPenD-1e-03_pDofErrCond8e-01_pHipPos3e+00_pSyncSymLegs2e-01_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct27_16-04-45"
load_run = "Oct28_08-54-23_Skills_jump_pEnergySubsteps1e-05_rTrackVel3._rAlive0.5_pYaw-1e-01_pHipPos5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct28_03-08-02"
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_11-05-56_Skills_jump_pEnergySubsteps2e-06_rTrackVel5._pYaw-1e-01_pHipPos5e+00_pCollision1e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_allowNegativeReward_fromOct28_08-54-23")
#### lowering max torques and energy
load_run = "Oct29_03-21-19_Skills_jump_pEnergySubsteps6e-06_rTrackVel5._pHipPos5e+00_pTorque2e-05_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.4-0.5_allowNegativeReward_fromOct28_11-05-56"
load_run = "Oct29_07-54-42_Skills_jump_pEnergySubsteps1e-05_rTrackVel5._pHipPos5e+00_pTorqueExceed6e-02_pTorque2e-03_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.4-0.5_allowNegativeReward_fromOct29_03-21-19"
load_run = "Oct29_12-18-08_Skills_jump_pEnergySubsteps1e-05_rTrackVel6._pTorqueExceed6e-02_pTorque1e-03_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_07-54-42"
load_run = "Oct29_15-18-02_Skills_jump_pEnergySubsteps1e-05_rTrackVel4._pTorqueExceed3e-02_pTorque1e-03_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_12-18-08"
load_run = "Oct29_16-37-44_Skills_jump_pEnergySubsteps1.2e-05_rTrackVel4._pTorqueExceed6e-02_pTorque2e-03_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_15-18-02"
load_run = "Oct29_20-17-31_Skills_jump_pEnergySubsteps5e-05_rTrackVel4._pTorqueExceed1e+00_pTorque4e-04_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_16-37-44"
load_run = "Oct30_11-11-43_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-8e-01_pTorqueExceed1.2e+00_pTorque4e-04_pDTorques1e-06_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct30_13-00-12_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-4e-01_pTorqueExceed1.8e+00_pTorque4e-04_pDTorques1e-05_propDelay0.04-0.05_noPropNoise_noPush_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31")
run_name = "".join(["Skills_",
("Multi" if len(Go1JumpCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1JumpCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1JumpCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_pEnergySubsteps" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.legs_energy_substeps, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "legs_energy_substeps", -2e-5) != -2e-5 else ""),
# ("_rTrackVel" + np.format_float_positional(Go1JumpCfg.rewards.scales.tracking_world_vel) if getattr(Go1JumpCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""),
# ("_pLinVel" + np.format_float_positional(-Go1JumpCfg.rewards.scales.world_vel_l2norm) if getattr(Go1JumpCfg.rewards.scales, "world_vel_l2norm", 0.) != 0.0 else ""),
# ("_rAlive{:.1f}".format(Go1JumpCfg.rewards.scales.alive) if getattr(Go1JumpCfg.rewards.scales, "alive", 0.) != 0. else ""),
# ("_pPenD{:.0e}".format(Go1JumpCfg.rewards.scales.penetrate_depth) if getattr(Go1JumpCfg.rewards.scales, "penetrate_depth", 0.) < 0. else ""),
# ("_pYaw{:.0e}".format(Go1JumpCfg.rewards.scales.yaw_abs) if getattr(Go1JumpCfg.rewards.scales, "yaw_abs", 0.) < 0. else ""),
# ("_pY{:.0e}".format(Go1JumpCfg.rewards.scales.lin_pos_y) if getattr(Go1JumpCfg.rewards.scales, "lin_pos_y", 0.) < 0. else ""),
# ("_pDofLimit{:.0e}".format(Go1JumpCfg.rewards.scales.exceed_dof_pos_limits) if getattr(Go1JumpCfg.rewards.scales, "exceed_dof_pos_limits", 0.) < 0. else ""),
# ("_pDofErr" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_error, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_error", 0.) != 0. else ""),
# ("_pDofErrCond" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_error_cond, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_error_cond", 0.) != 0. else ""),
# ("_pHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "hip_pos", 0.) != 0. else ""),
("_pFHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.front_hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "front_hip_pos", 0.) != 0. else ""),
("_pRHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.rear_hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "rear_hip_pos", 0.) != 0. else ""),
# ("_pTorqueExceedI" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.exceed_torque_limits_i, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "exceed_torque_limits_i", 0.) != 0. else ""),
("_pTorqueExceed" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, trim="-") if Go1JumpCfg.rewards.scales.exceed_torque_limits_l1norm != -8e-1 else ""),
# ("_pContact" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.feet_contact_forces, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "feet_contact_forces", 0.) != 0. else ""),
# ("_pJumpSameLegs{:.1f}".format(-Go1JumpCfg.rewards.scales.sync_legs_cond) if getattr(Go1JumpCfg.rewards.scales, "sync_legs_cond", 0.) != 0. else ""),
# ("_pDofAcc" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_acc, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_acc", 0.) != 0. else ""),
# ("_pTorque" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.torques, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "torques", 0.) != 0. else ""),
# ("_rJumpXVel" + np.format_float_positional(Go1JumpCfg.rewards.scales.jump_x_vel_cond) if getattr(Go1JumpCfg.rewards.scales, "jump_x_vel_cond", 0.) != 0. else "_noJumpBonous"),
# ("_pCollision" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.collision, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "collision", 0.) < 0. else ""),
# ("_pSyncSymLegs" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.sync_all_legs_cond, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "sync_all_legs_cond", 0.) != 0. else ""),
# ("_pZVel" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.lin_vel_z, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "lin_vel_z", 0.) != 0. else ""),
# ("_pAngXYVel" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.ang_vel_xy, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "ang_vel_xy", 0.) != 0. else ""),
# ("_pDTorques" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.delta_torques, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "delta_torques", 0.) != 0. else ""),
("_propDelay{:.2f}-{:.2f}".format(
Go1JumpCfg.sensor.proprioception.latency_range[0],
Go1JumpCfg.sensor.proprioception.latency_range[1],
)),
# ("_softDof{:.1f}".format(Go1JumpCfg.rewards.soft_dof_pos_limit) if Go1JumpCfg.rewards.soft_dof_pos_limit != 0.9 else ""),
# ("_timeoutFinished" if getattr(Go1JumpCfg.termination, "timeout_at_finished", False) else ""),
# ("_noPitchTerm" if "pitch" not in Go1JumpCfg.termination.termination_terms else ""),
("_noPropNoise" if not Go1JumpCfg.noise.add_noise else "_withPropNoise"),
("_noPush" if not Go1JumpCfg.domain_rand.push_robots else "_pushRobot"),
("_minStd0.21"),
("_entropy0.01"),
# ("_gamma0.999"),
("_noTanh"),
# ("_zeroResetAction" if Go1JumpCfg.init_state.zero_actions else ""),
# ("_actionClip" + Go1JumpCfg.normalization.clip_actions_method if getattr(Go1JumpCfg.normalization, "clip_actions_method", "") != "" else ""),
# ("_noDelayActObs" if not Go1JumpCfg.sensor.proprioception.delay_action_obs else ""),
("_noComputerClip" if not Go1JumpCfg.control.computer_clip_torque else ""),
("_jumpRange{:.1f}-{:.1f}".format(*Go1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["height"]) if Go1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["height"] != (0.1, 0.6) else ""),
# ("_noCurriculum" if not Go1JumpCfg.terrain.curriculum else ""),
# ("_allowNegativeReward" if not Go1JumpCfg.rewards.only_positive_rewards else ""),
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"),
])
max_iterations = 20000
save_interval = 200

View File

@ -0,0 +1,184 @@
from os import path as osp
import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1LeapCfg( Go1FieldCfg ):
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.4]
#### uncomment this to train non-virtual terrain
class sensor( Go1FieldCfg.sensor ):
class proprioception( Go1FieldCfg.sensor.proprioception ):
latency_range = [0.04-0.0025, 0.04+0.0075]
# latency_range = [0.06-0.005, 0.06+0.005]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = False
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"leap",
],
leap= dict(
length= (0.3, 0.8),
depth= (0.4, 0.8),
height= 0.15,
),
wall_height= -0.4,
virtual_terrain= True, # Change this to False for real terrain
no_perlin_threshold= 0.06,
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.12],
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [1.8, 1.5]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class control( Go1FieldCfg.control ):
computer_clip_torque = True
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
# roll_kwargs = merge_dict(Go1FieldCfg.termination.roll_kwargs, dict(
# threshold= 0.7,
# leap_threshold= 0.7,
# ))
# pitch_kwargs = merge_dict(Go1FieldCfg.termination.pitch_kwargs, dict(
# threshold= 0.7,
# leap_threshold= 0.7,
# ))
z_low_kwargs = merge_dict(Go1FieldCfg.termination.z_low_kwargs, dict(
threshold= -0.1,
))
z_high_kwargs = merge_dict(Go1FieldCfg.termination.z_high_kwargs, dict(
threshold= 2.0,
))
class domain_rand( Go1FieldCfg.domain_rand ):
class com_range( Go1FieldCfg.domain_rand.com_range ):
z = [-0.2, 0.2]
init_base_pos_range = dict(
x= [0.2, 0.8],
y= [-0.25, 0.25],
)
init_base_rot_range = dict(
roll= [-0.1, 0.1],
pitch= [-0.1, 0.1],
)
push_robots = False
class rewards( Go1FieldCfg.rewards ):
class scales:
tracking_ang_vel = 0.05
# world_vel_l2norm = -1.
tracking_world_vel = 5.
# leap_bonous_cond = 6.
legs_energy_substeps = -7e-6 # 5e5 spike (not scaled)
# alive = 3.
penetrate_depth = -1e-2 # 8 spike (not scaled)
penetrate_volume = -1e-4 # 100 spike (not scaled)
exceed_dof_pos_limits = -8e-1
exceed_torque_limits_l1norm = -1.
# action_rate = -0.1
delta_torques = -1e-7
dof_acc = -1e-7
torques = -4e-5 # 2000 spike (not scaled)
yaw_abs = -0.2
collision = -1.
lin_pos_y = -0.4
orientation = -0.1 # 0.3 segment (not scaled)
hip_pos = -5. # 0.5 spike (not scaled)
dof_error = -0.15 # 1.2 spike (not scaled)
tracking_sigma = 0.35
only_positive_rewards = False
soft_dof_pos_limit = 0.7
class curriculum( Go1FieldCfg.curriculum ):
# penetrate_volume_threshold_harder = 9000
# penetrate_volume_threshold_easier = 10000
# penetrate_depth_threshold_harder = 300
# penetrate_depth_threshold_easier = 5000
####### use extreme large value to disable penetrate curriculum
penetrate_volume_threshold_harder = 900000
penetrate_volume_threshold_easier = 1000000
penetrate_depth_threshold_harder = 30000
penetrate_depth_threshold_easier = 500000
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1LeapCfgPPO( Go1FieldCfgPPO ):
class runner( Go1FieldCfgPPO.runner ):
resume = True
load_run = "{Your traind walking model directory}"
# load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep27_14-56-25_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pDofLimit8e-01_pCollision0.1_kp40_kd0.5fromSep27_02-44-48")
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct09_09-51-58_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pPenD1.e-2_pDofLimit4e-01_pCollision0.5_kp40_kd0.5fromOct05_02-16-22")
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-39-12_Skills_leap_pEnertySubsteps4.e-5_pActRate2.e-1_pDofLimit8.e-1_pCollision5.e-1_noPropNoise_noTanh_zeroResetAction_actionCliphard_fromOct09_09-51-58")
load_run = "Oct28_03-32-35_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pActRate2.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct25_11-39-12"
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-23-39_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pSyncAllLegs6.e-1_pOrient6.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct28_03-32-35")
load_run = "Oct29_04-23-33_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorques2.e-5_pHipPos5.e+0_pDorErrCond8.e-2_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct28_13-23-39"
# load_run = "Oct29_08-30-23_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed2.e-1_pPosY2.e-1_pTorques2.e-5_pHipPos5.e+0_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_04-23-33"
# load_run = "Oct29_15-46-30_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed2.e-1_pOrient3.e-1_pTorques2.e-5_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_08-30-23"
load_run = "Oct30_02-07-26_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed1.e-1_pPosY4.e-1_pTorques2.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_04-23-33"
load_run = "Oct30_04-10-12_Skills_leap_pEnertySubsteps4.e-6_rTrackVel5._pTorqueExceed3.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_02-07-26"
load_run = "Oct30_05-07-24_Skills_leap_pEnertySubsteps5.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"
load_run = "Oct30_05-08-03_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"
load_run = "Oct30_05-08-20_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed5.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"
load_run = "Oct30_07-32-10_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed8.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_pDorErr1.5e-1_noPenCurriculum_noCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_05-07-24"
run_name = "".join(["Skills_",
("Multi" if len(Go1LeapCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1LeapCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1LeapCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_pEnertySubsteps" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "legs_energy_substeps", 0.0) != 0.0 else ""),
("_rTrackVel" + np.format_float_positional(Go1LeapCfg.rewards.scales.tracking_world_vel) if getattr(Go1LeapCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""),
("_rAlive{:.1f}".format(Go1LeapCfg.rewards.scales.alive) if getattr(Go1LeapCfg.rewards.scales, "alive", 0.) != 0. else ""),
# ("_pActRate" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.action_rate, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "action_rate", 0.) != 0.0 else ""),
# ("_pDofAcc" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_acc, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_acc", 0.) != 0.0 else ""),
# ("_pDofLimit" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.exceed_dof_pos_limits, precision=1, exp_digits=1) if Go1LeapCfg.rewards.scales.exceed_dof_pos_limits != 0.0 else ""),
# ("_pCollision" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.collision, precision=1, exp_digits=1) if Go1LeapCfg.rewards.scales.collision != 0.0 else ""),
# ("_leapBonousCond" if getattr(Go1LeapCfg.rewards.scales, "leap_bonous_cond", 0.) != 0.0 else ""),
("_pTorqueExceed" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0.0 else ""),
# ("_pSyncAllLegs" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.sync_all_legs, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "sync_all_legs", 0.) != 0.0 else ""),
# ("_pOrient" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.orientation, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "orientation", 0.) != 0.0 else ""),
# ("_pPenD" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.penetrate_depth, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "penetrate_depth", 0.) != 0.0 else ""),
("_pPosY" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.lin_pos_y, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "lin_pos_y", 0.) != 0.0 else ""),
# ("_pPenV" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.penetrate_volume, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "penetrate_volume", 0.) != 0.0 else ""),
("_pTorques" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.torques, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "torques", 0.) != 0.0 else ""),
("_pHipPos" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.hip_pos, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "hip_pos", 0.) != 0.0 else ""),
# ("_pDorErrCond" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_error_cond, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_error_cond", 0.) != 0.0 else ""),
("_pDorErr" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_error, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_error", 0.) != 0.0 else ""),
# ("_noPropNoise" if not Go1LeapCfg.noise.add_noise else ""),
("_noPenCurriculum" if Go1LeapCfg.curriculum.penetrate_volume_threshold_harder > 2e5 else ""),
("_noCurriculum" if not Go1LeapCfg.terrain.curriculum else ""),
("_noComputerClip" if not Go1LeapCfg.control.computer_clip_torque else ""),
("_noTanh"),
("_noPush" if not Go1LeapCfg.domain_rand.push_robots else "_pushRobot"),
# ("_zeroResetAction" if Go1LeapCfg.init_state.zero_actions else ""),
("_allowNegativeReward" if not Go1LeapCfg.rewards.only_positive_rewards else ""),
("_actionClip" + Go1LeapCfg.normalization.clip_actions_method if getattr(Go1LeapCfg.normalization, "clip_actions_method", "") != "" else ""),
("_virtual" if Go1LeapCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else ""),
])
max_iterations = 20000
save_interval = 200

View File

@ -0,0 +1,120 @@
import numpy as np
import os.path as osp
from legged_gym.utils.helpers import merge_dict
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
class Go1RemoteCfg( Go1FieldCfg ):
class env( Go1FieldCfg.env ):
num_envs = 4096
obs_components = [
"proprioception", # 48
]
privileged_obs_components = [
"proprioception",
"robot_config",
]
use_lin_vel = False
privileged_use_lin_vel = True
class init_state( Go1FieldCfg.init_state ):
pos = [0., 0., 0.42]
class terrain( Go1FieldCfg.terrain ):
num_rows = 6
num_cols = 6
selected = "TerrainPerlin"
TerrainPerlin_kwargs = dict(
zScale= 0.15,
frequency= 10,
)
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [-1.0, 1.0]
lin_vel_y = [-1., 1.]
ang_vel_yaw = [-1., 1.]
class domain_rand( Go1FieldCfg.domain_rand ):
init_base_pos_range = dict(
x= [-0.1, 0.1],
y= [-0.1, 0.1],
)
init_base_rot_range = dict(
roll= [-0.4, 0.4],
pitch= [-0.4, 0.4],
)
init_base_vel_range = merge_dict(Go1FieldCfg.domain_rand.init_base_vel_range, dict(
x= [-0.8, 1.5],
y= [-0.8, 0.8],
))
class rewards( Go1FieldCfg.rewards ):
class scales:
###### hacker from Field
tracking_ang_vel = 1.
tracking_lin_vel = 3.
# lin_vel_l2norm = -1.
alive = 1.
legs_energy_substeps = -6e-7
# penalty for hardware safety
# exceed_dof_pos_limits = -4e-2
exceed_torque_limits_l1norm = -1e-2
# penalty for walking gait, probably no need
lin_vel_z = -0.5
ang_vel_xy = -0.05
orientation = -4.
dof_acc = -1.e-7
collision = -10.
action_rate = -0.01
delta_torques = -1e-7
torques = -1.e-5
hip_pos = -0.4
dof_error = -0.04
stand_still = -0.6
only_positive_rewards = False
soft_dof_pos_limit = 0.6
class normalization( Go1FieldCfg.normalization ):
clip_actions_method = "hard"
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
class Go1RemoteCfgPPO( Go1FieldCfgPPO ):
class algorithm( Go1FieldCfgPPO.algorithm ):
entropy_coef = 0.01
clip_min_std = 0.02
class runner( Go1FieldCfgPPO.runner ):
resume = True
# load_run = osp.join(logs_root, "field_a1/Sep26_15-33-45_WalkByRemote_pEnergySubsteps2e-5_pTorqueL18e-01_pCollision0.2_propDelay0.04-0.05_aScale0.5_kp40_kd0.5_maxPushAng0.5_noTanh_hardActClip_noResume")
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep28_11-53-30_WalkByRemote_pEnergySubsteps-4e-05_kp40_kd0.5_noTanh_fromSep26_15-33-45")
load_run = "Oct04_16-15-17_WalkByRemote_pEnergySubsteps-1e-04_softDof0.5_pStand0.0_pDofAcc5e-7_kp40_kd0.5_noTanh_fromSep28_11-53-30"
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct12_16-01-06_WalkByRemote_pLin1.0_pFootForce1.e-02_xComMin-0.2_maxForce200_propDelay0.04-0.05_entropyCoef0.01_noTanh_fromOct11_04-34-39")
load_run = "Oct23_03-33-29_WalkByRemote_pEnergySubsteps-1e-05_rLinTrack3.0_softDof0.6_pStand1.0_kp40_kd0.5_noTanh_fromOct12_16-01-06"
load_run = "Oct23_11-01-47_WalkByRemote_pEnergySubsteps-1e-05_rLinTrack5.0_pCollide1e-1_pOrient1e-1_pStand2e-1_softDof0.6_kp40_kd0.5_noTanh_fromOct23_03-33-29"
load_run = "Oct23_13-14-42_WalkByRemote_pEnergySubsteps1e-05_rLinTrack5.0_pDofAcc2.5e-7_pCollide1e-1_pOrient1e-1_pExceedDof8e-1_pStand4e-1_softDof0.6_noTanh_EntropyCoef0.01_fromOct23_11-01-47"
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct12_16-00-55_WalkByRemote_pLin1.0_pFootForce1.e-02_xComMin-0.2_maxForce200_propDelay0.05-0.07_entropyCoef0.01_noTanh_fromOct11_04-34-39")
load_run = "Oct24_15-05-23_WalkByRemote_rLinTrack3.0_pActRate1e-1_softDof0.6_noTanh_zeroResetAction_hackReset_EntropyCoef0.01_actionCliphard_fromOct12_16-00-55"
run_name = "".join(["WalkByRemote",
("_pEnergySubsteps{:.0e}".format(-Go1RemoteCfg.rewards.scales.legs_energy_substeps) if getattr(Go1RemoteCfg.rewards.scales, "legs_energy_substeps", 0.) != 0. else ""),
("_rLinTrack{:.1f}".format(Go1RemoteCfg.rewards.scales.tracking_lin_vel) if getattr(Go1RemoteCfg.rewards.scales, "tracking_lin_vel", 0.) != 0. else ""),
("_pLinVelL2{:.1f}".format(-Go1RemoteCfg.rewards.scales.lin_vel_l2norm) if getattr(Go1RemoteCfg.rewards.scales, "lin_vel_l2norm", 0.) != 0. else ""),
("_rAlive{:.1f}".format(Go1RemoteCfg.rewards.scales.alive) if getattr(Go1RemoteCfg.rewards.scales, "alive", 0.) != 0. else ""),
# ("_pDofAcc" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.dof_acc, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "dof_acc", 0.) != 0. else ""),
# ("_pCollide" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.collision, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "collision", 0.) != 0. else ""),
# ("_pOrient" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.orientation, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "orientation", 0.) != 0. else ""),
# ("_pExceedDof" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.exceed_dof_pos_limits, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "exceed_dof_pos_limits", 0.) != 0. else ""),
("_pExceedTorqueL1" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.exceed_torque_limits_l1norm, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0. else ""),
# ("_pStand" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.stand_still, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "stand_still", 0.) != 0. else ""),
("_pActRate" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.action_rate, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "action_rate", 0.) != 0. else ""),
("_softDof{:.1f}".format(Go1RemoteCfg.rewards.soft_dof_pos_limit) if Go1RemoteCfg.rewards.soft_dof_pos_limit != 0.9 else ""),
# ("_kp{:d}".format(int(Go1RemoteCfg.control.stiffness["joint"])) if Go1RemoteCfg.control.stiffness["joint"] != 50 else ""),
# ("_kd{:.1f}".format(Go1RemoteCfg.control.damping["joint"]) if Go1RemoteCfg.control.damping["joint"] != 1. else ""),
("_noTanh"),
# ("_zeroResetAction" if Go1RemoteCfg.init_state.zero_actions else ""),
("_EntropyCoef0.01"),
("_actionClip" + Go1RemoteCfg.normalization.clip_actions_method if getattr(Go1RemoteCfg.normalization, "clip_actions_method", None) is not None else ""),
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
])

View File

@ -0,0 +1,90 @@
from os import path as osp
import numpy as np
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
from legged_gym.utils.helpers import merge_dict
class Go1TiltCfg( Go1FieldCfg ):
#### uncomment this to train non-virtual terrain
# class sensor( Go1FieldCfg.sensor ):
# class proprioception( Go1FieldCfg.sensor.proprioception ):
# delay_action_obs = True
# latency_range = [0.04-0.0025, 0.04+0.0075]
#### uncomment the above to train non-virtual terrain
class terrain( Go1FieldCfg.terrain ):
max_init_terrain_level = 2
border_size = 5
slope_treshold = 20.
curriculum = True
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
options= [
"tilt",
],
tilt= dict(
width= (0.24, 0.8),
depth= (0.4, 1.), # size along the forward axis
opening_angle= 0.0, # [rad] an opening that make the robot easier to get into the obstacle
wall_height= 0.5,
),
virtual_terrain= True, # Change this to False for real terrain
no_perlin_threshold= 0.06,
))
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
zScale= [0.05, 0.1],
))
class commands( Go1FieldCfg.commands ):
class ranges( Go1FieldCfg.commands.ranges ):
lin_vel_x = [0.3, 0.6]
lin_vel_y = [0.0, 0.0]
ang_vel_yaw = [0., 0.]
class termination( Go1FieldCfg.termination ):
# additional factors that determines whether to terminates the episode
termination_terms = [
"roll",
"pitch",
"z_low",
"z_high",
"out_of_track",
]
roll_kwargs = merge_dict(Go1FieldCfg.termination.roll_kwargs, dict(
threshold= 0.4,
leap_threshold= 0.4,
))
z_high_kwargs = merge_dict(Go1FieldCfg.termination.z_high_kwargs, dict(
threshold= 2.0,
))
class rewards( Go1FieldCfg.rewards ):
class scales:
tracking_ang_vel = 0.05
world_vel_l2norm = -1.
legs_energy_substeps = -1e-6
alive = 2.
penetrate_depth = -4e-3
penetrate_volume = -4e-3
exceed_dof_pos_limits = -1e-1
exceed_torque_limits_i = -1e-1
class curriculum( Go1FieldCfg.curriculum ):
penetrate_volume_threshold_harder = 9000
penetrate_volume_threshold_easier = 10000
penetrate_depth_threshold_harder = 300
penetrate_depth_threshold_easier = 5000
class Go1TiltCfgPPO( Go1FieldCfgPPO ):
class runner( Go1FieldCfgPPO.runner ):
run_name = "".join(["Skill",
("Multi" if len(Go1TiltCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1TiltCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1TiltCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
("_tiltMax{:.1f}".format(Go1TiltCfg.terrain.BarrierTrack_kwargs["tilt"]["width"][1]) if Go1TiltCfg.terrain.BarrierTrack_kwargs["tilt"]["width"][1] > 0.5 else ""),
])
resume = True
load_run = "{Your traind walking model directory}"
load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
max_iterations = 20000
save_interval = 500

View File

@ -7,13 +7,13 @@ import argparse
def main(args):
for data_dir in args.data_dirs:
if osp.isfile(osp.join(data_dir, "metadata.json")):
shutil.move(
shutil.copy2(
osp.join(data_dir, "metadata.json"),
osp.join(osp.dirname(data_dir), data_dir.split("/")[-1] + ".json")
)
print(f"Moved metadata.json to {osp.join(osp.dirname(data_dir), data_dir.split('/')[-1] + '.json')}")
# removing the directory
if osp.isdir(data_dir):
if osp.isdir(data_dir) and (not osp.islink(data_dir)):
print("Removing directory: ", data_dir)
if not args.only_files:
shutil.rmtree(data_dir)

View File

@ -29,36 +29,34 @@ def main(args):
update_class_from_dict(train_cfg, d, strict= True)
# Some custom settings
env_cfg.terrain.BarrierTrack_kwargs["options"] = [
"tilt", # "tilt",
"crawl", # "crawl",
"climb", # "climb",
"climb", # "climb",
"leap", # "leap",
]
####### customized option to increase data distribution #######
# ####### customized option to increase data distribution #######
action_sample_std = 0.0
############# some predefined options #############
if len(env_cfg.terrain.BarrierTrack_kwargs["options"]) == 1:
env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1
env_cfg.terrain.num_rows = 20; env_cfg.terrain.num_cols = 30
else: # for parkour env
# >>> option 1
env_cfg.domain_rand.added_mass_range = [1.5, 3.0]
env_cfg.terrain.BarrierTrack_kwargs["climb_down_prob"] = 0.45
env_cfg.terrain.num_rows = 60; env_cfg.terrain.num_cols = 1
env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 2.8
env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 2.4
env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.6)
env_cfg.domain_rand.init_base_pos_range["x"] = (0.4, 1.8)
env_cfg.terrain.num_rows = 12; env_cfg.terrain.num_cols = 10
# >>> option 2
# env_cfg.terrain.BarrierTrack_kwargs["climb"]["depth"] = (0.1, 0.3)
# env_cfg.terrain.BarrierTrack_kwargs["climb"]["climb_down_prob"] = 0.55
# env_cfg.terrain.num_rows = 60; env_cfg.terrain.num_cols = 1
# env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 3.
# env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 4.0
# env_cfg.terrain.BarrierTrack_kwargs["wall_height"] = (-0.5, -0.2)
# env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 1.4)
# env_cfg.domain_rand.init_base_pos_range["x"] = (1.6, 2.0)
# env_cfg.terrain.num_rows = 16; env_cfg.terrain.num_cols = 5
# >>> option 3
# env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 1.6
# env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 2.2
# env_cfg.terrain.BarrierTrack_kwargs["wall_height"] = (-0.5, 0.1)
# env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.5)
# env_cfg.domain_rand.init_base_pos_range["x"] = (0.2, 0.9)
# env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1
# env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 1.6
# env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.3)
# env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 3.0
# env_cfg.terrain.BarrierTrack_kwargs["climb"]["depth"] = (0.5, 1.8)
# env_cfg.terrain.num_rows = 20; env_cfg.terrain.num_cols = 24
# action_sample_std = 0.1
# env_cfg.terrain.num_rows = 22; env_cfg.terrain.num_cols = 16
pass
if (env_cfg.terrain.BarrierTrack_kwargs["options"][0] == "leap") and all(i == env_cfg.terrain.BarrierTrack_kwargs["options"][0] for i in env_cfg.terrain.BarrierTrack_kwargs["options"]):
######### For leap, because the platform is usually higher than the ground.

View File

@ -101,7 +101,7 @@ def play(args):
env_cfg.terrain.curriculum = False
env_cfg.terrain.BarrierTrack_kwargs["options"] = [
# "crawl",
"climb",
"jump",
# "leap",
# "tilt",
]
@ -146,7 +146,7 @@ def play(args):
logs_root + "/field_a1_oracle/Jun03_00-33-08_Skills_climb_pEnergySubsteps2e-6_rAlive2_pTorqueExceedIndicate2e-1_propDelay0.04-0.05_noPerlinRate0.2_nSubsteps4_envFreq50.0_aScale0.5",
logs_root + "/field_a1_oracle/Jun04_01-03-59_Skills_leap_pEnergySubsteps2e-6_rAlive2_pPenV4e-3_pPenD4e-3_pPosY0.20_pYaw0.20_pTorqueExceedSquare1e-3_leapH0.2_propDelay0.04-0.05_noPerlinRate0.2_aScale0.5",
]
train_cfg.policy.climb_down_policy_path = logs_root + "/field_a1_oracle/Aug30_16-12-14_Skills_climb_climbDownProb0.5_propDelay0.04-0.05"
train_cfg.policy.jump_down_policy_path = logs_root + "/field_a1_oracle/Aug30_16-12-14_Skills_climb_climbDownProb0.5_propDelay0.04-0.05"
train_cfg.runner.resume = False
env_cfg.env.use_lin_vel = True
train_cfg.policy.cmd_vel_mapping = {

View File

@ -163,7 +163,7 @@ def update_cfg_from_args(env_cfg, cfg_train, args):
return env_cfg, cfg_train
def get_args():
def get_args(custom_args=[]):
custom_parameters = [
{"name": "--task", "type": str, "default": "anymal_c_flat", "help": "Resume training or start testing from a checkpoint. Overrides config file if provided."},
{"name": "--resume", "action": "store_true", "default": False, "help": "Resume training from a checkpoint"},
@ -178,7 +178,7 @@ def get_args():
{"name": "--num_envs", "type": int, "help": "Number of environments to create. Overrides config file if provided."},
{"name": "--seed", "type": int, "help": "Random seed. Overrides config file if provided."},
{"name": "--max_iterations", "type": int, "help": "Maximum number of training iterations. Overrides config file if provided."},
]
] + custom_args
# parse arguments
args = gymutil.parse_arguments(
description="RL Policy",

View File

@ -96,7 +96,9 @@ class Logger:
a.legend()
# plot base pitch
a = axs[0, 2]
if log["base_pitch"]: a.plot(time, log["base_pitch"], label='measured')
if log["base_pitch"]:
a.plot(time, log["base_pitch"], label='measured')
a.plot(time, [-0.75] * len(time), label= 'thresh')
# if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded')
a.set(xlabel='time [s]', ylabel='base ang [rad]', title='Base pitch')
a.legend()
@ -113,11 +115,15 @@ class Logger:
a.plot(time, forces[:, i], label=f'force {i}')
a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces')
a.legend()
# plot torque/vel curves
# # plot torque/vel curves
# a = axs[2, 1]
# if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured')
# a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves')
# a.legend()
# plot power curves
a = axs[2, 1]
if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured')
a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves')
a.legend()
if log["power"]!=[]: a.plot(time, log["power"], label='power [W]')
a.set(xlabel='time [s]', ylabel='Power [W]', title='Power')
# plot torques
a = axs[2, 2]
if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured')
@ -126,20 +132,29 @@ class Logger:
# plot rewards
a = axs[3, 0]
if log["max_torques"]: a.plot(time, log["max_torques"], label='max_torques')
if log["max_torque_motor"]: a.plot(time, log["max_torque_motor"], label='max_torque_motor')
if log["max_torque_leg"]: a.plot(time, log["max_torque_leg"], label='max_torque_leg')
a.set(xlabel='time [s]', ylabel='max_torques [Nm]', title='max_torques')
a.legend()
a.legend(fontsize= 5)
# plot customed data
a = axs[3, 1]
if log["student_action"]:
a.plot(time, log["student_action"], label='s')
a.plot(time, log["teacher_action"], label='t')
a.legend()
a.set(xlabel='time [s]', ylabel='tanh', title='student/teacher action')
a.set(xlabel='time [s]', ylabel='value before step()', title='student/teacher action')
a = axs[3, 2]
if log["teacher_action"]:
a.plot(time, [i-j for i, j in zip(log["student_action"], log["teacher_action"])], label='action difference')
a.set(xlabel='time [s]', ylabel='tanh', title='action difference')
a.legend()
a.plot(time, log["reward"], label='rewards')
for i in log["mark"]:
if i > 0:
a.plot(time, log["mark"], label='user mark')
break
for key in log.keys():
if "reward_removed_" in key:
a.plot(time, log[key], label= key)
a.set(xlabel='time [s]', ylabel='', title='rewards')
# a.set_ylim([-0.12, 0.1])
a.legend(fontsize = 5)
plt.show()
def print_rewards(self):

View File

@ -12,7 +12,7 @@ class BarrierTrack:
# default kwargs
track_kwargs = dict(
options= [
"climb",
"jump",
"crawl",
"tilt",
], # each race track will permute all the options
@ -22,12 +22,11 @@ class BarrierTrack:
track_block_length= 1.2, # the x-axis distance from the env origin point
wall_thickness= 0.04, # [m]
wall_height= 0.5, # [m]
climb= dict(
jump= dict(
height= 0.3,
depth= 0.04, # size along the forward axis
fake_offset= 0.0, # [m] fake offset will make climb's height info greater than its physical height.
fake_height= 0.0, # [m] offset/height only one of them can be non-zero
climb_down_prob= 0.0, # if > 0, will have a chance to climb down from the obstacle
fake_offset= 0.0, # [m] fake offset will make jump's height info greater than its physical height.
jump_down_prob= 0.0, # if > 0, will have a chance to jump down from the obstacle
),
crawl= dict(
height= 0.32,
@ -53,16 +52,18 @@ class BarrierTrack:
border_height= 0., # Incase we want the surrounding plane to be lower than the track
virtual_terrain= False,
draw_virtual_terrain= False, # set True for visualization
check_skill_combinations= False, # check if some specific skills are connected, if set. e.g. climb -> leap
check_skill_combinations= False, # check if some specific skills are connected, if set. e.g. jump -> leap
engaging_next_threshold= 0., # if > 0, engaging_next is based on this threshold instead of track_block_length/2. Make sure the obstacle is not too long.
engaging_finish_threshold= 0., # an obstacle is considered finished only if the last volume point is this amount away from the block origin.
curriculum_perlin= True, # If True, perlin noise scale will be depends on the difficulty if possible.
no_perlin_threshold= 0.02, # If the perlin noise is too small, clip it to zero.
walk_in_skill_gap= False, # If True, obstacle ID will be walk when the distance to the obstacle does not reach engaging_next_threshold
)
max_track_options = 4 # ("tilt", "crawl", "climb", "dynamic") at most
max_track_options = 4 # ("tilt", "crawl", "jump", "dynamic") at most
track_options_id_dict = {
"tilt": 1,
"crawl": 2,
"climb": 3,
"jump": 3,
"leap": 4,
} # track_id are aranged in this order
def __init__(self, cfg, num_robots: int) -> None:
@ -88,7 +89,7 @@ class BarrierTrack:
# For each track block (n_options + 1 in total), 3 parameters are enabled:
# - track_id: int, starting track is 0, other numbers depends on the options order.
# - obstacle_depth: float,
# - obstacle_critical_params: e.g. tilt width, crawl height, climb height
# - obstacle_critical_params: e.g. tilt width, crawl height, jump height
# num_rows + 1 incase the robot finish the entire row of tracks
self.track_info_map = torch.zeros(
@ -101,6 +102,11 @@ class BarrierTrack:
dtype= torch.float32,
device= self.device,
)
self.block_starting_height_map = torch.zeros(
(self.cfg.num_rows, self.cfg.num_cols, self.n_blocks_per_track),
dtype= torch.float32,
device= self.device,
) # height [m] related to the world coordinate system
def initialize_track(self):
""" All track blocks are defined as follows
@ -126,7 +132,7 @@ class BarrierTrack:
np.ceil(self.track_kwargs["track_block_length"] / self.cfg.horizontal_scale).astype(int),
np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int),
)
self.n_blocks_per_track = (self.track_kwargs["n_obstacles_per_track"] + 1) if self.track_kwargs["randomize_obstacle_order"] else (len(self.track_kwargs["options"]) + 1)
self.n_blocks_per_track = (self.track_kwargs["n_obstacles_per_track"] + 1) if (self.track_kwargs["randomize_obstacle_order"] and len(self.track_kwargs["options"]) > 0) else (len(self.track_kwargs["options"]) + 1)
self.track_resolution = (
np.ceil(self.track_kwargs["track_block_length"] * self.n_blocks_per_track / self.cfg.horizontal_scale).astype(int),
np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int),
@ -183,7 +189,7 @@ class BarrierTrack:
height_offset_px = 0
return track_trimesh, track_heightfield, block_info, height_offset_px
def get_climb_track(self,
def get_jump_track(self,
wall_thickness,
trimesh_template,
heightfield_template,
@ -191,25 +197,25 @@ class BarrierTrack:
heightfield_noise= None,
virtual= False,
):
if isinstance(self.track_kwargs["climb"]["depth"], (tuple, list)):
if isinstance(self.track_kwargs["jump"]["depth"], (tuple, list)):
if not virtual:
climb_depth = min(*self.track_kwargs["climb"]["depth"])
jump_depth = min(*self.track_kwargs["jump"]["depth"])
else:
climb_depth = np.random.uniform(*self.track_kwargs["climb"]["depth"])
jump_depth = np.random.uniform(*self.track_kwargs["jump"]["depth"])
else:
climb_depth = self.track_kwargs["climb"]["depth"]
if isinstance(self.track_kwargs["climb"]["height"], (tuple, list)):
jump_depth = self.track_kwargs["jump"]["depth"]
if isinstance(self.track_kwargs["jump"]["height"], (tuple, list)):
if difficulty is None:
climb_height = np.random.uniform(*self.track_kwargs["climb"]["height"])
jump_height = np.random.uniform(*self.track_kwargs["jump"]["height"])
else:
climb_height = (1-difficulty) * self.track_kwargs["climb"]["height"][0] + difficulty * self.track_kwargs["climb"]["height"][1]
jump_height = (1-difficulty) * self.track_kwargs["jump"]["height"][0] + difficulty * self.track_kwargs["jump"]["height"][1]
else:
climb_height = self.track_kwargs["climb"]["height"]
if self.track_kwargs["climb"].get("climb_down_prob", 0.) > 0.:
if np.random.uniform() < self.track_kwargs["climb"]["climb_down_prob"]:
climb_height = -climb_height
depth_px = int(climb_depth / self.cfg.horizontal_scale)
height_value = climb_height / self.cfg.vertical_scale
jump_height = self.track_kwargs["jump"]["height"]
if self.track_kwargs["jump"].get("jump_down_prob", 0.) > 0.:
if np.random.uniform() < self.track_kwargs["jump"]["jump_down_prob"]:
jump_height = -jump_height
depth_px = int(jump_depth / self.cfg.horizontal_scale)
height_value = jump_height / self.cfg.vertical_scale
wall_thickness_px = int(wall_thickness / self.cfg.horizontal_scale) + 1
if not heightfield_noise is None:
@ -223,7 +229,7 @@ class BarrierTrack:
] += height_value
if height_value < 0.:
track_heightfield[
depth_px:,
(0 if virtual else depth_px):,
max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1),
] += height_value
track_trimesh = convert_heightfield_to_trimesh(
@ -233,12 +239,15 @@ class BarrierTrack:
self.cfg.slope_treshold,
)
assert not (
self.track_kwargs["climb"].get("fake_offset", 0.) != 0. and self.track_kwargs["climb"].get("fake_height", 0.) != 0.), \
self.track_kwargs["jump"].get("fake_offset", 0.) != 0. and self.track_kwargs["jump"].get("fake_height", 0.) != 0.), \
"fake_offset and fake_height cannot be both non-zero"
climb_height_ = climb_height + (self.track_kwargs["climb"].get("fake_offset", 0.) if self.track_kwargs["climb"].get("fake_offset", 0.) == 0. else self.track_kwargs["climb"].get("fake_height", 0.))
jump_height_ = jump_height + (
self.track_kwargs["jump"].get("fake_offset", 0.) \
if jump_height > 0. \
else 0.)
block_info = torch.tensor([
climb_depth,
climb_height_,
jump_depth,
jump_height_,
], dtype= torch.float32, device= self.device)
height_offset_px = height_value if not virtual else min(height_value, 0)
return track_trimesh, track_heightfield, block_info, height_offset_px
@ -399,9 +408,9 @@ class BarrierTrack:
track_heightfield = heightfield_template + heightfield_noise
else:
track_heightfield = heightfield_template.copy()
# There is no difference between virtual/non-virtual environment.
start_px = int(self.track_kwargs["leap"].get("fake_offset", 0.) / self.cfg.horizontal_scale) + 1 if not virtual else 1
track_heightfield[
1: length_px+1,
start_px: length_px+1,
max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1),
] -= depth_value
track_trimesh = convert_heightfield_to_trimesh(
@ -538,13 +547,14 @@ class BarrierTrack:
self.track_info_map[row_idx, col_idx, 0, 0] = 0
self.track_info_map[row_idx, col_idx, 0, 1:] = block_info
self.track_width_map[row_idx, col_idx] = self.env_width - wall_thickness * 2
self.block_starting_height_map[row_idx, col_idx, 0] = block_starting_height_px * self.cfg.vertical_scale
block_starting_height_px += height_offset_px
for obstacle_idx, obstacle_selection in enumerate(obstacle_order):
obstacle_name = self.track_kwargs["options"][obstacle_selection]
obstacle_id = self.track_options_id_dict[obstacle_name]
# call method to generate trimesh and heightfield for each track block.
# For example get_climb_track, get_tilt_track
# For example get_jump_track, get_tilt_track
# using `virtual_track` to create non-collision mesh for collocation method in training.
track_trimesh, track_heightfield, block_info, height_offset_px = getattr(self, "get_" + obstacle_name + "_track")(
wall_thickness,
@ -576,6 +586,7 @@ class BarrierTrack:
)
self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 0] = obstacle_id
self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 1:] = block_info
self.block_starting_height_map[row_idx, col_idx, obstacle_idx + 1] = block_starting_height_px * self.cfg.vertical_scale
block_starting_height_px += height_offset_px
return block_starting_height_px
@ -711,7 +722,7 @@ class BarrierTrack:
block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,)
block_idx_clipped = torch.clip(
block_idx,
0.,
0,
(self.n_blocks_per_track - 1),
)
in_track_mask = (track_idx == track_idx_clipped).all(dim= -1) & (block_idx == block_idx_clipped)
@ -757,6 +768,7 @@ class BarrierTrack:
else self.env_block_length / 2
min_x_points_offset = torch.min(volume_points_offset[:, :, 0], dim= -1)[0] # (n,)
engaging_next_block = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) \
& (in_block_distance > self.track_kwargs.get("engaging_finish_threshold", 0.)) \
& (in_block_distance > engaging_next_distance)
# update the engaging track_idx and block_idx if engaging next
@ -790,7 +802,9 @@ class BarrierTrack:
0
].to(int)
if self.track_kwargs.get("walk_in_skill_gap", False):
between_skill_mask = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) & (in_block_distance < engaging_next_distance)
between_skill_mask = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) \
& (in_block_distance > self.track_kwargs.get("engaging_finish_threshold", 0.)) \
& (in_block_distance < engaging_next_distance)
obstacle_id_selection[between_skill_mask] = 0.
engaging_block_info[between_skill_mask] = 0.
engaging_obstacle_onehot[
@ -827,18 +841,35 @@ class BarrierTrack:
distance_neg_y,
], dim= -1) # (n, 2)
def get_climb_penetration_depths(self,
def get_jump_penetration_depths(self,
block_infos,
positions_in_block,
mask_only= False,
):
in_obstacle_mask = positions_in_block[:, 0] <= block_infos[:, 1]
climb_over_mask = positions_in_block[:, 2] > block_infos[:, 2]
penetrated_mask = torch.logical_and(in_obstacle_mask, (torch.logical_not(climb_over_mask)))
in_up_mask = torch.logical_and(
positions_in_block[:, 0] <= block_infos[:, 1],
block_infos[:, 2] > 0.,
)
in_down_mask = torch.logical_and(
positions_in_block[:, 0] <= block_infos[:, 1],
block_infos[:, 2] < 0.,
)
jump_over_mask = torch.logical_and(
positions_in_block[:, 2] > block_infos[:, 2],
positions_in_block[:, 2] > 0, # to avoid the penetration of virtual obstacle in jump down.
) # (n_points,)
penetrated_mask = torch.logical_and(
torch.logical_or(in_up_mask, in_down_mask),
(torch.logical_not(jump_over_mask)),
)
if mask_only:
return penetrated_mask.to(torch.float32)
penetration_depths_buffer = torch.zeros_like(penetrated_mask, dtype= torch.float32)
penetration_depths_buffer[penetrated_mask] = block_infos[penetrated_mask, 2] - positions_in_block[penetrated_mask, 2]
penetrate_up_mask = torch.logical_and(penetrated_mask, in_up_mask)
penetration_depths_buffer[penetrate_up_mask] = block_infos[penetrate_up_mask, 2] - positions_in_block[penetrate_up_mask, 2]
penetrate_down_mask = torch.logical_and(penetrated_mask, in_down_mask)
penetration_depths_buffer[penetrate_down_mask] = 0. - positions_in_block[penetrate_down_mask, 2]
return penetration_depths_buffer
def get_tilt_penetration_depths(self,
@ -906,10 +937,11 @@ class BarrierTrack:
forward_distance = sample_points[:, 0] - self.cfg.border_size - (track_idx[:, 0] * self.env_length) # (N,) w.r.t a track
block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,)
block_idx[block_idx >= self.track_info_map.shape[2]] = 0.
positions_in_block = torch.cat([
(forward_distance % self.env_block_length).unsqueeze(-1),
sample_points[:, 1:] - self.env_origins_pyt[track_idx_clipped[:, 0], track_idx_clipped[:, 1]][:, 1:],
], dim= -1)
positions_in_block = torch.stack([
forward_distance % self.env_block_length,
sample_points[:, 1] - self.env_origins_pyt[track_idx_clipped[:, 0], track_idx_clipped[:, 1]][:, 1],
sample_points[:, 2] - self.block_starting_height_map[track_idx_clipped[:, 0], track_idx_clipped[:, 1], block_idx],
], dim= -1) # (N, 3) related to the origin of the block, not the track.
block_infos = self.track_info_map[track_idx_clipped[:, 0], track_idx_clipped[:, 1], block_idx] # (N, 3)
penetration_depths = torch.zeros_like(sample_points[:, 0]) # shape (N,)
@ -967,23 +999,23 @@ class BarrierTrack:
return passed_depths
######## methods to draw visualization #######################
def draw_virtual_climb_track(self,
def draw_virtual_jump_track(self,
block_info,
xy_origin,
block_origin,
):
climb_depth = block_info[0]
climb_height = block_info[1]
jump_depth = block_info[1]
jump_height = block_info[2]
geom = gymutil.WireframeBoxGeometry(
climb_depth,
jump_depth if jump_height > 0 else self.track_kwargs["jump"].get("down_forward_length", 0.1),
self.env_width,
climb_height,
jump_height,
pose= None,
color= (0, 0, 1),
)
pose = gymapi.Transform(gymapi.Vec3(
climb_depth/2 + xy_origin[0],
self.env_width/2 + xy_origin[1],
climb_height/2,
jump_depth/2 + block_origin[0],
block_origin[1],
jump_height/2 + block_origin[2],
), r= None)
gymutil.draw_lines(
geom,
@ -995,10 +1027,10 @@ class BarrierTrack:
def draw_virtual_tilt_track(self,
block_info,
xy_origin,
block_origin,
):
tilt_depth = block_info[0]
tilt_width = block_info[1]
tilt_depth = block_info[1]
tilt_width = block_info[2]
wall_height = self.track_kwargs["tilt"]["wall_height"]
geom = gymutil.WireframeBoxGeometry(
tilt_depth,
@ -1009,9 +1041,9 @@ class BarrierTrack:
)
pose = gymapi.Transform(gymapi.Vec3(
tilt_depth/2 + xy_origin[0],
(self.env_width - tilt_width) / 4 + xy_origin[1],
wall_height/2,
tilt_depth/2 + block_origin[0],
block_origin[1] + (self.env_width + tilt_width) / 4,
wall_height/2 + block_origin[2],
), r= None)
gymutil.draw_lines(
geom,
@ -1021,9 +1053,9 @@ class BarrierTrack:
pose,
)
pose = gymapi.Transform(gymapi.Vec3(
tilt_depth/2 + xy_origin[0],
self.env_width - (self.env_width - tilt_width) / 4 + xy_origin[1],
wall_height/2,
tilt_depth/2 + block_origin[0],
block_origin[1] - (self.env_width + tilt_width) / 4,
wall_height/2 + block_origin[2],
), r= None)
gymutil.draw_lines(
geom,
@ -1035,11 +1067,11 @@ class BarrierTrack:
def draw_virtual_crawl_track(self,
block_info,
xy_origin,
block_origin,
):
crawl_depth = block_info[0]
crawl_height = block_info[1]
wall_height = self.track_kwargs["crawl"]["wall_height"]
crawl_depth = block_info[1]
crawl_height = block_info[2]
wall_height = self.track_kwargs["crawl"]["wall_height"][1] if isinstance(self.track_kwargs["crawl"]["wall_height"], (list, tuple)) else self.track_kwargs["crawl"]["wall_height"]
geom = gymutil.WireframeBoxGeometry(
crawl_depth,
self.env_width,
@ -1048,9 +1080,9 @@ class BarrierTrack:
color= (0, 0, 1),
)
pose = gymapi.Transform(gymapi.Vec3(
crawl_depth/2 + xy_origin[0],
self.env_width/2 + xy_origin[1],
wall_height/2 + crawl_height,
crawl_depth/2 + block_origin[0],
block_origin[1],
wall_height/2 + crawl_height + block_origin[2],
), r= None)
gymutil.draw_lines(
geom,
@ -1062,11 +1094,11 @@ class BarrierTrack:
def draw_virtual_leap_track(self,
block_info,
xy_origin,
block_origin,
):
# virtual/non-virtual terrain looks the same when leaping the gap.
# but the expected height can be visualized
leap_length = block_info[0]
leap_length = block_info[1]
expected_height = self.track_kwargs["leap"]["height"]
geom = gymutil.WireframeBoxGeometry(
leap_length,
@ -1076,9 +1108,9 @@ class BarrierTrack:
color= (0, 0.5, 0.5),
)
pose = gymapi.Transform(gymapi.Vec3(
leap_length/2 + xy_origin[0],
self.env_width/2 + xy_origin[1],
expected_height/2,
leap_length/2 + block_origin[0],
block_origin[1],
expected_height/2 + block_origin[2],
), r= None)
gymutil.draw_lines(
geom,
@ -1090,12 +1122,12 @@ class BarrierTrack:
def draw_virtual_track(self,
track_origin_px,
row_idx,
col_idx,
):
difficulties = self.get_difficulty(row_idx, col_idx)
virtual_terrain = difficulties[1]
track_origin = self.env_origins[row_idx, col_idx]
for block_idx in range(1, self.track_info_map.shape[2]):
if virtual_terrain and self.track_kwargs["draw_virtual_terrain"]:
@ -1104,13 +1136,14 @@ class BarrierTrack:
if v == obstacle_id:
obstacle_name = k
break
block_info = self.track_info_map[row_idx, col_idx, block_idx, 1:]
heightfield_x0 = track_origin_px[0] + self.track_block_resolution[0] * block_idx
heightfield_y0 = track_origin_px[1]
block_info = self.track_info_map[row_idx, col_idx, block_idx] # (3,)
getattr(self, "draw_virtual_" + obstacle_name + "_track")(
block_info,
(np.array([heightfield_x0, heightfield_y0]) * self.cfg.horizontal_scale),
np.array([
track_origin[0] + self.track_kwargs["track_block_length"] * block_idx,
track_origin[1],
self.block_starting_height_map[row_idx, col_idx, block_idx].cpu().numpy(),
]),
)
def draw_virtual_terrain(self, viewer):
@ -1119,7 +1152,6 @@ class BarrierTrack:
for row_idx in range(self.cfg.num_rows):
for col_idx in range(self.cfg.num_cols):
self.draw_virtual_track(
self.track_origins_px[row_idx, col_idx, :2],
row_idx= row_idx,
col_idx= col_idx,
)