[update] env and training config for go1
This commit is contained in:
parent
f771497315
commit
1ab4639f15
|
@ -8,7 +8,7 @@ Authors:
|
|||
[Sören Schwertfeger](https://robotics.shanghaitech.edu.cn/people/soeren),
|
||||
[Chelsea Finn](https://ai.stanford.edu/~cbfinn/),
|
||||
[Hang Zhao](https://hangzhaomit.github.io/)<br>
|
||||
Conference on Robot Learning (CoRL) 2023, Oral <br>
|
||||
Conference on Robot Learning (CoRL) 2023, **Oral**, **Best Systems Paper Award Finalist (top 3)** <br>
|
||||
|
||||
<p align="center">
|
||||
<img src="images/teaser.jpeg" width="80%"/>
|
||||
|
@ -34,7 +34,7 @@ To deploy the trained model on your real robot, please follow the instructions i
|
|||
If you cannot run the distillation part or all graphics computing goes to GPU 0 dispite you have multiple GPUs and have set the CUDA_VISIBLE_DEVICES, please use docker to isolate each GPU.
|
||||
|
||||
## To Do (will be done before Nov 2023) ##
|
||||
- [ ] Go1 training pipeline in simulation
|
||||
- [x] Go1 training configuration (not from scratch)
|
||||
- [ ] A1 deployment code
|
||||
- [x] Go1 deployment code
|
||||
|
||||
|
|
|
@ -62,11 +62,27 @@ task_registry.register( "go1_field", LeggedRobotNoisy, Go1FieldCfg(), Go1FieldCf
|
|||
task_registry.register( "go1_distill", LeggedRobotNoisy, Go1FieldDistillCfg(), Go1FieldDistillCfgPPO())
|
||||
|
||||
## The following tasks are for the convinience of opensource
|
||||
from .a1.a1_climb_config import A1ClimbCfg, A1ClimbCfgPPO
|
||||
task_registry.register( "a1_climb", LeggedRobotNoisy, A1ClimbCfg(), A1ClimbCfgPPO() )
|
||||
from .a1.a1_remote_config import A1RemoteCfg, A1RemoteCfgPPO
|
||||
task_registry.register( "a1_remote", LeggedRobotNoisy, A1RemoteCfg(), A1RemoteCfgPPO() )
|
||||
from .a1.a1_jump_config import A1JumpCfg, A1JumpCfgPPO
|
||||
task_registry.register( "a1_jump", LeggedRobotNoisy, A1JumpCfg(), A1JumpCfgPPO() )
|
||||
from .a1.a1_down_config import A1DownCfg, A1DownCfgPPO
|
||||
task_registry.register( "a1_down", LeggedRobotNoisy, A1DownCfg(), A1DownCfgPPO() )
|
||||
from .a1.a1_leap_config import A1LeapCfg, A1LeapCfgPPO
|
||||
task_registry.register( "a1_leap", LeggedRobotNoisy, A1LeapCfg(), A1LeapCfgPPO() )
|
||||
from .a1.a1_crawl_config import A1CrawlCfg, A1CrawlCfgPPO
|
||||
task_registry.register( "a1_crawl", LeggedRobotNoisy, A1CrawlCfg(), A1CrawlCfgPPO() )
|
||||
from .a1.a1_tilt_config import A1TiltCfg, A1TiltCfgPPO
|
||||
task_registry.register( "a1_tilt", LeggedRobotNoisy, A1TiltCfg(), A1TiltCfgPPO() )
|
||||
from .go1.go1_remote_config import Go1RemoteCfg, Go1RemoteCfgPPO
|
||||
task_registry.register( "go1_remote", LeggedRobotNoisy, Go1RemoteCfg(), Go1RemoteCfgPPO() )
|
||||
from .go1.go1_jump_config import Go1JumpCfg, Go1JumpCfgPPO
|
||||
task_registry.register( "go1_jump", LeggedRobotNoisy, Go1JumpCfg(), Go1JumpCfgPPO() )
|
||||
from .go1.go1_down_config import Go1DownCfg, Go1DownCfgPPO
|
||||
task_registry.register( "go1_down", LeggedRobotNoisy, Go1DownCfg(), Go1DownCfgPPO() )
|
||||
from .go1.go1_leap_config import Go1LeapCfg, Go1LeapCfgPPO
|
||||
task_registry.register( "go1_leap", LeggedRobotNoisy, Go1LeapCfg(), Go1LeapCfgPPO() )
|
||||
from .go1.go1_crawl_config import Go1CrawlCfg, Go1CrawlCfgPPO
|
||||
task_registry.register( "go1_crawl", LeggedRobotNoisy, Go1CrawlCfg(), Go1CrawlCfgPPO() )
|
||||
from .go1.go1_tilt_config import Go1TiltCfg, Go1TiltCfgPPO
|
||||
task_registry.register( "go1_tilt", LeggedRobotNoisy, Go1TiltCfg(), Go1TiltCfgPPO() )
|
||||
|
|
|
@ -71,6 +71,14 @@ class A1RoughCfg( LeggedRobotCfg ):
|
|||
penalize_contacts_on = ["thigh", "calf"]
|
||||
terminate_after_contacts_on = ["base"]
|
||||
self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
|
||||
sdk_dof_range = dict( # copied from a1_const.h from unitree_legged_sdk
|
||||
Hip_max= 0.802,
|
||||
Hip_min=-0.802,
|
||||
Thigh_max= 4.19,
|
||||
Thigh_min= -1.05,
|
||||
Calf_max= -0.916,
|
||||
Calf_min= -2.7,
|
||||
)
|
||||
|
||||
class rewards( LeggedRobotCfg.rewards ):
|
||||
soft_dof_pos_limit = 0.9
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
from os import path as osp
|
||||
import numpy as np
|
||||
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
|
||||
class A1DownCfg( A1FieldCfg ):
|
||||
|
||||
#### uncomment this to train non-virtual terrain
|
||||
class sensor( A1FieldCfg.sensor ):
|
||||
class proprioception( A1FieldCfg.sensor.proprioception ):
|
||||
latency_range = [0.04-0.0025, 0.04+0.0075]
|
||||
#### uncomment the above to train non-virtual terrain
|
||||
|
||||
class terrain( A1FieldCfg.terrain ):
|
||||
max_init_terrain_level = 2
|
||||
border_size = 5
|
||||
slope_treshold = 20.
|
||||
curriculum = True
|
||||
|
||||
BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict(
|
||||
options= [
|
||||
"jump",
|
||||
],
|
||||
track_block_length= 1.6,
|
||||
jump= dict(
|
||||
height= (0.2, 0.45),
|
||||
depth= 0.3,
|
||||
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
|
||||
jump_down_prob= 1., # probability of jumping down use it in non-virtual terrain
|
||||
),
|
||||
virtual_terrain= True, # Change this to False for real terrain
|
||||
no_perlin_threshold= 0.1,
|
||||
n_obstacles_per_track= 3,
|
||||
))
|
||||
|
||||
TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
|
||||
zScale= [0.05, 0.15],
|
||||
))
|
||||
|
||||
class commands( A1FieldCfg.commands ):
|
||||
class ranges( A1FieldCfg.commands.ranges ):
|
||||
lin_vel_x = [0.5, 1.5]
|
||||
lin_vel_y = [0.0, 0.0]
|
||||
ang_vel_yaw = [0., 0.]
|
||||
|
||||
class termination( A1FieldCfg.termination ):
|
||||
# additional factors that determines whether to terminates the episode
|
||||
termination_terms = [
|
||||
"roll",
|
||||
"pitch",
|
||||
"z_low",
|
||||
"z_high",
|
||||
"out_of_track",
|
||||
]
|
||||
z_low_kwargs = merge_dict(A1FieldCfg.termination.z_low_kwargs, dict(
|
||||
threshold= -3.,
|
||||
))
|
||||
|
||||
class domain_rand( A1FieldCfg.domain_rand ):
|
||||
init_base_pos_range = dict(
|
||||
x= [0.2, 0.6],
|
||||
y= [-0.25, 0.25],
|
||||
)
|
||||
init_base_rot_range = dict(
|
||||
roll= [-0.1, 0.1],
|
||||
pitch= [-0.1, 0.1],
|
||||
)
|
||||
|
||||
push_robots = True
|
||||
|
||||
class rewards( A1FieldCfg.rewards ):
|
||||
class scales:
|
||||
tracking_ang_vel = 0.1
|
||||
world_vel_l2norm = -1.3
|
||||
legs_energy_substeps = -1e-6
|
||||
alive = 2.
|
||||
penetrate_depth = -2e-3
|
||||
penetrate_volume = -2e-3
|
||||
exceed_dof_pos_limits = -4e-1
|
||||
exceed_torque_limits_l1norm = -4e-1
|
||||
feet_contact_forces = -1e-2
|
||||
torques = -2e-5
|
||||
lin_pos_y = -0.1
|
||||
yaw_abs = -0.1
|
||||
collision = -0.1
|
||||
down_cond = 0.3
|
||||
soft_dof_pos_limit = 0.8
|
||||
max_contact_force = 200.0
|
||||
|
||||
class curriculum( A1FieldCfg.curriculum ):
|
||||
penetrate_volume_threshold_harder = 2000000
|
||||
penetrate_volume_threshold_easier = 4000000
|
||||
penetrate_depth_threshold_harder = 200000
|
||||
penetrate_depth_threshold_easier = 400000
|
||||
|
||||
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class A1DownCfgPPO( A1FieldCfgPPO ):
|
||||
class algorithm( A1FieldCfgPPO.algorithm ):
|
||||
entropy_coef = 0.0
|
||||
clip_min_std = 0.2
|
||||
|
||||
class runner( A1FieldCfgPPO.runner ):
|
||||
policy_class_name = "ActorCriticRecurrent"
|
||||
experiment_name = "field_a1"
|
||||
resume = True
|
||||
load_run = "{Your trained walk model directory}"
|
||||
|
||||
run_name = "".join(["Skills_",
|
||||
("down" if A1DownCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] > 0. else "jump"),
|
||||
("_noResume" if not resume else "from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
|
||||
])
|
||||
max_iterations = 20000
|
||||
save_interval = 500
|
||||
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
import numpy as np
|
||||
import os.path as osp
|
||||
from legged_gym.envs.a1.a1_config import A1RoughCfg, A1RoughCfgPPO
|
||||
|
||||
class A1FieldCfg( A1RoughCfg ):
|
||||
|
@ -59,7 +60,7 @@ class A1FieldCfg( A1RoughCfg ):
|
|||
|
||||
BarrierTrack_kwargs = dict(
|
||||
options= [
|
||||
# "climb",
|
||||
# "jump",
|
||||
# "crawl",
|
||||
# "tilt",
|
||||
# "leap",
|
||||
|
@ -68,11 +69,11 @@ class A1FieldCfg( A1RoughCfg ):
|
|||
track_block_length= 2., # the x-axis distance from the env origin point
|
||||
wall_thickness= (0.04, 0.2), # [m]
|
||||
wall_height= -0.05,
|
||||
climb= dict(
|
||||
jump= dict(
|
||||
height= (0.2, 0.6),
|
||||
depth= (0.1, 0.8), # size along the forward axis
|
||||
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
|
||||
climb_down_prob= 0.0,
|
||||
jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain
|
||||
),
|
||||
crawl= dict(
|
||||
height= (0.25, 0.5),
|
||||
|
@ -97,13 +98,14 @@ class A1FieldCfg( A1RoughCfg ):
|
|||
virtual_terrain= False,
|
||||
draw_virtual_terrain= True,
|
||||
engaging_next_threshold= 1.2,
|
||||
engaging_finish_threshold= 0.,
|
||||
curriculum_perlin= False,
|
||||
no_perlin_threshold= 0.0,
|
||||
no_perlin_threshold= 0.1,
|
||||
)
|
||||
|
||||
TerrainPerlin_kwargs = dict(
|
||||
zScale= [0.1, 0.15],
|
||||
# zScale= 0.1, # Use a constant zScale for training a walk policy
|
||||
zScale= [0.08, 0.15],
|
||||
# zScale= 0.15, # Use a constant zScale for training a walk policy
|
||||
frequency= 10,
|
||||
)
|
||||
|
||||
|
@ -114,9 +116,6 @@ class A1FieldCfg( A1RoughCfg ):
|
|||
lin_vel_x = [-1.0, 1.0]
|
||||
lin_vel_y = [0.0, 0.0]
|
||||
ang_vel_yaw = [0., 0.]
|
||||
######## configs for training a walk policy #########
|
||||
# lin_vel_y = [-1., 1.]
|
||||
# ang_vel_yaw = [-1., 1.]
|
||||
|
||||
class control( A1RoughCfg.control ):
|
||||
stiffness = {'joint': 50.}
|
||||
|
@ -125,10 +124,15 @@ class A1FieldCfg( A1RoughCfg ):
|
|||
torque_limits = 25 # override the urdf
|
||||
computer_clip_torque = True
|
||||
motor_clip_torque = False
|
||||
# action_delay = False # set to True to enable action delay in sim
|
||||
# action_delay_range = [0.002, 0.022] # [s]
|
||||
# action_delay_resample_time = 5.0 # [s]
|
||||
|
||||
class asset( A1RoughCfg.asset ):
|
||||
penalize_contacts_on = ["base", "thigh"]
|
||||
penalize_contacts_on = ["base", "thigh", "calf"]
|
||||
terminate_after_contacts_on = ["base", "imu"]
|
||||
front_hip_names = ["FR_hip_joint", "FL_hip_joint"]
|
||||
rear_hip_names = ["RR_hip_joint", "RL_hip_joint"]
|
||||
|
||||
class termination:
|
||||
# additional factors that determines whether to terminates the episode
|
||||
|
@ -145,8 +149,8 @@ class A1FieldCfg( A1RoughCfg ):
|
|||
tilt_threshold= 1.5,
|
||||
)
|
||||
pitch_kwargs = dict(
|
||||
threshold= 1.6, # [rad] # for leap, climb
|
||||
climb_threshold= 1.6,
|
||||
threshold= 1.6, # [rad] # for leap, jump
|
||||
jump_threshold= 1.6,
|
||||
leap_threshold= 1.5,
|
||||
)
|
||||
z_low_kwargs = dict(
|
||||
|
@ -176,7 +180,7 @@ class A1FieldCfg( A1RoughCfg ):
|
|||
added_mass_range = [1.0, 3.0]
|
||||
|
||||
randomize_friction = True
|
||||
friction_range = [0.0, 1.]
|
||||
friction_range = [0., 2.]
|
||||
|
||||
init_base_pos_range = dict(
|
||||
x= [0.2, 0.6],
|
||||
|
@ -200,10 +204,29 @@ class A1FieldCfg( A1RoughCfg ):
|
|||
class normalization( A1RoughCfg.normalization ):
|
||||
class obs_scales( A1RoughCfg.normalization.obs_scales ):
|
||||
forward_depth = 1.
|
||||
base_pose = [0., 0., 1., 1., 1., 1.]
|
||||
base_pose = [0., 0., 0., 1., 1., 1.]
|
||||
engaging_block = 1.
|
||||
""" The following action clip is used for tanh policy activation. """
|
||||
# clip_actions_method = "hard"
|
||||
# dof_pos_redundancy = 0.2
|
||||
# clip_actions_low = []
|
||||
# clip_actions_high = []
|
||||
# for sdk_joint_name, sim_joint_name in zip(
|
||||
# ["Hip", "Thigh", "Calf"] * 4,
|
||||
# [ # in the order as simulation
|
||||
# "FL_hip_joint", "FL_thigh_joint", "FL_calf_joint",
|
||||
# "FR_hip_joint", "FR_thigh_joint", "FR_calf_joint",
|
||||
# "RL_hip_joint", "RL_thigh_joint", "RL_calf_joint",
|
||||
# "RR_hip_joint", "RR_thigh_joint", "RR_calf_joint",
|
||||
# ],
|
||||
# ): # This setting is only for position control.
|
||||
# clip_actions_low.append( (A1RoughCfg.asset.sdk_dof_range[sdk_joint_name + "_min"] + dof_pos_redundancy - A1RoughCfg.init_state.default_joint_angles[sim_joint_name]) / a1_action_scale )
|
||||
# clip_actions_high.append( (A1RoughCfg.asset.sdk_dof_range[sdk_joint_name + "_max"] - dof_pos_redundancy - A1RoughCfg.init_state.default_joint_angles[sim_joint_name]) / a1_action_scale )
|
||||
# del dof_pos_redundancy, sdk_joint_name, sim_joint_name # This is not intended to be an attribute of normalization
|
||||
""" The above action clip is used for tanh policy activation. """
|
||||
|
||||
class noise( A1RoughCfg.noise ):
|
||||
add_noise = False # disable internal uniform +- 1 noise, and no noise in proprioception
|
||||
class noise_scales( A1RoughCfg.noise.noise_scales ):
|
||||
forward_depth = 0.1
|
||||
base_pose = 1.0
|
||||
|
@ -244,6 +267,7 @@ class A1FieldCfg( A1RoughCfg ):
|
|||
no_moveup_when_fall = False
|
||||
# chosen heuristically, please refer to `LeggedRobotField._get_terrain_curriculum_move` with fixed body_measure_points
|
||||
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class A1FieldCfgPPO( A1RoughCfgPPO ):
|
||||
class algorithm( A1RoughCfgPPO.algorithm ):
|
||||
entropy_coef = 0.01
|
||||
|
@ -256,8 +280,9 @@ class A1FieldCfgPPO( A1RoughCfgPPO ):
|
|||
class runner( A1RoughCfgPPO.runner ):
|
||||
policy_class_name = "ActorCriticRecurrent"
|
||||
experiment_name = "field_a1"
|
||||
run_name = "".join(["WalkingBase",
|
||||
("_pEnergySubsteps" + np.format_float_scientific(-A1FieldCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1, trim="-") if A1FieldCfg.rewards.scales.legs_energy_substeps != 0 else ""),
|
||||
resume = False
|
||||
|
||||
run_name = "".join(["WalkForward",
|
||||
("_propDelay{:.2f}-{:.2f}".format(
|
||||
A1FieldCfg.sensor.proprioception.latency_range[0],
|
||||
A1FieldCfg.sensor.proprioception.latency_range[1],
|
||||
|
|
|
@ -3,12 +3,14 @@ import numpy as np
|
|||
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
|
||||
class A1ClimbCfg( A1FieldCfg ):
|
||||
class A1JumpCfg( A1FieldCfg ):
|
||||
|
||||
class init_state( A1FieldCfg.init_state ):
|
||||
pos = [0., 0., 0.45]
|
||||
|
||||
#### uncomment this to train non-virtual terrain
|
||||
# class sensor( A1FieldCfg.sensor ):
|
||||
# class proprioception( A1FieldCfg.sensor.proprioception ):
|
||||
# delay_action_obs = True
|
||||
# latency_range = [0.04-0.0025, 0.04+0.0075]
|
||||
#### uncomment the above to train non-virtual terrain
|
||||
|
||||
|
@ -20,22 +22,23 @@ class A1ClimbCfg( A1FieldCfg ):
|
|||
|
||||
BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict(
|
||||
options= [
|
||||
"climb",
|
||||
"jump",
|
||||
],
|
||||
track_block_length= 1.6,
|
||||
climb= dict(
|
||||
jump= dict(
|
||||
height= (0.2, 0.6), # use this to train in virtual terrain
|
||||
# height= (0.1, 0.5), # use this to train in non-virtual terrain
|
||||
depth= (0.1, 0.2),
|
||||
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
|
||||
climb_down_prob= 0., # probability of climbing down use it in non-virtual terrain
|
||||
jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain
|
||||
),
|
||||
virtual_terrain= False, # Change this to False for real terrain
|
||||
no_perlin_threshold= 0.06,
|
||||
no_perlin_threshold= 0.12,
|
||||
n_obstacles_per_track= 3,
|
||||
))
|
||||
|
||||
TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
|
||||
zScale= [0.05, 0.1],
|
||||
zScale= [0.05, 0.15],
|
||||
))
|
||||
|
||||
class commands( A1FieldCfg.commands ):
|
||||
|
@ -58,10 +61,19 @@ class A1ClimbCfg( A1FieldCfg ):
|
|||
))
|
||||
|
||||
class domain_rand( A1FieldCfg.domain_rand ):
|
||||
class com_range( A1FieldCfg.domain_rand.com_range ):
|
||||
z = [-0.1, 0.1]
|
||||
|
||||
init_base_pos_range = dict(
|
||||
x= [0.2, 0.6],
|
||||
y= [-0.25, 0.25],
|
||||
)
|
||||
init_base_rot_range = dict(
|
||||
roll= [-0.1, 0.1],
|
||||
pitch= [-0.1, 0.1],
|
||||
)
|
||||
|
||||
push_robots = False
|
||||
|
||||
class rewards( A1FieldCfg.rewards ):
|
||||
class scales:
|
||||
|
@ -73,6 +85,8 @@ class A1ClimbCfg( A1FieldCfg ):
|
|||
penetrate_volume = -1e-2
|
||||
exceed_dof_pos_limits = -1e-1
|
||||
exceed_torque_limits_i = -2e-1
|
||||
soft_dof_pos_limit = 0.8
|
||||
max_contact_force = 100.0
|
||||
|
||||
class curriculum( A1FieldCfg.curriculum ):
|
||||
penetrate_volume_threshold_harder = 8000
|
||||
|
@ -81,7 +95,8 @@ class A1ClimbCfg( A1FieldCfg ):
|
|||
penetrate_depth_threshold_easier = 1600
|
||||
|
||||
|
||||
class A1ClimbCfgPPO( A1FieldCfgPPO ):
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class A1JumpCfgPPO( A1FieldCfgPPO ):
|
||||
class algorithm( A1FieldCfgPPO.algorithm ):
|
||||
entropy_coef = 0.0
|
||||
clip_min_std = 0.2
|
||||
|
@ -89,23 +104,15 @@ class A1ClimbCfgPPO( A1FieldCfgPPO ):
|
|||
class runner( A1FieldCfgPPO.runner ):
|
||||
policy_class_name = "ActorCriticRecurrent"
|
||||
experiment_name = "field_a1"
|
||||
run_name = "".join(["Skill",
|
||||
("Multi" if len(A1ClimbCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (A1ClimbCfg.terrain.BarrierTrack_kwargs["options"][0] if A1ClimbCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
|
||||
("_pEnergySubsteps{:.0e}".format(A1ClimbCfg.rewards.scales.legs_energy_substeps) if A1ClimbCfg.rewards.scales.legs_energy_substeps != -1e-6 else ""),
|
||||
("_pDofLimit{:.0e}".format(A1ClimbCfg.rewards.scales.exceed_dof_pos_limits) if getattr(A1ClimbCfg.rewards.scales, "exceed_dof_pos_limits") != 0.0 else ""),
|
||||
("_climbDownProb{:.1f}".format(A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"]["climb_down_prob"]) if A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"].get("climb_down_prob", 0.) > 0. else ""),
|
||||
("_climbHeight{:.1f}-{:.1f}".format(*A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"]["height"])),
|
||||
("_propDelay{:.2f}-{:.2f}".format(
|
||||
A1ClimbCfg.sensor.proprioception.latency_range[0],
|
||||
A1ClimbCfg.sensor.proprioception.latency_range[1],
|
||||
) if A1ClimbCfg.sensor.proprioception.delay_action_obs else ""
|
||||
),
|
||||
("_virtual" if A1ClimbCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
|
||||
("_trackBlockLen{:.1f}".format(A1ClimbCfg.terrain.BarrierTrack_kwargs["track_block_length"]) if A1ClimbCfg.terrain.BarrierTrack_kwargs["track_block_length"] != 2. else ""),
|
||||
])
|
||||
resume = True
|
||||
load_run = "{Your traind walking model directory}"
|
||||
load_run = "{Your virtually trained climb model directory}"
|
||||
load_run = "{Your virtually trained jump model directory}"
|
||||
|
||||
run_name = "".join(["Skills_",
|
||||
("jump" if A1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] < 1. else "down"),
|
||||
("_virtual" if A1JumpCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
|
||||
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
|
||||
])
|
||||
max_iterations = 20000
|
||||
save_interval = 500
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
import numpy as np
|
||||
import os.path as osp
|
||||
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
|
||||
class A1RemoteCfg( A1FieldCfg ):
|
||||
class env( A1FieldCfg.env ):
|
||||
num_envs = 4096
|
||||
obs_components = [
|
||||
"proprioception", # 48
|
||||
# "height_measurements", # 187
|
||||
# "forward_depth",
|
||||
# "base_pose",
|
||||
# "robot_config",
|
||||
# "engaging_block",
|
||||
# "sidewall_distance",
|
||||
]
|
||||
privileged_obs_components = [
|
||||
"proprioception",
|
||||
# "height_measurements",
|
||||
# "forward_depth",
|
||||
"robot_config",
|
||||
]
|
||||
use_lin_vel = False
|
||||
privileged_use_lin_vel = True
|
||||
|
||||
class terrain( A1FieldCfg.terrain ):
|
||||
num_rows = 4
|
||||
num_cols = 4
|
||||
selected = "TerrainPerlin"
|
||||
TerrainPerlin_kwargs = dict(
|
||||
zScale= 0.15,
|
||||
frequency= 10,
|
||||
)
|
||||
|
||||
class commands( A1FieldCfg.commands ):
|
||||
class ranges( A1FieldCfg.commands.ranges ):
|
||||
lin_vel_x = [-1.0, 1.0]
|
||||
lin_vel_y = [-1., 1.]
|
||||
ang_vel_yaw = [-1., 1.]
|
||||
|
||||
class domain_rand( A1FieldCfg.domain_rand ):
|
||||
class com_range( A1FieldCfg.domain_rand.com_range ):
|
||||
x = [-0.2, 0.2]
|
||||
max_push_vel_ang = 0.5
|
||||
push_robots = True
|
||||
|
||||
class rewards( A1FieldCfg.rewards ):
|
||||
class scales:
|
||||
###### hacker from Field
|
||||
tracking_ang_vel = 0.5
|
||||
lin_vel_l2norm = -1.
|
||||
legs_energy_substeps = -1e-5
|
||||
alive = 2.
|
||||
# penalty for hardware safety
|
||||
exceed_dof_pos_limits = -4e-2
|
||||
exceed_torque_limits_l1norm = -4e-1
|
||||
# penalty for walking gait, probably no need
|
||||
collision = -0.1
|
||||
orientation = -0.1
|
||||
feet_contact_forces = -1e-3
|
||||
soft_dof_pos_limit = 0.5
|
||||
max_contact_force = 60.0
|
||||
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class A1RemoteCfgPPO( A1FieldCfgPPO ):
|
||||
class algorithm( A1FieldCfgPPO.algorithm ):
|
||||
entropy_coef = 0.01
|
||||
clip_min_std = 0.05
|
||||
|
||||
class runner( A1FieldCfgPPO.runner ):
|
||||
resume = False
|
||||
load_run = None
|
||||
|
||||
run_name = "".join(["WalkByRemote",
|
||||
("_rLinTrack{:.1f}".format(A1RemoteCfg.rewards.scales.tracking_lin_vel) if getattr(A1RemoteCfg.rewards.scales, "tracking_lin_vel", 0.) != 0. else ""),
|
||||
("_pLin{:.1f}".format(-A1RemoteCfg.rewards.scales.lin_vel_l2norm) if getattr(A1RemoteCfg.rewards.scales, "lin_vel_l2norm", 0.) != 0. else ""),
|
||||
("_rAng{:.1f}".format(A1RemoteCfg.rewards.scales.tracking_ang_vel) if A1RemoteCfg.rewards.scales.tracking_ang_vel != 0.2 else ""),
|
||||
("_rAlive{:.1f}".format(A1RemoteCfg.rewards.scales.alive) if getattr(A1RemoteCfg.rewards.scales, "alive", 2.) != 2. else ""),
|
||||
("_pEnergySubsteps{:.0e}".format(A1RemoteCfg.rewards.scales.legs_energy_substeps) if getattr(A1RemoteCfg.rewards.scales, "legs_energy_substeps", -2e-5) != -2e-5 else "_nopEnergy"),
|
||||
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
|
||||
])
|
|
@ -141,6 +141,7 @@ class LeggedRobot(BaseTask):
|
|||
self.last_actions[:] = self.actions[:]
|
||||
self.last_dof_vel[:] = self.dof_vel[:]
|
||||
self.last_root_vel[:] = self.root_states[:, 7:13]
|
||||
self.last_torques[:] = self.torques[:]
|
||||
|
||||
if self.viewer and self.enable_viewer_sync and self.debug_viz:
|
||||
self._draw_debug_vis()
|
||||
|
@ -343,6 +344,7 @@ class LeggedRobot(BaseTask):
|
|||
|
||||
# set small commands to zero
|
||||
self.commands[env_ids, :2] *= (torch.norm(self.commands[env_ids, :2], dim=1) > 0.2).unsqueeze(1)
|
||||
self.commands[env_ids, 2] *= (torch.abs(self.commands[env_ids, 2]) > 0.1)
|
||||
|
||||
def _compute_torques(self, actions):
|
||||
""" Compute torques from actions.
|
||||
|
@ -387,7 +389,9 @@ class LeggedRobot(BaseTask):
|
|||
)
|
||||
else:
|
||||
self.dof_pos[env_ids] = self.default_dof_pos
|
||||
self.dof_vel[env_ids] = 0.
|
||||
# self.dof_vel[env_ids] = 0. # history init method
|
||||
dof_vel_range = getattr(self.cfg.domain_rand, "init_dof_vel_range", [-3., 3.])
|
||||
self.dof_vel[env_ids] = torch.rand_like(self.dof_vel[env_ids]) * abs(dof_vel_range[1] - dof_vel_range[0]) + min(dof_vel_range)
|
||||
|
||||
# Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first.
|
||||
dof_idx = env_ids * self.all_root_states.shape[0] / self.num_envs
|
||||
|
@ -414,16 +418,64 @@ class LeggedRobot(BaseTask):
|
|||
else:
|
||||
self.root_states[env_ids] = self.base_init_state
|
||||
self.root_states[env_ids, :3] += self.env_origins[env_ids]
|
||||
# base rotation (roll and pitch)
|
||||
if hasattr(self.cfg.domain_rand, "init_base_rot_range"):
|
||||
base_roll = torch_rand_float(
|
||||
*self.cfg.domain_rand.init_base_rot_range["roll"],
|
||||
(len(env_ids), 1),
|
||||
device=self.device,
|
||||
)[:, 0]
|
||||
base_pitch = torch_rand_float(
|
||||
*self.cfg.domain_rand.init_base_rot_range["pitch"],
|
||||
(len(env_ids), 1),
|
||||
device=self.device,
|
||||
)[:, 0]
|
||||
base_quat = quat_from_euler_xyz(base_roll, base_pitch, torch.zeros_like(base_roll))
|
||||
self.root_states[env_ids, 3:7] = base_quat
|
||||
# base velocities
|
||||
if getattr(self.cfg.domain_rand, "init_base_vel_range", None) is None:
|
||||
base_vel_range = (-0.5, 0.5)
|
||||
else:
|
||||
base_vel_range = self.cfg.domain_rand.init_base_vel_range
|
||||
self.root_states[env_ids, 7:13] = torch_rand_float(
|
||||
*base_vel_range,
|
||||
(len(env_ids), 6),
|
||||
device=self.device,
|
||||
) # [7:10]: lin vel, [10:13]: ang vel
|
||||
if isinstance(base_vel_range, (tuple, list)):
|
||||
self.root_states[env_ids, 7:13] = torch_rand_float(
|
||||
*base_vel_range,
|
||||
(len(env_ids), 6),
|
||||
device=self.device,
|
||||
) # [7:10]: lin vel, [10:13]: ang vel
|
||||
elif isinstance(base_vel_range, dict):
|
||||
self.root_states[env_ids, 7:8] = torch_rand_float(
|
||||
*base_vel_range["x"],
|
||||
(len(env_ids), 1),
|
||||
device=self.device,
|
||||
)
|
||||
self.root_states[env_ids, 8:9] = torch_rand_float(
|
||||
*base_vel_range["y"],
|
||||
(len(env_ids), 1),
|
||||
device=self.device,
|
||||
)
|
||||
self.root_states[env_ids, 9:10] = torch_rand_float(
|
||||
*base_vel_range["z"],
|
||||
(len(env_ids), 1),
|
||||
device=self.device,
|
||||
)
|
||||
self.root_states[env_ids, 10:11] = torch_rand_float(
|
||||
*base_vel_range["roll"],
|
||||
(len(env_ids), 1),
|
||||
device=self.device,
|
||||
)
|
||||
self.root_states[env_ids, 11:12] = torch_rand_float(
|
||||
*base_vel_range["pitch"],
|
||||
(len(env_ids), 1),
|
||||
device=self.device,
|
||||
)
|
||||
self.root_states[env_ids, 12:13] = torch_rand_float(
|
||||
*base_vel_range["yaw"],
|
||||
(len(env_ids), 1),
|
||||
device=self.device,
|
||||
)
|
||||
else:
|
||||
raise NameError(f"Unknown base_vel_range type: {type(base_vel_range)}")
|
||||
|
||||
# Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first.
|
||||
actor_idx = env_ids * self.all_root_states.shape[0] / self.num_envs
|
||||
|
@ -546,6 +598,7 @@ class LeggedRobot(BaseTask):
|
|||
self.last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False)
|
||||
self.last_dof_vel = torch.zeros_like(self.dof_vel)
|
||||
self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13])
|
||||
self.last_torques = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False)
|
||||
self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading
|
||||
self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) # TODO change this
|
||||
self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False)
|
||||
|
@ -580,8 +633,11 @@ class LeggedRobot(BaseTask):
|
|||
self.default_dof_pos = self.default_dof_pos.unsqueeze(0)
|
||||
|
||||
def _reset_buffers(self, env_ids):
|
||||
if getattr(self.cfg.init_state, "zero_actions", False):
|
||||
self.actions[env_ids] = 0.
|
||||
self.last_actions[env_ids] = 0.
|
||||
self.last_dof_vel[env_ids] = 0.
|
||||
self.last_torques[env_ids] = 0.
|
||||
self.feet_air_time[env_ids] = 0.
|
||||
self.episode_length_buf[env_ids] = 0
|
||||
self.reset_buf[env_ids] = 1
|
||||
|
@ -902,8 +958,13 @@ class LeggedRobot(BaseTask):
|
|||
# log additional curriculum info
|
||||
if self.cfg.terrain.curriculum:
|
||||
self.extras["episode"]["terrain_level"] = torch.mean(self.terrain_levels.float())
|
||||
if len(env_ids) > 0:
|
||||
self.extras["episode"]["terrain_level_max"] = torch.max(self.terrain_levels[env_ids].float())
|
||||
self.extras["episode"]["terrain_level_min"] = torch.min(self.terrain_levels[env_ids].float())
|
||||
if self.cfg.commands.curriculum:
|
||||
self.extras["episode"]["max_command_x"] = self.command_ranges["lin_vel_x"][1]
|
||||
# log whether the episode ends by timeout or dead, or by reaching the goal
|
||||
self.extras["episode"]["timeout_ratio"] = self.time_out_buf.float().sum() / self.reset_buf.float().sum()
|
||||
# send timeout info to the algorithm
|
||||
if self.cfg.env.send_timeouts:
|
||||
self.extras["time_outs"] = self.time_out_buf
|
||||
|
@ -930,6 +991,9 @@ class LeggedRobot(BaseTask):
|
|||
# Penalize torques
|
||||
return torch.sum(torch.square(self.torques), dim=1)
|
||||
|
||||
def _reward_delta_torques(self):
|
||||
return torch.sum(torch.square(self.torques - self.last_torques), dim=1)
|
||||
|
||||
def _reward_dof_vel(self):
|
||||
# Penalize dof velocities
|
||||
return torch.sum(torch.square(self.dof_vel), dim=1)
|
||||
|
|
|
@ -7,6 +7,7 @@ import torch
|
|||
|
||||
from legged_gym.envs.base.legged_robot import LeggedRobot
|
||||
from legged_gym.utils.terrain import get_terrain_cls
|
||||
from legged_gym.utils.observation import get_obs_slice
|
||||
from .legged_robot_config import LeggedRobotCfg
|
||||
|
||||
class LeggedRobotField(LeggedRobot):
|
||||
|
@ -102,10 +103,34 @@ class LeggedRobotField(LeggedRobot):
|
|||
self.cfg.normalization.clip_actions,
|
||||
device= self.device,
|
||||
)
|
||||
if isinstance(getattr(self.cfg.normalization, "clip_actions_low", None), (tuple, list)):
|
||||
self.cfg.normalization.clip_actions_low = torch.tensor(
|
||||
self.cfg.normalization.clip_actions_low,
|
||||
device= self.device
|
||||
)
|
||||
if isinstance(getattr(self.cfg.normalization, "clip_actions_high", None), (tuple, list)):
|
||||
self.cfg.normalization.clip_actions_high = torch.tensor(
|
||||
self.cfg.normalization.clip_actions_high,
|
||||
device= self.device
|
||||
)
|
||||
if getattr(self.cfg.normalization, "clip_actions_method", None) == "tanh":
|
||||
clip_actions = self.cfg.normalization.clip_actions
|
||||
self.actions = (torch.tanh(actions) * clip_actions).to(self.device)
|
||||
actions_preprocessed = True
|
||||
elif getattr(self.cfg.normalization, "clip_actions_method", None) == "hard":
|
||||
if self.cfg.control.control_type == "P":
|
||||
actions_low = getattr(
|
||||
self.cfg.normalization, "clip_actions_low",
|
||||
self.dof_pos_limits[:, 0] - self.default_dof_pos,
|
||||
)
|
||||
actions_high = getattr(
|
||||
self.cfg.normalization, "clip_actions_high",
|
||||
self.dof_pos_limits[:, 1] - self.default_dof_pos,
|
||||
)
|
||||
self.actions = torch.clip(actions, actions_low, actions_high)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
actions_preprocessed = True
|
||||
if getattr(self.cfg.normalization, "clip_actions_delta", None) is not None:
|
||||
self.actions = torch.clip(
|
||||
self.actions,
|
||||
|
@ -226,6 +251,7 @@ class LeggedRobotField(LeggedRobot):
|
|||
torch.div(pos_x, self.terrain.env_block_length, rounding_mode= "floor") - 1,
|
||||
min= 0.0,
|
||||
)).cpu()
|
||||
self.extras["episode"]["max_power_throughout_episode"] = self.max_power_per_timestep[env_ids].max().cpu().item()
|
||||
|
||||
return return_
|
||||
|
||||
|
@ -245,6 +271,11 @@ class LeggedRobotField(LeggedRobot):
|
|||
min= 0.0,
|
||||
)).cpu()
|
||||
|
||||
self.max_power_per_timestep = torch.maximum(
|
||||
self.max_power_per_timestep,
|
||||
torch.max(torch.sum(self.substep_torques * self.substep_dof_vel, dim= -1), dim= -1)[0],
|
||||
)
|
||||
|
||||
return return_
|
||||
|
||||
def _compute_torques(self, actions):
|
||||
|
@ -322,9 +353,40 @@ class LeggedRobotField(LeggedRobot):
|
|||
gymapi.IMAGE_COLOR,
|
||||
)))
|
||||
|
||||
if getattr(self.cfg.normalization, "clip_actions_method", None) is None:
|
||||
print("WARNING: No action clipping method is specified, actions will only be clipped by clip_actions.")
|
||||
|
||||
# projected gravity bias (if needed)
|
||||
if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False):
|
||||
print("Initializing gravity bias for domain randomization")
|
||||
# add cross trajectory domain randomization on projected gravity bias
|
||||
# uniform sample from range
|
||||
self.gravity_bias = torch.rand(self.num_envs, 3, dtype= torch.float, device= self.device, requires_grad= False)
|
||||
self.gravity_bias[:, 0] *= self.cfg.domain_rand.gravity_bias_range["x"][1] - self.cfg.domain_rand.gravity_bias_range["x"][0]
|
||||
self.gravity_bias[:, 0] += self.cfg.domain_rand.gravity_bias_range["x"][0]
|
||||
self.gravity_bias[:, 1] *= self.cfg.domain_rand.gravity_bias_range["y"][1] - self.cfg.domain_rand.gravity_bias_range["y"][0]
|
||||
self.gravity_bias[:, 1] += self.cfg.domain_rand.gravity_bias_range["y"][0]
|
||||
self.gravity_bias[:, 2] *= self.cfg.domain_rand.gravity_bias_range["z"][1] - self.cfg.domain_rand.gravity_bias_range["z"][0]
|
||||
self.gravity_bias[:, 2] += self.cfg.domain_rand.gravity_bias_range["z"][0]
|
||||
|
||||
self.max_power_per_timestep = torch.zeros(self.num_envs, dtype= torch.float32, device= self.device)
|
||||
|
||||
def _reset_buffers(self, env_ids):
|
||||
return_ = super()._reset_buffers(env_ids)
|
||||
if hasattr(self, "velocity_sample_points"): self.velocity_sample_points[env_ids] = 0.
|
||||
|
||||
if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False):
|
||||
assert hasattr(self, "gravity_bias")
|
||||
self.gravity_bias[env_ids] = torch.rand_like(self.gravity_bias[env_ids])
|
||||
self.gravity_bias[env_ids, 0] *= self.cfg.domain_rand.gravity_bias_range["x"][1] - self.cfg.domain_rand.gravity_bias_range["x"][0]
|
||||
self.gravity_bias[env_ids, 0] += self.cfg.domain_rand.gravity_bias_range["x"][0]
|
||||
self.gravity_bias[env_ids, 1] *= self.cfg.domain_rand.gravity_bias_range["y"][1] - self.cfg.domain_rand.gravity_bias_range["y"][0]
|
||||
self.gravity_bias[env_ids, 1] += self.cfg.domain_rand.gravity_bias_range["y"][0]
|
||||
self.gravity_bias[env_ids, 2] *= self.cfg.domain_rand.gravity_bias_range["z"][1] - self.cfg.domain_rand.gravity_bias_range["z"][0]
|
||||
self.gravity_bias[env_ids, 2] += self.cfg.domain_rand.gravity_bias_range["z"][0]
|
||||
|
||||
self.max_power_per_timestep[env_ids] = 0.
|
||||
|
||||
return return_
|
||||
|
||||
def _prepare_reward_function(self):
|
||||
|
@ -483,6 +545,16 @@ class LeggedRobotField(LeggedRobot):
|
|||
self.cfg.env.obs_components,
|
||||
privileged= False,
|
||||
)
|
||||
if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False) and "proprioception" in self.cfg.env.obs_components:
|
||||
# NOTE: The logic order is a bit messed up here when combining using legged_robot_noisy.py
|
||||
# When simulating the sensor bias/noise and latency, the gravity bias should be added before
|
||||
# proprioception latency.
|
||||
# However, considering gravirty bias is randomized across trajectory, the latency simulation
|
||||
# from legged_robot_noisy.py does not need to be computed after adding the bias.
|
||||
# So, this computation order looks OK.
|
||||
assert hasattr(self, "gravity_bias")
|
||||
proprioception_slice = get_obs_slice(self.obs_segments, "proprioception")
|
||||
self.obs_buf[:, proprioception_slice[0].start + 6: proprioception_slice[0].start + 9] += self.gravity_bias
|
||||
if self.add_noise:
|
||||
self.obs_buf += (2 * torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec
|
||||
|
||||
|
@ -496,9 +568,13 @@ class LeggedRobotField(LeggedRobot):
|
|||
if "proprioception" in getattr(self.cfg.env, "privileged_obs_components", []) \
|
||||
and getattr(self.cfg.env, "privileged_use_lin_vel", False):
|
||||
# NOTE: according to self.get_obs_segment_from_components, "proprioception" observation
|
||||
# is always the first part of this flattened observation. check super().compute_observations
|
||||
# is always the FIRST part of this flattened observation. check super().compute_observations
|
||||
# and self.cfg.env.use_lin_vel for the reason of this if branch.
|
||||
self.privileged_obs_buf[:, :3] = self.base_lin_vel * self.obs_scales.lin_vel
|
||||
if getattr(self.cfg.domain_rand, "randomize_privileged_gravity_bias", False) and "proprioception" in getattr(self.cfg.env, "privileged_obs_components", []):
|
||||
assert hasattr(self, "gravity_bias")
|
||||
proprioception_slice = get_obs_slice(self.privileged_obs_segments, "proprioception")
|
||||
self.privileged_obs_buf[:, proprioception_slice[0].start + 6: proprioception_slice[0].start + 9] += self.gravity_bias
|
||||
|
||||
for key in self.sensor_handles[0].keys():
|
||||
if "camera" in key:
|
||||
|
@ -558,12 +634,31 @@ class LeggedRobotField(LeggedRobot):
|
|||
|
||||
def _create_envs(self):
|
||||
if self.cfg.domain_rand.randomize_motor:
|
||||
mtr_rng = self.cfg.domain_rand.leg_motor_strength_range
|
||||
self.motor_strength = torch_rand_float(
|
||||
self.cfg.domain_rand.leg_motor_strength_range[0],
|
||||
self.cfg.domain_rand.leg_motor_strength_range[1],
|
||||
(self.num_envs, 12),
|
||||
mtr_rng[0],
|
||||
mtr_rng[1],
|
||||
(self.num_envs, self.num_actions),
|
||||
device=self.device,
|
||||
)
|
||||
|
||||
return_ = super()._create_envs()
|
||||
|
||||
front_hip_names = getattr(self.cfg.asset, "front_hip_names", ["FR_hip_joint", "FL_hip_joint"])
|
||||
self.front_hip_indices = torch.zeros(len(front_hip_names), dtype=torch.long, device=self.device, requires_grad=False)
|
||||
for i, name in enumerate(front_hip_names):
|
||||
self.front_hip_indices[i] = self.dof_names.index(name)
|
||||
|
||||
rear_hip_names = getattr(self.cfg.asset, "rear_hip_names", ["RR_hip_joint", "RL_hip_joint"])
|
||||
self.rear_hip_indices = torch.zeros(len(rear_hip_names), dtype=torch.long, device=self.device, requires_grad=False)
|
||||
for i, name in enumerate(rear_hip_names):
|
||||
self.rear_hip_indices[i] = self.dof_names.index(name)
|
||||
|
||||
hip_names = front_hip_names + rear_hip_names
|
||||
self.hip_indices = torch.zeros(len(hip_names), dtype=torch.long, device=self.device, requires_grad=False)
|
||||
for i, name in enumerate(hip_names):
|
||||
self.hip_indices[i] = self.dof_names.index(name)
|
||||
|
||||
return super()._create_envs()
|
||||
|
||||
def _process_rigid_shape_props(self, props, env_id):
|
||||
|
@ -703,10 +798,16 @@ class LeggedRobotField(LeggedRobot):
|
|||
|
||||
def draw_volume_sample_points(self):
|
||||
sphere_geom = gymutil.WireframeSphereGeometry(0.005, 4, 4, None, color=(1, 0.1, 0))
|
||||
sphere_penetrate_geom = gymutil.WireframeSphereGeometry(0.005, 4, 4, None, color=(0.1, 0.6, 0.6))
|
||||
if self.cfg.terrain.selected == "BarrierTrack":
|
||||
penetration_mask = self.terrain.get_penetration_mask(self.volume_sample_points.view(-1, 3)).view(self.num_envs, -1)
|
||||
for env_idx in range(self.num_envs):
|
||||
for point_idx in range(self.volume_sample_points.shape[1]):
|
||||
sphere_pose = gymapi.Transform(gymapi.Vec3(*self.volume_sample_points[env_idx, point_idx]), r= None)
|
||||
gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose)
|
||||
if penetration_mask[env_idx, point_idx]:
|
||||
gymutil.draw_lines(sphere_penetrate_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose)
|
||||
else:
|
||||
gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose)
|
||||
|
||||
##### Additional rewards #####
|
||||
def _reward_lin_vel_l2norm(self):
|
||||
|
@ -715,6 +816,10 @@ class LeggedRobotField(LeggedRobot):
|
|||
def _reward_world_vel_l2norm(self):
|
||||
return torch.norm((self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1)
|
||||
|
||||
def _reward_tracking_world_vel(self):
|
||||
world_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1)
|
||||
return torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)
|
||||
|
||||
def _reward_legs_energy(self):
|
||||
return torch.sum(torch.square(self.torques * self.dof_vel), dim=1)
|
||||
|
||||
|
@ -730,6 +835,10 @@ class LeggedRobotField(LeggedRobot):
|
|||
def _reward_alive(self):
|
||||
return 1.
|
||||
|
||||
def _reward_dof_error(self):
|
||||
dof_error = torch.sum(torch.square(self.dof_pos - self.default_dof_pos), dim=1)
|
||||
return dof_error
|
||||
|
||||
def _reward_lin_cmd(self):
|
||||
""" This reward term does not depend on the policy, depends on the command """
|
||||
return torch.norm(self.commands[:, :2], dim= 1)
|
||||
|
@ -767,6 +876,157 @@ class LeggedRobotField(LeggedRobot):
|
|||
penetration_mask *= torch.norm(self.velocity_sample_points, dim= -1) + 1e-3
|
||||
return torch.sum(penetration_mask, dim= -1)
|
||||
|
||||
def _reward_tilt_cond(self):
|
||||
""" Conditioned reward term in terms of whether the robot is engaging the tilt obstacle
|
||||
Use positive factor to enable rolling angle when incountering tilt obstacle
|
||||
"""
|
||||
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
|
||||
roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7])
|
||||
pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732
|
||||
roll[roll > pi] -= pi * 2 # to range (-pi, pi)
|
||||
roll[roll < -pi] += pi * 2 # to range (-pi, pi)
|
||||
if hasattr(self, "volume_sample_points"):
|
||||
self.refresh_volume_sample_points()
|
||||
stepping_obstacle_info = self.terrain.get_stepping_obstacle_info(self.volume_sample_points.view(-1, 3))
|
||||
else:
|
||||
stepping_obstacle_info = self.terrain.get_stepping_obstacle_info(self.root_states[:, :3])
|
||||
stepping_obstacle_info = stepping_obstacle_info.view(self.num_envs, -1, stepping_obstacle_info.shape[-1])
|
||||
# Assuming that each robot will only be in one obstacle or non obstacle.
|
||||
robot_stepping_obstacle_id = torch.max(stepping_obstacle_info[:, :, 0], dim= -1)[0]
|
||||
tilting_mask = robot_stepping_obstacle_id == self.terrain.track_options_id_dict["tilt"]
|
||||
return_ = torch.where(tilting_mask, torch.clip(torch.abs(roll), 0, torch.pi/2), -torch.clip(torch.abs(roll), 0, torch.pi/2))
|
||||
return return_
|
||||
|
||||
def _reward_hip_pos(self):
|
||||
return torch.sum(torch.square(self.dof_pos[:, self.hip_indices] - self.default_dof_pos[:, self.hip_indices]), dim=1)
|
||||
|
||||
def _reward_front_hip_pos(self):
|
||||
""" Reward the robot to stop moving its front hips """
|
||||
return torch.sum(torch.square(self.dof_pos[:, self.front_hip_indices] - self.default_dof_pos[:, self.front_hip_indices]), dim=1)
|
||||
|
||||
def _reward_rear_hip_pos(self):
|
||||
""" Reward the robot to stop moving its rear hips """
|
||||
return torch.sum(torch.square(self.dof_pos[:, self.rear_hip_indices] - self.default_dof_pos[:, self.rear_hip_indices]), dim=1)
|
||||
|
||||
def _reward_down_cond(self):
|
||||
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
|
||||
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
|
||||
self.refresh_volume_sample_points()
|
||||
engaging_obstacle_info = self.terrain.get_engaging_block_info(
|
||||
self.root_states[:, :3],
|
||||
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
|
||||
)
|
||||
roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7])
|
||||
pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732
|
||||
pitch[pitch > pi] -= pi * 2 # to range (-pi, pi)
|
||||
pitch[pitch < -pi] += pi * 2 # to range (-pi, pi)
|
||||
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
|
||||
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] < 0.)
|
||||
pitch_err = torch.abs(pitch - 0.2)
|
||||
return torch.exp(-pitch_err/self.cfg.rewards.tracking_sigma) * engaging_mask # the higher positive factor, the more you want the robot to pitch down 0.2 rad
|
||||
|
||||
def _reward_jump_x_vel_cond(self):
|
||||
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
|
||||
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
|
||||
self.refresh_volume_sample_points()
|
||||
engaging_obstacle_info = self.terrain.get_engaging_block_info(
|
||||
self.root_states[:, :3],
|
||||
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
|
||||
)
|
||||
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
|
||||
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \
|
||||
& (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance.
|
||||
roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7])
|
||||
pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732
|
||||
pitch[pitch > pi] -= pi * 2 # to range (-pi, pi)
|
||||
pitch[pitch < -pi] += pi * 2 # to range (-pi, pi)
|
||||
pitch_up_mask = pitch < -0.75 # a hack value
|
||||
|
||||
return torch.clip(self.base_lin_vel[:, 0], max= 1.5) * engaging_mask * pitch_up_mask
|
||||
|
||||
def _reward_sync_legs_cond(self):
|
||||
""" A hack to force same actuation on both rear legs when jump. """
|
||||
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
|
||||
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
|
||||
self.refresh_volume_sample_points()
|
||||
engaging_obstacle_info = self.terrain.get_engaging_block_info(
|
||||
self.root_states[:, :3],
|
||||
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
|
||||
)
|
||||
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
|
||||
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \
|
||||
& (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance.
|
||||
rr_legs = torch.clone(self.actions[:, 6:9]) # shoulder, thigh, calf
|
||||
rl_legs = torch.clone(self.actions[:, 9:12]) # shoulder, thigh, calf
|
||||
rl_legs[:, 0] *= -1 # flip the sign of shoulder action
|
||||
return torch.norm(rr_legs - rl_legs, dim= -1) * engaging_mask
|
||||
|
||||
def _reward_sync_all_legs_cond(self):
|
||||
""" A hack to force same actuation on both front/rear legs when jump. """
|
||||
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
|
||||
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
|
||||
self.refresh_volume_sample_points()
|
||||
engaging_obstacle_info = self.terrain.get_engaging_block_info(
|
||||
self.root_states[:, :3],
|
||||
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
|
||||
)
|
||||
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \
|
||||
& (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \
|
||||
& (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance.
|
||||
right_legs = torch.clone(torch.cat([
|
||||
self.actions[:, 0:3],
|
||||
self.actions[:, 6:9],
|
||||
], dim= -1)) # shoulder, thigh, calf
|
||||
left_legs = torch.clone(torch.cat([
|
||||
self.actions[:, 3:6],
|
||||
self.actions[:, 9:12],
|
||||
], dim= -1)) # shoulder, thigh, calf
|
||||
left_legs[:, 0] *= -1 # flip the sign of shoulder action
|
||||
left_legs[:, 3] *= -1 # flip the sign of shoulder action
|
||||
return torch.norm(right_legs - left_legs, p= 1, dim= -1) * engaging_mask
|
||||
|
||||
def _reward_sync_all_legs(self):
|
||||
right_legs = torch.clone(torch.cat([
|
||||
self.actions[:, 0:3],
|
||||
self.actions[:, 6:9],
|
||||
], dim= -1)) # shoulder, thigh, calf
|
||||
left_legs = torch.clone(torch.cat([
|
||||
self.actions[:, 3:6],
|
||||
self.actions[:, 9:12],
|
||||
], dim= -1)) # shoulder, thigh, calf
|
||||
left_legs[:, 0] *= -1 # flip the sign of shoulder action
|
||||
left_legs[:, 3] *= -1 # flip the sign of shoulder action
|
||||
return torch.norm(right_legs - left_legs, p= 1, dim= -1)
|
||||
|
||||
def _reward_dof_error_cond(self):
|
||||
""" Force dof error when not engaging obstacle """
|
||||
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
|
||||
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
|
||||
self.refresh_volume_sample_points()
|
||||
engaging_obstacle_info = self.terrain.get_engaging_block_info(
|
||||
self.root_states[:, :3],
|
||||
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
|
||||
)
|
||||
engaging_mask = (engaging_obstacle_info[:, 1] > 0)
|
||||
return torch.sum(torch.square(self.dof_pos - self.default_dof_pos), dim=1) * engaging_mask
|
||||
|
||||
def _reward_leap_bonous_cond(self):
|
||||
""" counteract the tracking reward loss during leap"""
|
||||
if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0])
|
||||
if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0])
|
||||
self.refresh_volume_sample_points()
|
||||
engaging_obstacle_info = self.terrain.get_engaging_block_info(
|
||||
self.root_states[:, :3],
|
||||
self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3)
|
||||
)
|
||||
engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["leap"]] > 0) \
|
||||
& (-engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 1] < engaging_obstacle_info[:, 0]) \
|
||||
& (engaging_obstacle_info[:, 0] < 0.) # engaging jump-up, not engaging jump-down, positive distance.
|
||||
|
||||
world_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1)
|
||||
return (1 - torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)) * engaging_mask # reverse version of tracking reward
|
||||
|
||||
|
||||
##### Some helper functions that override parent class attributes #####
|
||||
@property
|
||||
def all_obs_components(self):
|
||||
|
|
|
@ -40,11 +40,24 @@ class LeggedRobotNoisy(LeggedRobotField):
|
|||
self.actions_scaled = self.actions * self.cfg.control.action_scale
|
||||
control_type = self.cfg.control.control_type
|
||||
if control_type == "P":
|
||||
self.actions_scaled_torque_clipped = self.clip_position_action_by_torque_limit(self.actions_scaled)
|
||||
actions_scaled_torque_clipped = self.clip_position_action_by_torque_limit(self.actions_scaled)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
self.actions_scaled_torque_clipped = self.actions * self.cfg.control.action_scale
|
||||
actions_scaled_torque_clipped = self.actions * self.cfg.control.action_scale
|
||||
|
||||
if getattr(self.cfg.control, "action_delay", False):
|
||||
# always put the latest action at the end of the buffer
|
||||
self.actions_history_buffer = torch.roll(self.actions_history_buffer, shifts= -1, dims= 0)
|
||||
self.actions_history_buffer[-1] = actions_scaled_torque_clipped
|
||||
# get the delayed action
|
||||
self.action_delayed_frames = ((self.current_action_delay / self.dt) + 1).to(int)
|
||||
self.actions_scaled_torque_clipped = self.actions_history_buffer[
|
||||
-self.action_delayed_frames,
|
||||
torch.arange(self.num_envs, device= self.device),
|
||||
]
|
||||
else:
|
||||
self.actions_scaled_torque_clipped = actions_scaled_torque_clipped
|
||||
|
||||
return return_
|
||||
|
||||
|
@ -79,7 +92,10 @@ class LeggedRobotNoisy(LeggedRobotField):
|
|||
torch.max(torch.abs(self.torques), dim= -1)[0],
|
||||
self.max_torques,
|
||||
)
|
||||
### The set torque limit is usally smaller than the robot dataset
|
||||
self.torque_exceed_count_substep[(torch.abs(self.torques) > self.torque_limits).any(dim= -1)] += 1
|
||||
### Hack to check the torque limit exceeding by your own value.
|
||||
# self.torque_exceed_count_envstep[(torch.abs(self.torques) > 38.).any(dim= -1)] += 1
|
||||
|
||||
### count how many times in the episode the robot is out of dof pos limit (summing all dofs)
|
||||
self.out_of_dof_pos_limit_count_substep += self._reward_dof_pos_limits().int()
|
||||
|
@ -108,6 +124,12 @@ class LeggedRobotNoisy(LeggedRobotField):
|
|||
def _post_physics_step_callback(self):
|
||||
super()._post_physics_step_callback()
|
||||
|
||||
if hasattr(self, "actions_history_buffer"):
|
||||
resampling_time = getattr(self.cfg.control, "action_delay_resampling_time", self.dt)
|
||||
resample_env_ids = (self.episode_length_buf % int(resampling_time / self.dt) == 0).nonzero(as_tuple= False).flatten()
|
||||
if len(resample_env_ids) > 0:
|
||||
self._resample_action_delay(resample_env_ids)
|
||||
|
||||
if hasattr(self, "proprioception_buffer"):
|
||||
resampling_time = getattr(self.cfg.sensor.proprioception, "latency_resampling_time", self.dt)
|
||||
resample_env_ids = (self.episode_length_buf % int(resampling_time / self.dt) == 0).nonzero(as_tuple= False).flatten()
|
||||
|
@ -122,6 +144,14 @@ class LeggedRobotNoisy(LeggedRobotField):
|
|||
|
||||
self.torque_exceed_count_envstep[(torch.abs(self.substep_torques) > self.torque_limits).any(dim= 1).any(dim= 1)] += 1
|
||||
|
||||
def _resample_action_delay(self, env_ids):
|
||||
self.current_action_delay[env_ids] = torch_rand_float(
|
||||
self.cfg.control.action_delay_range[0],
|
||||
self.cfg.control.action_delay_range[1],
|
||||
(len(env_ids), 1),
|
||||
device= self.device,
|
||||
).flatten()
|
||||
|
||||
def _resample_proprioception_latency(self, env_ids):
|
||||
self.current_proprioception_latency[env_ids] = torch_rand_float(
|
||||
self.cfg.sensor.proprioception.latency_range[0],
|
||||
|
@ -142,6 +172,27 @@ class LeggedRobotNoisy(LeggedRobotField):
|
|||
return_ = super()._init_buffers()
|
||||
all_obs_components = self.all_obs_components
|
||||
|
||||
if getattr(self.cfg.control, "action_delay", False):
|
||||
assert hasattr(self.cfg.control, "action_delay_range") and hasattr(self.cfg.control, "action_delay_resample_time"), "Please specify action_delay_range and action_delay_resample_time in the config file."
|
||||
""" Used in pre-physics step """
|
||||
self.cfg.control.action_history_buffer_length = int((self.cfg.control.action_delay_range[1] + self.dt) / self.dt)
|
||||
self.actions_history_buffer = torch.zeros(
|
||||
(
|
||||
self.cfg.control.action_history_buffer_length,
|
||||
self.num_envs,
|
||||
self.num_actions,
|
||||
),
|
||||
dtype= torch.float32,
|
||||
device= self.device,
|
||||
)
|
||||
self.current_action_delay = torch_rand_float(
|
||||
self.cfg.control.action_delay_range[0],
|
||||
self.cfg.control.action_delay_range[1],
|
||||
(self.num_envs, 1),
|
||||
device= self.device,
|
||||
).flatten()
|
||||
self.action_delayed_frames = ((self.current_action_delay / self.dt) + 1).to(int)
|
||||
|
||||
if "proprioception" in all_obs_components and hasattr(self.cfg.sensor, "proprioception"):
|
||||
""" Adding proprioception delay buffer """
|
||||
self.cfg.sensor.proprioception.buffer_length = int((self.cfg.sensor.proprioception.latency_range[1] + self.dt) / self.dt)
|
||||
|
@ -154,13 +205,13 @@ class LeggedRobotNoisy(LeggedRobotField):
|
|||
dtype= torch.float32,
|
||||
device= self.device,
|
||||
)
|
||||
self.proprioception_delayed_frames = torch.ones((self.num_envs,), device= self.device, dtype= int) * self.cfg.sensor.proprioception.buffer_length
|
||||
self.current_proprioception_latency = torch_rand_float(
|
||||
self.cfg.sensor.proprioception.latency_range[0],
|
||||
self.cfg.sensor.proprioception.latency_range[1],
|
||||
(self.num_envs, 1),
|
||||
device= self.device,
|
||||
).flatten()
|
||||
self.proprioception_delayed_frames = ((self.current_proprioception_latency / self.dt) + 1).to(int)
|
||||
|
||||
if "forward_depth" in all_obs_components and hasattr(self.cfg.sensor, "forward_camera"):
|
||||
output_resolution = getattr(self.cfg.sensor.forward_camera, "output_resolution", self.cfg.sensor.forward_camera.resolution)
|
||||
|
@ -220,6 +271,9 @@ class LeggedRobotNoisy(LeggedRobotField):
|
|||
|
||||
def _reset_buffers(self, env_ids):
|
||||
return_ = super()._reset_buffers(env_ids)
|
||||
if hasattr(self, "actions_history_buffer"):
|
||||
self.actions_history_buffer[:, env_ids] = 0.
|
||||
self.action_delayed_frames[env_ids] = self.cfg.control.action_history_buffer_length
|
||||
if hasattr(self, "forward_depth_buffer"):
|
||||
self.forward_depth_buffer[:, env_ids] = 0.
|
||||
self.forward_depth_delayed_frames[env_ids] = self.cfg.sensor.forward_camera.buffer_length
|
||||
|
@ -537,12 +591,11 @@ class LeggedRobotNoisy(LeggedRobotField):
|
|||
-self.proprioception_delayed_frames,
|
||||
torch.arange(self.num_envs, device= self.device),
|
||||
].clone()
|
||||
### NOTE: WARN: ERROR: remove this code in final version, no action delay should be used.
|
||||
if getattr(self.cfg.sensor.proprioception, "delay_action_obs", False) or getattr(self.cfg.sensor.proprioception, "delay_privileged_action_obs", False):
|
||||
raise ValueError("LeggedRobotNoisy: No action delay should be used. Please remove these settings")
|
||||
# The last-action is not delayed.
|
||||
if getattr(self.cfg.sensor.proprioception, "delay_action_obs", False):
|
||||
not_delayed_mask = torch.randint(0, 1, size= (self.num_envs,), device= self.device).bool()
|
||||
self.proprioception_output[not_delayed_mask, -12:] = self.proprioception_buffer[-1, not_delayed_mask, -12:]
|
||||
else:
|
||||
self.proprioception_output[:, -12:] = self.proprioception_buffer[-1, :, -12:]
|
||||
self.proprioception_output[:, -12:] = self.proprioception_buffer[-1, :, -12:]
|
||||
self.proprioception_refreshed = True
|
||||
if not hasattr(self.cfg.sensor, "proprioception") or privileged:
|
||||
return super()._get_proprioception_obs(privileged)
|
||||
|
@ -573,5 +626,12 @@ class LeggedRobotNoisy(LeggedRobotField):
|
|||
# sum along decimation axis and dof axis
|
||||
return torch.square(exceeded_torques).sum(dim= 1).sum(dim= 1)
|
||||
|
||||
def _reward_exceed_torque_limits_l1norm(self):
|
||||
""" square function for exceeding part """
|
||||
exceeded_torques = torch.abs(self.substep_torques) - self.torque_limits
|
||||
exceeded_torques[exceeded_torques < 0.] = 0.
|
||||
# sum along decimation axis and dof axis
|
||||
return torch.norm(exceeded_torques, p= 1, dim= -1).sum(dim= 1)
|
||||
|
||||
def _reward_exceed_dof_pos_limits(self):
|
||||
return self.substep_exceed_dof_pos_limits.to(torch.float32).sum(dim= -1).mean(dim= -1)
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
import numpy as np
|
||||
from os import path as osp
|
||||
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
|
||||
class Go1CrawlCfg( Go1FieldCfg ):
|
||||
class init_state( Go1FieldCfg.init_state ):
|
||||
pos = [0., 0., 0.45]
|
||||
|
||||
#### uncomment this to train non-virtual terrain
|
||||
class sensor( Go1FieldCfg.sensor ):
|
||||
class proprioception( Go1FieldCfg.sensor.proprioception ):
|
||||
latency_range = [0.04-0.0025, 0.04+0.0075]
|
||||
#### uncomment the above to train non-virtual terrain
|
||||
|
||||
class terrain( Go1FieldCfg.terrain ):
|
||||
max_init_terrain_level = 2
|
||||
border_size = 5
|
||||
slope_treshold = 20.
|
||||
curriculum = True
|
||||
|
||||
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
|
||||
options= [
|
||||
"crawl",
|
||||
],
|
||||
track_block_length= 1.6,
|
||||
crawl= dict(
|
||||
height= (0.28, 0.5),
|
||||
depth= (0.1, 0.6), # size along the forward axis
|
||||
wall_height= 0.6,
|
||||
no_perlin_at_obstacle= False,
|
||||
),
|
||||
virtual_terrain= False, # Change this to False for real terrain
|
||||
))
|
||||
|
||||
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
|
||||
zScale= 0.06,
|
||||
))
|
||||
|
||||
class commands( Go1FieldCfg.commands ):
|
||||
class ranges( Go1FieldCfg.commands.ranges ):
|
||||
lin_vel_x = [0.3, 0.8]
|
||||
lin_vel_y = [0.0, 0.0]
|
||||
ang_vel_yaw = [0., 0.]
|
||||
|
||||
class control( Go1FieldCfg.control ):
|
||||
computer_clip_torque = False
|
||||
|
||||
class asset( Go1FieldCfg.asset ):
|
||||
terminate_after_contacts_on = ["base"]
|
||||
|
||||
class termination( Go1FieldCfg.termination ):
|
||||
# additional factors that determines whether to terminates the episode
|
||||
termination_terms = [
|
||||
"roll",
|
||||
"pitch",
|
||||
"z_low",
|
||||
"z_high",
|
||||
"out_of_track",
|
||||
]
|
||||
|
||||
class domain_rand( Go1FieldCfg.domain_rand ):
|
||||
init_base_rot_range = dict(
|
||||
roll= [-0.1, 0.1],
|
||||
pitch= [-0.1, 0.1],
|
||||
)
|
||||
init_base_vel_range = [-0.1, 0.1]
|
||||
|
||||
class rewards( Go1FieldCfg.rewards ):
|
||||
class scales:
|
||||
# tracking_ang_vel = 0.05
|
||||
# world_vel_l2norm = -1.
|
||||
# legs_energy_substeps = -4e-5
|
||||
# alive = 5.
|
||||
# penetrate_depth = -6e-2
|
||||
# penetrate_volume = -6e-2
|
||||
# exceed_dof_pos_limits = -8e-1
|
||||
# exceed_torque_limits_l1norm = -8e-1
|
||||
# lin_pos_y = -0.1
|
||||
###############################################
|
||||
tracking_ang_vel = 0.05
|
||||
tracking_world_vel = 5.
|
||||
# world_vel_l2norm = -1.
|
||||
# alive = 2.
|
||||
legs_energy_substeps = -1e-5
|
||||
# penetrate_depth = -6e-2 # comment this out if trianing non-virtual terrain
|
||||
# penetrate_volume = -6e-2 # comment this out if trianing non-virtual terrain
|
||||
exceed_dof_pos_limits = -8e-1
|
||||
# exceed_torque_limits_i = -2e-1
|
||||
exceed_torque_limits_l1norm = -1.
|
||||
# collision = -0.05
|
||||
# tilt_cond = 0.1
|
||||
torques = -1e-5
|
||||
yaw_abs = -0.1
|
||||
lin_pos_y = -0.1
|
||||
soft_dof_pos_limit = 0.7
|
||||
only_positive_rewards = False
|
||||
|
||||
class curriculum( Go1FieldCfg.curriculum ):
|
||||
penetrate_volume_threshold_harder = 1500
|
||||
penetrate_volume_threshold_easier = 10000
|
||||
penetrate_depth_threshold_harder = 10
|
||||
penetrate_depth_threshold_easier = 400
|
||||
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class Go1CrawlCfgPPO( Go1FieldCfgPPO ):
|
||||
class algorithm( Go1FieldCfgPPO.algorithm ):
|
||||
entropy_coef = 0.0
|
||||
clip_min_std = 0.1
|
||||
|
||||
class runner( Go1FieldCfgPPO.runner ):
|
||||
resume = True
|
||||
load_run = "{Your traind walking model directory}"
|
||||
load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
|
||||
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep26_14-30-24_Skills_crawl_propDelay0.04-0.05_pEnergy-2e-5_pDof8e-01_pTorqueL14e-01_rTilt5e-01_pCollision0.2_maxPushAng0.5_kp40_fromSep26_01-38-19")
|
||||
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26")
|
||||
|
||||
run_name = "".join(["Skills_",
|
||||
("Multi" if len(Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
|
||||
("_pEnergy" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1)),
|
||||
("_pDof" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.exceed_dof_pos_limits, precision=1, exp_digits=1)),
|
||||
("_pTorqueL1" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, exp_digits=1)),
|
||||
("_noComputerClip" if not Go1CrawlCfg.control.computer_clip_torque else ""),
|
||||
("_noTanh"),
|
||||
])
|
||||
max_iterations = 20000
|
||||
save_interval = 500
|
|
@ -0,0 +1,166 @@
|
|||
from os import path as osp
|
||||
import numpy as np
|
||||
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
|
||||
class Go1DownCfg( Go1FieldCfg ):
|
||||
class init_state( Go1FieldCfg.init_state ):
|
||||
pos = [0., 0., 0.45]
|
||||
|
||||
#### uncomment this to train non-virtual terrain
|
||||
class sensor( Go1FieldCfg.sensor ):
|
||||
class proprioception( Go1FieldCfg.sensor.proprioception ):
|
||||
latency_range = [0.04-0.0025, 0.04+0.0075]
|
||||
#### uncomment the above to train non-virtual terrain
|
||||
|
||||
class terrain( Go1FieldCfg.terrain ):
|
||||
max_init_terrain_level = 2
|
||||
border_size = 5
|
||||
slope_treshold = 20.
|
||||
curriculum = False
|
||||
|
||||
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
|
||||
options= [
|
||||
"jump",
|
||||
],
|
||||
track_block_length= 1.6,
|
||||
jump= dict(
|
||||
height= (0.2, 0.45),
|
||||
depth= 0.3,
|
||||
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
|
||||
jump_down_prob= 1., # probability of jumping down use it in non-virtual terrain
|
||||
),
|
||||
virtual_terrain= True, # Change this to False for real terrain
|
||||
no_perlin_threshold= 0.6,
|
||||
randomize_obstacle_order= True,
|
||||
n_obstacles_per_track= 3,
|
||||
))
|
||||
|
||||
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
|
||||
zScale= [0.04, 0.15],
|
||||
))
|
||||
|
||||
class commands( Go1FieldCfg.commands ):
|
||||
class ranges( Go1FieldCfg.commands.ranges ):
|
||||
lin_vel_x = [0.5, 1.]
|
||||
lin_vel_y = [0.0, 0.0]
|
||||
ang_vel_yaw = [0., 0.]
|
||||
|
||||
class control( Go1FieldCfg.control ):
|
||||
computer_clip_torque = False
|
||||
|
||||
class asset( Go1FieldCfg.asset ):
|
||||
terminate_after_contacts_on = ["base"]
|
||||
|
||||
class termination( Go1FieldCfg.termination ):
|
||||
# additional factors that determines whether to terminates the episode
|
||||
termination_terms = [
|
||||
"roll",
|
||||
"pitch",
|
||||
"z_low",
|
||||
"z_high",
|
||||
"out_of_track",
|
||||
]
|
||||
z_low_kwargs = merge_dict(Go1FieldCfg.termination.z_low_kwargs, dict(
|
||||
threshold= -3.,
|
||||
))
|
||||
|
||||
class domain_rand( Go1FieldCfg.domain_rand ):
|
||||
init_base_pos_range = dict(
|
||||
x= [0.2, 0.6],
|
||||
y= [-0.25, 0.25],
|
||||
)
|
||||
init_base_rot_range = dict(
|
||||
roll= [-0.1, 0.1],
|
||||
pitch= [-0.1, 0.1],
|
||||
)
|
||||
init_base_vel_range = merge_dict(Go1FieldCfg.domain_rand.init_base_vel_range, dict(
|
||||
x= [-0.1, 0.5],
|
||||
))
|
||||
|
||||
push_robots = True
|
||||
|
||||
class rewards( Go1FieldCfg.rewards ):
|
||||
class scales:
|
||||
action_rate = -0.1
|
||||
collision = -10.0
|
||||
delta_torques = -1e-07
|
||||
dof_acc = -1e-07
|
||||
down_cond = 0.04
|
||||
exceed_dof_pos_limits = -0.001
|
||||
exceed_torque_limits_l1norm = -0.2
|
||||
feet_contact_forces = -0.01
|
||||
hip_pos = -5.0
|
||||
legs_energy_substeps = -5e-06
|
||||
lin_pos_y = -0.3
|
||||
penetrate_depth = -0.002
|
||||
penetrate_volume = -0.002
|
||||
torques = -0.0001
|
||||
tracking_ang_vel = 0.1
|
||||
tracking_world_vel = 5.0
|
||||
yaw_abs = -0.
|
||||
soft_dof_pos_limit = 0.75
|
||||
max_contact_force = 200.
|
||||
only_positive_rewards = False
|
||||
|
||||
class noise( Go1FieldCfg.noise ):
|
||||
add_noise = False
|
||||
|
||||
class curriculum( Go1FieldCfg.curriculum ):
|
||||
penetrate_volume_threshold_harder = 2000000
|
||||
penetrate_volume_threshold_easier = 4000000
|
||||
penetrate_depth_threshold_harder = 200000
|
||||
penetrate_depth_threshold_easier = 400000
|
||||
|
||||
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class Go1DownCfgPPO( Go1FieldCfgPPO ):
|
||||
class algorithm( Go1FieldCfgPPO.algorithm ):
|
||||
entropy_coef = 0.0
|
||||
clip_min_std = 0.2
|
||||
|
||||
class runner( Go1FieldCfgPPO.runner ):
|
||||
policy_class_name = "ActorCriticRecurrent"
|
||||
experiment_name = "field_go1"
|
||||
resume = True
|
||||
load_run = "{Your trained climb model directory}"
|
||||
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep28_14-59-53_Skills_down_pEnergySubsteps-8e-06_pPenD1e-02_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pCollision-1e-1_downRedundant0.1_softDof0.8_pushRobot_propDelay0.04-0.05_kp40_kd0.5fromSep27_14-32-13")
|
||||
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct14_02-06-47_Skills_down_comXRange-0.2-0.2_pEnergySubsteps-4e-06_rAlive3_pPenD4e-03_pDHarder2.e+05_pushRobot_noTanh_virtualfromOct09_09-49-58")
|
||||
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_10-48-54_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond3.e-1_kp40_kd0.5_virtualfromOct14_02-06-47")
|
||||
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct26_08-45-02_Skills_down_pEnergySubsteps-4e-05_rTrackVel4._pZVel1e-01_pAngXYVel5e-02_pDTorques1e-07_rDownCond1.e-1_noTanh_virtualfromOct25_10-48-54")
|
||||
load_run = "Oct28_07-38-39_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos4e-01_rDownCond4.e-2_noTanh_virtualfromOct14_02-06-47"
|
||||
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-08-21_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_withPropNoise_noTanh_virtualfromOct28_07-38-39")
|
||||
load_run = "Oct29_22-07-51_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_noComputerClip_noPropNoise_noTanh_virtualfromOct28_13-08-21"
|
||||
load_run = "Oct30_04-23-31_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL14e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtual_fromOct29_22-07-51"
|
||||
|
||||
run_name = "".join(["Skills_",
|
||||
("down" if Go1DownCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] > 0. else "jump"),
|
||||
("_pEnergySubsteps{:.0e}".format(Go1DownCfg.rewards.scales.legs_energy_substeps) if getattr(Go1DownCfg.rewards.scales, "legs_energy_substeps", 0.) != -1e-6 else ""),
|
||||
("_rTrackVel" + np.format_float_positional(Go1DownCfg.rewards.scales.tracking_world_vel) if getattr(Go1DownCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""),
|
||||
# ("_pLinVel" + np.format_float_positional(-Go1DownCfg.rewards.scales.world_vel_l2norm) if getattr(Go1DownCfg.rewards.scales, "world_vel_l2norm", 0.) != 0.0 else ""),
|
||||
# ("_rAlive" + np.format_float_positional(Go1DownCfg.rewards.scales.alive) if getattr(Go1DownCfg.rewards.scales, "alive", 0.) != 0.0 else ""),
|
||||
# ("_pDofLimit{:.0e}".format(Go1DownCfg.rewards.scales.exceed_dof_pos_limits) if getattr(Go1DownCfg.rewards.scales, "exceed_dof_pos_limits") != 0.0 else ""),
|
||||
# ("_pYawAbs" if getattr(Go1DownCfg.rewards.scales, "yaw_abs", 0.) != 0.1 else ""),
|
||||
("_pTorqueL1{:.0e}".format(-Go1DownCfg.rewards.scales.exceed_torque_limits_l1norm) if getattr(Go1DownCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0 else ""),
|
||||
# ("_pActRate" + np.format_float_scientific(-Go1DownCfg.rewards.scales.action_rate, precision=1, exp_digits=1) if getattr(Go1DownCfg.rewards.scales, "action_rate", 0.) != 0.0 else ""),
|
||||
# ("_pZVel" + np.format_float_scientific(-Go1DownCfg.rewards.scales.lin_vel_z, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "lin_vel_z", 0.) != 0. else ""),
|
||||
# ("_pAngXYVel" + np.format_float_scientific(-Go1DownCfg.rewards.scales.ang_vel_xy, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "ang_vel_xy", 0.) != 0. else ""),
|
||||
# ("_pDTorques" + np.format_float_scientific(-Go1DownCfg.rewards.scales.delta_torques, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "delta_torques", 0.) != 0. else ""),
|
||||
# ("_pHipPos" + np.format_float_scientific(-Go1DownCfg.rewards.scales.hip_pos, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "hip_pos", 0.) != 0. else ""),
|
||||
# ("_rDownCond" + np.format_float_scientific(Go1DownCfg.rewards.scales.down_cond, precision=1, exp_digits=1) if getattr(Go1DownCfg.rewards.scales, "down_cond", 0.) != 0.0 else ""),
|
||||
("_pCollision{:d}".format(int(Go1DownCfg.rewards.scales.collision)) if getattr(Go1DownCfg.rewards.scales, "collision", 0.) != 0.0 else ""),
|
||||
# ("_downRedundant{:.1f}".format(Go1DownCfg.terrain.BarrierTrack_kwargs["jump"]["down_forward_length"]) if (Go1DownCfg.terrain.BarrierTrack_kwargs["jump"].get("down_forward_length", 0.) > 0. and Go1DownCfg.terrain.BarrierTrack_kwargs["jump"].get("jump_down_prob", 0.) > 0.) else ""),
|
||||
("_cmdRange{:.1f}-{:.1f}".format(*Go1DownCfg.commands.ranges.lin_vel_x)),
|
||||
("_allowNegativeReward" if not Go1DownCfg.rewards.only_positive_rewards else ""),
|
||||
("_pushRobot" if Go1DownCfg.domain_rand.push_robots else "_noPush"),
|
||||
("_noComputerClip" if not Go1DownCfg.control.computer_clip_torque else ""),
|
||||
("_noPropNoise" if not Go1DownCfg.noise.add_noise else "_withPropNoise"),
|
||||
# ("_kp{:d}".format(int(Go1DownCfg.control.stiffness["joint"])) if Go1DownCfg.control.stiffness["joint"] != 50 else ""),
|
||||
# ("_kd{:.1f}".format(Go1DownCfg.control.damping["joint"]) if Go1DownCfg.control.damping["joint"] != 1. else ""),
|
||||
("_noTanh"),
|
||||
("_virtual" if Go1DownCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
|
||||
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
|
||||
])
|
||||
max_iterations = 20000
|
||||
save_interval = 200
|
||||
|
|
@ -1,162 +1,161 @@
|
|||
import numpy as np
|
||||
import os
|
||||
from os import path as osp
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO
|
||||
|
||||
go1_const_dof_range = dict(
|
||||
Hip_max= 1.047,
|
||||
Hip_min=-1.047,
|
||||
Thigh_max= 2.966,
|
||||
Thigh_min= -0.663,
|
||||
Calf_max= -0.837,
|
||||
Calf_min= -2.721,
|
||||
)
|
||||
|
||||
go1_action_scale = 0.5
|
||||
|
||||
class Go1FieldCfg( A1FieldCfg ):
|
||||
class env( A1FieldCfg.env ):
|
||||
num_envs = 8192
|
||||
|
||||
class init_state( A1FieldCfg.init_state ):
|
||||
pos = [0., 0., 0.7]
|
||||
zero_actions = False
|
||||
|
||||
class sensor( A1FieldCfg.sensor ):
|
||||
class proprioception( A1FieldCfg.sensor.proprioception ):
|
||||
delay_action_obs = False
|
||||
latency_range = [0.04-0.0025, 0.04+0.0075] # comment this if it is too hard to train.
|
||||
|
||||
class terrain( A1FieldCfg.terrain ):
|
||||
|
||||
num_rows = 20
|
||||
num_cols = 50
|
||||
selected = "BarrierTrack"
|
||||
max_init_terrain_level = 0 # for climb, leap finetune
|
||||
border_size = 5
|
||||
slope_treshold = 100.
|
||||
num_cols = 80
|
||||
|
||||
curriculum = True # for tilt, crawl, climb, leap
|
||||
# curriculum = False # for walk
|
||||
horizontal_scale = 0.025 # [m]
|
||||
# curriculum = True # for tilt, crawl, jump, leap
|
||||
curriculum = False # for walk
|
||||
pad_unavailable_info = True
|
||||
|
||||
BarrierTrack_kwargs = dict(
|
||||
BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict(
|
||||
options= [
|
||||
# "climb",
|
||||
# "jump",
|
||||
# "crawl",
|
||||
"tilt",
|
||||
# "tilt",
|
||||
# "leap",
|
||||
], # each race track will permute all the options
|
||||
track_width= 1.6, # for climb, crawl, tilt, walk
|
||||
# track_width= 1.0, # for leap
|
||||
# randomize_obstacle_order= True,
|
||||
track_width= 1.6,
|
||||
track_block_length= 2., # the x-axis distance from the env origin point
|
||||
wall_thickness= (0.04, 0.2), # [m]
|
||||
wall_height= 0.01, # [m] for climb, crawl, tilt, walk
|
||||
# wall_height= -0.5, # for leap
|
||||
climb= dict(
|
||||
height= (0.2, 0.6),
|
||||
depth= (0.1, 0.8), # size along the forward axis
|
||||
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
|
||||
),
|
||||
crawl= dict(
|
||||
height= (0.28, 0.38),
|
||||
depth= (0.1, 0.5), # size along the forward axis
|
||||
wall_height= 0.6,
|
||||
no_perlin_at_obstacle= False,
|
||||
),
|
||||
tilt= dict(
|
||||
width= (0.24, 0.32),
|
||||
depth= (0.4, 1.), # size along the forward axis
|
||||
opening_angle= 0.0, # [rad] an opening that make the robot easier to get into the obstacle
|
||||
wall_height= 0.5,
|
||||
),
|
||||
leap= dict(
|
||||
length= (0.2, 1.0),
|
||||
depth= (0.4, 0.8),
|
||||
height= 0.25,
|
||||
),
|
||||
wall_height= 0.0,
|
||||
add_perlin_noise= True,
|
||||
border_perlin_noise= True,
|
||||
border_height= 0., # for climb, crawl, tilt, walk
|
||||
# border_height= -0.5, # for leap
|
||||
virtual_terrain= False, # for climb, crawl, leap
|
||||
# virtual_terrain= True, # for tilt
|
||||
border_height= 0.,
|
||||
virtual_terrain= False,
|
||||
draw_virtual_terrain= True,
|
||||
engaging_next_threshold= 1.2,
|
||||
engaging_finish_threshold= 0.,
|
||||
curriculum_perlin= False,
|
||||
no_perlin_threshold= 0.0, # for crawl, tilt, walk
|
||||
# no_perlin_threshold= 0.05, # for leap
|
||||
# no_perlin_threshold= 0.06, # for climb
|
||||
)
|
||||
no_perlin_threshold= 0.1,
|
||||
))
|
||||
|
||||
TerrainPerlin_kwargs = dict(
|
||||
# zScale= 0.1, # for crawl
|
||||
zScale= 0.12, # for tilt
|
||||
# zScale= [0.05, 0.1], # for climb
|
||||
# zScale= [0.04, 0.1], # for leap
|
||||
# zScale= [0.1, 0.15], # for walk
|
||||
zScale= [0.08, 0.15],
|
||||
frequency= 10,
|
||||
)
|
||||
|
||||
class commands( A1FieldCfg.commands ):
|
||||
class ranges( A1FieldCfg.commands.ranges ):
|
||||
# lin_vel_x = [0.0, 1.0] # for walk
|
||||
# lin_vel_x = [0.8, 1.5] # for climb
|
||||
# lin_vel_x = [1.0, 1.5] # for leap
|
||||
lin_vel_x = [0.3, 0.8] # for tilt, crawl
|
||||
lin_vel_x = [-1.0, 1.0]
|
||||
lin_vel_y = [0.0, 0.0]
|
||||
ang_vel_yaw = [0., 0.]
|
||||
|
||||
|
||||
class control( A1FieldCfg.control ):
|
||||
stiffness = {'joint': 50.}
|
||||
damping = {'joint': 1.}
|
||||
# action_scale = [0.2, 0.4, 0.4] * 4 # for walk
|
||||
action_scale = 0.5 # for tilt, crawl, climb, leap
|
||||
# for climb, leap
|
||||
stiffness = {'joint': 40.}
|
||||
damping = {'joint': 0.5}
|
||||
action_scale = go1_action_scale
|
||||
torque_limits = [20., 20., 25.] * 4
|
||||
computer_clip_torque = True
|
||||
computer_clip_torque = False
|
||||
motor_clip_torque = False
|
||||
|
||||
class asset( A1FieldCfg.asset ):
|
||||
file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/go1/urdf/go1.urdf"
|
||||
penalize_contacts_on = ["base", "thigh"]
|
||||
terminate_after_contacts_on = ["base", "imu"] # for climb, leap, tilt, walk no-virtual
|
||||
|
||||
class termination:
|
||||
# additional factors that determines whether to terminates the episode
|
||||
termination_terms = [
|
||||
"roll", # for tilt
|
||||
"pitch",
|
||||
"z_low",
|
||||
"z_high",
|
||||
"out_of_track", # for leap, walk
|
||||
]
|
||||
sdk_dof_range = go1_const_dof_range
|
||||
|
||||
class termination( A1FieldCfg.termination ):
|
||||
roll_kwargs = dict(
|
||||
threshold= 0.8, # [rad] # for tilt
|
||||
tilt_threshold= 1.5, # for tilt (condition on engaging block)
|
||||
threshold= 1.5,
|
||||
)
|
||||
pitch_kwargs = dict(
|
||||
threshold= 1.6,
|
||||
climb_threshold= 1.6,
|
||||
leap_threshold= 1.5,
|
||||
threshold= 1.5,
|
||||
)
|
||||
z_low_kwargs = dict(
|
||||
threshold= 0.08, # [m]
|
||||
)
|
||||
z_high_kwargs = dict(
|
||||
threshold= 1.5, # [m]
|
||||
)
|
||||
out_of_track_kwargs = dict(
|
||||
threshold= 1., # [m]
|
||||
)
|
||||
|
||||
check_obstacle_conditioned_threshold = True
|
||||
timeout_at_border = True
|
||||
timeout_at_finished = True
|
||||
|
||||
class domain_rand( A1FieldCfg.domain_rand ):
|
||||
randomize_com = True
|
||||
class com_range:
|
||||
x = [-0.05, 0.15]
|
||||
y = [-0.1, 0.1]
|
||||
z = [-0.05, 0.05]
|
||||
|
||||
randomize_base_mass = True
|
||||
added_mass_range = [1.0, 3.0]
|
||||
class com_range( A1FieldCfg.domain_rand.com_range ):
|
||||
x = [-0.2, 0.2]
|
||||
|
||||
init_base_pos_range = merge_dict(A1FieldCfg.domain_rand.init_base_pos_range, dict(
|
||||
x= [0.05, 0.6],
|
||||
))
|
||||
init_base_rot_range = dict(
|
||||
roll= [-0.75, 0.75],
|
||||
pitch= [-0.75, 0.75],
|
||||
)
|
||||
# init_base_vel_range = [-1.0, 1.0]
|
||||
init_base_vel_range = dict(
|
||||
x= [-0.2, 1.5],
|
||||
y= [-0.2, 0.2],
|
||||
z= [-0.2, 0.2],
|
||||
roll= [-1., 1.],
|
||||
pitch= [-1., 1.],
|
||||
yaw= [-1., 1.],
|
||||
)
|
||||
init_dof_vel_range = [-5, 5]
|
||||
|
||||
class rewards( A1FieldCfg.rewards ):
|
||||
class scales:
|
||||
tracking_ang_vel = 0.1
|
||||
world_vel_l2norm = -1.
|
||||
legs_energy_substeps = -1e-5
|
||||
exceed_torque_limits_i = -0.1
|
||||
alive = 2.
|
||||
lin_pos_y = -0.2
|
||||
yaw_abs = -0.5
|
||||
penetrate_depth = -5e-3
|
||||
penetrate_volume = -5e-3
|
||||
soft_dof_pos_limit = 0.01
|
||||
tracking_ang_vel = 0.05
|
||||
tracking_world_vel = 3.
|
||||
# world_vel_l2norm = -2.
|
||||
# alive = 3.
|
||||
legs_energy_substeps = -2e-5
|
||||
# penalty for hardware safety
|
||||
exceed_dof_pos_limits = -8e-1
|
||||
exceed_torque_limits_l1norm = -8e-1
|
||||
# penalty for walking gait, probably no need
|
||||
lin_vel_z = -1.
|
||||
ang_vel_xy = -0.05
|
||||
orientation = -4.
|
||||
dof_acc = -2.5e-7
|
||||
collision = -10.
|
||||
action_rate = -0.1
|
||||
delta_torques = -1e-7
|
||||
torques = -1.e-5
|
||||
yaw_abs = -0.8
|
||||
lin_pos_y = -0.8
|
||||
hip_pos = -0.4
|
||||
dof_error = -0.04
|
||||
soft_dof_pos_limit = 0.8 # only in training walking
|
||||
max_contact_force = 200.0
|
||||
|
||||
class normalization( A1FieldCfg.normalization ):
|
||||
dof_pos_redundancy = 0.2
|
||||
clip_actions_method = "hard"
|
||||
clip_actions_low = []
|
||||
clip_actions_high = []
|
||||
for sdk_joint_name, sim_joint_name in zip(
|
||||
["Hip", "Thigh", "Calf"] * 4,
|
||||
[ # in the order as simulation
|
||||
"FL_hip_joint", "FL_thigh_joint", "FL_calf_joint",
|
||||
"FR_hip_joint", "FR_thigh_joint", "FR_calf_joint",
|
||||
"RL_hip_joint", "RL_thigh_joint", "RL_calf_joint",
|
||||
"RR_hip_joint", "RR_thigh_joint", "RR_calf_joint",
|
||||
],
|
||||
):
|
||||
clip_actions_low.append( (go1_const_dof_range[sdk_joint_name + "_min"] + dof_pos_redundancy - A1FieldCfg.init_state.default_joint_angles[sim_joint_name]) / go1_action_scale )
|
||||
clip_actions_high.append( (go1_const_dof_range[sdk_joint_name + "_max"] - dof_pos_redundancy - A1FieldCfg.init_state.default_joint_angles[sim_joint_name]) / go1_action_scale )
|
||||
del dof_pos_redundancy, sdk_joint_name, sim_joint_name
|
||||
|
||||
class sim( A1FieldCfg.sim ):
|
||||
body_measure_points = { # transform are related to body frame
|
||||
|
@ -184,49 +183,24 @@ class Go1FieldCfg( A1FieldCfg ):
|
|||
),
|
||||
}
|
||||
|
||||
class curriculum:
|
||||
# chosen heuristically, please refer to `LeggedRobotField._get_terrain_curriculum_move` with fixed body_measure_points
|
||||
# for crawl (not updated)
|
||||
penetrate_volume_threshold_harder = 1500
|
||||
penetrate_volume_threshold_easier = 10000
|
||||
penetrate_depth_threshold_harder = 10
|
||||
penetrate_depth_threshold_easier = 400
|
||||
# for tilt
|
||||
# penetrate_volume_threshold_harder = 2000
|
||||
# penetrate_volume_threshold_easier = 10000
|
||||
# penetrate_depth_threshold_harder = 20
|
||||
# penetrate_depth_threshold_easier = 300
|
||||
# for climb
|
||||
# penetrate_volume_threshold_harder = 6000
|
||||
# penetrate_volume_threshold_easier = 12000
|
||||
# penetrate_depth_threshold_harder = 600
|
||||
# penetrate_depth_threshold_easier = 1600
|
||||
# for leap
|
||||
# penetrate_volume_threshold_harder = 9000
|
||||
# penetrate_volume_threshold_easier = 10000
|
||||
# penetrate_depth_threshold_harder = 300
|
||||
# penetrate_depth_threshold_easier = 5000
|
||||
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class Go1FieldCfgPPO( A1FieldCfgPPO ):
|
||||
class algorithm( A1FieldCfgPPO.algorithm ):
|
||||
entropy_coef = 0.01 # for walk
|
||||
clip_min_std = 1e-12 # for walk
|
||||
entropy_coef = 0.0
|
||||
clip_min_std = 0.2
|
||||
|
||||
class policy( A1FieldCfgPPO.policy ):
|
||||
mu_activation = None # use action clip method by env
|
||||
|
||||
class runner( A1FieldCfgPPO.runner ):
|
||||
experiment_name = "field_go1"
|
||||
run_name = "".join(["Skills",
|
||||
("_all" if len(Go1FieldCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else ("_" + Go1FieldCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1FieldCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
|
||||
resume = False
|
||||
load_run = None
|
||||
|
||||
run_name = "".join(["WalkForward",
|
||||
("_pEnergySubsteps" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.legs_energy_substeps, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "legs_energy_substeps", 0.0) != 0.0 else ""),
|
||||
("_pPenV" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.penetrate_volume, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "penetrate_volume", 0.) < 0. else ""),
|
||||
("_pPenD" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.penetrate_depth, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "penetrate_depth", 0.) < 0. else ""),
|
||||
("_rAlive{:d}".format(int(Go1FieldCfg.rewards.scales.alive)) if Go1FieldCfg.rewards.scales.alive != 2 else ""),
|
||||
("_rAngVel{:.2f}".format(Go1FieldCfg.rewards.scales.tracking_ang_vel) if Go1FieldCfg.rewards.scales.tracking_ang_vel != 0.05 else ""),
|
||||
("_pTorqueExceedIndicate" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.exceed_torque_limits_i, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "exceed_torque_limits_i", 0.) != 0. else ""),
|
||||
("_pTorqueExceedSquare" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.exceed_torque_limits_square, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "exceed_torque_limits_square", 0.) != 0. else ""),
|
||||
("_pYaw{:.2f}".format(-Go1FieldCfg.rewards.scales.yaw_abs) if Go1FieldCfg.rewards.scales.yaw_abs != 0. else ""),
|
||||
("_noComputerTorqueClip" if not Go1FieldCfg.control.computer_clip_torque else ""),
|
||||
("_virtualTerrain" if Go1FieldCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
|
||||
("_rTrackVel" + np.format_float_scientific(Go1FieldCfg.rewards.scales.tracking_world_vel, precision=1, exp_digits=1, trim="-") if getattr(Go1FieldCfg.rewards.scales, "tracking_world_vel", 0.0) != 0.0 else ""),
|
||||
("_pWorldVel" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.world_vel_l2norm, precision=1, exp_digits=1, trim="-") if getattr(Go1FieldCfg.rewards.scales, "world_vel_l2norm", 0.0) != 0.0 else ""),
|
||||
("_aScale{:d}{:d}{:d}".format(
|
||||
int(Go1FieldCfg.control.action_scale[0] * 10),
|
||||
int(Go1FieldCfg.control.action_scale[1] * 10),
|
||||
|
@ -234,15 +208,8 @@ class Go1FieldCfgPPO( A1FieldCfgPPO ):
|
|||
) if isinstance(Go1FieldCfg.control.action_scale, (tuple, list)) \
|
||||
else "_aScale{:.1f}".format(Go1FieldCfg.control.action_scale)
|
||||
),
|
||||
("_torqueClip{:.0f}".format(Go1FieldCfg.control.torque_limits) if not isinstance(Go1FieldCfg.control.torque_limits, (tuple, list)) else (
|
||||
"_tClip{:d}{:d}{:d}".format(
|
||||
int(Go1FieldCfg.control.torque_limits[0]),
|
||||
int(Go1FieldCfg.control.torque_limits[1]),
|
||||
int(Go1FieldCfg.control.torque_limits[2]),
|
||||
)
|
||||
)),
|
||||
("_actionClip" + Go1FieldCfg.normalization.clip_actions_method if getattr(Go1FieldCfg.normalization, "clip_actions_method", None) is not None else ""),
|
||||
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"),
|
||||
])
|
||||
resume = False
|
||||
load_run = ""
|
||||
max_iterations = 10000
|
||||
max_iterations = 20000
|
||||
save_interval = 500
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from collections import OrderedDict
|
||||
import os
|
||||
import datetime
|
||||
from datetime import datetime
|
||||
import os.path as osp
|
||||
import numpy as np
|
||||
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
|
||||
|
@ -12,48 +12,74 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
|
|||
num_envs = 256
|
||||
obs_components = [
|
||||
"proprioception", # 48
|
||||
# "height_measurements", # 187
|
||||
"forward_depth",
|
||||
]
|
||||
privileged_obs_components = [
|
||||
"proprioception", # 48
|
||||
"base_pose",
|
||||
"robot_config",
|
||||
"engaging_block",
|
||||
"sidewall_distance",
|
||||
# "height_measurements", # 187
|
||||
# "forward_depth",
|
||||
"base_pose", # 6
|
||||
"robot_config", # 1 + 3 + 1 + 12
|
||||
"engaging_block", # 1 + (4 + 1) + 2
|
||||
"sidewall_distance", # 2
|
||||
]
|
||||
use_lin_vel = False
|
||||
privileged_use_lin_vel = True
|
||||
# if True privileged_obs will not have noise based on implementations
|
||||
privileged_obs_gets_privilege = False # for climb temporarily
|
||||
privileged_obs_gets_privilege = False # for jump temporarily
|
||||
|
||||
class init_state( Go1FieldCfg.init_state ):
|
||||
pos = [0.0, 0.0, 0.38] # x,y,z [m]
|
||||
pos = [0.0, 0.0, 0.43] # x,y,z [m]
|
||||
|
||||
class sensor( Go1FieldCfg.sensor ):
|
||||
class forward_camera( Go1FieldCfg.sensor.forward_camera ):
|
||||
resolution = [int(240/4), int(424/4)]
|
||||
# position = dict(
|
||||
# mean= [0.245, 0.0075, 0.072+0.018],
|
||||
# std= [0.002, 0.002, 0.0005],
|
||||
# ) # position in base_link ##### small randomization
|
||||
# rotation = dict(
|
||||
# lower= [0, 0, 0],
|
||||
# upper= [0, 0, 0],
|
||||
# ) # rotation in base_link ##### small randomization
|
||||
########## new camera extrinsics with 30degree down ####################
|
||||
position = dict(
|
||||
mean= [0.245, 0.0075, 0.072+0.018],
|
||||
std= [0.002, 0.002, 0.0005],
|
||||
mean= [0.245+0.027, 0.0075, 0.072+0.02],
|
||||
std= [0.002, 0.002, 0.0002],
|
||||
) # position in base_link ##### small randomization
|
||||
rotation = dict(
|
||||
lower= [0, 0, 0],
|
||||
upper= [0, 0, 0],
|
||||
lower= [0, 0.5, 0], # positive for pitch down
|
||||
upper= [0, 0.54, 0],
|
||||
) # rotation in base_link ##### small randomization
|
||||
########## new camera extrinsics with 30degree down ####################
|
||||
########## new camera extrinsics with 15degree down ####################
|
||||
# position = dict(
|
||||
# mean= [0.245+0.027, 0.0075, 0.072+0.018],
|
||||
# std= [0.003, 0.002, 0.0005],
|
||||
# ) # position in base_link ##### small randomization
|
||||
# rotation = dict(
|
||||
# lower= [0, 0.24, 0], # positive for pitch down
|
||||
# upper= [0, 0.28, 0],
|
||||
# ) # rotation in base_link ##### small randomization
|
||||
########## new camera extrinsics with 15degree down ####################
|
||||
resized_resolution= [48, 64]
|
||||
output_resolution = [48, 64]
|
||||
horizontal_fov = [85, 87] # measured around 87 degree
|
||||
# for go1, usb2.0, 480x640, d435i camera
|
||||
latency_range = [0.25, 0.30]
|
||||
latency_range = [0.25, 0.30] # [s]
|
||||
# latency_range = [0.28, 0.36] # [s]
|
||||
latency_resample_time = 5.0 # [s]
|
||||
refresh_duration = 1/10 # [s] for (240, 424 option with onboard script fixed to no more than 20Hz)
|
||||
|
||||
# config to simulate stero RGBD camera
|
||||
crop_top_bottom = [0, 0]
|
||||
crop_left_right = [int(60/4), int(46/4)]
|
||||
depth_range = [0.0, 1.5] # [m]
|
||||
depth_range = [0.0, 2.0] # [m]
|
||||
|
||||
# class proprioception( Go1FieldCfg.sensor.proprioception ): # inherited from A1FieldCfg
|
||||
class proprioception( Go1FieldCfg.sensor.proprioception ): # inherited from A1FieldCfg
|
||||
delay_action_obs = False
|
||||
delay_privileged_action_obs = False
|
||||
|
||||
class terrain( Go1FieldCfg.terrain ):
|
||||
num_rows = 2
|
||||
|
@ -67,21 +93,24 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
|
|||
options= [
|
||||
# "tilt",
|
||||
"crawl",
|
||||
"climb",
|
||||
"jump",
|
||||
"jump",
|
||||
"leap",
|
||||
], # each race track will permute all the options
|
||||
one_obstacle_per_track= True,
|
||||
n_obstacles_per_track= 4,
|
||||
randomize_obstacle_order= True,
|
||||
track_width= 1.6,
|
||||
track_block_length= 1.8, # the x-axis distance from the env origin point
|
||||
wall_thickness= (0.04, 0.2), # [m]
|
||||
wall_height= 0.0, # [m]
|
||||
climb= dict(
|
||||
track_block_length= 2.0, # the x-axis distance from the env origin point
|
||||
wall_thickness= (0.04, 0.3), # [m]
|
||||
wall_height= (-0.5, 0.0), # [m]
|
||||
jump= dict(
|
||||
height= (0.40, 0.45),
|
||||
depth= (0.5, 1.0), # size along the forward axis
|
||||
fake_offset= 0.05, # [m] making the climb's height info greater than its physical height.
|
||||
depth= (0.01, 0.01), # size along the forward axis
|
||||
fake_offset= 0.03, # [m] making the jump's height info greater than its physical height.
|
||||
jump_down_prob= 0.5,
|
||||
),
|
||||
crawl= dict(
|
||||
height= (0.28, 0.38),
|
||||
height= (0.36, 0.5),
|
||||
depth= (0.1, 0.2), # size along the forward axis
|
||||
wall_height= 0.6,
|
||||
fake_depth= 0.4, # when track block length is 1.8m
|
||||
|
@ -94,11 +123,11 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
|
|||
),
|
||||
leap= dict(
|
||||
# length= (0.45, 0.7),
|
||||
length= (0.5, 0.7), # for parkour real-world env
|
||||
length= (0.3, 0.74), # for parkour real-world env
|
||||
depth= (0.5, 0.6),
|
||||
height= 0.2,
|
||||
fake_offset= 0.2,
|
||||
follow_climb_ratio= 0.5, # when following climb, not drop down to ground suddenly.
|
||||
fake_offset= 0.1,
|
||||
follow_jump_ratio= 0.5, # when following jump, not drop down to ground suddenly.
|
||||
),
|
||||
add_perlin_noise= True,
|
||||
border_perlin_noise= True,
|
||||
|
@ -106,6 +135,7 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
|
|||
virtual_terrain= False,
|
||||
draw_virtual_terrain= True,
|
||||
engaging_next_threshold= 1.2,
|
||||
engaging_finish_threshold= 0.3,
|
||||
check_skill_combinations= True,
|
||||
curriculum_perlin= False,
|
||||
no_perlin_threshold= 0.04, # for parkour real-world env
|
||||
|
@ -113,33 +143,65 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
|
|||
)
|
||||
|
||||
TerrainPerlin_kwargs = dict(
|
||||
zScale= [0.0, 0.05], # for parkour real-world env
|
||||
zScale= [0.01, 0.1], # for parkour real-world env
|
||||
frequency= 10,
|
||||
)
|
||||
|
||||
class commands( A1FieldDistillCfg.commands ):
|
||||
pass
|
||||
|
||||
class control( Go1FieldCfg.control ):
|
||||
computer_clip_torque = False
|
||||
|
||||
class asset( Go1FieldCfg.asset ):
|
||||
terminate_after_contacts_on = ["base"]
|
||||
|
||||
class termination( A1FieldDistillCfg.termination ):
|
||||
pass
|
||||
out_of_track_kwargs = dict(
|
||||
threshold= 1.6,
|
||||
)
|
||||
|
||||
class domain_rand( A1FieldDistillCfg.domain_rand ):
|
||||
randomize_com = True
|
||||
randomize_motor = True
|
||||
randomize_friction = True
|
||||
friction_range = [0.0, 0.8]
|
||||
friction_range = [0.2, 2.0]
|
||||
randomize_base_mass = True
|
||||
push_robots = False
|
||||
init_dof_pos_ratio_range = [0.9, 1.1]
|
||||
init_base_vel_range = [-0.0, 0.0]
|
||||
init_dof_pos_ratio_range = [0.8, 1.2]
|
||||
init_dof_vel_range = [-2., 2.]
|
||||
# init_base_vel_range = [-0.0, 0.0] # use super class
|
||||
init_base_pos_range = dict(
|
||||
x= [0.4, 0.6],
|
||||
y= [-0.05, 0.05],
|
||||
)
|
||||
randomize_gravity_bias = True
|
||||
randomize_privileged_gravity_bias = False
|
||||
gravity_bias_range = dict(
|
||||
x= [-0.12, 0.12],
|
||||
y= [-0.12, 0.12],
|
||||
z= [-0.05, 0.05],
|
||||
)
|
||||
|
||||
class rewards( A1FieldDistillCfg.rewards ):
|
||||
class scales:
|
||||
pass
|
||||
|
||||
class noise( A1FieldDistillCfg.noise ):
|
||||
add_noise = True # This only account for proprioception noise and height measurements noise at most
|
||||
class noise_scales( A1FieldDistillCfg.noise.noise_scales ):
|
||||
ang_vel = 0.2 # measured in 0.02
|
||||
dof_pos = 0.0006 # measured in 0.0002
|
||||
dof_vel = 0.02 # measured in 0.015
|
||||
gravity = 0.06 # measured in 0.05
|
||||
|
||||
# These aspects of obs should not have noise,
|
||||
# which are not used or set to zero or has its own noise mechanism
|
||||
height_measurements = 0.0
|
||||
forward_depth = 0.0
|
||||
base_pose = 0.0
|
||||
lin_vel = 0.0
|
||||
|
||||
class forward_depth:
|
||||
stereo_min_distance = 0.12 # when using (240, 424) resolution
|
||||
stereo_far_distance = 2.
|
||||
|
@ -164,7 +226,8 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
|
|||
# override base class attributes
|
||||
pass
|
||||
|
||||
distill_target_ = "tanh"
|
||||
# distill_target_ = "tanh"
|
||||
distill_target_ = "l1"
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
|
||||
class algorithm( A1FieldDistillCfgPPO.algorithm ):
|
||||
|
@ -177,10 +240,10 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
|
|||
using_ppo = False
|
||||
distill_target = distill_target_
|
||||
buffer_dilation_ratio = 1.
|
||||
learning_rate = 3e-4
|
||||
learning_rate = 1.e-4
|
||||
optimizer_class_name = "AdamW"
|
||||
|
||||
teacher_policy_class_name = "ActorCriticFieldMutex"
|
||||
teacher_policy_class_name = "ActorCriticClimbMutex"
|
||||
teacher_ac_path = None
|
||||
class teacher_policy (A1FieldDistillCfgPPO.policy ):
|
||||
# For loading teacher policy. No need to change for training student
|
||||
|
@ -195,20 +258,77 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
|
|||
sidewall_distance= (2,),
|
||||
)
|
||||
env_action_scale = Go1FieldCfg.control.action_scale
|
||||
action_smoothing_buffer_len = 3
|
||||
reset_non_selected = "when_skill"
|
||||
# reset_non_selected = True
|
||||
|
||||
|
||||
sub_policy_class_name = "ActorCriticRecurrent"
|
||||
sub_policy_paths = [ # must in the order of obstacle ID
|
||||
logs_root + "/field_go1/{your go1 walking policy}",
|
||||
logs_root + "/field_go1/{your go1 tilting policy}",
|
||||
logs_root + "/field_go1/{your go1 crawling policy}",
|
||||
logs_root + "/field_go1/{your go1 climbing policy}",
|
||||
logs_root + "/field_go1/{your go1 leaping policy}",
|
||||
|
||||
sub_policy_paths = [
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct10_00-03-12_WalkForward_pEnergySubsteps1e-5_maxPushAng0.5_pushInter3.0_noTanh_fromOct09_15-50-14"),
|
||||
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_14-43-48_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_rAlive3e+0_pTorqueL1norm8e-1_pDofLim8e-1_actionCliphard_fromOct24_09-00-16"),
|
||||
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"),
|
||||
os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-24-22_Skills_tilt_propDelay0.04-0.05_pEnergySubsteps1e-5_pPenD2e-3_pDofLimit8e-1_rTilt8e-03_pCollision0.1_PushRobot_kp40_kd0.5_tiltMax0.40fromSep27_13-59-27"),
|
||||
os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26"),
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct13_09-12-39_258k_Skills_jump_comXRange-0.2-0.2_pEnergySubsteps-4e-06_pushRobot_jumpHeight0.1-0.7_propDelay0.04-0.05_noTanh_fromOct09_10-13-26"),
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct16_10-55-09_Skills_jump_comXRange-0.2-0.2_pLinVel1.4_pTorque2e-06_pY0.1_pCollision0.1_pushRobot_jumpHeight0.10-0.45_propDelay0.04-0.05_noTanh_fromOct13_09-12-39"),
|
||||
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct22_15-19-26_Skills_jump_rTrackVel3._pJumpSameLegs0.6_noPContact_pDofAcc1.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_08-47-13"),
|
||||
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct24_07-41-26_Skills_jump_rTrackVel4._pY-2e-01_pJumpSameLegs0.2_noPContact_pDofAcc2.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-1e-01_noPush_minStd0.2_noTanh_zeroResetAction_jumpRange0.4-0.5_fromOct20_16-09-47"),
|
||||
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-16_Skills_jump_pEnergySubsteps3e-05_rTrackVel4._pY-3e-01_propDelay0.04-0.05_noPropNoise_noPush_noTanh_zeroResetAction_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct24_07-41-26"),
|
||||
# os.path.join(logs_root, "field_go1", "Oct27_16-03-48_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pDofErrCond4e-01_pHipPos5e-01_pCollision3e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59"),
|
||||
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_16-04-45_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pPenD-6e-03_pDofErrCond6e-01_pHipPos5e-01_pCollision6e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59"),
|
||||
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_11-05-56_Skills_jump_pEnergySubsteps2e-06_rTrackVel5._pYaw-1e-01_pHipPos5e+00_pCollision1e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_allowNegativeReward_fromOct28_08-54-23"),
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct09_09-51-58_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pPenD1.e-2_pDofLimit4e-01_pCollision0.5_kp40_kd0.5fromOct05_02-16-22"),
|
||||
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-39-12_Skills_leap_pEnertySubsteps4.e-5_pActRate2.e-1_pDofLimit8.e-1_pCollision5.e-1_noPropNoise_noTanh_zeroResetAction_actionCliphard_fromOct09_09-51-58"),
|
||||
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-23-39_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pSyncAllLegs6.e-1_pOrient6.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct28_03-32-35"),
|
||||
]
|
||||
# jump_down_policy_path = logs_root + "/field_a1_noTanh_oracle/Oct09_09-49-58_Skills_down_pEnergySubsteps-4e-06_rAlive3_pPenD1e-02_pDofLimit-4e-01_pTorqueL14e-01_maxForce200_downRedundant0.2_softDof0.8_pushRobot_kp40_kd0.5fromSep28_14-59-53"
|
||||
# jump_down_policy_path = os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct14_02-06-47_Skills_down_comXRange-0.2-0.2_pEnergySubsteps-4e-06_rAlive3_pPenD4e-03_pDHarder2.e+05_pushRobot_noTanh_virtualfromOct09_09-49-58")
|
||||
# jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_10-48-54_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond3.e-1_kp40_kd0.5_virtualfromOct14_02-06-47")
|
||||
#tested OK, tends to stop
|
||||
# jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-31_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond1.e-2_kp40_kd0.5_virtualfromOct14_02-06-47")
|
||||
# jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct26_08-45-02_Skills_down_pEnergySubsteps-4e-05_rTrackVel4._pZVel1e-01_pAngXYVel5e-02_pDTorques1e-07_rDownCond1.e-1_noTanh_virtualfromOct25_10-48-54")
|
||||
jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-08-21_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_withPropNoise_noTanh_virtualfromOct28_07-38-39")
|
||||
|
||||
# sub_policy_paths = [ # must in the order of obstacle ID
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct18_12-37-48_WalkForward_comXRange-0.2-0.2_noDelayActObs_pLinY0.05_noTanh_fromOct10_00-03-12"), # a little pitch up, accetable
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-24-22_Skills_tilt_propDelay0.04-0.05_pEnergySubsteps1e-5_pPenD2e-3_pDofLimit8e-1_rTilt8e-03_pCollision0.1_PushRobot_kp40_kd0.5_tiltMax0.40fromSep27_13-59-27"), # a little pitch up, accetable
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct18_12-32-28_Skills_crawl_comXRange-0.2-0.2_pEnergySubsteps-1e-5_pDof8e-01_pTorque1e-5_pTorqueL14e-01_noDelayActObs_noTanh_fromOct11_12-19-00"),
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_06-29-14_Skills_jump_comXRange-0.2-0.2_pLinVel1._pDofAcc5e-7_pTorque6e-05_jumpHeight0.10-0.46_noDelayActObs_noTanh_minStd0.1_fromOct18_15-57-31"),
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_01-25-22_Skills_leap_comXRange-0.2-0.2_pTorques2.e-5_pContactForces1.e-2_leapHeight0.1_noDelayActObs_noTanh_virtualfromOct09_09-51-58"),
|
||||
# ]
|
||||
# jump_down_policy_path = \
|
||||
# os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_07-01-54_Skills_down_comXRange-0.2-0.2_pXVelL21.3_rDown0.3_pPenD2e-03_pDHarder2e+5_pTorque2e-5_pushRobot_noDelayActObs_noTanh_virtualfromOct18_00-51-27")
|
||||
|
||||
# The group of ckpt that used for no Computer Clip and no Motor Clip
|
||||
sub_policy_paths = [
|
||||
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"),
|
||||
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"),
|
||||
os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26"),
|
||||
# os.path.join(logs_root, "field_go1", "Oct30_03-46-42_Skills_crawl_pEnergy1.e-5_pDof8.e-1_pTorqueL11.e+0_noComputerClip_noTanh"),
|
||||
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct29_22-31-44_Skills_jump_pEnergySubsteps4e-05_rTrackVel4._pY-4e-01_pTorque4e-04_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"),
|
||||
# os.path.join(logs_root, "field_go1", "Oct30_11-11-43_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-8e-01_pTorqueExceed1.2e+00_pTorque4e-04_pDTorques1e-06_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"),
|
||||
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct30_13-00-12_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-4e-01_pTorqueExceed1.8e+00_pTorque4e-04_pDTorques1e-05_propDelay0.04-0.05_noPropNoise_noPush_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"),
|
||||
### New jump oracle with more spreaded rear hip.
|
||||
# os.path.join(logs_root, "field_go1_noTanh_oracle", "Nov02_11-07-49_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.8e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12"),
|
||||
os.path.join(logs_root, "field_go1_noTanh_oracle", "Nov02_13-53-48_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.21_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12"),
|
||||
# os.path.join(logs_root, "field_go1", "Oct30_05-08-03_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"),
|
||||
os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct30_07-32-10_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed8.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_pDorErr1.5e-1_noPenCurriculum_noCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_05-07-24"),
|
||||
]
|
||||
jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle",
|
||||
"Oct30_03-42-56_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL12e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtualfromOct29_22-07-51",
|
||||
)
|
||||
# jump_down_policy_path = os.path.join(logs_root, "field_go1",
|
||||
# "Oct30_04-23-31_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL14e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtual_fromOct29_22-07-51",
|
||||
# )
|
||||
|
||||
jump_down_vel = 1.0
|
||||
cmd_vel_mapping = {
|
||||
0: 1.0,
|
||||
1: 0.5,
|
||||
2: 0.8,
|
||||
3: 1.2,
|
||||
1: 0.6,
|
||||
2: 1.0,
|
||||
3: 1.0,
|
||||
4: 1.5,
|
||||
}
|
||||
|
||||
|
@ -218,38 +338,125 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ):
|
|||
runner_class_name = "TwoStageRunner"
|
||||
class runner( A1FieldDistillCfgPPO.runner ):
|
||||
experiment_name = "distill_go1"
|
||||
num_steps_per_env = 32
|
||||
|
||||
class pretrain_dataset:
|
||||
# data_dir = [
|
||||
# "logs/distill_a1_dagger/" + dir_ \
|
||||
# for dir_ in os.listdir("logs/distill_a1_dagger")
|
||||
# "/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/20231021_camPitch0.52_jumpA1Oct16_10-55-09/" + dir_ \
|
||||
# for dir_ in os.listdir("/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/20231021_camPitch0.52_jumpA1Oct16_10-55-09")
|
||||
# ]
|
||||
scan_dir = "".join([
|
||||
"logs/distill_go1_dagger/{}_".format(datetime.datetime.now().strftime("%b%d")),
|
||||
"/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/", datetime.now().strftime('%b%d_%H-%M-%S'), "_",
|
||||
"".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]),
|
||||
"_vDelay{:.2f}-{:.2f}".format(
|
||||
Go1FieldDistillCfg.sensor.forward_camera.latency_range[0],
|
||||
Go1FieldDistillCfg.sensor.forward_camera.latency_range[1],
|
||||
),
|
||||
"_pDelay{:.2f}-{:.2f}".format(
|
||||
Go1FieldDistillCfg.sensor.proprioception.latency_range[0],
|
||||
Go1FieldDistillCfg.sensor.proprioception.latency_range[1],
|
||||
),
|
||||
("_camPitch{:.2f}".format(
|
||||
(Go1FieldDistillCfg.sensor.forward_camera.rotation["lower"][1] + Go1FieldDistillCfg.sensor.forward_camera.rotation["upper"][1]) / 2
|
||||
)),
|
||||
("_depthMax{:.1f}".format(Go1FieldDistillCfg.sensor.forward_camera.depth_range[1])),
|
||||
# ("_zeroResetAction" if Go1FieldDistillCfg.init_state.zero_actions else ""),
|
||||
# ("_actionClip" + Go1FieldDistillCfg.normalization.clip_actions_method if getattr(Go1FieldDistillCfg.normalization, "clip_actions_method", None) else ""),
|
||||
("_jumpOffset{:.2f}".format(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["jump"]["fake_offset"])),
|
||||
("_noComputerClip" if not Go1FieldDistillCfg.control.computer_clip_torque else ""),
|
||||
("_randOrder" if Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"] else ""),
|
||||
])
|
||||
dataset_loops = -1
|
||||
random_shuffle_traj_order = True
|
||||
keep_latest_ratio = 1.0
|
||||
keep_latest_n_trajs = 4000
|
||||
keep_latest_n_trajs = 3000
|
||||
starting_frame_range = [0, 100]
|
||||
|
||||
resume = False
|
||||
load_run = ""
|
||||
resume = True
|
||||
# load_run = "/home/zzw/Data/legged_gym_logs/distill_a1/Jun06_00-15-50_distill_crawljumpleap_vDelay0.20-0.26_jump0.40-0.45_vFps10_leap0.55-0.62_fromJun04_20-18-56_06-3308JumpOracle"
|
||||
# load_run = "/home/zzw/Data/legged_gym_logs/distill_go1/Jul11_16-21-59_distill_crawljumpleap"
|
||||
# load_run = "Jul17_08-47-39_distill_jump_vDelay0.25-0.30"
|
||||
# load_run = "Oct08_02-51-14_distill_crawljumpjumpleap_vDelay0.25-0.30_noTanh_fromJul17_08-47-39"
|
||||
# load_run = "Oct08_14-51-22_distill_crawljumpjumpleap_vDelay0.25-0.30_noTanh_fromOct08_02-51-14"
|
||||
load_run = "Oct20_05-35-58_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_jumpA1Oct19_06-29-14_noTanh_fromOct08_14-51-22"
|
||||
load_run = "Oct21_09-01-45_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_jumpA1Oct16_10-55-09_noTanh_fromOct20_05-35-58"
|
||||
load_run = "Oct21_16-21-33_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_jumpA1Oct16_10-55-09_noTanh_fromOct21_09-01-45"
|
||||
load_run = "Oct22_02-25-42_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_jumpA1Oct16_10-55-09_noTanh_fromOct21_16-21-33"
|
||||
load_run = "Oct24_15-43-34_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_zeroResetAction_actionCliphard_jumpOct24_07-41-26_noTanh_fromOct22_02-25-42"
|
||||
load_run = "Oct25_17-10-09_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_actionCliphard_jumpOct25_11-36-16_downOct25_11-36-31_noTanh_fromOct24_15-43-34"
|
||||
#### From wrong walk and jump skill policy
|
||||
# load_run = "Oct26_15-38-28_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_noPropNoise_gravityBias_actionCliphard_jumpOct25_11-36-16_downOct25_11-36-31_noTanh_fromOct25_17-10-09"
|
||||
# load_run = "Oct27_06-00-07_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_noPropNoise_gravityBias_actionCliphard_jumpOct25_11-36-16_downOctOct26_08-45-02_fricMax2.0_noTanh_fromOct26_15-38-28"
|
||||
load_run = "Oct28_16-16-58_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_addPropNoise_gravityBias_actionCliphard_jumpOct28_11-05-56_fricMax2.0_noTanh_fromOct25_17-10-09"
|
||||
######## Continue tuning policy to fix jump and leap.
|
||||
## This ckpt looks good. But the jump is not good enough.
|
||||
load_run = "Oct30_16-31-09_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_addPropNoise_gravityBias_actionCliphard_noComputerClip_oracleResetWhenSkill_fricMax2.0_noTanh_fromOct28_16-16-58"
|
||||
|
||||
max_iterations = 80000
|
||||
max_iterations = 100000
|
||||
save_interval = 2000
|
||||
log_interval = 100
|
||||
run_name = "".join(["distill_",
|
||||
"".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]),
|
||||
"_vDelay{:.2f}-{:.2f}".format(
|
||||
Go1FieldDistillCfg.sensor.forward_camera.latency_range[0],
|
||||
Go1FieldDistillCfg.sensor.forward_camera.latency_range[1],
|
||||
),
|
||||
("_camPitch{:.2f}".format(
|
||||
(Go1FieldDistillCfg.sensor.forward_camera.rotation["lower"][1] + Go1FieldDistillCfg.sensor.forward_camera.rotation["upper"][1]) / 2
|
||||
)),
|
||||
("_depthMax{:.1f}".format(Go1FieldDistillCfg.sensor.forward_camera.depth_range[1])),
|
||||
# ("_addPropNoise" if Go1FieldDistillCfg.noise.add_noise else "_noPropNoise"),
|
||||
# ("_gravityBias" if Go1FieldDistillCfg.domain_rand.randomize_gravity_bias else "_noGravityBias"),
|
||||
# ("_actionClip" + Go1FieldDistillCfg.normalization.clip_actions_method if getattr(Go1FieldDistillCfg.normalization, "clip_actions_method", None) else ""),
|
||||
# ("_noComputerClip" if not Go1FieldDistillCfg.control.computer_clip_torque else ""),
|
||||
("_jumpNov02_13-53-48"),
|
||||
("_leapVel1.5"),
|
||||
("_jumpOffset{:.2f}".format(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["jump"]["fake_offset"])),
|
||||
("_oracleResetWhenSkill"),
|
||||
("_fricMax{:.1f}".format(Go1FieldDistillCfg.domain_rand.friction_range[1])),
|
||||
("_noTanh"),
|
||||
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"),
|
||||
])
|
||||
|
||||
# Checking variants:
|
||||
"""
|
||||
1. Faster leap (1.5m/s)
|
||||
higher offset 0.1 for jump up
|
||||
2. Longer leap offset 0.2, faster leap (1.5m/s)
|
||||
higher offset 0.1 for jump up
|
||||
3. Faster leap (1.5m/s)
|
||||
higher offset 0.1 for jump up
|
||||
from older ckpt
|
||||
4. Faster leap (1.5m/s)
|
||||
faster jump (1.2m/s) and higher offset 0.1 for jump up
|
||||
from older ckpt
|
||||
smaller learning rate 1e-4
|
||||
---- 20231101 after testing preious ckpts ----
|
||||
5. Faster leap (1.5m/s)
|
||||
normal 0.05 offset for jump up, normal (1.0m/s) jump
|
||||
from older ckpt (Oct28_16-16-58)
|
||||
same learning rate as Oct30_16-31-09
|
||||
6. Faster leap (1.5m/s)
|
||||
slight higher 0.07 offset for jump up, faster (1.2m/s) jump
|
||||
from older ckpt (Oct28_16-16-58)
|
||||
lower learning rate as 1e-4
|
||||
---- 20231102 after testing preious ckpts ----
|
||||
New jump oracle to test:
|
||||
* Nov02_11-07-49_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.8e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
|
||||
Nov02_11-09-03_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed2e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
|
||||
Nov02_13-53-13_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.22_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
|
||||
* Nov02_13-53-48_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.21_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12
|
||||
7. Faster leap (1.5m/s)
|
||||
lower 0.05 offset for jump up, normal (1.0m/s) jump
|
||||
using new jump oracle
|
||||
same older ckpt (Oct28_16-16-58)
|
||||
same learning rate as Oct30_16-31-09 (1.5e-4)
|
||||
8. Faster leap (1.5m/s)
|
||||
lower 0.03 offset for jump up, normal (1.0m/s) jump
|
||||
using new jump oracle
|
||||
new ckpt (Oct30_16-31-09)
|
||||
lower learning rate as 1e-4
|
||||
"""
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,223 @@
|
|||
from os import path as osp
|
||||
import numpy as np
|
||||
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
|
||||
class Go1JumpCfg( Go1FieldCfg ):
|
||||
|
||||
class init_state( Go1FieldCfg.init_state ):
|
||||
pos = [0., 0., 0.45]
|
||||
|
||||
#### uncomment this to train non-virtual terrain
|
||||
class sensor( Go1FieldCfg.sensor ):
|
||||
class proprioception( Go1FieldCfg.sensor.proprioception ):
|
||||
latency_range = [0.04-0.0025, 0.04+0.0075]
|
||||
#### uncomment the above to train non-virtual terrain
|
||||
|
||||
class terrain( Go1FieldCfg.terrain ):
|
||||
max_init_terrain_level = 10
|
||||
border_size = 5
|
||||
slope_treshold = 20.
|
||||
curriculum = True
|
||||
|
||||
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
|
||||
options= [
|
||||
"jump",
|
||||
],
|
||||
track_width= 1.6,
|
||||
track_block_length= 1.6,
|
||||
wall_thickness= 0.2,
|
||||
jump= dict(
|
||||
# height= (0.38, 0.46),
|
||||
height= (0.2, 0.46),
|
||||
depth= (0.1, 0.2),
|
||||
fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle
|
||||
jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain
|
||||
),
|
||||
virtual_terrain= False, # Change this to False for real terrain
|
||||
no_perlin_threshold= 0.06,
|
||||
randomize_obstacle_order= True,
|
||||
n_obstacles_per_track= 3,
|
||||
))
|
||||
|
||||
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
|
||||
zScale= [0.05, 0.12],
|
||||
))
|
||||
|
||||
class commands( Go1FieldCfg.commands ):
|
||||
class ranges( Go1FieldCfg.commands.ranges ):
|
||||
lin_vel_x = [0.5, 1.5]
|
||||
lin_vel_y = [0.0, 0.0]
|
||||
ang_vel_yaw = [0., 0.]
|
||||
|
||||
class control( Go1FieldCfg.control ):
|
||||
computer_clip_torque = False
|
||||
|
||||
class asset( Go1FieldCfg.asset ):
|
||||
penalize_contacts_on = ["base", "thigh", "calf", "imu"]
|
||||
terminate_after_contacts_on = ["base"]
|
||||
|
||||
class termination( Go1FieldCfg.termination ):
|
||||
# additional factors that determines whether to terminates the episode
|
||||
termination_terms = [
|
||||
"roll",
|
||||
"pitch",
|
||||
"z_high",
|
||||
"out_of_track",
|
||||
]
|
||||
timeout_at_finished = True
|
||||
timeout_at_border = True
|
||||
|
||||
class domain_rand( Go1FieldCfg.domain_rand ):
|
||||
class com_range( Go1FieldCfg.domain_rand.com_range ):
|
||||
z = [-0.1, 0.1]
|
||||
|
||||
init_base_pos_range = dict(
|
||||
x= [0.2, 0.8],
|
||||
y= [-0.25, 0.25],
|
||||
)
|
||||
init_base_rot_range = dict(
|
||||
roll= [-0.1, 0.1],
|
||||
pitch= [-0.1, 0.1],
|
||||
)
|
||||
|
||||
push_robots = True
|
||||
|
||||
class rewards( Go1FieldCfg.rewards ):
|
||||
class scales:
|
||||
tracking_ang_vel = 0.08
|
||||
# alive = 10.
|
||||
tracking_world_vel = 5. ######## 1. always ########
|
||||
legs_energy_substeps = -6.e-5 # -6e-6
|
||||
jump_x_vel_cond = 0.1 ######## 1. segment ########
|
||||
# hip_pos = -5.
|
||||
front_hip_pos = -5.
|
||||
rear_hip_pos = -0.000001
|
||||
penetrate_depth = -6e-3
|
||||
penetrate_volume = -6e-4
|
||||
exceed_dof_pos_limits = -8e-4 # -8e-4
|
||||
exceed_torque_limits_l1norm = -2. # -8e-2 # -6e-2
|
||||
action_rate = -0.1
|
||||
delta_torques = -1.e-5
|
||||
dof_acc = -1e-7
|
||||
torques = -4e-4 # 2e-3
|
||||
yaw_abs = -0.1
|
||||
lin_pos_y = -0.4
|
||||
collision = -1.
|
||||
sync_all_legs_cond = -0.3 # -0.6
|
||||
dof_error = -0.06
|
||||
only_positive_rewards = False # False
|
||||
soft_dof_pos_limit = 0.8
|
||||
max_contact_force = 100.
|
||||
tracking_sigma = 0.3
|
||||
|
||||
class noise( Go1FieldCfg.noise ):
|
||||
add_noise = False
|
||||
|
||||
class curriculum( Go1FieldCfg.curriculum ):
|
||||
penetrate_volume_threshold_harder = 6000
|
||||
penetrate_volume_threshold_easier = 12000
|
||||
penetrate_depth_threshold_harder = 600
|
||||
penetrate_depth_threshold_easier = 1200
|
||||
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class Go1JumpCfgPPO( Go1FieldCfgPPO ):
|
||||
class algorithm( Go1FieldCfgPPO.algorithm ):
|
||||
entropy_coef = 0.01
|
||||
clip_min_std = 0.21
|
||||
|
||||
class runner( Go1FieldCfgPPO.runner ):
|
||||
resume = True
|
||||
load_run = "{Your traind walking model directory}"
|
||||
# load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
|
||||
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep27_14-32-13_Skills_jump_pEnergySubsteps-8e-06_pDofLimit-8e-01_pTorqueL18e-01_pContact1e-02_pCollision0.4_pushRobot_propDelay0.04-0.05_kp40_kd0.5_fromSep27_03-36-02")
|
||||
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct07_10-24-52_Skills_jump_pEnergy1e-06_pY-1e-02_pCollision-1e-02_noPush_noTanh_jumpRange0.2-0.6_fromSep27_14-32-13")
|
||||
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct19_06-29-14_Skills_jump_comXRange-0.2-0.2_pLinVel1._pDofAcc5e-7_pTorque6e-05_jumpHeight0.10-0.46_noDelayActObs_noTanh_minStd0.1_fromOct18_15-57-31")
|
||||
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct16_10-55-09_Skills_jump_comXRange-0.2-0.2_pLinVel1.4_pTorque2e-06_pY0.1_pCollision0.1_pushRobot_jumpHeight0.10-0.45_propDelay0.04-0.05_noTanh_fromOct13_09-12-39")
|
||||
######################################
|
||||
# load_run = "Oct20_16-09-47_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14"
|
||||
# load_run = "Oct21_05-34-37_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._noPropNoise_propDelay0.04-0.05_pCollision-5e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct20_16-09-47"
|
||||
# load_run = "Oct21_12-30-28_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._noPropNoise_propDelay0.04-0.05_pCollision-1e+00_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct21_05-34-37"
|
||||
# load_run = "Oct21_14-26-29_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel1.2_noPropNoise_propDelay0.04-0.05_pCollision-2e+00_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct21_12-30-28"
|
||||
# load_run = "Oct20_16-09-47_275k_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14"
|
||||
# load_run = osp.join(logs_root, "field_go1_before20231030_tuneJump_noTanh", "Oct20_16-09-47_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14")
|
||||
# load_run = "Oct22_02-20-52_Skills_jump_pLinVel1._noPContact_pDofAcc2.5e-07_rJumpXVel1.4_noPropNoise_propDelay0.04-0.05_pCollision-1e+00_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct20_16-09-47"
|
||||
# load_run = "Oct22_08-47-56_Skills_jump_rTrackVel2._pJumpSameLegs0.8_noPContact_pDofAcc2.5e-07_rJumpXVel1._noPropNoise_propDelay0.04-0.05_pCollision-4e-01_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_02-20-52"
|
||||
# load_run = "Oct22_08-47-13_Skills_jump_rTrackVel2._pJumpSameLegs0.6_noPContact_pDofAcc2.5e-07_rJumpXVel1.2_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_02-20-52"
|
||||
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct22_15-19-26_Skills_jump_rTrackVel3._pJumpSameLegs0.6_noPContact_pDofAcc1.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_08-47-13")
|
||||
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct24_07-41-26_Skills_jump_rTrackVel4._pY-2e-01_pJumpSameLegs0.2_noPContact_pDofAcc2.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-1e-01_noPush_minStd0.2_noTanh_zeroResetAction_jumpRange0.4-0.5_fromOct20_16-09-47")
|
||||
# load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-16_Skills_jump_pEnergySubsteps3e-05_rTrackVel4._pY-3e-01_propDelay0.04-0.05_noPropNoise_noPush_noTanh_zeroResetAction_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct24_07-41-26")
|
||||
# load_run = "Oct27_08-20-20_Skills_jump_rTrackVel5._noPPen_pY-3e-01_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct25_11-36-16"
|
||||
# load_run = "Oct27_09-17-23_Skills_jump_rTrackVel5._noPPen_pDofErr6e-02_pDofErrCond6e-01_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_08-20-20"
|
||||
#### Force hip_pos and sync_legs
|
||||
# load_run = "Oct27_13-20-59_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pDofErr4e-02_pDofErrCond4e-01_pHipPos5e-01_pCollision5e+00_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_09-17-23"
|
||||
# load_run = "Oct27_13-24-58_Skills_jump_pEnergySubsteps3e-05_rTrackVel5._pDofErr4e-02_pDofErrCond3e-01_pHipPos3e-01_pCollision5e+00_pSyncSymLegs6e-01_propDelay0.04-0.05_withPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_09-17-23"
|
||||
#### Adding RL gamma for energy penalty
|
||||
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct27_16-04-45_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pPenD-6e-03_pDofErrCond6e-01_pHipPos5e-01_pCollision6e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59")
|
||||
#### Wide spreading legs first
|
||||
# load_run = "Oct28_03-08-02_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._rAlive0.5_pDofErrCond8e-01_pHipPos5e-01_pSyncSymLegs2e-01_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct27_16-04-45"
|
||||
# load_run = "Oct28_07-07-03_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._rAlive1.0_pPenD-1e-03_pDofErrCond8e-01_pHipPos3e+00_pSyncSymLegs2e-01_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct27_16-04-45"
|
||||
load_run = "Oct28_08-54-23_Skills_jump_pEnergySubsteps1e-05_rTrackVel3._rAlive0.5_pYaw-1e-01_pHipPos5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct28_03-08-02"
|
||||
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_11-05-56_Skills_jump_pEnergySubsteps2e-06_rTrackVel5._pYaw-1e-01_pHipPos5e+00_pCollision1e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_allowNegativeReward_fromOct28_08-54-23")
|
||||
#### lowering max torques and energy
|
||||
load_run = "Oct29_03-21-19_Skills_jump_pEnergySubsteps6e-06_rTrackVel5._pHipPos5e+00_pTorque2e-05_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.4-0.5_allowNegativeReward_fromOct28_11-05-56"
|
||||
load_run = "Oct29_07-54-42_Skills_jump_pEnergySubsteps1e-05_rTrackVel5._pHipPos5e+00_pTorqueExceed6e-02_pTorque2e-03_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.4-0.5_allowNegativeReward_fromOct29_03-21-19"
|
||||
load_run = "Oct29_12-18-08_Skills_jump_pEnergySubsteps1e-05_rTrackVel6._pTorqueExceed6e-02_pTorque1e-03_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_07-54-42"
|
||||
load_run = "Oct29_15-18-02_Skills_jump_pEnergySubsteps1e-05_rTrackVel4._pTorqueExceed3e-02_pTorque1e-03_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_12-18-08"
|
||||
load_run = "Oct29_16-37-44_Skills_jump_pEnergySubsteps1.2e-05_rTrackVel4._pTorqueExceed6e-02_pTorque2e-03_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_15-18-02"
|
||||
load_run = "Oct29_20-17-31_Skills_jump_pEnergySubsteps5e-05_rTrackVel4._pTorqueExceed1e+00_pTorque4e-04_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_16-37-44"
|
||||
load_run = "Oct30_11-11-43_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-8e-01_pTorqueExceed1.2e+00_pTorque4e-04_pDTorques1e-06_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"
|
||||
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct30_13-00-12_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-4e-01_pTorqueExceed1.8e+00_pTorque4e-04_pDTorques1e-05_propDelay0.04-0.05_noPropNoise_noPush_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31")
|
||||
|
||||
run_name = "".join(["Skills_",
|
||||
("Multi" if len(Go1JumpCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1JumpCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1JumpCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
|
||||
("_pEnergySubsteps" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.legs_energy_substeps, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "legs_energy_substeps", -2e-5) != -2e-5 else ""),
|
||||
# ("_rTrackVel" + np.format_float_positional(Go1JumpCfg.rewards.scales.tracking_world_vel) if getattr(Go1JumpCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""),
|
||||
# ("_pLinVel" + np.format_float_positional(-Go1JumpCfg.rewards.scales.world_vel_l2norm) if getattr(Go1JumpCfg.rewards.scales, "world_vel_l2norm", 0.) != 0.0 else ""),
|
||||
# ("_rAlive{:.1f}".format(Go1JumpCfg.rewards.scales.alive) if getattr(Go1JumpCfg.rewards.scales, "alive", 0.) != 0. else ""),
|
||||
# ("_pPenD{:.0e}".format(Go1JumpCfg.rewards.scales.penetrate_depth) if getattr(Go1JumpCfg.rewards.scales, "penetrate_depth", 0.) < 0. else ""),
|
||||
# ("_pYaw{:.0e}".format(Go1JumpCfg.rewards.scales.yaw_abs) if getattr(Go1JumpCfg.rewards.scales, "yaw_abs", 0.) < 0. else ""),
|
||||
# ("_pY{:.0e}".format(Go1JumpCfg.rewards.scales.lin_pos_y) if getattr(Go1JumpCfg.rewards.scales, "lin_pos_y", 0.) < 0. else ""),
|
||||
# ("_pDofLimit{:.0e}".format(Go1JumpCfg.rewards.scales.exceed_dof_pos_limits) if getattr(Go1JumpCfg.rewards.scales, "exceed_dof_pos_limits", 0.) < 0. else ""),
|
||||
# ("_pDofErr" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_error, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_error", 0.) != 0. else ""),
|
||||
# ("_pDofErrCond" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_error_cond, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_error_cond", 0.) != 0. else ""),
|
||||
# ("_pHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "hip_pos", 0.) != 0. else ""),
|
||||
("_pFHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.front_hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "front_hip_pos", 0.) != 0. else ""),
|
||||
("_pRHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.rear_hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "rear_hip_pos", 0.) != 0. else ""),
|
||||
# ("_pTorqueExceedI" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.exceed_torque_limits_i, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "exceed_torque_limits_i", 0.) != 0. else ""),
|
||||
("_pTorqueExceed" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, trim="-") if Go1JumpCfg.rewards.scales.exceed_torque_limits_l1norm != -8e-1 else ""),
|
||||
# ("_pContact" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.feet_contact_forces, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "feet_contact_forces", 0.) != 0. else ""),
|
||||
# ("_pJumpSameLegs{:.1f}".format(-Go1JumpCfg.rewards.scales.sync_legs_cond) if getattr(Go1JumpCfg.rewards.scales, "sync_legs_cond", 0.) != 0. else ""),
|
||||
# ("_pDofAcc" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_acc, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_acc", 0.) != 0. else ""),
|
||||
# ("_pTorque" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.torques, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "torques", 0.) != 0. else ""),
|
||||
# ("_rJumpXVel" + np.format_float_positional(Go1JumpCfg.rewards.scales.jump_x_vel_cond) if getattr(Go1JumpCfg.rewards.scales, "jump_x_vel_cond", 0.) != 0. else "_noJumpBonous"),
|
||||
# ("_pCollision" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.collision, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "collision", 0.) < 0. else ""),
|
||||
# ("_pSyncSymLegs" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.sync_all_legs_cond, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "sync_all_legs_cond", 0.) != 0. else ""),
|
||||
# ("_pZVel" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.lin_vel_z, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "lin_vel_z", 0.) != 0. else ""),
|
||||
# ("_pAngXYVel" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.ang_vel_xy, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "ang_vel_xy", 0.) != 0. else ""),
|
||||
# ("_pDTorques" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.delta_torques, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "delta_torques", 0.) != 0. else ""),
|
||||
("_propDelay{:.2f}-{:.2f}".format(
|
||||
Go1JumpCfg.sensor.proprioception.latency_range[0],
|
||||
Go1JumpCfg.sensor.proprioception.latency_range[1],
|
||||
)),
|
||||
# ("_softDof{:.1f}".format(Go1JumpCfg.rewards.soft_dof_pos_limit) if Go1JumpCfg.rewards.soft_dof_pos_limit != 0.9 else ""),
|
||||
# ("_timeoutFinished" if getattr(Go1JumpCfg.termination, "timeout_at_finished", False) else ""),
|
||||
# ("_noPitchTerm" if "pitch" not in Go1JumpCfg.termination.termination_terms else ""),
|
||||
("_noPropNoise" if not Go1JumpCfg.noise.add_noise else "_withPropNoise"),
|
||||
("_noPush" if not Go1JumpCfg.domain_rand.push_robots else "_pushRobot"),
|
||||
("_minStd0.21"),
|
||||
("_entropy0.01"),
|
||||
# ("_gamma0.999"),
|
||||
("_noTanh"),
|
||||
# ("_zeroResetAction" if Go1JumpCfg.init_state.zero_actions else ""),
|
||||
# ("_actionClip" + Go1JumpCfg.normalization.clip_actions_method if getattr(Go1JumpCfg.normalization, "clip_actions_method", "") != "" else ""),
|
||||
# ("_noDelayActObs" if not Go1JumpCfg.sensor.proprioception.delay_action_obs else ""),
|
||||
("_noComputerClip" if not Go1JumpCfg.control.computer_clip_torque else ""),
|
||||
("_jumpRange{:.1f}-{:.1f}".format(*Go1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["height"]) if Go1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["height"] != (0.1, 0.6) else ""),
|
||||
# ("_noCurriculum" if not Go1JumpCfg.terrain.curriculum else ""),
|
||||
# ("_allowNegativeReward" if not Go1JumpCfg.rewards.only_positive_rewards else ""),
|
||||
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"),
|
||||
])
|
||||
max_iterations = 20000
|
||||
save_interval = 200
|
||||
|
|
@ -0,0 +1,184 @@
|
|||
from os import path as osp
|
||||
import numpy as np
|
||||
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
|
||||
class Go1LeapCfg( Go1FieldCfg ):
|
||||
|
||||
class init_state( Go1FieldCfg.init_state ):
|
||||
pos = [0., 0., 0.4]
|
||||
|
||||
#### uncomment this to train non-virtual terrain
|
||||
class sensor( Go1FieldCfg.sensor ):
|
||||
class proprioception( Go1FieldCfg.sensor.proprioception ):
|
||||
latency_range = [0.04-0.0025, 0.04+0.0075]
|
||||
# latency_range = [0.06-0.005, 0.06+0.005]
|
||||
#### uncomment the above to train non-virtual terrain
|
||||
|
||||
class terrain( Go1FieldCfg.terrain ):
|
||||
max_init_terrain_level = 2
|
||||
border_size = 5
|
||||
slope_treshold = 20.
|
||||
curriculum = False
|
||||
|
||||
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
|
||||
options= [
|
||||
"leap",
|
||||
],
|
||||
leap= dict(
|
||||
length= (0.3, 0.8),
|
||||
depth= (0.4, 0.8),
|
||||
height= 0.15,
|
||||
),
|
||||
wall_height= -0.4,
|
||||
virtual_terrain= True, # Change this to False for real terrain
|
||||
no_perlin_threshold= 0.06,
|
||||
))
|
||||
|
||||
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
|
||||
zScale= [0.05, 0.12],
|
||||
))
|
||||
|
||||
class commands( Go1FieldCfg.commands ):
|
||||
class ranges( Go1FieldCfg.commands.ranges ):
|
||||
lin_vel_x = [1.8, 1.5]
|
||||
lin_vel_y = [0.0, 0.0]
|
||||
ang_vel_yaw = [0., 0.]
|
||||
|
||||
class control( Go1FieldCfg.control ):
|
||||
computer_clip_torque = True
|
||||
|
||||
class termination( Go1FieldCfg.termination ):
|
||||
# additional factors that determines whether to terminates the episode
|
||||
termination_terms = [
|
||||
"roll",
|
||||
"pitch",
|
||||
"z_low",
|
||||
"z_high",
|
||||
"out_of_track",
|
||||
]
|
||||
# roll_kwargs = merge_dict(Go1FieldCfg.termination.roll_kwargs, dict(
|
||||
# threshold= 0.7,
|
||||
# leap_threshold= 0.7,
|
||||
# ))
|
||||
# pitch_kwargs = merge_dict(Go1FieldCfg.termination.pitch_kwargs, dict(
|
||||
# threshold= 0.7,
|
||||
# leap_threshold= 0.7,
|
||||
# ))
|
||||
z_low_kwargs = merge_dict(Go1FieldCfg.termination.z_low_kwargs, dict(
|
||||
threshold= -0.1,
|
||||
))
|
||||
z_high_kwargs = merge_dict(Go1FieldCfg.termination.z_high_kwargs, dict(
|
||||
threshold= 2.0,
|
||||
))
|
||||
|
||||
class domain_rand( Go1FieldCfg.domain_rand ):
|
||||
class com_range( Go1FieldCfg.domain_rand.com_range ):
|
||||
z = [-0.2, 0.2]
|
||||
|
||||
init_base_pos_range = dict(
|
||||
x= [0.2, 0.8],
|
||||
y= [-0.25, 0.25],
|
||||
)
|
||||
init_base_rot_range = dict(
|
||||
roll= [-0.1, 0.1],
|
||||
pitch= [-0.1, 0.1],
|
||||
)
|
||||
|
||||
push_robots = False
|
||||
|
||||
class rewards( Go1FieldCfg.rewards ):
|
||||
class scales:
|
||||
tracking_ang_vel = 0.05
|
||||
# world_vel_l2norm = -1.
|
||||
tracking_world_vel = 5.
|
||||
# leap_bonous_cond = 6.
|
||||
legs_energy_substeps = -7e-6 # 5e5 spike (not scaled)
|
||||
# alive = 3.
|
||||
penetrate_depth = -1e-2 # 8 spike (not scaled)
|
||||
penetrate_volume = -1e-4 # 100 spike (not scaled)
|
||||
exceed_dof_pos_limits = -8e-1
|
||||
exceed_torque_limits_l1norm = -1.
|
||||
# action_rate = -0.1
|
||||
delta_torques = -1e-7
|
||||
dof_acc = -1e-7
|
||||
torques = -4e-5 # 2000 spike (not scaled)
|
||||
yaw_abs = -0.2
|
||||
collision = -1.
|
||||
lin_pos_y = -0.4
|
||||
orientation = -0.1 # 0.3 segment (not scaled)
|
||||
hip_pos = -5. # 0.5 spike (not scaled)
|
||||
dof_error = -0.15 # 1.2 spike (not scaled)
|
||||
tracking_sigma = 0.35
|
||||
only_positive_rewards = False
|
||||
soft_dof_pos_limit = 0.7
|
||||
|
||||
class curriculum( Go1FieldCfg.curriculum ):
|
||||
# penetrate_volume_threshold_harder = 9000
|
||||
# penetrate_volume_threshold_easier = 10000
|
||||
# penetrate_depth_threshold_harder = 300
|
||||
# penetrate_depth_threshold_easier = 5000
|
||||
####### use extreme large value to disable penetrate curriculum
|
||||
penetrate_volume_threshold_harder = 900000
|
||||
penetrate_volume_threshold_easier = 1000000
|
||||
penetrate_depth_threshold_harder = 30000
|
||||
penetrate_depth_threshold_easier = 500000
|
||||
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class Go1LeapCfgPPO( Go1FieldCfgPPO ):
|
||||
|
||||
class runner( Go1FieldCfgPPO.runner ):
|
||||
resume = True
|
||||
load_run = "{Your traind walking model directory}"
|
||||
# load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
|
||||
# load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep27_14-56-25_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pDofLimit8e-01_pCollision0.1_kp40_kd0.5fromSep27_02-44-48")
|
||||
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct09_09-51-58_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pPenD1.e-2_pDofLimit4e-01_pCollision0.5_kp40_kd0.5fromOct05_02-16-22")
|
||||
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-39-12_Skills_leap_pEnertySubsteps4.e-5_pActRate2.e-1_pDofLimit8.e-1_pCollision5.e-1_noPropNoise_noTanh_zeroResetAction_actionCliphard_fromOct09_09-51-58")
|
||||
load_run = "Oct28_03-32-35_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pActRate2.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct25_11-39-12"
|
||||
load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-23-39_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pSyncAllLegs6.e-1_pOrient6.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct28_03-32-35")
|
||||
load_run = "Oct29_04-23-33_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorques2.e-5_pHipPos5.e+0_pDorErrCond8.e-2_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct28_13-23-39"
|
||||
# load_run = "Oct29_08-30-23_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed2.e-1_pPosY2.e-1_pTorques2.e-5_pHipPos5.e+0_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_04-23-33"
|
||||
# load_run = "Oct29_15-46-30_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed2.e-1_pOrient3.e-1_pTorques2.e-5_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_08-30-23"
|
||||
load_run = "Oct30_02-07-26_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed1.e-1_pPosY4.e-1_pTorques2.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_04-23-33"
|
||||
load_run = "Oct30_04-10-12_Skills_leap_pEnertySubsteps4.e-6_rTrackVel5._pTorqueExceed3.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_02-07-26"
|
||||
load_run = "Oct30_05-07-24_Skills_leap_pEnertySubsteps5.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"
|
||||
|
||||
load_run = "Oct30_05-08-03_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"
|
||||
load_run = "Oct30_05-08-20_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed5.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"
|
||||
load_run = "Oct30_07-32-10_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed8.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_pDorErr1.5e-1_noPenCurriculum_noCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_05-07-24"
|
||||
|
||||
run_name = "".join(["Skills_",
|
||||
("Multi" if len(Go1LeapCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1LeapCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1LeapCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
|
||||
("_pEnertySubsteps" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "legs_energy_substeps", 0.0) != 0.0 else ""),
|
||||
("_rTrackVel" + np.format_float_positional(Go1LeapCfg.rewards.scales.tracking_world_vel) if getattr(Go1LeapCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""),
|
||||
("_rAlive{:.1f}".format(Go1LeapCfg.rewards.scales.alive) if getattr(Go1LeapCfg.rewards.scales, "alive", 0.) != 0. else ""),
|
||||
# ("_pActRate" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.action_rate, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "action_rate", 0.) != 0.0 else ""),
|
||||
# ("_pDofAcc" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_acc, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_acc", 0.) != 0.0 else ""),
|
||||
# ("_pDofLimit" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.exceed_dof_pos_limits, precision=1, exp_digits=1) if Go1LeapCfg.rewards.scales.exceed_dof_pos_limits != 0.0 else ""),
|
||||
# ("_pCollision" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.collision, precision=1, exp_digits=1) if Go1LeapCfg.rewards.scales.collision != 0.0 else ""),
|
||||
# ("_leapBonousCond" if getattr(Go1LeapCfg.rewards.scales, "leap_bonous_cond", 0.) != 0.0 else ""),
|
||||
("_pTorqueExceed" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0.0 else ""),
|
||||
# ("_pSyncAllLegs" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.sync_all_legs, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "sync_all_legs", 0.) != 0.0 else ""),
|
||||
# ("_pOrient" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.orientation, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "orientation", 0.) != 0.0 else ""),
|
||||
# ("_pPenD" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.penetrate_depth, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "penetrate_depth", 0.) != 0.0 else ""),
|
||||
("_pPosY" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.lin_pos_y, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "lin_pos_y", 0.) != 0.0 else ""),
|
||||
# ("_pPenV" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.penetrate_volume, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "penetrate_volume", 0.) != 0.0 else ""),
|
||||
("_pTorques" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.torques, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "torques", 0.) != 0.0 else ""),
|
||||
("_pHipPos" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.hip_pos, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "hip_pos", 0.) != 0.0 else ""),
|
||||
# ("_pDorErrCond" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_error_cond, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_error_cond", 0.) != 0.0 else ""),
|
||||
("_pDorErr" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_error, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_error", 0.) != 0.0 else ""),
|
||||
# ("_noPropNoise" if not Go1LeapCfg.noise.add_noise else ""),
|
||||
("_noPenCurriculum" if Go1LeapCfg.curriculum.penetrate_volume_threshold_harder > 2e5 else ""),
|
||||
("_noCurriculum" if not Go1LeapCfg.terrain.curriculum else ""),
|
||||
("_noComputerClip" if not Go1LeapCfg.control.computer_clip_torque else ""),
|
||||
("_noTanh"),
|
||||
("_noPush" if not Go1LeapCfg.domain_rand.push_robots else "_pushRobot"),
|
||||
# ("_zeroResetAction" if Go1LeapCfg.init_state.zero_actions else ""),
|
||||
("_allowNegativeReward" if not Go1LeapCfg.rewards.only_positive_rewards else ""),
|
||||
("_actionClip" + Go1LeapCfg.normalization.clip_actions_method if getattr(Go1LeapCfg.normalization, "clip_actions_method", "") != "" else ""),
|
||||
("_virtual" if Go1LeapCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""),
|
||||
("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else ""),
|
||||
])
|
||||
max_iterations = 20000
|
||||
save_interval = 200
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
import numpy as np
|
||||
import os.path as osp
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
|
||||
|
||||
class Go1RemoteCfg( Go1FieldCfg ):
|
||||
class env( Go1FieldCfg.env ):
|
||||
num_envs = 4096
|
||||
obs_components = [
|
||||
"proprioception", # 48
|
||||
]
|
||||
privileged_obs_components = [
|
||||
"proprioception",
|
||||
"robot_config",
|
||||
]
|
||||
use_lin_vel = False
|
||||
privileged_use_lin_vel = True
|
||||
|
||||
class init_state( Go1FieldCfg.init_state ):
|
||||
pos = [0., 0., 0.42]
|
||||
|
||||
class terrain( Go1FieldCfg.terrain ):
|
||||
num_rows = 6
|
||||
num_cols = 6
|
||||
selected = "TerrainPerlin"
|
||||
TerrainPerlin_kwargs = dict(
|
||||
zScale= 0.15,
|
||||
frequency= 10,
|
||||
)
|
||||
|
||||
class commands( Go1FieldCfg.commands ):
|
||||
class ranges( Go1FieldCfg.commands.ranges ):
|
||||
lin_vel_x = [-1.0, 1.0]
|
||||
lin_vel_y = [-1., 1.]
|
||||
ang_vel_yaw = [-1., 1.]
|
||||
|
||||
class domain_rand( Go1FieldCfg.domain_rand ):
|
||||
init_base_pos_range = dict(
|
||||
x= [-0.1, 0.1],
|
||||
y= [-0.1, 0.1],
|
||||
)
|
||||
init_base_rot_range = dict(
|
||||
roll= [-0.4, 0.4],
|
||||
pitch= [-0.4, 0.4],
|
||||
)
|
||||
init_base_vel_range = merge_dict(Go1FieldCfg.domain_rand.init_base_vel_range, dict(
|
||||
x= [-0.8, 1.5],
|
||||
y= [-0.8, 0.8],
|
||||
))
|
||||
|
||||
class rewards( Go1FieldCfg.rewards ):
|
||||
class scales:
|
||||
###### hacker from Field
|
||||
tracking_ang_vel = 1.
|
||||
tracking_lin_vel = 3.
|
||||
# lin_vel_l2norm = -1.
|
||||
alive = 1.
|
||||
legs_energy_substeps = -6e-7
|
||||
# penalty for hardware safety
|
||||
# exceed_dof_pos_limits = -4e-2
|
||||
exceed_torque_limits_l1norm = -1e-2
|
||||
# penalty for walking gait, probably no need
|
||||
lin_vel_z = -0.5
|
||||
ang_vel_xy = -0.05
|
||||
orientation = -4.
|
||||
dof_acc = -1.e-7
|
||||
collision = -10.
|
||||
action_rate = -0.01
|
||||
delta_torques = -1e-7
|
||||
torques = -1.e-5
|
||||
hip_pos = -0.4
|
||||
dof_error = -0.04
|
||||
stand_still = -0.6
|
||||
only_positive_rewards = False
|
||||
soft_dof_pos_limit = 0.6
|
||||
|
||||
class normalization( Go1FieldCfg.normalization ):
|
||||
clip_actions_method = "hard"
|
||||
|
||||
logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs")
|
||||
class Go1RemoteCfgPPO( Go1FieldCfgPPO ):
|
||||
class algorithm( Go1FieldCfgPPO.algorithm ):
|
||||
entropy_coef = 0.01
|
||||
clip_min_std = 0.02
|
||||
|
||||
class runner( Go1FieldCfgPPO.runner ):
|
||||
resume = True
|
||||
# load_run = osp.join(logs_root, "field_a1/Sep26_15-33-45_WalkByRemote_pEnergySubsteps2e-5_pTorqueL18e-01_pCollision0.2_propDelay0.04-0.05_aScale0.5_kp40_kd0.5_maxPushAng0.5_noTanh_hardActClip_noResume")
|
||||
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep28_11-53-30_WalkByRemote_pEnergySubsteps-4e-05_kp40_kd0.5_noTanh_fromSep26_15-33-45")
|
||||
load_run = "Oct04_16-15-17_WalkByRemote_pEnergySubsteps-1e-04_softDof0.5_pStand0.0_pDofAcc5e-7_kp40_kd0.5_noTanh_fromSep28_11-53-30"
|
||||
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct12_16-01-06_WalkByRemote_pLin1.0_pFootForce1.e-02_xComMin-0.2_maxForce200_propDelay0.04-0.05_entropyCoef0.01_noTanh_fromOct11_04-34-39")
|
||||
load_run = "Oct23_03-33-29_WalkByRemote_pEnergySubsteps-1e-05_rLinTrack3.0_softDof0.6_pStand1.0_kp40_kd0.5_noTanh_fromOct12_16-01-06"
|
||||
load_run = "Oct23_11-01-47_WalkByRemote_pEnergySubsteps-1e-05_rLinTrack5.0_pCollide1e-1_pOrient1e-1_pStand2e-1_softDof0.6_kp40_kd0.5_noTanh_fromOct23_03-33-29"
|
||||
load_run = "Oct23_13-14-42_WalkByRemote_pEnergySubsteps1e-05_rLinTrack5.0_pDofAcc2.5e-7_pCollide1e-1_pOrient1e-1_pExceedDof8e-1_pStand4e-1_softDof0.6_noTanh_EntropyCoef0.01_fromOct23_11-01-47"
|
||||
load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct12_16-00-55_WalkByRemote_pLin1.0_pFootForce1.e-02_xComMin-0.2_maxForce200_propDelay0.05-0.07_entropyCoef0.01_noTanh_fromOct11_04-34-39")
|
||||
load_run = "Oct24_15-05-23_WalkByRemote_rLinTrack3.0_pActRate1e-1_softDof0.6_noTanh_zeroResetAction_hackReset_EntropyCoef0.01_actionCliphard_fromOct12_16-00-55"
|
||||
|
||||
run_name = "".join(["WalkByRemote",
|
||||
("_pEnergySubsteps{:.0e}".format(-Go1RemoteCfg.rewards.scales.legs_energy_substeps) if getattr(Go1RemoteCfg.rewards.scales, "legs_energy_substeps", 0.) != 0. else ""),
|
||||
("_rLinTrack{:.1f}".format(Go1RemoteCfg.rewards.scales.tracking_lin_vel) if getattr(Go1RemoteCfg.rewards.scales, "tracking_lin_vel", 0.) != 0. else ""),
|
||||
("_pLinVelL2{:.1f}".format(-Go1RemoteCfg.rewards.scales.lin_vel_l2norm) if getattr(Go1RemoteCfg.rewards.scales, "lin_vel_l2norm", 0.) != 0. else ""),
|
||||
("_rAlive{:.1f}".format(Go1RemoteCfg.rewards.scales.alive) if getattr(Go1RemoteCfg.rewards.scales, "alive", 0.) != 0. else ""),
|
||||
# ("_pDofAcc" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.dof_acc, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "dof_acc", 0.) != 0. else ""),
|
||||
# ("_pCollide" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.collision, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "collision", 0.) != 0. else ""),
|
||||
# ("_pOrient" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.orientation, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "orientation", 0.) != 0. else ""),
|
||||
# ("_pExceedDof" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.exceed_dof_pos_limits, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "exceed_dof_pos_limits", 0.) != 0. else ""),
|
||||
("_pExceedTorqueL1" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.exceed_torque_limits_l1norm, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0. else ""),
|
||||
# ("_pStand" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.stand_still, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "stand_still", 0.) != 0. else ""),
|
||||
("_pActRate" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.action_rate, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "action_rate", 0.) != 0. else ""),
|
||||
("_softDof{:.1f}".format(Go1RemoteCfg.rewards.soft_dof_pos_limit) if Go1RemoteCfg.rewards.soft_dof_pos_limit != 0.9 else ""),
|
||||
# ("_kp{:d}".format(int(Go1RemoteCfg.control.stiffness["joint"])) if Go1RemoteCfg.control.stiffness["joint"] != 50 else ""),
|
||||
# ("_kd{:.1f}".format(Go1RemoteCfg.control.damping["joint"]) if Go1RemoteCfg.control.damping["joint"] != 1. else ""),
|
||||
("_noTanh"),
|
||||
# ("_zeroResetAction" if Go1RemoteCfg.init_state.zero_actions else ""),
|
||||
("_EntropyCoef0.01"),
|
||||
("_actionClip" + Go1RemoteCfg.normalization.clip_actions_method if getattr(Go1RemoteCfg.normalization, "clip_actions_method", None) is not None else ""),
|
||||
("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])),
|
||||
])
|
||||
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
from os import path as osp
|
||||
import numpy as np
|
||||
from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO
|
||||
from legged_gym.utils.helpers import merge_dict
|
||||
|
||||
class Go1TiltCfg( Go1FieldCfg ):
|
||||
|
||||
#### uncomment this to train non-virtual terrain
|
||||
# class sensor( Go1FieldCfg.sensor ):
|
||||
# class proprioception( Go1FieldCfg.sensor.proprioception ):
|
||||
# delay_action_obs = True
|
||||
# latency_range = [0.04-0.0025, 0.04+0.0075]
|
||||
#### uncomment the above to train non-virtual terrain
|
||||
|
||||
class terrain( Go1FieldCfg.terrain ):
|
||||
max_init_terrain_level = 2
|
||||
border_size = 5
|
||||
slope_treshold = 20.
|
||||
curriculum = True
|
||||
|
||||
BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict(
|
||||
options= [
|
||||
"tilt",
|
||||
],
|
||||
tilt= dict(
|
||||
width= (0.24, 0.8),
|
||||
depth= (0.4, 1.), # size along the forward axis
|
||||
opening_angle= 0.0, # [rad] an opening that make the robot easier to get into the obstacle
|
||||
wall_height= 0.5,
|
||||
),
|
||||
virtual_terrain= True, # Change this to False for real terrain
|
||||
no_perlin_threshold= 0.06,
|
||||
))
|
||||
|
||||
TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
|
||||
zScale= [0.05, 0.1],
|
||||
))
|
||||
|
||||
class commands( Go1FieldCfg.commands ):
|
||||
class ranges( Go1FieldCfg.commands.ranges ):
|
||||
lin_vel_x = [0.3, 0.6]
|
||||
lin_vel_y = [0.0, 0.0]
|
||||
ang_vel_yaw = [0., 0.]
|
||||
|
||||
class termination( Go1FieldCfg.termination ):
|
||||
# additional factors that determines whether to terminates the episode
|
||||
termination_terms = [
|
||||
"roll",
|
||||
"pitch",
|
||||
"z_low",
|
||||
"z_high",
|
||||
"out_of_track",
|
||||
]
|
||||
roll_kwargs = merge_dict(Go1FieldCfg.termination.roll_kwargs, dict(
|
||||
threshold= 0.4,
|
||||
leap_threshold= 0.4,
|
||||
))
|
||||
z_high_kwargs = merge_dict(Go1FieldCfg.termination.z_high_kwargs, dict(
|
||||
threshold= 2.0,
|
||||
))
|
||||
|
||||
class rewards( Go1FieldCfg.rewards ):
|
||||
class scales:
|
||||
tracking_ang_vel = 0.05
|
||||
world_vel_l2norm = -1.
|
||||
legs_energy_substeps = -1e-6
|
||||
alive = 2.
|
||||
penetrate_depth = -4e-3
|
||||
penetrate_volume = -4e-3
|
||||
exceed_dof_pos_limits = -1e-1
|
||||
exceed_torque_limits_i = -1e-1
|
||||
|
||||
class curriculum( Go1FieldCfg.curriculum ):
|
||||
penetrate_volume_threshold_harder = 9000
|
||||
penetrate_volume_threshold_easier = 10000
|
||||
penetrate_depth_threshold_harder = 300
|
||||
penetrate_depth_threshold_easier = 5000
|
||||
|
||||
class Go1TiltCfgPPO( Go1FieldCfgPPO ):
|
||||
class runner( Go1FieldCfgPPO.runner ):
|
||||
run_name = "".join(["Skill",
|
||||
("Multi" if len(Go1TiltCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1TiltCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1TiltCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")),
|
||||
("_tiltMax{:.1f}".format(Go1TiltCfg.terrain.BarrierTrack_kwargs["tilt"]["width"][1]) if Go1TiltCfg.terrain.BarrierTrack_kwargs["tilt"]["width"][1] > 0.5 else ""),
|
||||
])
|
||||
resume = True
|
||||
load_run = "{Your traind walking model directory}"
|
||||
load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025"
|
||||
max_iterations = 20000
|
||||
save_interval = 500
|
||||
|
|
@ -7,13 +7,13 @@ import argparse
|
|||
def main(args):
|
||||
for data_dir in args.data_dirs:
|
||||
if osp.isfile(osp.join(data_dir, "metadata.json")):
|
||||
shutil.move(
|
||||
shutil.copy2(
|
||||
osp.join(data_dir, "metadata.json"),
|
||||
osp.join(osp.dirname(data_dir), data_dir.split("/")[-1] + ".json")
|
||||
)
|
||||
print(f"Moved metadata.json to {osp.join(osp.dirname(data_dir), data_dir.split('/')[-1] + '.json')}")
|
||||
# removing the directory
|
||||
if osp.isdir(data_dir):
|
||||
if osp.isdir(data_dir) and (not osp.islink(data_dir)):
|
||||
print("Removing directory: ", data_dir)
|
||||
if not args.only_files:
|
||||
shutil.rmtree(data_dir)
|
||||
|
|
|
@ -29,36 +29,34 @@ def main(args):
|
|||
update_class_from_dict(train_cfg, d, strict= True)
|
||||
|
||||
# Some custom settings
|
||||
env_cfg.terrain.BarrierTrack_kwargs["options"] = [
|
||||
"tilt", # "tilt",
|
||||
"crawl", # "crawl",
|
||||
"climb", # "climb",
|
||||
"climb", # "climb",
|
||||
"leap", # "leap",
|
||||
]
|
||||
####### customized option to increase data distribution #######
|
||||
# ####### customized option to increase data distribution #######
|
||||
action_sample_std = 0.0
|
||||
############# some predefined options #############
|
||||
if len(env_cfg.terrain.BarrierTrack_kwargs["options"]) == 1:
|
||||
env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1
|
||||
env_cfg.terrain.num_rows = 20; env_cfg.terrain.num_cols = 30
|
||||
else: # for parkour env
|
||||
# >>> option 1
|
||||
env_cfg.domain_rand.added_mass_range = [1.5, 3.0]
|
||||
env_cfg.terrain.BarrierTrack_kwargs["climb_down_prob"] = 0.45
|
||||
env_cfg.terrain.num_rows = 60; env_cfg.terrain.num_cols = 1
|
||||
env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 2.8
|
||||
env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 2.4
|
||||
env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.6)
|
||||
env_cfg.domain_rand.init_base_pos_range["x"] = (0.4, 1.8)
|
||||
env_cfg.terrain.num_rows = 12; env_cfg.terrain.num_cols = 10
|
||||
# >>> option 2
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["climb"]["depth"] = (0.1, 0.3)
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["climb"]["climb_down_prob"] = 0.55
|
||||
# env_cfg.terrain.num_rows = 60; env_cfg.terrain.num_cols = 1
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 3.
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 4.0
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["wall_height"] = (-0.5, -0.2)
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 1.4)
|
||||
# env_cfg.domain_rand.init_base_pos_range["x"] = (1.6, 2.0)
|
||||
# env_cfg.terrain.num_rows = 16; env_cfg.terrain.num_cols = 5
|
||||
# >>> option 3
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 1.6
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 2.2
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["wall_height"] = (-0.5, 0.1)
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.5)
|
||||
# env_cfg.domain_rand.init_base_pos_range["x"] = (0.2, 0.9)
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 1.6
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.3)
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 3.0
|
||||
# env_cfg.terrain.BarrierTrack_kwargs["climb"]["depth"] = (0.5, 1.8)
|
||||
# env_cfg.terrain.num_rows = 20; env_cfg.terrain.num_cols = 24
|
||||
# action_sample_std = 0.1
|
||||
# env_cfg.terrain.num_rows = 22; env_cfg.terrain.num_cols = 16
|
||||
pass
|
||||
if (env_cfg.terrain.BarrierTrack_kwargs["options"][0] == "leap") and all(i == env_cfg.terrain.BarrierTrack_kwargs["options"][0] for i in env_cfg.terrain.BarrierTrack_kwargs["options"]):
|
||||
######### For leap, because the platform is usually higher than the ground.
|
||||
|
|
|
@ -101,7 +101,7 @@ def play(args):
|
|||
env_cfg.terrain.curriculum = False
|
||||
env_cfg.terrain.BarrierTrack_kwargs["options"] = [
|
||||
# "crawl",
|
||||
"climb",
|
||||
"jump",
|
||||
# "leap",
|
||||
# "tilt",
|
||||
]
|
||||
|
@ -146,7 +146,7 @@ def play(args):
|
|||
logs_root + "/field_a1_oracle/Jun03_00-33-08_Skills_climb_pEnergySubsteps2e-6_rAlive2_pTorqueExceedIndicate2e-1_propDelay0.04-0.05_noPerlinRate0.2_nSubsteps4_envFreq50.0_aScale0.5",
|
||||
logs_root + "/field_a1_oracle/Jun04_01-03-59_Skills_leap_pEnergySubsteps2e-6_rAlive2_pPenV4e-3_pPenD4e-3_pPosY0.20_pYaw0.20_pTorqueExceedSquare1e-3_leapH0.2_propDelay0.04-0.05_noPerlinRate0.2_aScale0.5",
|
||||
]
|
||||
train_cfg.policy.climb_down_policy_path = logs_root + "/field_a1_oracle/Aug30_16-12-14_Skills_climb_climbDownProb0.5_propDelay0.04-0.05"
|
||||
train_cfg.policy.jump_down_policy_path = logs_root + "/field_a1_oracle/Aug30_16-12-14_Skills_climb_climbDownProb0.5_propDelay0.04-0.05"
|
||||
train_cfg.runner.resume = False
|
||||
env_cfg.env.use_lin_vel = True
|
||||
train_cfg.policy.cmd_vel_mapping = {
|
||||
|
|
|
@ -163,7 +163,7 @@ def update_cfg_from_args(env_cfg, cfg_train, args):
|
|||
|
||||
return env_cfg, cfg_train
|
||||
|
||||
def get_args():
|
||||
def get_args(custom_args=[]):
|
||||
custom_parameters = [
|
||||
{"name": "--task", "type": str, "default": "anymal_c_flat", "help": "Resume training or start testing from a checkpoint. Overrides config file if provided."},
|
||||
{"name": "--resume", "action": "store_true", "default": False, "help": "Resume training from a checkpoint"},
|
||||
|
@ -178,7 +178,7 @@ def get_args():
|
|||
{"name": "--num_envs", "type": int, "help": "Number of environments to create. Overrides config file if provided."},
|
||||
{"name": "--seed", "type": int, "help": "Random seed. Overrides config file if provided."},
|
||||
{"name": "--max_iterations", "type": int, "help": "Maximum number of training iterations. Overrides config file if provided."},
|
||||
]
|
||||
] + custom_args
|
||||
# parse arguments
|
||||
args = gymutil.parse_arguments(
|
||||
description="RL Policy",
|
||||
|
|
|
@ -96,7 +96,9 @@ class Logger:
|
|||
a.legend()
|
||||
# plot base pitch
|
||||
a = axs[0, 2]
|
||||
if log["base_pitch"]: a.plot(time, log["base_pitch"], label='measured')
|
||||
if log["base_pitch"]:
|
||||
a.plot(time, log["base_pitch"], label='measured')
|
||||
a.plot(time, [-0.75] * len(time), label= 'thresh')
|
||||
# if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded')
|
||||
a.set(xlabel='time [s]', ylabel='base ang [rad]', title='Base pitch')
|
||||
a.legend()
|
||||
|
@ -113,11 +115,15 @@ class Logger:
|
|||
a.plot(time, forces[:, i], label=f'force {i}')
|
||||
a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces')
|
||||
a.legend()
|
||||
# plot torque/vel curves
|
||||
# # plot torque/vel curves
|
||||
# a = axs[2, 1]
|
||||
# if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured')
|
||||
# a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves')
|
||||
# a.legend()
|
||||
# plot power curves
|
||||
a = axs[2, 1]
|
||||
if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured')
|
||||
a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves')
|
||||
a.legend()
|
||||
if log["power"]!=[]: a.plot(time, log["power"], label='power [W]')
|
||||
a.set(xlabel='time [s]', ylabel='Power [W]', title='Power')
|
||||
# plot torques
|
||||
a = axs[2, 2]
|
||||
if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured')
|
||||
|
@ -126,20 +132,29 @@ class Logger:
|
|||
# plot rewards
|
||||
a = axs[3, 0]
|
||||
if log["max_torques"]: a.plot(time, log["max_torques"], label='max_torques')
|
||||
if log["max_torque_motor"]: a.plot(time, log["max_torque_motor"], label='max_torque_motor')
|
||||
if log["max_torque_leg"]: a.plot(time, log["max_torque_leg"], label='max_torque_leg')
|
||||
a.set(xlabel='time [s]', ylabel='max_torques [Nm]', title='max_torques')
|
||||
a.legend()
|
||||
a.legend(fontsize= 5)
|
||||
# plot customed data
|
||||
a = axs[3, 1]
|
||||
if log["student_action"]:
|
||||
a.plot(time, log["student_action"], label='s')
|
||||
a.plot(time, log["teacher_action"], label='t')
|
||||
a.legend()
|
||||
a.set(xlabel='time [s]', ylabel='tanh', title='student/teacher action')
|
||||
a.set(xlabel='time [s]', ylabel='value before step()', title='student/teacher action')
|
||||
a = axs[3, 2]
|
||||
if log["teacher_action"]:
|
||||
a.plot(time, [i-j for i, j in zip(log["student_action"], log["teacher_action"])], label='action difference')
|
||||
a.set(xlabel='time [s]', ylabel='tanh', title='action difference')
|
||||
a.legend()
|
||||
a.plot(time, log["reward"], label='rewards')
|
||||
for i in log["mark"]:
|
||||
if i > 0:
|
||||
a.plot(time, log["mark"], label='user mark')
|
||||
break
|
||||
for key in log.keys():
|
||||
if "reward_removed_" in key:
|
||||
a.plot(time, log[key], label= key)
|
||||
a.set(xlabel='time [s]', ylabel='', title='rewards')
|
||||
# a.set_ylim([-0.12, 0.1])
|
||||
a.legend(fontsize = 5)
|
||||
plt.show()
|
||||
|
||||
def print_rewards(self):
|
||||
|
|
|
@ -12,7 +12,7 @@ class BarrierTrack:
|
|||
# default kwargs
|
||||
track_kwargs = dict(
|
||||
options= [
|
||||
"climb",
|
||||
"jump",
|
||||
"crawl",
|
||||
"tilt",
|
||||
], # each race track will permute all the options
|
||||
|
@ -22,12 +22,11 @@ class BarrierTrack:
|
|||
track_block_length= 1.2, # the x-axis distance from the env origin point
|
||||
wall_thickness= 0.04, # [m]
|
||||
wall_height= 0.5, # [m]
|
||||
climb= dict(
|
||||
jump= dict(
|
||||
height= 0.3,
|
||||
depth= 0.04, # size along the forward axis
|
||||
fake_offset= 0.0, # [m] fake offset will make climb's height info greater than its physical height.
|
||||
fake_height= 0.0, # [m] offset/height only one of them can be non-zero
|
||||
climb_down_prob= 0.0, # if > 0, will have a chance to climb down from the obstacle
|
||||
fake_offset= 0.0, # [m] fake offset will make jump's height info greater than its physical height.
|
||||
jump_down_prob= 0.0, # if > 0, will have a chance to jump down from the obstacle
|
||||
),
|
||||
crawl= dict(
|
||||
height= 0.32,
|
||||
|
@ -53,16 +52,18 @@ class BarrierTrack:
|
|||
border_height= 0., # Incase we want the surrounding plane to be lower than the track
|
||||
virtual_terrain= False,
|
||||
draw_virtual_terrain= False, # set True for visualization
|
||||
check_skill_combinations= False, # check if some specific skills are connected, if set. e.g. climb -> leap
|
||||
check_skill_combinations= False, # check if some specific skills are connected, if set. e.g. jump -> leap
|
||||
engaging_next_threshold= 0., # if > 0, engaging_next is based on this threshold instead of track_block_length/2. Make sure the obstacle is not too long.
|
||||
engaging_finish_threshold= 0., # an obstacle is considered finished only if the last volume point is this amount away from the block origin.
|
||||
curriculum_perlin= True, # If True, perlin noise scale will be depends on the difficulty if possible.
|
||||
no_perlin_threshold= 0.02, # If the perlin noise is too small, clip it to zero.
|
||||
walk_in_skill_gap= False, # If True, obstacle ID will be walk when the distance to the obstacle does not reach engaging_next_threshold
|
||||
)
|
||||
max_track_options = 4 # ("tilt", "crawl", "climb", "dynamic") at most
|
||||
max_track_options = 4 # ("tilt", "crawl", "jump", "dynamic") at most
|
||||
track_options_id_dict = {
|
||||
"tilt": 1,
|
||||
"crawl": 2,
|
||||
"climb": 3,
|
||||
"jump": 3,
|
||||
"leap": 4,
|
||||
} # track_id are aranged in this order
|
||||
def __init__(self, cfg, num_robots: int) -> None:
|
||||
|
@ -88,7 +89,7 @@ class BarrierTrack:
|
|||
# For each track block (n_options + 1 in total), 3 parameters are enabled:
|
||||
# - track_id: int, starting track is 0, other numbers depends on the options order.
|
||||
# - obstacle_depth: float,
|
||||
# - obstacle_critical_params: e.g. tilt width, crawl height, climb height
|
||||
# - obstacle_critical_params: e.g. tilt width, crawl height, jump height
|
||||
|
||||
# num_rows + 1 incase the robot finish the entire row of tracks
|
||||
self.track_info_map = torch.zeros(
|
||||
|
@ -101,6 +102,11 @@ class BarrierTrack:
|
|||
dtype= torch.float32,
|
||||
device= self.device,
|
||||
)
|
||||
self.block_starting_height_map = torch.zeros(
|
||||
(self.cfg.num_rows, self.cfg.num_cols, self.n_blocks_per_track),
|
||||
dtype= torch.float32,
|
||||
device= self.device,
|
||||
) # height [m] related to the world coordinate system
|
||||
|
||||
def initialize_track(self):
|
||||
""" All track blocks are defined as follows
|
||||
|
@ -126,7 +132,7 @@ class BarrierTrack:
|
|||
np.ceil(self.track_kwargs["track_block_length"] / self.cfg.horizontal_scale).astype(int),
|
||||
np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int),
|
||||
)
|
||||
self.n_blocks_per_track = (self.track_kwargs["n_obstacles_per_track"] + 1) if self.track_kwargs["randomize_obstacle_order"] else (len(self.track_kwargs["options"]) + 1)
|
||||
self.n_blocks_per_track = (self.track_kwargs["n_obstacles_per_track"] + 1) if (self.track_kwargs["randomize_obstacle_order"] and len(self.track_kwargs["options"]) > 0) else (len(self.track_kwargs["options"]) + 1)
|
||||
self.track_resolution = (
|
||||
np.ceil(self.track_kwargs["track_block_length"] * self.n_blocks_per_track / self.cfg.horizontal_scale).astype(int),
|
||||
np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int),
|
||||
|
@ -183,7 +189,7 @@ class BarrierTrack:
|
|||
height_offset_px = 0
|
||||
return track_trimesh, track_heightfield, block_info, height_offset_px
|
||||
|
||||
def get_climb_track(self,
|
||||
def get_jump_track(self,
|
||||
wall_thickness,
|
||||
trimesh_template,
|
||||
heightfield_template,
|
||||
|
@ -191,25 +197,25 @@ class BarrierTrack:
|
|||
heightfield_noise= None,
|
||||
virtual= False,
|
||||
):
|
||||
if isinstance(self.track_kwargs["climb"]["depth"], (tuple, list)):
|
||||
if isinstance(self.track_kwargs["jump"]["depth"], (tuple, list)):
|
||||
if not virtual:
|
||||
climb_depth = min(*self.track_kwargs["climb"]["depth"])
|
||||
jump_depth = min(*self.track_kwargs["jump"]["depth"])
|
||||
else:
|
||||
climb_depth = np.random.uniform(*self.track_kwargs["climb"]["depth"])
|
||||
jump_depth = np.random.uniform(*self.track_kwargs["jump"]["depth"])
|
||||
else:
|
||||
climb_depth = self.track_kwargs["climb"]["depth"]
|
||||
if isinstance(self.track_kwargs["climb"]["height"], (tuple, list)):
|
||||
jump_depth = self.track_kwargs["jump"]["depth"]
|
||||
if isinstance(self.track_kwargs["jump"]["height"], (tuple, list)):
|
||||
if difficulty is None:
|
||||
climb_height = np.random.uniform(*self.track_kwargs["climb"]["height"])
|
||||
jump_height = np.random.uniform(*self.track_kwargs["jump"]["height"])
|
||||
else:
|
||||
climb_height = (1-difficulty) * self.track_kwargs["climb"]["height"][0] + difficulty * self.track_kwargs["climb"]["height"][1]
|
||||
jump_height = (1-difficulty) * self.track_kwargs["jump"]["height"][0] + difficulty * self.track_kwargs["jump"]["height"][1]
|
||||
else:
|
||||
climb_height = self.track_kwargs["climb"]["height"]
|
||||
if self.track_kwargs["climb"].get("climb_down_prob", 0.) > 0.:
|
||||
if np.random.uniform() < self.track_kwargs["climb"]["climb_down_prob"]:
|
||||
climb_height = -climb_height
|
||||
depth_px = int(climb_depth / self.cfg.horizontal_scale)
|
||||
height_value = climb_height / self.cfg.vertical_scale
|
||||
jump_height = self.track_kwargs["jump"]["height"]
|
||||
if self.track_kwargs["jump"].get("jump_down_prob", 0.) > 0.:
|
||||
if np.random.uniform() < self.track_kwargs["jump"]["jump_down_prob"]:
|
||||
jump_height = -jump_height
|
||||
depth_px = int(jump_depth / self.cfg.horizontal_scale)
|
||||
height_value = jump_height / self.cfg.vertical_scale
|
||||
wall_thickness_px = int(wall_thickness / self.cfg.horizontal_scale) + 1
|
||||
|
||||
if not heightfield_noise is None:
|
||||
|
@ -223,7 +229,7 @@ class BarrierTrack:
|
|||
] += height_value
|
||||
if height_value < 0.:
|
||||
track_heightfield[
|
||||
depth_px:,
|
||||
(0 if virtual else depth_px):,
|
||||
max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1),
|
||||
] += height_value
|
||||
track_trimesh = convert_heightfield_to_trimesh(
|
||||
|
@ -233,12 +239,15 @@ class BarrierTrack:
|
|||
self.cfg.slope_treshold,
|
||||
)
|
||||
assert not (
|
||||
self.track_kwargs["climb"].get("fake_offset", 0.) != 0. and self.track_kwargs["climb"].get("fake_height", 0.) != 0.), \
|
||||
self.track_kwargs["jump"].get("fake_offset", 0.) != 0. and self.track_kwargs["jump"].get("fake_height", 0.) != 0.), \
|
||||
"fake_offset and fake_height cannot be both non-zero"
|
||||
climb_height_ = climb_height + (self.track_kwargs["climb"].get("fake_offset", 0.) if self.track_kwargs["climb"].get("fake_offset", 0.) == 0. else self.track_kwargs["climb"].get("fake_height", 0.))
|
||||
jump_height_ = jump_height + (
|
||||
self.track_kwargs["jump"].get("fake_offset", 0.) \
|
||||
if jump_height > 0. \
|
||||
else 0.)
|
||||
block_info = torch.tensor([
|
||||
climb_depth,
|
||||
climb_height_,
|
||||
jump_depth,
|
||||
jump_height_,
|
||||
], dtype= torch.float32, device= self.device)
|
||||
height_offset_px = height_value if not virtual else min(height_value, 0)
|
||||
return track_trimesh, track_heightfield, block_info, height_offset_px
|
||||
|
@ -399,9 +408,9 @@ class BarrierTrack:
|
|||
track_heightfield = heightfield_template + heightfield_noise
|
||||
else:
|
||||
track_heightfield = heightfield_template.copy()
|
||||
# There is no difference between virtual/non-virtual environment.
|
||||
start_px = int(self.track_kwargs["leap"].get("fake_offset", 0.) / self.cfg.horizontal_scale) + 1 if not virtual else 1
|
||||
track_heightfield[
|
||||
1: length_px+1,
|
||||
start_px: length_px+1,
|
||||
max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1),
|
||||
] -= depth_value
|
||||
track_trimesh = convert_heightfield_to_trimesh(
|
||||
|
@ -538,13 +547,14 @@ class BarrierTrack:
|
|||
self.track_info_map[row_idx, col_idx, 0, 0] = 0
|
||||
self.track_info_map[row_idx, col_idx, 0, 1:] = block_info
|
||||
self.track_width_map[row_idx, col_idx] = self.env_width - wall_thickness * 2
|
||||
self.block_starting_height_map[row_idx, col_idx, 0] = block_starting_height_px * self.cfg.vertical_scale
|
||||
block_starting_height_px += height_offset_px
|
||||
|
||||
for obstacle_idx, obstacle_selection in enumerate(obstacle_order):
|
||||
obstacle_name = self.track_kwargs["options"][obstacle_selection]
|
||||
obstacle_id = self.track_options_id_dict[obstacle_name]
|
||||
# call method to generate trimesh and heightfield for each track block.
|
||||
# For example get_climb_track, get_tilt_track
|
||||
# For example get_jump_track, get_tilt_track
|
||||
# using `virtual_track` to create non-collision mesh for collocation method in training.
|
||||
track_trimesh, track_heightfield, block_info, height_offset_px = getattr(self, "get_" + obstacle_name + "_track")(
|
||||
wall_thickness,
|
||||
|
@ -576,6 +586,7 @@ class BarrierTrack:
|
|||
)
|
||||
self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 0] = obstacle_id
|
||||
self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 1:] = block_info
|
||||
self.block_starting_height_map[row_idx, col_idx, obstacle_idx + 1] = block_starting_height_px * self.cfg.vertical_scale
|
||||
block_starting_height_px += height_offset_px
|
||||
|
||||
return block_starting_height_px
|
||||
|
@ -711,7 +722,7 @@ class BarrierTrack:
|
|||
block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,)
|
||||
block_idx_clipped = torch.clip(
|
||||
block_idx,
|
||||
0.,
|
||||
0,
|
||||
(self.n_blocks_per_track - 1),
|
||||
)
|
||||
in_track_mask = (track_idx == track_idx_clipped).all(dim= -1) & (block_idx == block_idx_clipped)
|
||||
|
@ -757,6 +768,7 @@ class BarrierTrack:
|
|||
else self.env_block_length / 2
|
||||
min_x_points_offset = torch.min(volume_points_offset[:, :, 0], dim= -1)[0] # (n,)
|
||||
engaging_next_block = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) \
|
||||
& (in_block_distance > self.track_kwargs.get("engaging_finish_threshold", 0.)) \
|
||||
& (in_block_distance > engaging_next_distance)
|
||||
|
||||
# update the engaging track_idx and block_idx if engaging next
|
||||
|
@ -790,7 +802,9 @@ class BarrierTrack:
|
|||
0
|
||||
].to(int)
|
||||
if self.track_kwargs.get("walk_in_skill_gap", False):
|
||||
between_skill_mask = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) & (in_block_distance < engaging_next_distance)
|
||||
between_skill_mask = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) \
|
||||
& (in_block_distance > self.track_kwargs.get("engaging_finish_threshold", 0.)) \
|
||||
& (in_block_distance < engaging_next_distance)
|
||||
obstacle_id_selection[between_skill_mask] = 0.
|
||||
engaging_block_info[between_skill_mask] = 0.
|
||||
engaging_obstacle_onehot[
|
||||
|
@ -827,18 +841,35 @@ class BarrierTrack:
|
|||
distance_neg_y,
|
||||
], dim= -1) # (n, 2)
|
||||
|
||||
def get_climb_penetration_depths(self,
|
||||
def get_jump_penetration_depths(self,
|
||||
block_infos,
|
||||
positions_in_block,
|
||||
mask_only= False,
|
||||
):
|
||||
in_obstacle_mask = positions_in_block[:, 0] <= block_infos[:, 1]
|
||||
climb_over_mask = positions_in_block[:, 2] > block_infos[:, 2]
|
||||
penetrated_mask = torch.logical_and(in_obstacle_mask, (torch.logical_not(climb_over_mask)))
|
||||
in_up_mask = torch.logical_and(
|
||||
positions_in_block[:, 0] <= block_infos[:, 1],
|
||||
block_infos[:, 2] > 0.,
|
||||
)
|
||||
in_down_mask = torch.logical_and(
|
||||
positions_in_block[:, 0] <= block_infos[:, 1],
|
||||
block_infos[:, 2] < 0.,
|
||||
)
|
||||
jump_over_mask = torch.logical_and(
|
||||
positions_in_block[:, 2] > block_infos[:, 2],
|
||||
positions_in_block[:, 2] > 0, # to avoid the penetration of virtual obstacle in jump down.
|
||||
) # (n_points,)
|
||||
|
||||
penetrated_mask = torch.logical_and(
|
||||
torch.logical_or(in_up_mask, in_down_mask),
|
||||
(torch.logical_not(jump_over_mask)),
|
||||
)
|
||||
if mask_only:
|
||||
return penetrated_mask.to(torch.float32)
|
||||
penetration_depths_buffer = torch.zeros_like(penetrated_mask, dtype= torch.float32)
|
||||
penetration_depths_buffer[penetrated_mask] = block_infos[penetrated_mask, 2] - positions_in_block[penetrated_mask, 2]
|
||||
penetrate_up_mask = torch.logical_and(penetrated_mask, in_up_mask)
|
||||
penetration_depths_buffer[penetrate_up_mask] = block_infos[penetrate_up_mask, 2] - positions_in_block[penetrate_up_mask, 2]
|
||||
penetrate_down_mask = torch.logical_and(penetrated_mask, in_down_mask)
|
||||
penetration_depths_buffer[penetrate_down_mask] = 0. - positions_in_block[penetrate_down_mask, 2]
|
||||
return penetration_depths_buffer
|
||||
|
||||
def get_tilt_penetration_depths(self,
|
||||
|
@ -906,10 +937,11 @@ class BarrierTrack:
|
|||
forward_distance = sample_points[:, 0] - self.cfg.border_size - (track_idx[:, 0] * self.env_length) # (N,) w.r.t a track
|
||||
block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,)
|
||||
block_idx[block_idx >= self.track_info_map.shape[2]] = 0.
|
||||
positions_in_block = torch.cat([
|
||||
(forward_distance % self.env_block_length).unsqueeze(-1),
|
||||
sample_points[:, 1:] - self.env_origins_pyt[track_idx_clipped[:, 0], track_idx_clipped[:, 1]][:, 1:],
|
||||
], dim= -1)
|
||||
positions_in_block = torch.stack([
|
||||
forward_distance % self.env_block_length,
|
||||
sample_points[:, 1] - self.env_origins_pyt[track_idx_clipped[:, 0], track_idx_clipped[:, 1]][:, 1],
|
||||
sample_points[:, 2] - self.block_starting_height_map[track_idx_clipped[:, 0], track_idx_clipped[:, 1], block_idx],
|
||||
], dim= -1) # (N, 3) related to the origin of the block, not the track.
|
||||
block_infos = self.track_info_map[track_idx_clipped[:, 0], track_idx_clipped[:, 1], block_idx] # (N, 3)
|
||||
|
||||
penetration_depths = torch.zeros_like(sample_points[:, 0]) # shape (N,)
|
||||
|
@ -967,23 +999,23 @@ class BarrierTrack:
|
|||
return passed_depths
|
||||
|
||||
######## methods to draw visualization #######################
|
||||
def draw_virtual_climb_track(self,
|
||||
def draw_virtual_jump_track(self,
|
||||
block_info,
|
||||
xy_origin,
|
||||
block_origin,
|
||||
):
|
||||
climb_depth = block_info[0]
|
||||
climb_height = block_info[1]
|
||||
jump_depth = block_info[1]
|
||||
jump_height = block_info[2]
|
||||
geom = gymutil.WireframeBoxGeometry(
|
||||
climb_depth,
|
||||
jump_depth if jump_height > 0 else self.track_kwargs["jump"].get("down_forward_length", 0.1),
|
||||
self.env_width,
|
||||
climb_height,
|
||||
jump_height,
|
||||
pose= None,
|
||||
color= (0, 0, 1),
|
||||
)
|
||||
pose = gymapi.Transform(gymapi.Vec3(
|
||||
climb_depth/2 + xy_origin[0],
|
||||
self.env_width/2 + xy_origin[1],
|
||||
climb_height/2,
|
||||
jump_depth/2 + block_origin[0],
|
||||
block_origin[1],
|
||||
jump_height/2 + block_origin[2],
|
||||
), r= None)
|
||||
gymutil.draw_lines(
|
||||
geom,
|
||||
|
@ -995,10 +1027,10 @@ class BarrierTrack:
|
|||
|
||||
def draw_virtual_tilt_track(self,
|
||||
block_info,
|
||||
xy_origin,
|
||||
block_origin,
|
||||
):
|
||||
tilt_depth = block_info[0]
|
||||
tilt_width = block_info[1]
|
||||
tilt_depth = block_info[1]
|
||||
tilt_width = block_info[2]
|
||||
wall_height = self.track_kwargs["tilt"]["wall_height"]
|
||||
geom = gymutil.WireframeBoxGeometry(
|
||||
tilt_depth,
|
||||
|
@ -1009,9 +1041,9 @@ class BarrierTrack:
|
|||
)
|
||||
|
||||
pose = gymapi.Transform(gymapi.Vec3(
|
||||
tilt_depth/2 + xy_origin[0],
|
||||
(self.env_width - tilt_width) / 4 + xy_origin[1],
|
||||
wall_height/2,
|
||||
tilt_depth/2 + block_origin[0],
|
||||
block_origin[1] + (self.env_width + tilt_width) / 4,
|
||||
wall_height/2 + block_origin[2],
|
||||
), r= None)
|
||||
gymutil.draw_lines(
|
||||
geom,
|
||||
|
@ -1021,9 +1053,9 @@ class BarrierTrack:
|
|||
pose,
|
||||
)
|
||||
pose = gymapi.Transform(gymapi.Vec3(
|
||||
tilt_depth/2 + xy_origin[0],
|
||||
self.env_width - (self.env_width - tilt_width) / 4 + xy_origin[1],
|
||||
wall_height/2,
|
||||
tilt_depth/2 + block_origin[0],
|
||||
block_origin[1] - (self.env_width + tilt_width) / 4,
|
||||
wall_height/2 + block_origin[2],
|
||||
), r= None)
|
||||
gymutil.draw_lines(
|
||||
geom,
|
||||
|
@ -1035,11 +1067,11 @@ class BarrierTrack:
|
|||
|
||||
def draw_virtual_crawl_track(self,
|
||||
block_info,
|
||||
xy_origin,
|
||||
block_origin,
|
||||
):
|
||||
crawl_depth = block_info[0]
|
||||
crawl_height = block_info[1]
|
||||
wall_height = self.track_kwargs["crawl"]["wall_height"]
|
||||
crawl_depth = block_info[1]
|
||||
crawl_height = block_info[2]
|
||||
wall_height = self.track_kwargs["crawl"]["wall_height"][1] if isinstance(self.track_kwargs["crawl"]["wall_height"], (list, tuple)) else self.track_kwargs["crawl"]["wall_height"]
|
||||
geom = gymutil.WireframeBoxGeometry(
|
||||
crawl_depth,
|
||||
self.env_width,
|
||||
|
@ -1048,9 +1080,9 @@ class BarrierTrack:
|
|||
color= (0, 0, 1),
|
||||
)
|
||||
pose = gymapi.Transform(gymapi.Vec3(
|
||||
crawl_depth/2 + xy_origin[0],
|
||||
self.env_width/2 + xy_origin[1],
|
||||
wall_height/2 + crawl_height,
|
||||
crawl_depth/2 + block_origin[0],
|
||||
block_origin[1],
|
||||
wall_height/2 + crawl_height + block_origin[2],
|
||||
), r= None)
|
||||
gymutil.draw_lines(
|
||||
geom,
|
||||
|
@ -1062,11 +1094,11 @@ class BarrierTrack:
|
|||
|
||||
def draw_virtual_leap_track(self,
|
||||
block_info,
|
||||
xy_origin,
|
||||
block_origin,
|
||||
):
|
||||
# virtual/non-virtual terrain looks the same when leaping the gap.
|
||||
# but the expected height can be visualized
|
||||
leap_length = block_info[0]
|
||||
leap_length = block_info[1]
|
||||
expected_height = self.track_kwargs["leap"]["height"]
|
||||
geom = gymutil.WireframeBoxGeometry(
|
||||
leap_length,
|
||||
|
@ -1076,9 +1108,9 @@ class BarrierTrack:
|
|||
color= (0, 0.5, 0.5),
|
||||
)
|
||||
pose = gymapi.Transform(gymapi.Vec3(
|
||||
leap_length/2 + xy_origin[0],
|
||||
self.env_width/2 + xy_origin[1],
|
||||
expected_height/2,
|
||||
leap_length/2 + block_origin[0],
|
||||
block_origin[1],
|
||||
expected_height/2 + block_origin[2],
|
||||
), r= None)
|
||||
gymutil.draw_lines(
|
||||
geom,
|
||||
|
@ -1090,12 +1122,12 @@ class BarrierTrack:
|
|||
|
||||
|
||||
def draw_virtual_track(self,
|
||||
track_origin_px,
|
||||
row_idx,
|
||||
col_idx,
|
||||
):
|
||||
difficulties = self.get_difficulty(row_idx, col_idx)
|
||||
virtual_terrain = difficulties[1]
|
||||
track_origin = self.env_origins[row_idx, col_idx]
|
||||
|
||||
for block_idx in range(1, self.track_info_map.shape[2]):
|
||||
if virtual_terrain and self.track_kwargs["draw_virtual_terrain"]:
|
||||
|
@ -1104,13 +1136,14 @@ class BarrierTrack:
|
|||
if v == obstacle_id:
|
||||
obstacle_name = k
|
||||
break
|
||||
block_info = self.track_info_map[row_idx, col_idx, block_idx, 1:]
|
||||
|
||||
heightfield_x0 = track_origin_px[0] + self.track_block_resolution[0] * block_idx
|
||||
heightfield_y0 = track_origin_px[1]
|
||||
block_info = self.track_info_map[row_idx, col_idx, block_idx] # (3,)
|
||||
getattr(self, "draw_virtual_" + obstacle_name + "_track")(
|
||||
block_info,
|
||||
(np.array([heightfield_x0, heightfield_y0]) * self.cfg.horizontal_scale),
|
||||
np.array([
|
||||
track_origin[0] + self.track_kwargs["track_block_length"] * block_idx,
|
||||
track_origin[1],
|
||||
self.block_starting_height_map[row_idx, col_idx, block_idx].cpu().numpy(),
|
||||
]),
|
||||
)
|
||||
|
||||
def draw_virtual_terrain(self, viewer):
|
||||
|
@ -1119,7 +1152,6 @@ class BarrierTrack:
|
|||
for row_idx in range(self.cfg.num_rows):
|
||||
for col_idx in range(self.cfg.num_cols):
|
||||
self.draw_virtual_track(
|
||||
self.track_origins_px[row_idx, col_idx, :2],
|
||||
row_idx= row_idx,
|
||||
col_idx= col_idx,
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue