diff --git a/README.md b/README.md index 3e8c93e..1a049fe 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Authors: [Sören Schwertfeger](https://robotics.shanghaitech.edu.cn/people/soeren), [Chelsea Finn](https://ai.stanford.edu/~cbfinn/), [Hang Zhao](https://hangzhaomit.github.io/)
-Conference on Robot Learning (CoRL) 2023, Oral
+Conference on Robot Learning (CoRL) 2023, **Oral**, **Best Systems Paper Award Finalist (top 3)**

@@ -34,7 +34,7 @@ To deploy the trained model on your real robot, please follow the instructions i If you cannot run the distillation part or all graphics computing goes to GPU 0 dispite you have multiple GPUs and have set the CUDA_VISIBLE_DEVICES, please use docker to isolate each GPU. ## To Do (will be done before Nov 2023) ## -- [ ] Go1 training pipeline in simulation +- [x] Go1 training configuration (not from scratch) - [ ] A1 deployment code - [x] Go1 deployment code diff --git a/legged_gym/legged_gym/envs/__init__.py b/legged_gym/legged_gym/envs/__init__.py index 34cfa7b..d1625c9 100644 --- a/legged_gym/legged_gym/envs/__init__.py +++ b/legged_gym/legged_gym/envs/__init__.py @@ -62,11 +62,27 @@ task_registry.register( "go1_field", LeggedRobotNoisy, Go1FieldCfg(), Go1FieldCf task_registry.register( "go1_distill", LeggedRobotNoisy, Go1FieldDistillCfg(), Go1FieldDistillCfgPPO()) ## The following tasks are for the convinience of opensource -from .a1.a1_climb_config import A1ClimbCfg, A1ClimbCfgPPO -task_registry.register( "a1_climb", LeggedRobotNoisy, A1ClimbCfg(), A1ClimbCfgPPO() ) +from .a1.a1_remote_config import A1RemoteCfg, A1RemoteCfgPPO +task_registry.register( "a1_remote", LeggedRobotNoisy, A1RemoteCfg(), A1RemoteCfgPPO() ) +from .a1.a1_jump_config import A1JumpCfg, A1JumpCfgPPO +task_registry.register( "a1_jump", LeggedRobotNoisy, A1JumpCfg(), A1JumpCfgPPO() ) +from .a1.a1_down_config import A1DownCfg, A1DownCfgPPO +task_registry.register( "a1_down", LeggedRobotNoisy, A1DownCfg(), A1DownCfgPPO() ) from .a1.a1_leap_config import A1LeapCfg, A1LeapCfgPPO task_registry.register( "a1_leap", LeggedRobotNoisy, A1LeapCfg(), A1LeapCfgPPO() ) from .a1.a1_crawl_config import A1CrawlCfg, A1CrawlCfgPPO task_registry.register( "a1_crawl", LeggedRobotNoisy, A1CrawlCfg(), A1CrawlCfgPPO() ) from .a1.a1_tilt_config import A1TiltCfg, A1TiltCfgPPO task_registry.register( "a1_tilt", LeggedRobotNoisy, A1TiltCfg(), A1TiltCfgPPO() ) +from .go1.go1_remote_config import Go1RemoteCfg, Go1RemoteCfgPPO +task_registry.register( "go1_remote", LeggedRobotNoisy, Go1RemoteCfg(), Go1RemoteCfgPPO() ) +from .go1.go1_jump_config import Go1JumpCfg, Go1JumpCfgPPO +task_registry.register( "go1_jump", LeggedRobotNoisy, Go1JumpCfg(), Go1JumpCfgPPO() ) +from .go1.go1_down_config import Go1DownCfg, Go1DownCfgPPO +task_registry.register( "go1_down", LeggedRobotNoisy, Go1DownCfg(), Go1DownCfgPPO() ) +from .go1.go1_leap_config import Go1LeapCfg, Go1LeapCfgPPO +task_registry.register( "go1_leap", LeggedRobotNoisy, Go1LeapCfg(), Go1LeapCfgPPO() ) +from .go1.go1_crawl_config import Go1CrawlCfg, Go1CrawlCfgPPO +task_registry.register( "go1_crawl", LeggedRobotNoisy, Go1CrawlCfg(), Go1CrawlCfgPPO() ) +from .go1.go1_tilt_config import Go1TiltCfg, Go1TiltCfgPPO +task_registry.register( "go1_tilt", LeggedRobotNoisy, Go1TiltCfg(), Go1TiltCfgPPO() ) diff --git a/legged_gym/legged_gym/envs/a1/a1_config.py b/legged_gym/legged_gym/envs/a1/a1_config.py index f3a6e28..2c8703b 100644 --- a/legged_gym/legged_gym/envs/a1/a1_config.py +++ b/legged_gym/legged_gym/envs/a1/a1_config.py @@ -71,6 +71,14 @@ class A1RoughCfg( LeggedRobotCfg ): penalize_contacts_on = ["thigh", "calf"] terminate_after_contacts_on = ["base"] self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter + sdk_dof_range = dict( # copied from a1_const.h from unitree_legged_sdk + Hip_max= 0.802, + Hip_min=-0.802, + Thigh_max= 4.19, + Thigh_min= -1.05, + Calf_max= -0.916, + Calf_min= -2.7, + ) class rewards( LeggedRobotCfg.rewards ): soft_dof_pos_limit = 0.9 diff --git a/legged_gym/legged_gym/envs/a1/a1_down_config.py b/legged_gym/legged_gym/envs/a1/a1_down_config.py new file mode 100644 index 0000000..c671cb3 --- /dev/null +++ b/legged_gym/legged_gym/envs/a1/a1_down_config.py @@ -0,0 +1,116 @@ +from os import path as osp +import numpy as np +from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO +from legged_gym.utils.helpers import merge_dict + +class A1DownCfg( A1FieldCfg ): + + #### uncomment this to train non-virtual terrain + class sensor( A1FieldCfg.sensor ): + class proprioception( A1FieldCfg.sensor.proprioception ): + latency_range = [0.04-0.0025, 0.04+0.0075] + #### uncomment the above to train non-virtual terrain + + class terrain( A1FieldCfg.terrain ): + max_init_terrain_level = 2 + border_size = 5 + slope_treshold = 20. + curriculum = True + + BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict( + options= [ + "jump", + ], + track_block_length= 1.6, + jump= dict( + height= (0.2, 0.45), + depth= 0.3, + fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle + jump_down_prob= 1., # probability of jumping down use it in non-virtual terrain + ), + virtual_terrain= True, # Change this to False for real terrain + no_perlin_threshold= 0.1, + n_obstacles_per_track= 3, + )) + + TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict( + zScale= [0.05, 0.15], + )) + + class commands( A1FieldCfg.commands ): + class ranges( A1FieldCfg.commands.ranges ): + lin_vel_x = [0.5, 1.5] + lin_vel_y = [0.0, 0.0] + ang_vel_yaw = [0., 0.] + + class termination( A1FieldCfg.termination ): + # additional factors that determines whether to terminates the episode + termination_terms = [ + "roll", + "pitch", + "z_low", + "z_high", + "out_of_track", + ] + z_low_kwargs = merge_dict(A1FieldCfg.termination.z_low_kwargs, dict( + threshold= -3., + )) + + class domain_rand( A1FieldCfg.domain_rand ): + init_base_pos_range = dict( + x= [0.2, 0.6], + y= [-0.25, 0.25], + ) + init_base_rot_range = dict( + roll= [-0.1, 0.1], + pitch= [-0.1, 0.1], + ) + + push_robots = True + + class rewards( A1FieldCfg.rewards ): + class scales: + tracking_ang_vel = 0.1 + world_vel_l2norm = -1.3 + legs_energy_substeps = -1e-6 + alive = 2. + penetrate_depth = -2e-3 + penetrate_volume = -2e-3 + exceed_dof_pos_limits = -4e-1 + exceed_torque_limits_l1norm = -4e-1 + feet_contact_forces = -1e-2 + torques = -2e-5 + lin_pos_y = -0.1 + yaw_abs = -0.1 + collision = -0.1 + down_cond = 0.3 + soft_dof_pos_limit = 0.8 + max_contact_force = 200.0 + + class curriculum( A1FieldCfg.curriculum ): + penetrate_volume_threshold_harder = 2000000 + penetrate_volume_threshold_easier = 4000000 + penetrate_depth_threshold_harder = 200000 + penetrate_depth_threshold_easier = 400000 + + +logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") +class A1DownCfgPPO( A1FieldCfgPPO ): + class algorithm( A1FieldCfgPPO.algorithm ): + entropy_coef = 0.0 + clip_min_std = 0.2 + + class runner( A1FieldCfgPPO.runner ): + policy_class_name = "ActorCriticRecurrent" + experiment_name = "field_a1" + resume = True + load_run = "{Your trained walk model directory}" + + run_name = "".join(["Skills_", + ("down" if A1DownCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] > 0. else "jump"), + ("_noResume" if not resume else "from" + "_".join(load_run.split("/")[-1].split("_")[:2])), + ]) + max_iterations = 20000 + save_interval = 500 + + \ No newline at end of file diff --git a/legged_gym/legged_gym/envs/a1/a1_field_config.py b/legged_gym/legged_gym/envs/a1/a1_field_config.py index a3e5039..057e792 100644 --- a/legged_gym/legged_gym/envs/a1/a1_field_config.py +++ b/legged_gym/legged_gym/envs/a1/a1_field_config.py @@ -1,4 +1,5 @@ import numpy as np +import os.path as osp from legged_gym.envs.a1.a1_config import A1RoughCfg, A1RoughCfgPPO class A1FieldCfg( A1RoughCfg ): @@ -59,7 +60,7 @@ class A1FieldCfg( A1RoughCfg ): BarrierTrack_kwargs = dict( options= [ - # "climb", + # "jump", # "crawl", # "tilt", # "leap", @@ -68,11 +69,11 @@ class A1FieldCfg( A1RoughCfg ): track_block_length= 2., # the x-axis distance from the env origin point wall_thickness= (0.04, 0.2), # [m] wall_height= -0.05, - climb= dict( + jump= dict( height= (0.2, 0.6), depth= (0.1, 0.8), # size along the forward axis fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle - climb_down_prob= 0.0, + jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain ), crawl= dict( height= (0.25, 0.5), @@ -97,13 +98,14 @@ class A1FieldCfg( A1RoughCfg ): virtual_terrain= False, draw_virtual_terrain= True, engaging_next_threshold= 1.2, + engaging_finish_threshold= 0., curriculum_perlin= False, - no_perlin_threshold= 0.0, + no_perlin_threshold= 0.1, ) TerrainPerlin_kwargs = dict( - zScale= [0.1, 0.15], - # zScale= 0.1, # Use a constant zScale for training a walk policy + zScale= [0.08, 0.15], + # zScale= 0.15, # Use a constant zScale for training a walk policy frequency= 10, ) @@ -114,9 +116,6 @@ class A1FieldCfg( A1RoughCfg ): lin_vel_x = [-1.0, 1.0] lin_vel_y = [0.0, 0.0] ang_vel_yaw = [0., 0.] - ######## configs for training a walk policy ######### - # lin_vel_y = [-1., 1.] - # ang_vel_yaw = [-1., 1.] class control( A1RoughCfg.control ): stiffness = {'joint': 50.} @@ -125,10 +124,15 @@ class A1FieldCfg( A1RoughCfg ): torque_limits = 25 # override the urdf computer_clip_torque = True motor_clip_torque = False + # action_delay = False # set to True to enable action delay in sim + # action_delay_range = [0.002, 0.022] # [s] + # action_delay_resample_time = 5.0 # [s] class asset( A1RoughCfg.asset ): - penalize_contacts_on = ["base", "thigh"] + penalize_contacts_on = ["base", "thigh", "calf"] terminate_after_contacts_on = ["base", "imu"] + front_hip_names = ["FR_hip_joint", "FL_hip_joint"] + rear_hip_names = ["RR_hip_joint", "RL_hip_joint"] class termination: # additional factors that determines whether to terminates the episode @@ -145,8 +149,8 @@ class A1FieldCfg( A1RoughCfg ): tilt_threshold= 1.5, ) pitch_kwargs = dict( - threshold= 1.6, # [rad] # for leap, climb - climb_threshold= 1.6, + threshold= 1.6, # [rad] # for leap, jump + jump_threshold= 1.6, leap_threshold= 1.5, ) z_low_kwargs = dict( @@ -176,7 +180,7 @@ class A1FieldCfg( A1RoughCfg ): added_mass_range = [1.0, 3.0] randomize_friction = True - friction_range = [0.0, 1.] + friction_range = [0., 2.] init_base_pos_range = dict( x= [0.2, 0.6], @@ -200,10 +204,29 @@ class A1FieldCfg( A1RoughCfg ): class normalization( A1RoughCfg.normalization ): class obs_scales( A1RoughCfg.normalization.obs_scales ): forward_depth = 1. - base_pose = [0., 0., 1., 1., 1., 1.] + base_pose = [0., 0., 0., 1., 1., 1.] engaging_block = 1. + """ The following action clip is used for tanh policy activation. """ + # clip_actions_method = "hard" + # dof_pos_redundancy = 0.2 + # clip_actions_low = [] + # clip_actions_high = [] + # for sdk_joint_name, sim_joint_name in zip( + # ["Hip", "Thigh", "Calf"] * 4, + # [ # in the order as simulation + # "FL_hip_joint", "FL_thigh_joint", "FL_calf_joint", + # "FR_hip_joint", "FR_thigh_joint", "FR_calf_joint", + # "RL_hip_joint", "RL_thigh_joint", "RL_calf_joint", + # "RR_hip_joint", "RR_thigh_joint", "RR_calf_joint", + # ], + # ): # This setting is only for position control. + # clip_actions_low.append( (A1RoughCfg.asset.sdk_dof_range[sdk_joint_name + "_min"] + dof_pos_redundancy - A1RoughCfg.init_state.default_joint_angles[sim_joint_name]) / a1_action_scale ) + # clip_actions_high.append( (A1RoughCfg.asset.sdk_dof_range[sdk_joint_name + "_max"] - dof_pos_redundancy - A1RoughCfg.init_state.default_joint_angles[sim_joint_name]) / a1_action_scale ) + # del dof_pos_redundancy, sdk_joint_name, sim_joint_name # This is not intended to be an attribute of normalization + """ The above action clip is used for tanh policy activation. """ class noise( A1RoughCfg.noise ): + add_noise = False # disable internal uniform +- 1 noise, and no noise in proprioception class noise_scales( A1RoughCfg.noise.noise_scales ): forward_depth = 0.1 base_pose = 1.0 @@ -244,6 +267,7 @@ class A1FieldCfg( A1RoughCfg ): no_moveup_when_fall = False # chosen heuristically, please refer to `LeggedRobotField._get_terrain_curriculum_move` with fixed body_measure_points +logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") class A1FieldCfgPPO( A1RoughCfgPPO ): class algorithm( A1RoughCfgPPO.algorithm ): entropy_coef = 0.01 @@ -256,8 +280,9 @@ class A1FieldCfgPPO( A1RoughCfgPPO ): class runner( A1RoughCfgPPO.runner ): policy_class_name = "ActorCriticRecurrent" experiment_name = "field_a1" - run_name = "".join(["WalkingBase", - ("_pEnergySubsteps" + np.format_float_scientific(-A1FieldCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1, trim="-") if A1FieldCfg.rewards.scales.legs_energy_substeps != 0 else ""), + resume = False + + run_name = "".join(["WalkForward", ("_propDelay{:.2f}-{:.2f}".format( A1FieldCfg.sensor.proprioception.latency_range[0], A1FieldCfg.sensor.proprioception.latency_range[1], diff --git a/legged_gym/legged_gym/envs/a1/a1_climb_config.py b/legged_gym/legged_gym/envs/a1/a1_jump_config.py similarity index 62% rename from legged_gym/legged_gym/envs/a1/a1_climb_config.py rename to legged_gym/legged_gym/envs/a1/a1_jump_config.py index 92efff4..fd09c1c 100644 --- a/legged_gym/legged_gym/envs/a1/a1_climb_config.py +++ b/legged_gym/legged_gym/envs/a1/a1_jump_config.py @@ -3,12 +3,14 @@ import numpy as np from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO from legged_gym.utils.helpers import merge_dict -class A1ClimbCfg( A1FieldCfg ): +class A1JumpCfg( A1FieldCfg ): + + class init_state( A1FieldCfg.init_state ): + pos = [0., 0., 0.45] #### uncomment this to train non-virtual terrain # class sensor( A1FieldCfg.sensor ): # class proprioception( A1FieldCfg.sensor.proprioception ): - # delay_action_obs = True # latency_range = [0.04-0.0025, 0.04+0.0075] #### uncomment the above to train non-virtual terrain @@ -20,22 +22,23 @@ class A1ClimbCfg( A1FieldCfg ): BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict( options= [ - "climb", + "jump", ], track_block_length= 1.6, - climb= dict( + jump= dict( height= (0.2, 0.6), # use this to train in virtual terrain # height= (0.1, 0.5), # use this to train in non-virtual terrain depth= (0.1, 0.2), fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle - climb_down_prob= 0., # probability of climbing down use it in non-virtual terrain + jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain ), virtual_terrain= False, # Change this to False for real terrain - no_perlin_threshold= 0.06, + no_perlin_threshold= 0.12, + n_obstacles_per_track= 3, )) TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict( - zScale= [0.05, 0.1], + zScale= [0.05, 0.15], )) class commands( A1FieldCfg.commands ): @@ -58,10 +61,19 @@ class A1ClimbCfg( A1FieldCfg ): )) class domain_rand( A1FieldCfg.domain_rand ): + class com_range( A1FieldCfg.domain_rand.com_range ): + z = [-0.1, 0.1] + init_base_pos_range = dict( x= [0.2, 0.6], y= [-0.25, 0.25], ) + init_base_rot_range = dict( + roll= [-0.1, 0.1], + pitch= [-0.1, 0.1], + ) + + push_robots = False class rewards( A1FieldCfg.rewards ): class scales: @@ -73,6 +85,8 @@ class A1ClimbCfg( A1FieldCfg ): penetrate_volume = -1e-2 exceed_dof_pos_limits = -1e-1 exceed_torque_limits_i = -2e-1 + soft_dof_pos_limit = 0.8 + max_contact_force = 100.0 class curriculum( A1FieldCfg.curriculum ): penetrate_volume_threshold_harder = 8000 @@ -81,7 +95,8 @@ class A1ClimbCfg( A1FieldCfg ): penetrate_depth_threshold_easier = 1600 -class A1ClimbCfgPPO( A1FieldCfgPPO ): +logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") +class A1JumpCfgPPO( A1FieldCfgPPO ): class algorithm( A1FieldCfgPPO.algorithm ): entropy_coef = 0.0 clip_min_std = 0.2 @@ -89,23 +104,15 @@ class A1ClimbCfgPPO( A1FieldCfgPPO ): class runner( A1FieldCfgPPO.runner ): policy_class_name = "ActorCriticRecurrent" experiment_name = "field_a1" - run_name = "".join(["Skill", - ("Multi" if len(A1ClimbCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (A1ClimbCfg.terrain.BarrierTrack_kwargs["options"][0] if A1ClimbCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")), - ("_pEnergySubsteps{:.0e}".format(A1ClimbCfg.rewards.scales.legs_energy_substeps) if A1ClimbCfg.rewards.scales.legs_energy_substeps != -1e-6 else ""), - ("_pDofLimit{:.0e}".format(A1ClimbCfg.rewards.scales.exceed_dof_pos_limits) if getattr(A1ClimbCfg.rewards.scales, "exceed_dof_pos_limits") != 0.0 else ""), - ("_climbDownProb{:.1f}".format(A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"]["climb_down_prob"]) if A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"].get("climb_down_prob", 0.) > 0. else ""), - ("_climbHeight{:.1f}-{:.1f}".format(*A1ClimbCfg.terrain.BarrierTrack_kwargs["climb"]["height"])), - ("_propDelay{:.2f}-{:.2f}".format( - A1ClimbCfg.sensor.proprioception.latency_range[0], - A1ClimbCfg.sensor.proprioception.latency_range[1], - ) if A1ClimbCfg.sensor.proprioception.delay_action_obs else "" - ), - ("_virtual" if A1ClimbCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""), - ("_trackBlockLen{:.1f}".format(A1ClimbCfg.terrain.BarrierTrack_kwargs["track_block_length"]) if A1ClimbCfg.terrain.BarrierTrack_kwargs["track_block_length"] != 2. else ""), - ]) resume = True load_run = "{Your traind walking model directory}" - load_run = "{Your virtually trained climb model directory}" + load_run = "{Your virtually trained jump model directory}" + + run_name = "".join(["Skills_", + ("jump" if A1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] < 1. else "down"), + ("_virtual" if A1JumpCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""), + ("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])), + ]) max_iterations = 20000 save_interval = 500 \ No newline at end of file diff --git a/legged_gym/legged_gym/envs/a1/a1_remote_config.py b/legged_gym/legged_gym/envs/a1/a1_remote_config.py new file mode 100644 index 0000000..60f3cd4 --- /dev/null +++ b/legged_gym/legged_gym/envs/a1/a1_remote_config.py @@ -0,0 +1,82 @@ +import numpy as np +import os.path as osp +from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO +from legged_gym.utils.helpers import merge_dict + +class A1RemoteCfg( A1FieldCfg ): + class env( A1FieldCfg.env ): + num_envs = 4096 + obs_components = [ + "proprioception", # 48 + # "height_measurements", # 187 + # "forward_depth", + # "base_pose", + # "robot_config", + # "engaging_block", + # "sidewall_distance", + ] + privileged_obs_components = [ + "proprioception", + # "height_measurements", + # "forward_depth", + "robot_config", + ] + use_lin_vel = False + privileged_use_lin_vel = True + + class terrain( A1FieldCfg.terrain ): + num_rows = 4 + num_cols = 4 + selected = "TerrainPerlin" + TerrainPerlin_kwargs = dict( + zScale= 0.15, + frequency= 10, + ) + + class commands( A1FieldCfg.commands ): + class ranges( A1FieldCfg.commands.ranges ): + lin_vel_x = [-1.0, 1.0] + lin_vel_y = [-1., 1.] + ang_vel_yaw = [-1., 1.] + + class domain_rand( A1FieldCfg.domain_rand ): + class com_range( A1FieldCfg.domain_rand.com_range ): + x = [-0.2, 0.2] + max_push_vel_ang = 0.5 + push_robots = True + + class rewards( A1FieldCfg.rewards ): + class scales: + ###### hacker from Field + tracking_ang_vel = 0.5 + lin_vel_l2norm = -1. + legs_energy_substeps = -1e-5 + alive = 2. + # penalty for hardware safety + exceed_dof_pos_limits = -4e-2 + exceed_torque_limits_l1norm = -4e-1 + # penalty for walking gait, probably no need + collision = -0.1 + orientation = -0.1 + feet_contact_forces = -1e-3 + soft_dof_pos_limit = 0.5 + max_contact_force = 60.0 + +logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") +class A1RemoteCfgPPO( A1FieldCfgPPO ): + class algorithm( A1FieldCfgPPO.algorithm ): + entropy_coef = 0.01 + clip_min_std = 0.05 + + class runner( A1FieldCfgPPO.runner ): + resume = False + load_run = None + + run_name = "".join(["WalkByRemote", + ("_rLinTrack{:.1f}".format(A1RemoteCfg.rewards.scales.tracking_lin_vel) if getattr(A1RemoteCfg.rewards.scales, "tracking_lin_vel", 0.) != 0. else ""), + ("_pLin{:.1f}".format(-A1RemoteCfg.rewards.scales.lin_vel_l2norm) if getattr(A1RemoteCfg.rewards.scales, "lin_vel_l2norm", 0.) != 0. else ""), + ("_rAng{:.1f}".format(A1RemoteCfg.rewards.scales.tracking_ang_vel) if A1RemoteCfg.rewards.scales.tracking_ang_vel != 0.2 else ""), + ("_rAlive{:.1f}".format(A1RemoteCfg.rewards.scales.alive) if getattr(A1RemoteCfg.rewards.scales, "alive", 2.) != 2. else ""), + ("_pEnergySubsteps{:.0e}".format(A1RemoteCfg.rewards.scales.legs_energy_substeps) if getattr(A1RemoteCfg.rewards.scales, "legs_energy_substeps", -2e-5) != -2e-5 else "_nopEnergy"), + ("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])), + ]) \ No newline at end of file diff --git a/legged_gym/legged_gym/envs/base/legged_robot.py b/legged_gym/legged_gym/envs/base/legged_robot.py index a767bda..0513206 100644 --- a/legged_gym/legged_gym/envs/base/legged_robot.py +++ b/legged_gym/legged_gym/envs/base/legged_robot.py @@ -141,6 +141,7 @@ class LeggedRobot(BaseTask): self.last_actions[:] = self.actions[:] self.last_dof_vel[:] = self.dof_vel[:] self.last_root_vel[:] = self.root_states[:, 7:13] + self.last_torques[:] = self.torques[:] if self.viewer and self.enable_viewer_sync and self.debug_viz: self._draw_debug_vis() @@ -343,6 +344,7 @@ class LeggedRobot(BaseTask): # set small commands to zero self.commands[env_ids, :2] *= (torch.norm(self.commands[env_ids, :2], dim=1) > 0.2).unsqueeze(1) + self.commands[env_ids, 2] *= (torch.abs(self.commands[env_ids, 2]) > 0.1) def _compute_torques(self, actions): """ Compute torques from actions. @@ -387,7 +389,9 @@ class LeggedRobot(BaseTask): ) else: self.dof_pos[env_ids] = self.default_dof_pos - self.dof_vel[env_ids] = 0. + # self.dof_vel[env_ids] = 0. # history init method + dof_vel_range = getattr(self.cfg.domain_rand, "init_dof_vel_range", [-3., 3.]) + self.dof_vel[env_ids] = torch.rand_like(self.dof_vel[env_ids]) * abs(dof_vel_range[1] - dof_vel_range[0]) + min(dof_vel_range) # Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first. dof_idx = env_ids * self.all_root_states.shape[0] / self.num_envs @@ -414,16 +418,64 @@ class LeggedRobot(BaseTask): else: self.root_states[env_ids] = self.base_init_state self.root_states[env_ids, :3] += self.env_origins[env_ids] + # base rotation (roll and pitch) + if hasattr(self.cfg.domain_rand, "init_base_rot_range"): + base_roll = torch_rand_float( + *self.cfg.domain_rand.init_base_rot_range["roll"], + (len(env_ids), 1), + device=self.device, + )[:, 0] + base_pitch = torch_rand_float( + *self.cfg.domain_rand.init_base_rot_range["pitch"], + (len(env_ids), 1), + device=self.device, + )[:, 0] + base_quat = quat_from_euler_xyz(base_roll, base_pitch, torch.zeros_like(base_roll)) + self.root_states[env_ids, 3:7] = base_quat # base velocities if getattr(self.cfg.domain_rand, "init_base_vel_range", None) is None: base_vel_range = (-0.5, 0.5) else: base_vel_range = self.cfg.domain_rand.init_base_vel_range - self.root_states[env_ids, 7:13] = torch_rand_float( - *base_vel_range, - (len(env_ids), 6), - device=self.device, - ) # [7:10]: lin vel, [10:13]: ang vel + if isinstance(base_vel_range, (tuple, list)): + self.root_states[env_ids, 7:13] = torch_rand_float( + *base_vel_range, + (len(env_ids), 6), + device=self.device, + ) # [7:10]: lin vel, [10:13]: ang vel + elif isinstance(base_vel_range, dict): + self.root_states[env_ids, 7:8] = torch_rand_float( + *base_vel_range["x"], + (len(env_ids), 1), + device=self.device, + ) + self.root_states[env_ids, 8:9] = torch_rand_float( + *base_vel_range["y"], + (len(env_ids), 1), + device=self.device, + ) + self.root_states[env_ids, 9:10] = torch_rand_float( + *base_vel_range["z"], + (len(env_ids), 1), + device=self.device, + ) + self.root_states[env_ids, 10:11] = torch_rand_float( + *base_vel_range["roll"], + (len(env_ids), 1), + device=self.device, + ) + self.root_states[env_ids, 11:12] = torch_rand_float( + *base_vel_range["pitch"], + (len(env_ids), 1), + device=self.device, + ) + self.root_states[env_ids, 12:13] = torch_rand_float( + *base_vel_range["yaw"], + (len(env_ids), 1), + device=self.device, + ) + else: + raise NameError(f"Unknown base_vel_range type: {type(base_vel_range)}") # Each env has multiple actors. So the actor index is not the same as env_id. But robot actor is always the first. actor_idx = env_ids * self.all_root_states.shape[0] / self.num_envs @@ -546,6 +598,7 @@ class LeggedRobot(BaseTask): self.last_actions = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) self.last_dof_vel = torch.zeros_like(self.dof_vel) self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13]) + self.last_torques = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) # TODO change this self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False) @@ -580,8 +633,11 @@ class LeggedRobot(BaseTask): self.default_dof_pos = self.default_dof_pos.unsqueeze(0) def _reset_buffers(self, env_ids): + if getattr(self.cfg.init_state, "zero_actions", False): + self.actions[env_ids] = 0. self.last_actions[env_ids] = 0. self.last_dof_vel[env_ids] = 0. + self.last_torques[env_ids] = 0. self.feet_air_time[env_ids] = 0. self.episode_length_buf[env_ids] = 0 self.reset_buf[env_ids] = 1 @@ -902,8 +958,13 @@ class LeggedRobot(BaseTask): # log additional curriculum info if self.cfg.terrain.curriculum: self.extras["episode"]["terrain_level"] = torch.mean(self.terrain_levels.float()) + if len(env_ids) > 0: + self.extras["episode"]["terrain_level_max"] = torch.max(self.terrain_levels[env_ids].float()) + self.extras["episode"]["terrain_level_min"] = torch.min(self.terrain_levels[env_ids].float()) if self.cfg.commands.curriculum: self.extras["episode"]["max_command_x"] = self.command_ranges["lin_vel_x"][1] + # log whether the episode ends by timeout or dead, or by reaching the goal + self.extras["episode"]["timeout_ratio"] = self.time_out_buf.float().sum() / self.reset_buf.float().sum() # send timeout info to the algorithm if self.cfg.env.send_timeouts: self.extras["time_outs"] = self.time_out_buf @@ -930,6 +991,9 @@ class LeggedRobot(BaseTask): # Penalize torques return torch.sum(torch.square(self.torques), dim=1) + def _reward_delta_torques(self): + return torch.sum(torch.square(self.torques - self.last_torques), dim=1) + def _reward_dof_vel(self): # Penalize dof velocities return torch.sum(torch.square(self.dof_vel), dim=1) diff --git a/legged_gym/legged_gym/envs/base/legged_robot_field.py b/legged_gym/legged_gym/envs/base/legged_robot_field.py index a4c2228..0f408ae 100644 --- a/legged_gym/legged_gym/envs/base/legged_robot_field.py +++ b/legged_gym/legged_gym/envs/base/legged_robot_field.py @@ -7,6 +7,7 @@ import torch from legged_gym.envs.base.legged_robot import LeggedRobot from legged_gym.utils.terrain import get_terrain_cls +from legged_gym.utils.observation import get_obs_slice from .legged_robot_config import LeggedRobotCfg class LeggedRobotField(LeggedRobot): @@ -102,10 +103,34 @@ class LeggedRobotField(LeggedRobot): self.cfg.normalization.clip_actions, device= self.device, ) + if isinstance(getattr(self.cfg.normalization, "clip_actions_low", None), (tuple, list)): + self.cfg.normalization.clip_actions_low = torch.tensor( + self.cfg.normalization.clip_actions_low, + device= self.device + ) + if isinstance(getattr(self.cfg.normalization, "clip_actions_high", None), (tuple, list)): + self.cfg.normalization.clip_actions_high = torch.tensor( + self.cfg.normalization.clip_actions_high, + device= self.device + ) if getattr(self.cfg.normalization, "clip_actions_method", None) == "tanh": clip_actions = self.cfg.normalization.clip_actions self.actions = (torch.tanh(actions) * clip_actions).to(self.device) actions_preprocessed = True + elif getattr(self.cfg.normalization, "clip_actions_method", None) == "hard": + if self.cfg.control.control_type == "P": + actions_low = getattr( + self.cfg.normalization, "clip_actions_low", + self.dof_pos_limits[:, 0] - self.default_dof_pos, + ) + actions_high = getattr( + self.cfg.normalization, "clip_actions_high", + self.dof_pos_limits[:, 1] - self.default_dof_pos, + ) + self.actions = torch.clip(actions, actions_low, actions_high) + else: + raise NotImplementedError() + actions_preprocessed = True if getattr(self.cfg.normalization, "clip_actions_delta", None) is not None: self.actions = torch.clip( self.actions, @@ -226,6 +251,7 @@ class LeggedRobotField(LeggedRobot): torch.div(pos_x, self.terrain.env_block_length, rounding_mode= "floor") - 1, min= 0.0, )).cpu() + self.extras["episode"]["max_power_throughout_episode"] = self.max_power_per_timestep[env_ids].max().cpu().item() return return_ @@ -245,6 +271,11 @@ class LeggedRobotField(LeggedRobot): min= 0.0, )).cpu() + self.max_power_per_timestep = torch.maximum( + self.max_power_per_timestep, + torch.max(torch.sum(self.substep_torques * self.substep_dof_vel, dim= -1), dim= -1)[0], + ) + return return_ def _compute_torques(self, actions): @@ -322,9 +353,40 @@ class LeggedRobotField(LeggedRobot): gymapi.IMAGE_COLOR, ))) + if getattr(self.cfg.normalization, "clip_actions_method", None) is None: + print("WARNING: No action clipping method is specified, actions will only be clipped by clip_actions.") + + # projected gravity bias (if needed) + if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False): + print("Initializing gravity bias for domain randomization") + # add cross trajectory domain randomization on projected gravity bias + # uniform sample from range + self.gravity_bias = torch.rand(self.num_envs, 3, dtype= torch.float, device= self.device, requires_grad= False) + self.gravity_bias[:, 0] *= self.cfg.domain_rand.gravity_bias_range["x"][1] - self.cfg.domain_rand.gravity_bias_range["x"][0] + self.gravity_bias[:, 0] += self.cfg.domain_rand.gravity_bias_range["x"][0] + self.gravity_bias[:, 1] *= self.cfg.domain_rand.gravity_bias_range["y"][1] - self.cfg.domain_rand.gravity_bias_range["y"][0] + self.gravity_bias[:, 1] += self.cfg.domain_rand.gravity_bias_range["y"][0] + self.gravity_bias[:, 2] *= self.cfg.domain_rand.gravity_bias_range["z"][1] - self.cfg.domain_rand.gravity_bias_range["z"][0] + self.gravity_bias[:, 2] += self.cfg.domain_rand.gravity_bias_range["z"][0] + + self.max_power_per_timestep = torch.zeros(self.num_envs, dtype= torch.float32, device= self.device) + def _reset_buffers(self, env_ids): return_ = super()._reset_buffers(env_ids) if hasattr(self, "velocity_sample_points"): self.velocity_sample_points[env_ids] = 0. + + if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False): + assert hasattr(self, "gravity_bias") + self.gravity_bias[env_ids] = torch.rand_like(self.gravity_bias[env_ids]) + self.gravity_bias[env_ids, 0] *= self.cfg.domain_rand.gravity_bias_range["x"][1] - self.cfg.domain_rand.gravity_bias_range["x"][0] + self.gravity_bias[env_ids, 0] += self.cfg.domain_rand.gravity_bias_range["x"][0] + self.gravity_bias[env_ids, 1] *= self.cfg.domain_rand.gravity_bias_range["y"][1] - self.cfg.domain_rand.gravity_bias_range["y"][0] + self.gravity_bias[env_ids, 1] += self.cfg.domain_rand.gravity_bias_range["y"][0] + self.gravity_bias[env_ids, 2] *= self.cfg.domain_rand.gravity_bias_range["z"][1] - self.cfg.domain_rand.gravity_bias_range["z"][0] + self.gravity_bias[env_ids, 2] += self.cfg.domain_rand.gravity_bias_range["z"][0] + + self.max_power_per_timestep[env_ids] = 0. + return return_ def _prepare_reward_function(self): @@ -483,6 +545,16 @@ class LeggedRobotField(LeggedRobot): self.cfg.env.obs_components, privileged= False, ) + if getattr(self.cfg.domain_rand, "randomize_gravity_bias", False) and "proprioception" in self.cfg.env.obs_components: + # NOTE: The logic order is a bit messed up here when combining using legged_robot_noisy.py + # When simulating the sensor bias/noise and latency, the gravity bias should be added before + # proprioception latency. + # However, considering gravirty bias is randomized across trajectory, the latency simulation + # from legged_robot_noisy.py does not need to be computed after adding the bias. + # So, this computation order looks OK. + assert hasattr(self, "gravity_bias") + proprioception_slice = get_obs_slice(self.obs_segments, "proprioception") + self.obs_buf[:, proprioception_slice[0].start + 6: proprioception_slice[0].start + 9] += self.gravity_bias if self.add_noise: self.obs_buf += (2 * torch.rand_like(self.obs_buf) - 1) * self.noise_scale_vec @@ -496,9 +568,13 @@ class LeggedRobotField(LeggedRobot): if "proprioception" in getattr(self.cfg.env, "privileged_obs_components", []) \ and getattr(self.cfg.env, "privileged_use_lin_vel", False): # NOTE: according to self.get_obs_segment_from_components, "proprioception" observation - # is always the first part of this flattened observation. check super().compute_observations + # is always the FIRST part of this flattened observation. check super().compute_observations # and self.cfg.env.use_lin_vel for the reason of this if branch. self.privileged_obs_buf[:, :3] = self.base_lin_vel * self.obs_scales.lin_vel + if getattr(self.cfg.domain_rand, "randomize_privileged_gravity_bias", False) and "proprioception" in getattr(self.cfg.env, "privileged_obs_components", []): + assert hasattr(self, "gravity_bias") + proprioception_slice = get_obs_slice(self.privileged_obs_segments, "proprioception") + self.privileged_obs_buf[:, proprioception_slice[0].start + 6: proprioception_slice[0].start + 9] += self.gravity_bias for key in self.sensor_handles[0].keys(): if "camera" in key: @@ -558,12 +634,31 @@ class LeggedRobotField(LeggedRobot): def _create_envs(self): if self.cfg.domain_rand.randomize_motor: + mtr_rng = self.cfg.domain_rand.leg_motor_strength_range self.motor_strength = torch_rand_float( - self.cfg.domain_rand.leg_motor_strength_range[0], - self.cfg.domain_rand.leg_motor_strength_range[1], - (self.num_envs, 12), + mtr_rng[0], + mtr_rng[1], + (self.num_envs, self.num_actions), device=self.device, ) + + return_ = super()._create_envs() + + front_hip_names = getattr(self.cfg.asset, "front_hip_names", ["FR_hip_joint", "FL_hip_joint"]) + self.front_hip_indices = torch.zeros(len(front_hip_names), dtype=torch.long, device=self.device, requires_grad=False) + for i, name in enumerate(front_hip_names): + self.front_hip_indices[i] = self.dof_names.index(name) + + rear_hip_names = getattr(self.cfg.asset, "rear_hip_names", ["RR_hip_joint", "RL_hip_joint"]) + self.rear_hip_indices = torch.zeros(len(rear_hip_names), dtype=torch.long, device=self.device, requires_grad=False) + for i, name in enumerate(rear_hip_names): + self.rear_hip_indices[i] = self.dof_names.index(name) + + hip_names = front_hip_names + rear_hip_names + self.hip_indices = torch.zeros(len(hip_names), dtype=torch.long, device=self.device, requires_grad=False) + for i, name in enumerate(hip_names): + self.hip_indices[i] = self.dof_names.index(name) + return super()._create_envs() def _process_rigid_shape_props(self, props, env_id): @@ -703,10 +798,16 @@ class LeggedRobotField(LeggedRobot): def draw_volume_sample_points(self): sphere_geom = gymutil.WireframeSphereGeometry(0.005, 4, 4, None, color=(1, 0.1, 0)) + sphere_penetrate_geom = gymutil.WireframeSphereGeometry(0.005, 4, 4, None, color=(0.1, 0.6, 0.6)) + if self.cfg.terrain.selected == "BarrierTrack": + penetration_mask = self.terrain.get_penetration_mask(self.volume_sample_points.view(-1, 3)).view(self.num_envs, -1) for env_idx in range(self.num_envs): for point_idx in range(self.volume_sample_points.shape[1]): sphere_pose = gymapi.Transform(gymapi.Vec3(*self.volume_sample_points[env_idx, point_idx]), r= None) - gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose) + if penetration_mask[env_idx, point_idx]: + gymutil.draw_lines(sphere_penetrate_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose) + else: + gymutil.draw_lines(sphere_geom, self.gym, self.viewer, self.envs[env_idx], sphere_pose) ##### Additional rewards ##### def _reward_lin_vel_l2norm(self): @@ -715,6 +816,10 @@ class LeggedRobotField(LeggedRobot): def _reward_world_vel_l2norm(self): return torch.norm((self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1) + def _reward_tracking_world_vel(self): + world_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1) + return torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma) + def _reward_legs_energy(self): return torch.sum(torch.square(self.torques * self.dof_vel), dim=1) @@ -730,6 +835,10 @@ class LeggedRobotField(LeggedRobot): def _reward_alive(self): return 1. + def _reward_dof_error(self): + dof_error = torch.sum(torch.square(self.dof_pos - self.default_dof_pos), dim=1) + return dof_error + def _reward_lin_cmd(self): """ This reward term does not depend on the policy, depends on the command """ return torch.norm(self.commands[:, :2], dim= 1) @@ -767,6 +876,157 @@ class LeggedRobotField(LeggedRobot): penetration_mask *= torch.norm(self.velocity_sample_points, dim= -1) + 1e-3 return torch.sum(penetration_mask, dim= -1) + def _reward_tilt_cond(self): + """ Conditioned reward term in terms of whether the robot is engaging the tilt obstacle + Use positive factor to enable rolling angle when incountering tilt obstacle + """ + if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0]) + roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7]) + pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732 + roll[roll > pi] -= pi * 2 # to range (-pi, pi) + roll[roll < -pi] += pi * 2 # to range (-pi, pi) + if hasattr(self, "volume_sample_points"): + self.refresh_volume_sample_points() + stepping_obstacle_info = self.terrain.get_stepping_obstacle_info(self.volume_sample_points.view(-1, 3)) + else: + stepping_obstacle_info = self.terrain.get_stepping_obstacle_info(self.root_states[:, :3]) + stepping_obstacle_info = stepping_obstacle_info.view(self.num_envs, -1, stepping_obstacle_info.shape[-1]) + # Assuming that each robot will only be in one obstacle or non obstacle. + robot_stepping_obstacle_id = torch.max(stepping_obstacle_info[:, :, 0], dim= -1)[0] + tilting_mask = robot_stepping_obstacle_id == self.terrain.track_options_id_dict["tilt"] + return_ = torch.where(tilting_mask, torch.clip(torch.abs(roll), 0, torch.pi/2), -torch.clip(torch.abs(roll), 0, torch.pi/2)) + return return_ + + def _reward_hip_pos(self): + return torch.sum(torch.square(self.dof_pos[:, self.hip_indices] - self.default_dof_pos[:, self.hip_indices]), dim=1) + + def _reward_front_hip_pos(self): + """ Reward the robot to stop moving its front hips """ + return torch.sum(torch.square(self.dof_pos[:, self.front_hip_indices] - self.default_dof_pos[:, self.front_hip_indices]), dim=1) + + def _reward_rear_hip_pos(self): + """ Reward the robot to stop moving its rear hips """ + return torch.sum(torch.square(self.dof_pos[:, self.rear_hip_indices] - self.default_dof_pos[:, self.rear_hip_indices]), dim=1) + + def _reward_down_cond(self): + if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0]) + if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0]) + self.refresh_volume_sample_points() + engaging_obstacle_info = self.terrain.get_engaging_block_info( + self.root_states[:, :3], + self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3) + ) + roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7]) + pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732 + pitch[pitch > pi] -= pi * 2 # to range (-pi, pi) + pitch[pitch < -pi] += pi * 2 # to range (-pi, pi) + engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \ + & (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] < 0.) + pitch_err = torch.abs(pitch - 0.2) + return torch.exp(-pitch_err/self.cfg.rewards.tracking_sigma) * engaging_mask # the higher positive factor, the more you want the robot to pitch down 0.2 rad + + def _reward_jump_x_vel_cond(self): + if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0]) + if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0]) + self.refresh_volume_sample_points() + engaging_obstacle_info = self.terrain.get_engaging_block_info( + self.root_states[:, :3], + self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3) + ) + engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \ + & (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \ + & (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance. + roll, pitch, yaw = get_euler_xyz(self.root_states[:, 3:7]) + pi = torch.acos(torch.zeros(1)).item() * 2 # which is 3.1415927410125732 + pitch[pitch > pi] -= pi * 2 # to range (-pi, pi) + pitch[pitch < -pi] += pi * 2 # to range (-pi, pi) + pitch_up_mask = pitch < -0.75 # a hack value + + return torch.clip(self.base_lin_vel[:, 0], max= 1.5) * engaging_mask * pitch_up_mask + + def _reward_sync_legs_cond(self): + """ A hack to force same actuation on both rear legs when jump. """ + if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0]) + if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0]) + self.refresh_volume_sample_points() + engaging_obstacle_info = self.terrain.get_engaging_block_info( + self.root_states[:, :3], + self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3) + ) + engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \ + & (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \ + & (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance. + rr_legs = torch.clone(self.actions[:, 6:9]) # shoulder, thigh, calf + rl_legs = torch.clone(self.actions[:, 9:12]) # shoulder, thigh, calf + rl_legs[:, 0] *= -1 # flip the sign of shoulder action + return torch.norm(rr_legs - rl_legs, dim= -1) * engaging_mask + + def _reward_sync_all_legs_cond(self): + """ A hack to force same actuation on both front/rear legs when jump. """ + if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0]) + if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0]) + self.refresh_volume_sample_points() + engaging_obstacle_info = self.terrain.get_engaging_block_info( + self.root_states[:, :3], + self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3) + ) + engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["jump"]] > 0) \ + & (engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 2] > 0.) \ + & (engaging_obstacle_info[:, 0] > 0) # engaging jump-up, not engaging jump-down, positive distance. + right_legs = torch.clone(torch.cat([ + self.actions[:, 0:3], + self.actions[:, 6:9], + ], dim= -1)) # shoulder, thigh, calf + left_legs = torch.clone(torch.cat([ + self.actions[:, 3:6], + self.actions[:, 9:12], + ], dim= -1)) # shoulder, thigh, calf + left_legs[:, 0] *= -1 # flip the sign of shoulder action + left_legs[:, 3] *= -1 # flip the sign of shoulder action + return torch.norm(right_legs - left_legs, p= 1, dim= -1) * engaging_mask + + def _reward_sync_all_legs(self): + right_legs = torch.clone(torch.cat([ + self.actions[:, 0:3], + self.actions[:, 6:9], + ], dim= -1)) # shoulder, thigh, calf + left_legs = torch.clone(torch.cat([ + self.actions[:, 3:6], + self.actions[:, 9:12], + ], dim= -1)) # shoulder, thigh, calf + left_legs[:, 0] *= -1 # flip the sign of shoulder action + left_legs[:, 3] *= -1 # flip the sign of shoulder action + return torch.norm(right_legs - left_legs, p= 1, dim= -1) + + def _reward_dof_error_cond(self): + """ Force dof error when not engaging obstacle """ + if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0]) + if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0]) + self.refresh_volume_sample_points() + engaging_obstacle_info = self.terrain.get_engaging_block_info( + self.root_states[:, :3], + self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3) + ) + engaging_mask = (engaging_obstacle_info[:, 1] > 0) + return torch.sum(torch.square(self.dof_pos - self.default_dof_pos), dim=1) * engaging_mask + + def _reward_leap_bonous_cond(self): + """ counteract the tracking reward loss during leap""" + if not self.check_BarrierTrack_terrain(): return torch.zeros_like(self.root_states[:, 0]) + if not hasattr(self, "volume_sample_points"): return torch.zeros_like(self.root_states[:, 0]) + self.refresh_volume_sample_points() + engaging_obstacle_info = self.terrain.get_engaging_block_info( + self.root_states[:, :3], + self.volume_sample_points - self.root_states[:, :3].unsqueeze(-2), # (n_envs, n_points, 3) + ) + engaging_mask = (engaging_obstacle_info[:, 1 + self.terrain.track_options_id_dict["leap"]] > 0) \ + & (-engaging_obstacle_info[:, 1 + self.terrain.max_track_options + 1] < engaging_obstacle_info[:, 0]) \ + & (engaging_obstacle_info[:, 0] < 0.) # engaging jump-up, not engaging jump-down, positive distance. + + world_vel_error = torch.sum(torch.square(self.commands[:, :2] - self.root_states[:, 7:9]), dim= 1) + return (1 - torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)) * engaging_mask # reverse version of tracking reward + + ##### Some helper functions that override parent class attributes ##### @property def all_obs_components(self): diff --git a/legged_gym/legged_gym/envs/base/legged_robot_noisy.py b/legged_gym/legged_gym/envs/base/legged_robot_noisy.py index 200cc20..12cbdaf 100644 --- a/legged_gym/legged_gym/envs/base/legged_robot_noisy.py +++ b/legged_gym/legged_gym/envs/base/legged_robot_noisy.py @@ -40,11 +40,24 @@ class LeggedRobotNoisy(LeggedRobotField): self.actions_scaled = self.actions * self.cfg.control.action_scale control_type = self.cfg.control.control_type if control_type == "P": - self.actions_scaled_torque_clipped = self.clip_position_action_by_torque_limit(self.actions_scaled) + actions_scaled_torque_clipped = self.clip_position_action_by_torque_limit(self.actions_scaled) else: raise NotImplementedError else: - self.actions_scaled_torque_clipped = self.actions * self.cfg.control.action_scale + actions_scaled_torque_clipped = self.actions * self.cfg.control.action_scale + + if getattr(self.cfg.control, "action_delay", False): + # always put the latest action at the end of the buffer + self.actions_history_buffer = torch.roll(self.actions_history_buffer, shifts= -1, dims= 0) + self.actions_history_buffer[-1] = actions_scaled_torque_clipped + # get the delayed action + self.action_delayed_frames = ((self.current_action_delay / self.dt) + 1).to(int) + self.actions_scaled_torque_clipped = self.actions_history_buffer[ + -self.action_delayed_frames, + torch.arange(self.num_envs, device= self.device), + ] + else: + self.actions_scaled_torque_clipped = actions_scaled_torque_clipped return return_ @@ -79,7 +92,10 @@ class LeggedRobotNoisy(LeggedRobotField): torch.max(torch.abs(self.torques), dim= -1)[0], self.max_torques, ) + ### The set torque limit is usally smaller than the robot dataset self.torque_exceed_count_substep[(torch.abs(self.torques) > self.torque_limits).any(dim= -1)] += 1 + ### Hack to check the torque limit exceeding by your own value. + # self.torque_exceed_count_envstep[(torch.abs(self.torques) > 38.).any(dim= -1)] += 1 ### count how many times in the episode the robot is out of dof pos limit (summing all dofs) self.out_of_dof_pos_limit_count_substep += self._reward_dof_pos_limits().int() @@ -108,6 +124,12 @@ class LeggedRobotNoisy(LeggedRobotField): def _post_physics_step_callback(self): super()._post_physics_step_callback() + if hasattr(self, "actions_history_buffer"): + resampling_time = getattr(self.cfg.control, "action_delay_resampling_time", self.dt) + resample_env_ids = (self.episode_length_buf % int(resampling_time / self.dt) == 0).nonzero(as_tuple= False).flatten() + if len(resample_env_ids) > 0: + self._resample_action_delay(resample_env_ids) + if hasattr(self, "proprioception_buffer"): resampling_time = getattr(self.cfg.sensor.proprioception, "latency_resampling_time", self.dt) resample_env_ids = (self.episode_length_buf % int(resampling_time / self.dt) == 0).nonzero(as_tuple= False).flatten() @@ -122,6 +144,14 @@ class LeggedRobotNoisy(LeggedRobotField): self.torque_exceed_count_envstep[(torch.abs(self.substep_torques) > self.torque_limits).any(dim= 1).any(dim= 1)] += 1 + def _resample_action_delay(self, env_ids): + self.current_action_delay[env_ids] = torch_rand_float( + self.cfg.control.action_delay_range[0], + self.cfg.control.action_delay_range[1], + (len(env_ids), 1), + device= self.device, + ).flatten() + def _resample_proprioception_latency(self, env_ids): self.current_proprioception_latency[env_ids] = torch_rand_float( self.cfg.sensor.proprioception.latency_range[0], @@ -142,6 +172,27 @@ class LeggedRobotNoisy(LeggedRobotField): return_ = super()._init_buffers() all_obs_components = self.all_obs_components + if getattr(self.cfg.control, "action_delay", False): + assert hasattr(self.cfg.control, "action_delay_range") and hasattr(self.cfg.control, "action_delay_resample_time"), "Please specify action_delay_range and action_delay_resample_time in the config file." + """ Used in pre-physics step """ + self.cfg.control.action_history_buffer_length = int((self.cfg.control.action_delay_range[1] + self.dt) / self.dt) + self.actions_history_buffer = torch.zeros( + ( + self.cfg.control.action_history_buffer_length, + self.num_envs, + self.num_actions, + ), + dtype= torch.float32, + device= self.device, + ) + self.current_action_delay = torch_rand_float( + self.cfg.control.action_delay_range[0], + self.cfg.control.action_delay_range[1], + (self.num_envs, 1), + device= self.device, + ).flatten() + self.action_delayed_frames = ((self.current_action_delay / self.dt) + 1).to(int) + if "proprioception" in all_obs_components and hasattr(self.cfg.sensor, "proprioception"): """ Adding proprioception delay buffer """ self.cfg.sensor.proprioception.buffer_length = int((self.cfg.sensor.proprioception.latency_range[1] + self.dt) / self.dt) @@ -154,13 +205,13 @@ class LeggedRobotNoisy(LeggedRobotField): dtype= torch.float32, device= self.device, ) - self.proprioception_delayed_frames = torch.ones((self.num_envs,), device= self.device, dtype= int) * self.cfg.sensor.proprioception.buffer_length self.current_proprioception_latency = torch_rand_float( self.cfg.sensor.proprioception.latency_range[0], self.cfg.sensor.proprioception.latency_range[1], (self.num_envs, 1), device= self.device, ).flatten() + self.proprioception_delayed_frames = ((self.current_proprioception_latency / self.dt) + 1).to(int) if "forward_depth" in all_obs_components and hasattr(self.cfg.sensor, "forward_camera"): output_resolution = getattr(self.cfg.sensor.forward_camera, "output_resolution", self.cfg.sensor.forward_camera.resolution) @@ -220,6 +271,9 @@ class LeggedRobotNoisy(LeggedRobotField): def _reset_buffers(self, env_ids): return_ = super()._reset_buffers(env_ids) + if hasattr(self, "actions_history_buffer"): + self.actions_history_buffer[:, env_ids] = 0. + self.action_delayed_frames[env_ids] = self.cfg.control.action_history_buffer_length if hasattr(self, "forward_depth_buffer"): self.forward_depth_buffer[:, env_ids] = 0. self.forward_depth_delayed_frames[env_ids] = self.cfg.sensor.forward_camera.buffer_length @@ -537,12 +591,11 @@ class LeggedRobotNoisy(LeggedRobotField): -self.proprioception_delayed_frames, torch.arange(self.num_envs, device= self.device), ].clone() + ### NOTE: WARN: ERROR: remove this code in final version, no action delay should be used. + if getattr(self.cfg.sensor.proprioception, "delay_action_obs", False) or getattr(self.cfg.sensor.proprioception, "delay_privileged_action_obs", False): + raise ValueError("LeggedRobotNoisy: No action delay should be used. Please remove these settings") # The last-action is not delayed. - if getattr(self.cfg.sensor.proprioception, "delay_action_obs", False): - not_delayed_mask = torch.randint(0, 1, size= (self.num_envs,), device= self.device).bool() - self.proprioception_output[not_delayed_mask, -12:] = self.proprioception_buffer[-1, not_delayed_mask, -12:] - else: - self.proprioception_output[:, -12:] = self.proprioception_buffer[-1, :, -12:] + self.proprioception_output[:, -12:] = self.proprioception_buffer[-1, :, -12:] self.proprioception_refreshed = True if not hasattr(self.cfg.sensor, "proprioception") or privileged: return super()._get_proprioception_obs(privileged) @@ -573,5 +626,12 @@ class LeggedRobotNoisy(LeggedRobotField): # sum along decimation axis and dof axis return torch.square(exceeded_torques).sum(dim= 1).sum(dim= 1) + def _reward_exceed_torque_limits_l1norm(self): + """ square function for exceeding part """ + exceeded_torques = torch.abs(self.substep_torques) - self.torque_limits + exceeded_torques[exceeded_torques < 0.] = 0. + # sum along decimation axis and dof axis + return torch.norm(exceeded_torques, p= 1, dim= -1).sum(dim= 1) + def _reward_exceed_dof_pos_limits(self): return self.substep_exceed_dof_pos_limits.to(torch.float32).sum(dim= -1).mean(dim= -1) diff --git a/legged_gym/legged_gym/envs/go1/go1_crawl_config.py b/legged_gym/legged_gym/envs/go1/go1_crawl_config.py new file mode 100644 index 0000000..fb04a55 --- /dev/null +++ b/legged_gym/legged_gym/envs/go1/go1_crawl_config.py @@ -0,0 +1,127 @@ +import numpy as np +from os import path as osp +from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO +from legged_gym.utils.helpers import merge_dict + +class Go1CrawlCfg( Go1FieldCfg ): + class init_state( Go1FieldCfg.init_state ): + pos = [0., 0., 0.45] + + #### uncomment this to train non-virtual terrain + class sensor( Go1FieldCfg.sensor ): + class proprioception( Go1FieldCfg.sensor.proprioception ): + latency_range = [0.04-0.0025, 0.04+0.0075] + #### uncomment the above to train non-virtual terrain + + class terrain( Go1FieldCfg.terrain ): + max_init_terrain_level = 2 + border_size = 5 + slope_treshold = 20. + curriculum = True + + BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict( + options= [ + "crawl", + ], + track_block_length= 1.6, + crawl= dict( + height= (0.28, 0.5), + depth= (0.1, 0.6), # size along the forward axis + wall_height= 0.6, + no_perlin_at_obstacle= False, + ), + virtual_terrain= False, # Change this to False for real terrain + )) + + TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict( + zScale= 0.06, + )) + + class commands( Go1FieldCfg.commands ): + class ranges( Go1FieldCfg.commands.ranges ): + lin_vel_x = [0.3, 0.8] + lin_vel_y = [0.0, 0.0] + ang_vel_yaw = [0., 0.] + + class control( Go1FieldCfg.control ): + computer_clip_torque = False + + class asset( Go1FieldCfg.asset ): + terminate_after_contacts_on = ["base"] + + class termination( Go1FieldCfg.termination ): + # additional factors that determines whether to terminates the episode + termination_terms = [ + "roll", + "pitch", + "z_low", + "z_high", + "out_of_track", + ] + + class domain_rand( Go1FieldCfg.domain_rand ): + init_base_rot_range = dict( + roll= [-0.1, 0.1], + pitch= [-0.1, 0.1], + ) + init_base_vel_range = [-0.1, 0.1] + + class rewards( Go1FieldCfg.rewards ): + class scales: + # tracking_ang_vel = 0.05 + # world_vel_l2norm = -1. + # legs_energy_substeps = -4e-5 + # alive = 5. + # penetrate_depth = -6e-2 + # penetrate_volume = -6e-2 + # exceed_dof_pos_limits = -8e-1 + # exceed_torque_limits_l1norm = -8e-1 + # lin_pos_y = -0.1 + ############################################### + tracking_ang_vel = 0.05 + tracking_world_vel = 5. + # world_vel_l2norm = -1. + # alive = 2. + legs_energy_substeps = -1e-5 + # penetrate_depth = -6e-2 # comment this out if trianing non-virtual terrain + # penetrate_volume = -6e-2 # comment this out if trianing non-virtual terrain + exceed_dof_pos_limits = -8e-1 + # exceed_torque_limits_i = -2e-1 + exceed_torque_limits_l1norm = -1. + # collision = -0.05 + # tilt_cond = 0.1 + torques = -1e-5 + yaw_abs = -0.1 + lin_pos_y = -0.1 + soft_dof_pos_limit = 0.7 + only_positive_rewards = False + + class curriculum( Go1FieldCfg.curriculum ): + penetrate_volume_threshold_harder = 1500 + penetrate_volume_threshold_easier = 10000 + penetrate_depth_threshold_harder = 10 + penetrate_depth_threshold_easier = 400 + +logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") +class Go1CrawlCfgPPO( Go1FieldCfgPPO ): + class algorithm( Go1FieldCfgPPO.algorithm ): + entropy_coef = 0.0 + clip_min_std = 0.1 + + class runner( Go1FieldCfgPPO.runner ): + resume = True + load_run = "{Your traind walking model directory}" + load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025" + load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep26_14-30-24_Skills_crawl_propDelay0.04-0.05_pEnergy-2e-5_pDof8e-01_pTorqueL14e-01_rTilt5e-01_pCollision0.2_maxPushAng0.5_kp40_fromSep26_01-38-19") + load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26") + + run_name = "".join(["Skills_", + ("Multi" if len(Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1CrawlCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")), + ("_pEnergy" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1)), + ("_pDof" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.exceed_dof_pos_limits, precision=1, exp_digits=1)), + ("_pTorqueL1" + np.format_float_scientific(-Go1CrawlCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, exp_digits=1)), + ("_noComputerClip" if not Go1CrawlCfg.control.computer_clip_torque else ""), + ("_noTanh"), + ]) + max_iterations = 20000 + save_interval = 500 \ No newline at end of file diff --git a/legged_gym/legged_gym/envs/go1/go1_down_config.py b/legged_gym/legged_gym/envs/go1/go1_down_config.py new file mode 100644 index 0000000..b2b7219 --- /dev/null +++ b/legged_gym/legged_gym/envs/go1/go1_down_config.py @@ -0,0 +1,166 @@ +from os import path as osp +import numpy as np +from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO +from legged_gym.utils.helpers import merge_dict + +class Go1DownCfg( Go1FieldCfg ): + class init_state( Go1FieldCfg.init_state ): + pos = [0., 0., 0.45] + + #### uncomment this to train non-virtual terrain + class sensor( Go1FieldCfg.sensor ): + class proprioception( Go1FieldCfg.sensor.proprioception ): + latency_range = [0.04-0.0025, 0.04+0.0075] + #### uncomment the above to train non-virtual terrain + + class terrain( Go1FieldCfg.terrain ): + max_init_terrain_level = 2 + border_size = 5 + slope_treshold = 20. + curriculum = False + + BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict( + options= [ + "jump", + ], + track_block_length= 1.6, + jump= dict( + height= (0.2, 0.45), + depth= 0.3, + fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle + jump_down_prob= 1., # probability of jumping down use it in non-virtual terrain + ), + virtual_terrain= True, # Change this to False for real terrain + no_perlin_threshold= 0.6, + randomize_obstacle_order= True, + n_obstacles_per_track= 3, + )) + + TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict( + zScale= [0.04, 0.15], + )) + + class commands( Go1FieldCfg.commands ): + class ranges( Go1FieldCfg.commands.ranges ): + lin_vel_x = [0.5, 1.] + lin_vel_y = [0.0, 0.0] + ang_vel_yaw = [0., 0.] + + class control( Go1FieldCfg.control ): + computer_clip_torque = False + + class asset( Go1FieldCfg.asset ): + terminate_after_contacts_on = ["base"] + + class termination( Go1FieldCfg.termination ): + # additional factors that determines whether to terminates the episode + termination_terms = [ + "roll", + "pitch", + "z_low", + "z_high", + "out_of_track", + ] + z_low_kwargs = merge_dict(Go1FieldCfg.termination.z_low_kwargs, dict( + threshold= -3., + )) + + class domain_rand( Go1FieldCfg.domain_rand ): + init_base_pos_range = dict( + x= [0.2, 0.6], + y= [-0.25, 0.25], + ) + init_base_rot_range = dict( + roll= [-0.1, 0.1], + pitch= [-0.1, 0.1], + ) + init_base_vel_range = merge_dict(Go1FieldCfg.domain_rand.init_base_vel_range, dict( + x= [-0.1, 0.5], + )) + + push_robots = True + + class rewards( Go1FieldCfg.rewards ): + class scales: + action_rate = -0.1 + collision = -10.0 + delta_torques = -1e-07 + dof_acc = -1e-07 + down_cond = 0.04 + exceed_dof_pos_limits = -0.001 + exceed_torque_limits_l1norm = -0.2 + feet_contact_forces = -0.01 + hip_pos = -5.0 + legs_energy_substeps = -5e-06 + lin_pos_y = -0.3 + penetrate_depth = -0.002 + penetrate_volume = -0.002 + torques = -0.0001 + tracking_ang_vel = 0.1 + tracking_world_vel = 5.0 + yaw_abs = -0. + soft_dof_pos_limit = 0.75 + max_contact_force = 200. + only_positive_rewards = False + + class noise( Go1FieldCfg.noise ): + add_noise = False + + class curriculum( Go1FieldCfg.curriculum ): + penetrate_volume_threshold_harder = 2000000 + penetrate_volume_threshold_easier = 4000000 + penetrate_depth_threshold_harder = 200000 + penetrate_depth_threshold_easier = 400000 + + +logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") +class Go1DownCfgPPO( Go1FieldCfgPPO ): + class algorithm( Go1FieldCfgPPO.algorithm ): + entropy_coef = 0.0 + clip_min_std = 0.2 + + class runner( Go1FieldCfgPPO.runner ): + policy_class_name = "ActorCriticRecurrent" + experiment_name = "field_go1" + resume = True + load_run = "{Your trained climb model directory}" + load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep28_14-59-53_Skills_down_pEnergySubsteps-8e-06_pPenD1e-02_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pCollision-1e-1_downRedundant0.1_softDof0.8_pushRobot_propDelay0.04-0.05_kp40_kd0.5fromSep27_14-32-13") + load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct14_02-06-47_Skills_down_comXRange-0.2-0.2_pEnergySubsteps-4e-06_rAlive3_pPenD4e-03_pDHarder2.e+05_pushRobot_noTanh_virtualfromOct09_09-49-58") + # load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_10-48-54_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond3.e-1_kp40_kd0.5_virtualfromOct14_02-06-47") + # load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct26_08-45-02_Skills_down_pEnergySubsteps-4e-05_rTrackVel4._pZVel1e-01_pAngXYVel5e-02_pDTorques1e-07_rDownCond1.e-1_noTanh_virtualfromOct25_10-48-54") + load_run = "Oct28_07-38-39_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos4e-01_rDownCond4.e-2_noTanh_virtualfromOct14_02-06-47" + load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-08-21_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_withPropNoise_noTanh_virtualfromOct28_07-38-39") + load_run = "Oct29_22-07-51_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_noComputerClip_noPropNoise_noTanh_virtualfromOct28_13-08-21" + load_run = "Oct30_04-23-31_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL14e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtual_fromOct29_22-07-51" + + run_name = "".join(["Skills_", + ("down" if Go1DownCfg.terrain.BarrierTrack_kwargs["jump"]["jump_down_prob"] > 0. else "jump"), + ("_pEnergySubsteps{:.0e}".format(Go1DownCfg.rewards.scales.legs_energy_substeps) if getattr(Go1DownCfg.rewards.scales, "legs_energy_substeps", 0.) != -1e-6 else ""), + ("_rTrackVel" + np.format_float_positional(Go1DownCfg.rewards.scales.tracking_world_vel) if getattr(Go1DownCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""), + # ("_pLinVel" + np.format_float_positional(-Go1DownCfg.rewards.scales.world_vel_l2norm) if getattr(Go1DownCfg.rewards.scales, "world_vel_l2norm", 0.) != 0.0 else ""), + # ("_rAlive" + np.format_float_positional(Go1DownCfg.rewards.scales.alive) if getattr(Go1DownCfg.rewards.scales, "alive", 0.) != 0.0 else ""), + # ("_pDofLimit{:.0e}".format(Go1DownCfg.rewards.scales.exceed_dof_pos_limits) if getattr(Go1DownCfg.rewards.scales, "exceed_dof_pos_limits") != 0.0 else ""), + # ("_pYawAbs" if getattr(Go1DownCfg.rewards.scales, "yaw_abs", 0.) != 0.1 else ""), + ("_pTorqueL1{:.0e}".format(-Go1DownCfg.rewards.scales.exceed_torque_limits_l1norm) if getattr(Go1DownCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0 else ""), + # ("_pActRate" + np.format_float_scientific(-Go1DownCfg.rewards.scales.action_rate, precision=1, exp_digits=1) if getattr(Go1DownCfg.rewards.scales, "action_rate", 0.) != 0.0 else ""), + # ("_pZVel" + np.format_float_scientific(-Go1DownCfg.rewards.scales.lin_vel_z, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "lin_vel_z", 0.) != 0. else ""), + # ("_pAngXYVel" + np.format_float_scientific(-Go1DownCfg.rewards.scales.ang_vel_xy, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "ang_vel_xy", 0.) != 0. else ""), + # ("_pDTorques" + np.format_float_scientific(-Go1DownCfg.rewards.scales.delta_torques, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "delta_torques", 0.) != 0. else ""), + # ("_pHipPos" + np.format_float_scientific(-Go1DownCfg.rewards.scales.hip_pos, precision=1, trim="-") if getattr(Go1DownCfg.rewards.scales, "hip_pos", 0.) != 0. else ""), + # ("_rDownCond" + np.format_float_scientific(Go1DownCfg.rewards.scales.down_cond, precision=1, exp_digits=1) if getattr(Go1DownCfg.rewards.scales, "down_cond", 0.) != 0.0 else ""), + ("_pCollision{:d}".format(int(Go1DownCfg.rewards.scales.collision)) if getattr(Go1DownCfg.rewards.scales, "collision", 0.) != 0.0 else ""), + # ("_downRedundant{:.1f}".format(Go1DownCfg.terrain.BarrierTrack_kwargs["jump"]["down_forward_length"]) if (Go1DownCfg.terrain.BarrierTrack_kwargs["jump"].get("down_forward_length", 0.) > 0. and Go1DownCfg.terrain.BarrierTrack_kwargs["jump"].get("jump_down_prob", 0.) > 0.) else ""), + ("_cmdRange{:.1f}-{:.1f}".format(*Go1DownCfg.commands.ranges.lin_vel_x)), + ("_allowNegativeReward" if not Go1DownCfg.rewards.only_positive_rewards else ""), + ("_pushRobot" if Go1DownCfg.domain_rand.push_robots else "_noPush"), + ("_noComputerClip" if not Go1DownCfg.control.computer_clip_torque else ""), + ("_noPropNoise" if not Go1DownCfg.noise.add_noise else "_withPropNoise"), + # ("_kp{:d}".format(int(Go1DownCfg.control.stiffness["joint"])) if Go1DownCfg.control.stiffness["joint"] != 50 else ""), + # ("_kd{:.1f}".format(Go1DownCfg.control.damping["joint"]) if Go1DownCfg.control.damping["joint"] != 1. else ""), + ("_noTanh"), + ("_virtual" if Go1DownCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""), + ("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])), + ]) + max_iterations = 20000 + save_interval = 200 + diff --git a/legged_gym/legged_gym/envs/go1/go1_field_config.py b/legged_gym/legged_gym/envs/go1/go1_field_config.py index 4ee3375..b89fd0f 100644 --- a/legged_gym/legged_gym/envs/go1/go1_field_config.py +++ b/legged_gym/legged_gym/envs/go1/go1_field_config.py @@ -1,162 +1,161 @@ import numpy as np import os from os import path as osp +from legged_gym.utils.helpers import merge_dict from legged_gym.envs.a1.a1_field_config import A1FieldCfg, A1FieldCfgPPO +go1_const_dof_range = dict( + Hip_max= 1.047, + Hip_min=-1.047, + Thigh_max= 2.966, + Thigh_min= -0.663, + Calf_max= -0.837, + Calf_min= -2.721, +) + +go1_action_scale = 0.5 + class Go1FieldCfg( A1FieldCfg ): + class env( A1FieldCfg.env ): + num_envs = 8192 + + class init_state( A1FieldCfg.init_state ): + pos = [0., 0., 0.7] + zero_actions = False + + class sensor( A1FieldCfg.sensor ): + class proprioception( A1FieldCfg.sensor.proprioception ): + delay_action_obs = False + latency_range = [0.04-0.0025, 0.04+0.0075] # comment this if it is too hard to train. class terrain( A1FieldCfg.terrain ): - num_rows = 20 - num_cols = 50 - selected = "BarrierTrack" - max_init_terrain_level = 0 # for climb, leap finetune - border_size = 5 - slope_treshold = 100. + num_cols = 80 - curriculum = True # for tilt, crawl, climb, leap - # curriculum = False # for walk - horizontal_scale = 0.025 # [m] + # curriculum = True # for tilt, crawl, jump, leap + curriculum = False # for walk pad_unavailable_info = True - BarrierTrack_kwargs = dict( + BarrierTrack_kwargs = merge_dict(A1FieldCfg.terrain.BarrierTrack_kwargs, dict( options= [ - # "climb", + # "jump", # "crawl", - "tilt", + # "tilt", # "leap", ], # each race track will permute all the options - track_width= 1.6, # for climb, crawl, tilt, walk - # track_width= 1.0, # for leap + # randomize_obstacle_order= True, + track_width= 1.6, track_block_length= 2., # the x-axis distance from the env origin point wall_thickness= (0.04, 0.2), # [m] - wall_height= 0.01, # [m] for climb, crawl, tilt, walk - # wall_height= -0.5, # for leap - climb= dict( - height= (0.2, 0.6), - depth= (0.1, 0.8), # size along the forward axis - fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle - ), - crawl= dict( - height= (0.28, 0.38), - depth= (0.1, 0.5), # size along the forward axis - wall_height= 0.6, - no_perlin_at_obstacle= False, - ), - tilt= dict( - width= (0.24, 0.32), - depth= (0.4, 1.), # size along the forward axis - opening_angle= 0.0, # [rad] an opening that make the robot easier to get into the obstacle - wall_height= 0.5, - ), - leap= dict( - length= (0.2, 1.0), - depth= (0.4, 0.8), - height= 0.25, - ), + wall_height= 0.0, add_perlin_noise= True, border_perlin_noise= True, - border_height= 0., # for climb, crawl, tilt, walk - # border_height= -0.5, # for leap - virtual_terrain= False, # for climb, crawl, leap - # virtual_terrain= True, # for tilt + border_height= 0., + virtual_terrain= False, draw_virtual_terrain= True, engaging_next_threshold= 1.2, + engaging_finish_threshold= 0., curriculum_perlin= False, - no_perlin_threshold= 0.0, # for crawl, tilt, walk - # no_perlin_threshold= 0.05, # for leap - # no_perlin_threshold= 0.06, # for climb - ) + no_perlin_threshold= 0.1, + )) TerrainPerlin_kwargs = dict( - # zScale= 0.1, # for crawl - zScale= 0.12, # for tilt - # zScale= [0.05, 0.1], # for climb - # zScale= [0.04, 0.1], # for leap - # zScale= [0.1, 0.15], # for walk + zScale= [0.08, 0.15], frequency= 10, ) class commands( A1FieldCfg.commands ): class ranges( A1FieldCfg.commands.ranges ): - # lin_vel_x = [0.0, 1.0] # for walk - # lin_vel_x = [0.8, 1.5] # for climb - # lin_vel_x = [1.0, 1.5] # for leap - lin_vel_x = [0.3, 0.8] # for tilt, crawl + lin_vel_x = [-1.0, 1.0] lin_vel_y = [0.0, 0.0] ang_vel_yaw = [0., 0.] class control( A1FieldCfg.control ): - stiffness = {'joint': 50.} - damping = {'joint': 1.} - # action_scale = [0.2, 0.4, 0.4] * 4 # for walk - action_scale = 0.5 # for tilt, crawl, climb, leap - # for climb, leap + stiffness = {'joint': 40.} + damping = {'joint': 0.5} + action_scale = go1_action_scale torque_limits = [20., 20., 25.] * 4 - computer_clip_torque = True + computer_clip_torque = False motor_clip_torque = False class asset( A1FieldCfg.asset ): file = "{LEGGED_GYM_ROOT_DIR}/resources/robots/go1/urdf/go1.urdf" - penalize_contacts_on = ["base", "thigh"] - terminate_after_contacts_on = ["base", "imu"] # for climb, leap, tilt, walk no-virtual - - class termination: - # additional factors that determines whether to terminates the episode - termination_terms = [ - "roll", # for tilt - "pitch", - "z_low", - "z_high", - "out_of_track", # for leap, walk - ] + sdk_dof_range = go1_const_dof_range + class termination( A1FieldCfg.termination ): roll_kwargs = dict( - threshold= 0.8, # [rad] # for tilt - tilt_threshold= 1.5, # for tilt (condition on engaging block) + threshold= 1.5, ) pitch_kwargs = dict( - threshold= 1.6, - climb_threshold= 1.6, - leap_threshold= 1.5, + threshold= 1.5, ) - z_low_kwargs = dict( - threshold= 0.08, # [m] - ) - z_high_kwargs = dict( - threshold= 1.5, # [m] - ) - out_of_track_kwargs = dict( - threshold= 1., # [m] - ) - - check_obstacle_conditioned_threshold = True - timeout_at_border = True - timeout_at_finished = True class domain_rand( A1FieldCfg.domain_rand ): - randomize_com = True - class com_range: - x = [-0.05, 0.15] - y = [-0.1, 0.1] - z = [-0.05, 0.05] - - randomize_base_mass = True - added_mass_range = [1.0, 3.0] + class com_range( A1FieldCfg.domain_rand.com_range ): + x = [-0.2, 0.2] + + init_base_pos_range = merge_dict(A1FieldCfg.domain_rand.init_base_pos_range, dict( + x= [0.05, 0.6], + )) + init_base_rot_range = dict( + roll= [-0.75, 0.75], + pitch= [-0.75, 0.75], + ) + # init_base_vel_range = [-1.0, 1.0] + init_base_vel_range = dict( + x= [-0.2, 1.5], + y= [-0.2, 0.2], + z= [-0.2, 0.2], + roll= [-1., 1.], + pitch= [-1., 1.], + yaw= [-1., 1.], + ) + init_dof_vel_range = [-5, 5] class rewards( A1FieldCfg.rewards ): class scales: - tracking_ang_vel = 0.1 - world_vel_l2norm = -1. - legs_energy_substeps = -1e-5 - exceed_torque_limits_i = -0.1 - alive = 2. - lin_pos_y = -0.2 - yaw_abs = -0.5 - penetrate_depth = -5e-3 - penetrate_volume = -5e-3 - soft_dof_pos_limit = 0.01 + tracking_ang_vel = 0.05 + tracking_world_vel = 3. + # world_vel_l2norm = -2. + # alive = 3. + legs_energy_substeps = -2e-5 + # penalty for hardware safety + exceed_dof_pos_limits = -8e-1 + exceed_torque_limits_l1norm = -8e-1 + # penalty for walking gait, probably no need + lin_vel_z = -1. + ang_vel_xy = -0.05 + orientation = -4. + dof_acc = -2.5e-7 + collision = -10. + action_rate = -0.1 + delta_torques = -1e-7 + torques = -1.e-5 + yaw_abs = -0.8 + lin_pos_y = -0.8 + hip_pos = -0.4 + dof_error = -0.04 + soft_dof_pos_limit = 0.8 # only in training walking + max_contact_force = 200.0 + + class normalization( A1FieldCfg.normalization ): + dof_pos_redundancy = 0.2 + clip_actions_method = "hard" + clip_actions_low = [] + clip_actions_high = [] + for sdk_joint_name, sim_joint_name in zip( + ["Hip", "Thigh", "Calf"] * 4, + [ # in the order as simulation + "FL_hip_joint", "FL_thigh_joint", "FL_calf_joint", + "FR_hip_joint", "FR_thigh_joint", "FR_calf_joint", + "RL_hip_joint", "RL_thigh_joint", "RL_calf_joint", + "RR_hip_joint", "RR_thigh_joint", "RR_calf_joint", + ], + ): + clip_actions_low.append( (go1_const_dof_range[sdk_joint_name + "_min"] + dof_pos_redundancy - A1FieldCfg.init_state.default_joint_angles[sim_joint_name]) / go1_action_scale ) + clip_actions_high.append( (go1_const_dof_range[sdk_joint_name + "_max"] - dof_pos_redundancy - A1FieldCfg.init_state.default_joint_angles[sim_joint_name]) / go1_action_scale ) + del dof_pos_redundancy, sdk_joint_name, sim_joint_name class sim( A1FieldCfg.sim ): body_measure_points = { # transform are related to body frame @@ -184,49 +183,24 @@ class Go1FieldCfg( A1FieldCfg ): ), } - class curriculum: - # chosen heuristically, please refer to `LeggedRobotField._get_terrain_curriculum_move` with fixed body_measure_points - # for crawl (not updated) - penetrate_volume_threshold_harder = 1500 - penetrate_volume_threshold_easier = 10000 - penetrate_depth_threshold_harder = 10 - penetrate_depth_threshold_easier = 400 - # for tilt - # penetrate_volume_threshold_harder = 2000 - # penetrate_volume_threshold_easier = 10000 - # penetrate_depth_threshold_harder = 20 - # penetrate_depth_threshold_easier = 300 - # for climb - # penetrate_volume_threshold_harder = 6000 - # penetrate_volume_threshold_easier = 12000 - # penetrate_depth_threshold_harder = 600 - # penetrate_depth_threshold_easier = 1600 - # for leap - # penetrate_volume_threshold_harder = 9000 - # penetrate_volume_threshold_easier = 10000 - # penetrate_depth_threshold_harder = 300 - # penetrate_depth_threshold_easier = 5000 - logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") class Go1FieldCfgPPO( A1FieldCfgPPO ): class algorithm( A1FieldCfgPPO.algorithm ): - entropy_coef = 0.01 # for walk - clip_min_std = 1e-12 # for walk + entropy_coef = 0.0 + clip_min_std = 0.2 + + class policy( A1FieldCfgPPO.policy ): + mu_activation = None # use action clip method by env class runner( A1FieldCfgPPO.runner ): experiment_name = "field_go1" - run_name = "".join(["Skills", - ("_all" if len(Go1FieldCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else ("_" + Go1FieldCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1FieldCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")), + resume = False + load_run = None + + run_name = "".join(["WalkForward", ("_pEnergySubsteps" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.legs_energy_substeps, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "legs_energy_substeps", 0.0) != 0.0 else ""), - ("_pPenV" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.penetrate_volume, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "penetrate_volume", 0.) < 0. else ""), - ("_pPenD" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.penetrate_depth, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "penetrate_depth", 0.) < 0. else ""), - ("_rAlive{:d}".format(int(Go1FieldCfg.rewards.scales.alive)) if Go1FieldCfg.rewards.scales.alive != 2 else ""), - ("_rAngVel{:.2f}".format(Go1FieldCfg.rewards.scales.tracking_ang_vel) if Go1FieldCfg.rewards.scales.tracking_ang_vel != 0.05 else ""), - ("_pTorqueExceedIndicate" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.exceed_torque_limits_i, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "exceed_torque_limits_i", 0.) != 0. else ""), - ("_pTorqueExceedSquare" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.exceed_torque_limits_square, trim= "-", exp_digits= 1) if getattr(Go1FieldCfg.rewards.scales, "exceed_torque_limits_square", 0.) != 0. else ""), - ("_pYaw{:.2f}".format(-Go1FieldCfg.rewards.scales.yaw_abs) if Go1FieldCfg.rewards.scales.yaw_abs != 0. else ""), - ("_noComputerTorqueClip" if not Go1FieldCfg.control.computer_clip_torque else ""), - ("_virtualTerrain" if Go1FieldCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""), + ("_rTrackVel" + np.format_float_scientific(Go1FieldCfg.rewards.scales.tracking_world_vel, precision=1, exp_digits=1, trim="-") if getattr(Go1FieldCfg.rewards.scales, "tracking_world_vel", 0.0) != 0.0 else ""), + ("_pWorldVel" + np.format_float_scientific(-Go1FieldCfg.rewards.scales.world_vel_l2norm, precision=1, exp_digits=1, trim="-") if getattr(Go1FieldCfg.rewards.scales, "world_vel_l2norm", 0.0) != 0.0 else ""), ("_aScale{:d}{:d}{:d}".format( int(Go1FieldCfg.control.action_scale[0] * 10), int(Go1FieldCfg.control.action_scale[1] * 10), @@ -234,15 +208,8 @@ class Go1FieldCfgPPO( A1FieldCfgPPO ): ) if isinstance(Go1FieldCfg.control.action_scale, (tuple, list)) \ else "_aScale{:.1f}".format(Go1FieldCfg.control.action_scale) ), - ("_torqueClip{:.0f}".format(Go1FieldCfg.control.torque_limits) if not isinstance(Go1FieldCfg.control.torque_limits, (tuple, list)) else ( - "_tClip{:d}{:d}{:d}".format( - int(Go1FieldCfg.control.torque_limits[0]), - int(Go1FieldCfg.control.torque_limits[1]), - int(Go1FieldCfg.control.torque_limits[2]), - ) - )), + ("_actionClip" + Go1FieldCfg.normalization.clip_actions_method if getattr(Go1FieldCfg.normalization, "clip_actions_method", None) is not None else ""), + ("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"), ]) - resume = False - load_run = "" - max_iterations = 10000 + max_iterations = 20000 save_interval = 500 diff --git a/legged_gym/legged_gym/envs/go1/go1_field_distill_config.py b/legged_gym/legged_gym/envs/go1/go1_field_distill_config.py index 9d8210a..80aa8ba 100644 --- a/legged_gym/legged_gym/envs/go1/go1_field_distill_config.py +++ b/legged_gym/legged_gym/envs/go1/go1_field_distill_config.py @@ -1,6 +1,6 @@ from collections import OrderedDict import os -import datetime +from datetime import datetime import os.path as osp import numpy as np from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO @@ -12,48 +12,74 @@ class Go1FieldDistillCfg( Go1FieldCfg ): num_envs = 256 obs_components = [ "proprioception", # 48 + # "height_measurements", # 187 "forward_depth", ] privileged_obs_components = [ "proprioception", # 48 - "base_pose", - "robot_config", - "engaging_block", - "sidewall_distance", + # "height_measurements", # 187 + # "forward_depth", + "base_pose", # 6 + "robot_config", # 1 + 3 + 1 + 12 + "engaging_block", # 1 + (4 + 1) + 2 + "sidewall_distance", # 2 ] use_lin_vel = False privileged_use_lin_vel = True # if True privileged_obs will not have noise based on implementations - privileged_obs_gets_privilege = False # for climb temporarily + privileged_obs_gets_privilege = False # for jump temporarily class init_state( Go1FieldCfg.init_state ): - pos = [0.0, 0.0, 0.38] # x,y,z [m] + pos = [0.0, 0.0, 0.43] # x,y,z [m] class sensor( Go1FieldCfg.sensor ): class forward_camera( Go1FieldCfg.sensor.forward_camera ): resolution = [int(240/4), int(424/4)] + # position = dict( + # mean= [0.245, 0.0075, 0.072+0.018], + # std= [0.002, 0.002, 0.0005], + # ) # position in base_link ##### small randomization + # rotation = dict( + # lower= [0, 0, 0], + # upper= [0, 0, 0], + # ) # rotation in base_link ##### small randomization + ########## new camera extrinsics with 30degree down #################### position = dict( - mean= [0.245, 0.0075, 0.072+0.018], - std= [0.002, 0.002, 0.0005], + mean= [0.245+0.027, 0.0075, 0.072+0.02], + std= [0.002, 0.002, 0.0002], ) # position in base_link ##### small randomization rotation = dict( - lower= [0, 0, 0], - upper= [0, 0, 0], + lower= [0, 0.5, 0], # positive for pitch down + upper= [0, 0.54, 0], ) # rotation in base_link ##### small randomization + ########## new camera extrinsics with 30degree down #################### + ########## new camera extrinsics with 15degree down #################### + # position = dict( + # mean= [0.245+0.027, 0.0075, 0.072+0.018], + # std= [0.003, 0.002, 0.0005], + # ) # position in base_link ##### small randomization + # rotation = dict( + # lower= [0, 0.24, 0], # positive for pitch down + # upper= [0, 0.28, 0], + # ) # rotation in base_link ##### small randomization + ########## new camera extrinsics with 15degree down #################### resized_resolution= [48, 64] output_resolution = [48, 64] horizontal_fov = [85, 87] # measured around 87 degree # for go1, usb2.0, 480x640, d435i camera - latency_range = [0.25, 0.30] + latency_range = [0.25, 0.30] # [s] + # latency_range = [0.28, 0.36] # [s] latency_resample_time = 5.0 # [s] refresh_duration = 1/10 # [s] for (240, 424 option with onboard script fixed to no more than 20Hz) # config to simulate stero RGBD camera crop_top_bottom = [0, 0] crop_left_right = [int(60/4), int(46/4)] - depth_range = [0.0, 1.5] # [m] + depth_range = [0.0, 2.0] # [m] - # class proprioception( Go1FieldCfg.sensor.proprioception ): # inherited from A1FieldCfg + class proprioception( Go1FieldCfg.sensor.proprioception ): # inherited from A1FieldCfg + delay_action_obs = False + delay_privileged_action_obs = False class terrain( Go1FieldCfg.terrain ): num_rows = 2 @@ -67,21 +93,24 @@ class Go1FieldDistillCfg( Go1FieldCfg ): options= [ # "tilt", "crawl", - "climb", + "jump", + "jump", "leap", ], # each race track will permute all the options - one_obstacle_per_track= True, + n_obstacles_per_track= 4, + randomize_obstacle_order= True, track_width= 1.6, - track_block_length= 1.8, # the x-axis distance from the env origin point - wall_thickness= (0.04, 0.2), # [m] - wall_height= 0.0, # [m] - climb= dict( + track_block_length= 2.0, # the x-axis distance from the env origin point + wall_thickness= (0.04, 0.3), # [m] + wall_height= (-0.5, 0.0), # [m] + jump= dict( height= (0.40, 0.45), - depth= (0.5, 1.0), # size along the forward axis - fake_offset= 0.05, # [m] making the climb's height info greater than its physical height. + depth= (0.01, 0.01), # size along the forward axis + fake_offset= 0.03, # [m] making the jump's height info greater than its physical height. + jump_down_prob= 0.5, ), crawl= dict( - height= (0.28, 0.38), + height= (0.36, 0.5), depth= (0.1, 0.2), # size along the forward axis wall_height= 0.6, fake_depth= 0.4, # when track block length is 1.8m @@ -94,11 +123,11 @@ class Go1FieldDistillCfg( Go1FieldCfg ): ), leap= dict( # length= (0.45, 0.7), - length= (0.5, 0.7), # for parkour real-world env + length= (0.3, 0.74), # for parkour real-world env depth= (0.5, 0.6), height= 0.2, - fake_offset= 0.2, - follow_climb_ratio= 0.5, # when following climb, not drop down to ground suddenly. + fake_offset= 0.1, + follow_jump_ratio= 0.5, # when following jump, not drop down to ground suddenly. ), add_perlin_noise= True, border_perlin_noise= True, @@ -106,6 +135,7 @@ class Go1FieldDistillCfg( Go1FieldCfg ): virtual_terrain= False, draw_virtual_terrain= True, engaging_next_threshold= 1.2, + engaging_finish_threshold= 0.3, check_skill_combinations= True, curriculum_perlin= False, no_perlin_threshold= 0.04, # for parkour real-world env @@ -113,33 +143,65 @@ class Go1FieldDistillCfg( Go1FieldCfg ): ) TerrainPerlin_kwargs = dict( - zScale= [0.0, 0.05], # for parkour real-world env + zScale= [0.01, 0.1], # for parkour real-world env frequency= 10, ) class commands( A1FieldDistillCfg.commands ): pass + class control( Go1FieldCfg.control ): + computer_clip_torque = False + + class asset( Go1FieldCfg.asset ): + terminate_after_contacts_on = ["base"] + class termination( A1FieldDistillCfg.termination ): - pass + out_of_track_kwargs = dict( + threshold= 1.6, + ) class domain_rand( A1FieldDistillCfg.domain_rand ): randomize_com = True randomize_motor = True randomize_friction = True - friction_range = [0.0, 0.8] + friction_range = [0.2, 2.0] randomize_base_mass = True push_robots = False - init_dof_pos_ratio_range = [0.9, 1.1] - init_base_vel_range = [-0.0, 0.0] + init_dof_pos_ratio_range = [0.8, 1.2] + init_dof_vel_range = [-2., 2.] + # init_base_vel_range = [-0.0, 0.0] # use super class init_base_pos_range = dict( x= [0.4, 0.6], y= [-0.05, 0.05], ) + randomize_gravity_bias = True + randomize_privileged_gravity_bias = False + gravity_bias_range = dict( + x= [-0.12, 0.12], + y= [-0.12, 0.12], + z= [-0.05, 0.05], + ) + + class rewards( A1FieldDistillCfg.rewards ): + class scales: + pass class noise( A1FieldDistillCfg.noise ): + add_noise = True # This only account for proprioception noise and height measurements noise at most class noise_scales( A1FieldDistillCfg.noise.noise_scales ): + ang_vel = 0.2 # measured in 0.02 + dof_pos = 0.0006 # measured in 0.0002 + dof_vel = 0.02 # measured in 0.015 + gravity = 0.06 # measured in 0.05 + + # These aspects of obs should not have noise, + # which are not used or set to zero or has its own noise mechanism + height_measurements = 0.0 forward_depth = 0.0 + base_pose = 0.0 + lin_vel = 0.0 + class forward_depth: stereo_min_distance = 0.12 # when using (240, 424) resolution stereo_far_distance = 2. @@ -164,7 +226,8 @@ class Go1FieldDistillCfg( Go1FieldCfg ): # override base class attributes pass -distill_target_ = "tanh" +# distill_target_ = "tanh" +distill_target_ = "l1" logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ): class algorithm( A1FieldDistillCfgPPO.algorithm ): @@ -177,10 +240,10 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ): using_ppo = False distill_target = distill_target_ buffer_dilation_ratio = 1. - learning_rate = 3e-4 + learning_rate = 1.e-4 optimizer_class_name = "AdamW" - teacher_policy_class_name = "ActorCriticFieldMutex" + teacher_policy_class_name = "ActorCriticClimbMutex" teacher_ac_path = None class teacher_policy (A1FieldDistillCfgPPO.policy ): # For loading teacher policy. No need to change for training student @@ -195,20 +258,77 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ): sidewall_distance= (2,), ) env_action_scale = Go1FieldCfg.control.action_scale + action_smoothing_buffer_len = 3 + reset_non_selected = "when_skill" + # reset_non_selected = True + sub_policy_class_name = "ActorCriticRecurrent" - sub_policy_paths = [ # must in the order of obstacle ID - logs_root + "/field_go1/{your go1 walking policy}", - logs_root + "/field_go1/{your go1 tilting policy}", - logs_root + "/field_go1/{your go1 crawling policy}", - logs_root + "/field_go1/{your go1 climbing policy}", - logs_root + "/field_go1/{your go1 leaping policy}", + + sub_policy_paths = [ + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct10_00-03-12_WalkForward_pEnergySubsteps1e-5_maxPushAng0.5_pushInter3.0_noTanh_fromOct09_15-50-14"), + # os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_14-43-48_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_rAlive3e+0_pTorqueL1norm8e-1_pDofLim8e-1_actionCliphard_fromOct24_09-00-16"), + os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"), + os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-24-22_Skills_tilt_propDelay0.04-0.05_pEnergySubsteps1e-5_pPenD2e-3_pDofLimit8e-1_rTilt8e-03_pCollision0.1_PushRobot_kp40_kd0.5_tiltMax0.40fromSep27_13-59-27"), + os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26"), + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct13_09-12-39_258k_Skills_jump_comXRange-0.2-0.2_pEnergySubsteps-4e-06_pushRobot_jumpHeight0.1-0.7_propDelay0.04-0.05_noTanh_fromOct09_10-13-26"), + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct16_10-55-09_Skills_jump_comXRange-0.2-0.2_pLinVel1.4_pTorque2e-06_pY0.1_pCollision0.1_pushRobot_jumpHeight0.10-0.45_propDelay0.04-0.05_noTanh_fromOct13_09-12-39"), + # os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct22_15-19-26_Skills_jump_rTrackVel3._pJumpSameLegs0.6_noPContact_pDofAcc1.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_08-47-13"), + # os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct24_07-41-26_Skills_jump_rTrackVel4._pY-2e-01_pJumpSameLegs0.2_noPContact_pDofAcc2.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-1e-01_noPush_minStd0.2_noTanh_zeroResetAction_jumpRange0.4-0.5_fromOct20_16-09-47"), + # os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-16_Skills_jump_pEnergySubsteps3e-05_rTrackVel4._pY-3e-01_propDelay0.04-0.05_noPropNoise_noPush_noTanh_zeroResetAction_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct24_07-41-26"), + # os.path.join(logs_root, "field_go1", "Oct27_16-03-48_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pDofErrCond4e-01_pHipPos5e-01_pCollision3e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59"), + # os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_16-04-45_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pPenD-6e-03_pDofErrCond6e-01_pHipPos5e-01_pCollision6e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59"), + os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_11-05-56_Skills_jump_pEnergySubsteps2e-06_rTrackVel5._pYaw-1e-01_pHipPos5e+00_pCollision1e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_allowNegativeReward_fromOct28_08-54-23"), + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct09_09-51-58_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pPenD1.e-2_pDofLimit4e-01_pCollision0.5_kp40_kd0.5fromOct05_02-16-22"), + # os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-39-12_Skills_leap_pEnertySubsteps4.e-5_pActRate2.e-1_pDofLimit8.e-1_pCollision5.e-1_noPropNoise_noTanh_zeroResetAction_actionCliphard_fromOct09_09-51-58"), + os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-23-39_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pSyncAllLegs6.e-1_pOrient6.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct28_03-32-35"), ] + # jump_down_policy_path = logs_root + "/field_a1_noTanh_oracle/Oct09_09-49-58_Skills_down_pEnergySubsteps-4e-06_rAlive3_pPenD1e-02_pDofLimit-4e-01_pTorqueL14e-01_maxForce200_downRedundant0.2_softDof0.8_pushRobot_kp40_kd0.5fromSep28_14-59-53" + # jump_down_policy_path = os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct14_02-06-47_Skills_down_comXRange-0.2-0.2_pEnergySubsteps-4e-06_rAlive3_pPenD4e-03_pDHarder2.e+05_pushRobot_noTanh_virtualfromOct09_09-49-58") + # jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_10-48-54_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond3.e-1_kp40_kd0.5_virtualfromOct14_02-06-47") + #tested OK, tends to stop + # jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-31_down_pEnergySubsteps-4e-05_pDofLimit-4e-01_pYawAbs_pTorqueL14e-01_pActRate1.e-1_rDownCond1.e-2_kp40_kd0.5_virtualfromOct14_02-06-47") + # jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct26_08-45-02_Skills_down_pEnergySubsteps-4e-05_rTrackVel4._pZVel1e-01_pAngXYVel5e-02_pDTorques1e-07_rDownCond1.e-1_noTanh_virtualfromOct25_10-48-54") + jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-08-21_Skills_down_pEnergySubsteps-4e-05_rTrackVel5._pHipPos8e-01_rDownCond4.e-2_withPropNoise_noTanh_virtualfromOct28_07-38-39") + + # sub_policy_paths = [ # must in the order of obstacle ID + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct18_12-37-48_WalkForward_comXRange-0.2-0.2_noDelayActObs_pLinY0.05_noTanh_fromOct10_00-03-12"), # a little pitch up, accetable + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-24-22_Skills_tilt_propDelay0.04-0.05_pEnergySubsteps1e-5_pPenD2e-3_pDofLimit8e-1_rTilt8e-03_pCollision0.1_PushRobot_kp40_kd0.5_tiltMax0.40fromSep27_13-59-27"), # a little pitch up, accetable + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct18_12-32-28_Skills_crawl_comXRange-0.2-0.2_pEnergySubsteps-1e-5_pDof8e-01_pTorque1e-5_pTorqueL14e-01_noDelayActObs_noTanh_fromOct11_12-19-00"), + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_06-29-14_Skills_jump_comXRange-0.2-0.2_pLinVel1._pDofAcc5e-7_pTorque6e-05_jumpHeight0.10-0.46_noDelayActObs_noTanh_minStd0.1_fromOct18_15-57-31"), + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_01-25-22_Skills_leap_comXRange-0.2-0.2_pTorques2.e-5_pContactForces1.e-2_leapHeight0.1_noDelayActObs_noTanh_virtualfromOct09_09-51-58"), + # ] + # jump_down_policy_path = \ + # os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct19_07-01-54_Skills_down_comXRange-0.2-0.2_pXVelL21.3_rDown0.3_pPenD2e-03_pDHarder2e+5_pTorque2e-5_pushRobot_noDelayActObs_noTanh_virtualfromOct18_00-51-27") + + # The group of ckpt that used for no Computer Clip and no Motor Clip + sub_policy_paths = [ + os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"), + os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct27_20-22-36_WalkForward_pEnergySubsteps2e-5_rTrackVel3e+0_pYawAbs8e-1_pYPosAbs8e-01_pHipPos8e-01_noPropNoise_noTanh_actionCliphard_fromOct27_16-25-22"), + os.path.join(logs_root, "field_a1_noTanh_oracle", "Oct11_12-19-00_Skills_crawl_propDelay0.04-0.05_pEnergy-1e-5_pDof8e-01_pTorqueL14e-01_pPosY0.1_maxPushAng0.3_kp40_fromOct09_09-58-26"), + # os.path.join(logs_root, "field_go1", "Oct30_03-46-42_Skills_crawl_pEnergy1.e-5_pDof8.e-1_pTorqueL11.e+0_noComputerClip_noTanh"), + # os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct29_22-31-44_Skills_jump_pEnergySubsteps4e-05_rTrackVel4._pY-4e-01_pTorque4e-04_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"), + # os.path.join(logs_root, "field_go1", "Oct30_11-11-43_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-8e-01_pTorqueExceed1.2e+00_pTorque4e-04_pDTorques1e-06_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"), + # os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct30_13-00-12_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-4e-01_pTorqueExceed1.8e+00_pTorque4e-04_pDTorques1e-05_propDelay0.04-0.05_noPropNoise_noPush_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31"), + ### New jump oracle with more spreaded rear hip. + # os.path.join(logs_root, "field_go1_noTanh_oracle", "Nov02_11-07-49_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.8e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12"), + os.path.join(logs_root, "field_go1_noTanh_oracle", "Nov02_13-53-48_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.21_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12"), + # os.path.join(logs_root, "field_go1", "Oct30_05-08-03_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12"), + os.path.join(logs_root, "field_go1_noTanh_oracle", "Oct30_07-32-10_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed8.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_pDorErr1.5e-1_noPenCurriculum_noCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_05-07-24"), + ] + jump_down_policy_path = os.path.join(logs_root, "field_go1_noTanh_oracle", + "Oct30_03-42-56_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL12e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtualfromOct29_22-07-51", + ) + # jump_down_policy_path = os.path.join(logs_root, "field_go1", + # "Oct30_04-23-31_Skills_down_pEnergySubsteps-1e-05_rTrackVel5._pTorqueL14e-01_pHipPos5e+00_rDownCond4.e-2_allowNegativeReward_pushRobot_noComputerClip_noPropNoise_noTanh_virtual_fromOct29_22-07-51", + # ) + + jump_down_vel = 1.0 cmd_vel_mapping = { 0: 1.0, - 1: 0.5, - 2: 0.8, - 3: 1.2, + 1: 0.6, + 2: 1.0, + 3: 1.0, 4: 1.5, } @@ -218,38 +338,125 @@ class Go1FieldDistillCfgPPO( A1FieldDistillCfgPPO ): runner_class_name = "TwoStageRunner" class runner( A1FieldDistillCfgPPO.runner ): experiment_name = "distill_go1" + num_steps_per_env = 32 class pretrain_dataset: # data_dir = [ - # "logs/distill_a1_dagger/" + dir_ \ - # for dir_ in os.listdir("logs/distill_a1_dagger") + # "/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/20231021_camPitch0.52_jumpA1Oct16_10-55-09/" + dir_ \ + # for dir_ in os.listdir("/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/20231021_camPitch0.52_jumpA1Oct16_10-55-09") # ] scan_dir = "".join([ - "logs/distill_go1_dagger/{}_".format(datetime.datetime.now().strftime("%b%d")), + "/localdata_ssd/isaac_ziwenz_tmp/distill_go1_dagger/", datetime.now().strftime('%b%d_%H-%M-%S'), "_", "".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]), "_vDelay{:.2f}-{:.2f}".format( Go1FieldDistillCfg.sensor.forward_camera.latency_range[0], Go1FieldDistillCfg.sensor.forward_camera.latency_range[1], ), + "_pDelay{:.2f}-{:.2f}".format( + Go1FieldDistillCfg.sensor.proprioception.latency_range[0], + Go1FieldDistillCfg.sensor.proprioception.latency_range[1], + ), + ("_camPitch{:.2f}".format( + (Go1FieldDistillCfg.sensor.forward_camera.rotation["lower"][1] + Go1FieldDistillCfg.sensor.forward_camera.rotation["upper"][1]) / 2 + )), + ("_depthMax{:.1f}".format(Go1FieldDistillCfg.sensor.forward_camera.depth_range[1])), + # ("_zeroResetAction" if Go1FieldDistillCfg.init_state.zero_actions else ""), + # ("_actionClip" + Go1FieldDistillCfg.normalization.clip_actions_method if getattr(Go1FieldDistillCfg.normalization, "clip_actions_method", None) else ""), + ("_jumpOffset{:.2f}".format(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["jump"]["fake_offset"])), + ("_noComputerClip" if not Go1FieldDistillCfg.control.computer_clip_torque else ""), + ("_randOrder" if Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"] else ""), ]) dataset_loops = -1 random_shuffle_traj_order = True keep_latest_ratio = 1.0 - keep_latest_n_trajs = 4000 + keep_latest_n_trajs = 3000 starting_frame_range = [0, 100] - resume = False - load_run = "" + resume = True + # load_run = "/home/zzw/Data/legged_gym_logs/distill_a1/Jun06_00-15-50_distill_crawljumpleap_vDelay0.20-0.26_jump0.40-0.45_vFps10_leap0.55-0.62_fromJun04_20-18-56_06-3308JumpOracle" + # load_run = "/home/zzw/Data/legged_gym_logs/distill_go1/Jul11_16-21-59_distill_crawljumpleap" + # load_run = "Jul17_08-47-39_distill_jump_vDelay0.25-0.30" + # load_run = "Oct08_02-51-14_distill_crawljumpjumpleap_vDelay0.25-0.30_noTanh_fromJul17_08-47-39" + # load_run = "Oct08_14-51-22_distill_crawljumpjumpleap_vDelay0.25-0.30_noTanh_fromOct08_02-51-14" + load_run = "Oct20_05-35-58_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_jumpA1Oct19_06-29-14_noTanh_fromOct08_14-51-22" + load_run = "Oct21_09-01-45_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_jumpA1Oct16_10-55-09_noTanh_fromOct20_05-35-58" + load_run = "Oct21_16-21-33_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_jumpA1Oct16_10-55-09_noTanh_fromOct21_09-01-45" + load_run = "Oct22_02-25-42_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_jumpA1Oct16_10-55-09_noTanh_fromOct21_16-21-33" + load_run = "Oct24_15-43-34_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_zeroResetAction_actionCliphard_jumpOct24_07-41-26_noTanh_fromOct22_02-25-42" + load_run = "Oct25_17-10-09_distill_crawljumpjumpleap_withDown_vDelay0.25-0.30_camPitch0.52_depthMax2.0_actionCliphard_jumpOct25_11-36-16_downOct25_11-36-31_noTanh_fromOct24_15-43-34" + #### From wrong walk and jump skill policy + # load_run = "Oct26_15-38-28_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_noPropNoise_gravityBias_actionCliphard_jumpOct25_11-36-16_downOct25_11-36-31_noTanh_fromOct25_17-10-09" + # load_run = "Oct27_06-00-07_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_noPropNoise_gravityBias_actionCliphard_jumpOct25_11-36-16_downOctOct26_08-45-02_fricMax2.0_noTanh_fromOct26_15-38-28" + load_run = "Oct28_16-16-58_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_addPropNoise_gravityBias_actionCliphard_jumpOct28_11-05-56_fricMax2.0_noTanh_fromOct25_17-10-09" + ######## Continue tuning policy to fix jump and leap. + ## This ckpt looks good. But the jump is not good enough. + load_run = "Oct30_16-31-09_distill_crawljumpjumpleap_vDelay0.25-0.30_camPitch0.52_depthMax2.0_addPropNoise_gravityBias_actionCliphard_noComputerClip_oracleResetWhenSkill_fricMax2.0_noTanh_fromOct28_16-16-58" - max_iterations = 80000 + max_iterations = 100000 save_interval = 2000 + log_interval = 100 run_name = "".join(["distill_", "".join(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["options"]), "_vDelay{:.2f}-{:.2f}".format( Go1FieldDistillCfg.sensor.forward_camera.latency_range[0], Go1FieldDistillCfg.sensor.forward_camera.latency_range[1], ), + ("_camPitch{:.2f}".format( + (Go1FieldDistillCfg.sensor.forward_camera.rotation["lower"][1] + Go1FieldDistillCfg.sensor.forward_camera.rotation["upper"][1]) / 2 + )), + ("_depthMax{:.1f}".format(Go1FieldDistillCfg.sensor.forward_camera.depth_range[1])), + # ("_addPropNoise" if Go1FieldDistillCfg.noise.add_noise else "_noPropNoise"), + # ("_gravityBias" if Go1FieldDistillCfg.domain_rand.randomize_gravity_bias else "_noGravityBias"), + # ("_actionClip" + Go1FieldDistillCfg.normalization.clip_actions_method if getattr(Go1FieldDistillCfg.normalization, "clip_actions_method", None) else ""), + # ("_noComputerClip" if not Go1FieldDistillCfg.control.computer_clip_torque else ""), + ("_jumpNov02_13-53-48"), + ("_leapVel1.5"), + ("_jumpOffset{:.2f}".format(Go1FieldDistillCfg.terrain.BarrierTrack_kwargs["jump"]["fake_offset"])), + ("_oracleResetWhenSkill"), + ("_fricMax{:.1f}".format(Go1FieldDistillCfg.domain_rand.friction_range[1])), + ("_noTanh"), + ("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"), ]) + # Checking variants: + """ + 1. Faster leap (1.5m/s) + higher offset 0.1 for jump up + 2. Longer leap offset 0.2, faster leap (1.5m/s) + higher offset 0.1 for jump up + 3. Faster leap (1.5m/s) + higher offset 0.1 for jump up + from older ckpt + 4. Faster leap (1.5m/s) + faster jump (1.2m/s) and higher offset 0.1 for jump up + from older ckpt + smaller learning rate 1e-4 + ---- 20231101 after testing preious ckpts ---- + 5. Faster leap (1.5m/s) + normal 0.05 offset for jump up, normal (1.0m/s) jump + from older ckpt (Oct28_16-16-58) + same learning rate as Oct30_16-31-09 + 6. Faster leap (1.5m/s) + slight higher 0.07 offset for jump up, faster (1.2m/s) jump + from older ckpt (Oct28_16-16-58) + lower learning rate as 1e-4 + ---- 20231102 after testing preious ckpts ---- + New jump oracle to test: + * Nov02_11-07-49_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.8e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12 + Nov02_11-09-03_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed2e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12 + Nov02_13-53-13_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.22_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12 + * Nov02_13-53-48_Skills_jump_pEnergySubsteps5e-05_rTrackVel5._pFHipPos5e+00_pTorqueExceed1.5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_minStd0.21_entropy0.01_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct30_13-00-12 + 7. Faster leap (1.5m/s) + lower 0.05 offset for jump up, normal (1.0m/s) jump + using new jump oracle + same older ckpt (Oct28_16-16-58) + same learning rate as Oct30_16-31-09 (1.5e-4) + 8. Faster leap (1.5m/s) + lower 0.03 offset for jump up, normal (1.0m/s) jump + using new jump oracle + new ckpt (Oct30_16-31-09) + lower learning rate as 1e-4 + """ + diff --git a/legged_gym/legged_gym/envs/go1/go1_jump_config.py b/legged_gym/legged_gym/envs/go1/go1_jump_config.py new file mode 100644 index 0000000..db4d522 --- /dev/null +++ b/legged_gym/legged_gym/envs/go1/go1_jump_config.py @@ -0,0 +1,223 @@ +from os import path as osp +import numpy as np +from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO +from legged_gym.utils.helpers import merge_dict + +class Go1JumpCfg( Go1FieldCfg ): + + class init_state( Go1FieldCfg.init_state ): + pos = [0., 0., 0.45] + + #### uncomment this to train non-virtual terrain + class sensor( Go1FieldCfg.sensor ): + class proprioception( Go1FieldCfg.sensor.proprioception ): + latency_range = [0.04-0.0025, 0.04+0.0075] + #### uncomment the above to train non-virtual terrain + + class terrain( Go1FieldCfg.terrain ): + max_init_terrain_level = 10 + border_size = 5 + slope_treshold = 20. + curriculum = True + + BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict( + options= [ + "jump", + ], + track_width= 1.6, + track_block_length= 1.6, + wall_thickness= 0.2, + jump= dict( + # height= (0.38, 0.46), + height= (0.2, 0.46), + depth= (0.1, 0.2), + fake_offset= 0.0, # [m] an offset that make the robot easier to get into the obstacle + jump_down_prob= 0., # probability of jumping down use it in non-virtual terrain + ), + virtual_terrain= False, # Change this to False for real terrain + no_perlin_threshold= 0.06, + randomize_obstacle_order= True, + n_obstacles_per_track= 3, + )) + + TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict( + zScale= [0.05, 0.12], + )) + + class commands( Go1FieldCfg.commands ): + class ranges( Go1FieldCfg.commands.ranges ): + lin_vel_x = [0.5, 1.5] + lin_vel_y = [0.0, 0.0] + ang_vel_yaw = [0., 0.] + + class control( Go1FieldCfg.control ): + computer_clip_torque = False + + class asset( Go1FieldCfg.asset ): + penalize_contacts_on = ["base", "thigh", "calf", "imu"] + terminate_after_contacts_on = ["base"] + + class termination( Go1FieldCfg.termination ): + # additional factors that determines whether to terminates the episode + termination_terms = [ + "roll", + "pitch", + "z_high", + "out_of_track", + ] + timeout_at_finished = True + timeout_at_border = True + + class domain_rand( Go1FieldCfg.domain_rand ): + class com_range( Go1FieldCfg.domain_rand.com_range ): + z = [-0.1, 0.1] + + init_base_pos_range = dict( + x= [0.2, 0.8], + y= [-0.25, 0.25], + ) + init_base_rot_range = dict( + roll= [-0.1, 0.1], + pitch= [-0.1, 0.1], + ) + + push_robots = True + + class rewards( Go1FieldCfg.rewards ): + class scales: + tracking_ang_vel = 0.08 + # alive = 10. + tracking_world_vel = 5. ######## 1. always ######## + legs_energy_substeps = -6.e-5 # -6e-6 + jump_x_vel_cond = 0.1 ######## 1. segment ######## + # hip_pos = -5. + front_hip_pos = -5. + rear_hip_pos = -0.000001 + penetrate_depth = -6e-3 + penetrate_volume = -6e-4 + exceed_dof_pos_limits = -8e-4 # -8e-4 + exceed_torque_limits_l1norm = -2. # -8e-2 # -6e-2 + action_rate = -0.1 + delta_torques = -1.e-5 + dof_acc = -1e-7 + torques = -4e-4 # 2e-3 + yaw_abs = -0.1 + lin_pos_y = -0.4 + collision = -1. + sync_all_legs_cond = -0.3 # -0.6 + dof_error = -0.06 + only_positive_rewards = False # False + soft_dof_pos_limit = 0.8 + max_contact_force = 100. + tracking_sigma = 0.3 + + class noise( Go1FieldCfg.noise ): + add_noise = False + + class curriculum( Go1FieldCfg.curriculum ): + penetrate_volume_threshold_harder = 6000 + penetrate_volume_threshold_easier = 12000 + penetrate_depth_threshold_harder = 600 + penetrate_depth_threshold_easier = 1200 + +logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") +class Go1JumpCfgPPO( Go1FieldCfgPPO ): + class algorithm( Go1FieldCfgPPO.algorithm ): + entropy_coef = 0.01 + clip_min_std = 0.21 + + class runner( Go1FieldCfgPPO.runner ): + resume = True + load_run = "{Your traind walking model directory}" + # load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025" + # load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep27_14-32-13_Skills_jump_pEnergySubsteps-8e-06_pDofLimit-8e-01_pTorqueL18e-01_pContact1e-02_pCollision0.4_pushRobot_propDelay0.04-0.05_kp40_kd0.5_fromSep27_03-36-02") + # load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct07_10-24-52_Skills_jump_pEnergy1e-06_pY-1e-02_pCollision-1e-02_noPush_noTanh_jumpRange0.2-0.6_fromSep27_14-32-13") + # load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct19_06-29-14_Skills_jump_comXRange-0.2-0.2_pLinVel1._pDofAcc5e-7_pTorque6e-05_jumpHeight0.10-0.46_noDelayActObs_noTanh_minStd0.1_fromOct18_15-57-31") + # load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct16_10-55-09_Skills_jump_comXRange-0.2-0.2_pLinVel1.4_pTorque2e-06_pY0.1_pCollision0.1_pushRobot_jumpHeight0.10-0.45_propDelay0.04-0.05_noTanh_fromOct13_09-12-39") + ###################################### + # load_run = "Oct20_16-09-47_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14" + # load_run = "Oct21_05-34-37_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._noPropNoise_propDelay0.04-0.05_pCollision-5e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct20_16-09-47" + # load_run = "Oct21_12-30-28_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._noPropNoise_propDelay0.04-0.05_pCollision-1e+00_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct21_05-34-37" + # load_run = "Oct21_14-26-29_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel1.2_noPropNoise_propDelay0.04-0.05_pCollision-2e+00_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct21_12-30-28" + # load_run = "Oct20_16-09-47_275k_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14" + # load_run = osp.join(logs_root, "field_go1_before20231030_tuneJump_noTanh", "Oct20_16-09-47_Skills_jump_pLinVel1._pDofLimit-8e-03_pDofAcc2.5e-07_pTorque1e-05_rJumpXVel2._propDelay0.04-0.05_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct19_06-29-14") + # load_run = "Oct22_02-20-52_Skills_jump_pLinVel1._noPContact_pDofAcc2.5e-07_rJumpXVel1.4_noPropNoise_propDelay0.04-0.05_pCollision-1e+00_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct20_16-09-47" + # load_run = "Oct22_08-47-56_Skills_jump_rTrackVel2._pJumpSameLegs0.8_noPContact_pDofAcc2.5e-07_rJumpXVel1._noPropNoise_propDelay0.04-0.05_pCollision-4e-01_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_02-20-52" + # load_run = "Oct22_08-47-13_Skills_jump_rTrackVel2._pJumpSameLegs0.6_noPContact_pDofAcc2.5e-07_rJumpXVel1.2_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_pushRobot_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_02-20-52" + # load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct22_15-19-26_Skills_jump_rTrackVel3._pJumpSameLegs0.6_noPContact_pDofAcc1.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-6e-01_noPush_minStd0.2_noTanh_jumpRange0.4-0.5_fromOct22_08-47-13") + # load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct24_07-41-26_Skills_jump_rTrackVel4._pY-2e-01_pJumpSameLegs0.2_noPContact_pDofAcc2.5e-07_rJumpXVel1.5_noPropNoise_propDelay0.04-0.05_pCollision-1e-01_noPush_minStd0.2_noTanh_zeroResetAction_jumpRange0.4-0.5_fromOct20_16-09-47") + # load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-36-16_Skills_jump_pEnergySubsteps3e-05_rTrackVel4._pY-3e-01_propDelay0.04-0.05_noPropNoise_noPush_noTanh_zeroResetAction_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct24_07-41-26") + # load_run = "Oct27_08-20-20_Skills_jump_rTrackVel5._noPPen_pY-3e-01_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct25_11-36-16" + # load_run = "Oct27_09-17-23_Skills_jump_rTrackVel5._noPPen_pDofErr6e-02_pDofErrCond6e-01_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_08-20-20" + #### Force hip_pos and sync_legs + # load_run = "Oct27_13-20-59_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pDofErr4e-02_pDofErrCond4e-01_pHipPos5e-01_pCollision5e+00_pSyncSymLegs6e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_09-17-23" + # load_run = "Oct27_13-24-58_Skills_jump_pEnergySubsteps3e-05_rTrackVel5._pDofErr4e-02_pDofErrCond3e-01_pHipPos3e-01_pCollision5e+00_pSyncSymLegs6e-01_propDelay0.04-0.05_withPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_09-17-23" + #### Adding RL gamma for energy penalty + load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct27_16-04-45_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._pPenD-6e-03_pDofErrCond6e-01_pHipPos5e-01_pCollision6e+00_pSyncSymLegs4e-01_propDelay0.04-0.05_noPropNoise_pushRobot_noTanh_actionCliphard_noDelayActObs_jumpRange0.4-0.5_fromOct27_13-20-59") + #### Wide spreading legs first + # load_run = "Oct28_03-08-02_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._rAlive0.5_pDofErrCond8e-01_pHipPos5e-01_pSyncSymLegs2e-01_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct27_16-04-45" + # load_run = "Oct28_07-07-03_Skills_jump_pEnergySubsteps4e-05_rTrackVel5._rAlive1.0_pPenD-1e-03_pDofErrCond8e-01_pHipPos3e+00_pSyncSymLegs2e-01_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct27_16-04-45" + load_run = "Oct28_08-54-23_Skills_jump_pEnergySubsteps1e-05_rTrackVel3._rAlive0.5_pYaw-1e-01_pHipPos5e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_fromOct28_03-08-02" + load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_11-05-56_Skills_jump_pEnergySubsteps2e-06_rTrackVel5._pYaw-1e-01_pHipPos5e+00_pCollision1e+00_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_jumpRange0.4-0.5_allowNegativeReward_fromOct28_08-54-23") + #### lowering max torques and energy + load_run = "Oct29_03-21-19_Skills_jump_pEnergySubsteps6e-06_rTrackVel5._pHipPos5e+00_pTorque2e-05_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.4-0.5_allowNegativeReward_fromOct28_11-05-56" + load_run = "Oct29_07-54-42_Skills_jump_pEnergySubsteps1e-05_rTrackVel5._pHipPos5e+00_pTorqueExceed6e-02_pTorque2e-03_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.4-0.5_allowNegativeReward_fromOct29_03-21-19" + load_run = "Oct29_12-18-08_Skills_jump_pEnergySubsteps1e-05_rTrackVel6._pTorqueExceed6e-02_pTorque1e-03_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_07-54-42" + load_run = "Oct29_15-18-02_Skills_jump_pEnergySubsteps1e-05_rTrackVel4._pTorqueExceed3e-02_pTorque1e-03_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_12-18-08" + load_run = "Oct29_16-37-44_Skills_jump_pEnergySubsteps1.2e-05_rTrackVel4._pTorqueExceed6e-02_pTorque2e-03_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_15-18-02" + load_run = "Oct29_20-17-31_Skills_jump_pEnergySubsteps5e-05_rTrackVel4._pTorqueExceed1e+00_pTorque4e-04_noJumpBonous_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_16-37-44" + load_run = "Oct30_11-11-43_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-8e-01_pTorqueExceed1.2e+00_pTorque4e-04_pDTorques1e-06_propDelay0.04-0.05_noPropNoise_pushRobot_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31" + load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct30_13-00-12_Skills_jump_pEnergySubsteps6e-05_rTrackVel5._pY-4e-01_pTorqueExceed1.8e+00_pTorque4e-04_pDTorques1e-05_propDelay0.04-0.05_noPropNoise_noPush_gamma0.999_noTanh_noComputerClip_jumpRange0.2-0.5_allowNegativeReward_fromOct29_20-17-31") + + run_name = "".join(["Skills_", + ("Multi" if len(Go1JumpCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1JumpCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1JumpCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")), + ("_pEnergySubsteps" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.legs_energy_substeps, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "legs_energy_substeps", -2e-5) != -2e-5 else ""), + # ("_rTrackVel" + np.format_float_positional(Go1JumpCfg.rewards.scales.tracking_world_vel) if getattr(Go1JumpCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""), + # ("_pLinVel" + np.format_float_positional(-Go1JumpCfg.rewards.scales.world_vel_l2norm) if getattr(Go1JumpCfg.rewards.scales, "world_vel_l2norm", 0.) != 0.0 else ""), + # ("_rAlive{:.1f}".format(Go1JumpCfg.rewards.scales.alive) if getattr(Go1JumpCfg.rewards.scales, "alive", 0.) != 0. else ""), + # ("_pPenD{:.0e}".format(Go1JumpCfg.rewards.scales.penetrate_depth) if getattr(Go1JumpCfg.rewards.scales, "penetrate_depth", 0.) < 0. else ""), + # ("_pYaw{:.0e}".format(Go1JumpCfg.rewards.scales.yaw_abs) if getattr(Go1JumpCfg.rewards.scales, "yaw_abs", 0.) < 0. else ""), + # ("_pY{:.0e}".format(Go1JumpCfg.rewards.scales.lin_pos_y) if getattr(Go1JumpCfg.rewards.scales, "lin_pos_y", 0.) < 0. else ""), + # ("_pDofLimit{:.0e}".format(Go1JumpCfg.rewards.scales.exceed_dof_pos_limits) if getattr(Go1JumpCfg.rewards.scales, "exceed_dof_pos_limits", 0.) < 0. else ""), + # ("_pDofErr" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_error, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_error", 0.) != 0. else ""), + # ("_pDofErrCond" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_error_cond, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_error_cond", 0.) != 0. else ""), + # ("_pHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "hip_pos", 0.) != 0. else ""), + ("_pFHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.front_hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "front_hip_pos", 0.) != 0. else ""), + ("_pRHipPos" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.rear_hip_pos, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "rear_hip_pos", 0.) != 0. else ""), + # ("_pTorqueExceedI" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.exceed_torque_limits_i, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "exceed_torque_limits_i", 0.) != 0. else ""), + ("_pTorqueExceed" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, trim="-") if Go1JumpCfg.rewards.scales.exceed_torque_limits_l1norm != -8e-1 else ""), + # ("_pContact" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.feet_contact_forces, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "feet_contact_forces", 0.) != 0. else ""), + # ("_pJumpSameLegs{:.1f}".format(-Go1JumpCfg.rewards.scales.sync_legs_cond) if getattr(Go1JumpCfg.rewards.scales, "sync_legs_cond", 0.) != 0. else ""), + # ("_pDofAcc" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.dof_acc, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "dof_acc", 0.) != 0. else ""), + # ("_pTorque" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.torques, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "torques", 0.) != 0. else ""), + # ("_rJumpXVel" + np.format_float_positional(Go1JumpCfg.rewards.scales.jump_x_vel_cond) if getattr(Go1JumpCfg.rewards.scales, "jump_x_vel_cond", 0.) != 0. else "_noJumpBonous"), + # ("_pCollision" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.collision, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "collision", 0.) < 0. else ""), + # ("_pSyncSymLegs" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.sync_all_legs_cond, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "sync_all_legs_cond", 0.) != 0. else ""), + # ("_pZVel" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.lin_vel_z, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "lin_vel_z", 0.) != 0. else ""), + # ("_pAngXYVel" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.ang_vel_xy, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "ang_vel_xy", 0.) != 0. else ""), + # ("_pDTorques" + np.format_float_scientific(-Go1JumpCfg.rewards.scales.delta_torques, precision=1, trim="-") if getattr(Go1JumpCfg.rewards.scales, "delta_torques", 0.) != 0. else ""), + ("_propDelay{:.2f}-{:.2f}".format( + Go1JumpCfg.sensor.proprioception.latency_range[0], + Go1JumpCfg.sensor.proprioception.latency_range[1], + )), + # ("_softDof{:.1f}".format(Go1JumpCfg.rewards.soft_dof_pos_limit) if Go1JumpCfg.rewards.soft_dof_pos_limit != 0.9 else ""), + # ("_timeoutFinished" if getattr(Go1JumpCfg.termination, "timeout_at_finished", False) else ""), + # ("_noPitchTerm" if "pitch" not in Go1JumpCfg.termination.termination_terms else ""), + ("_noPropNoise" if not Go1JumpCfg.noise.add_noise else "_withPropNoise"), + ("_noPush" if not Go1JumpCfg.domain_rand.push_robots else "_pushRobot"), + ("_minStd0.21"), + ("_entropy0.01"), + # ("_gamma0.999"), + ("_noTanh"), + # ("_zeroResetAction" if Go1JumpCfg.init_state.zero_actions else ""), + # ("_actionClip" + Go1JumpCfg.normalization.clip_actions_method if getattr(Go1JumpCfg.normalization, "clip_actions_method", "") != "" else ""), + # ("_noDelayActObs" if not Go1JumpCfg.sensor.proprioception.delay_action_obs else ""), + ("_noComputerClip" if not Go1JumpCfg.control.computer_clip_torque else ""), + ("_jumpRange{:.1f}-{:.1f}".format(*Go1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["height"]) if Go1JumpCfg.terrain.BarrierTrack_kwargs["jump"]["height"] != (0.1, 0.6) else ""), + # ("_noCurriculum" if not Go1JumpCfg.terrain.curriculum else ""), + # ("_allowNegativeReward" if not Go1JumpCfg.rewards.only_positive_rewards else ""), + ("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else "_noResume"), + ]) + max_iterations = 20000 + save_interval = 200 + diff --git a/legged_gym/legged_gym/envs/go1/go1_leap_config.py b/legged_gym/legged_gym/envs/go1/go1_leap_config.py new file mode 100644 index 0000000..54bdb7c --- /dev/null +++ b/legged_gym/legged_gym/envs/go1/go1_leap_config.py @@ -0,0 +1,184 @@ +from os import path as osp +import numpy as np +from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO +from legged_gym.utils.helpers import merge_dict + +class Go1LeapCfg( Go1FieldCfg ): + + class init_state( Go1FieldCfg.init_state ): + pos = [0., 0., 0.4] + + #### uncomment this to train non-virtual terrain + class sensor( Go1FieldCfg.sensor ): + class proprioception( Go1FieldCfg.sensor.proprioception ): + latency_range = [0.04-0.0025, 0.04+0.0075] + # latency_range = [0.06-0.005, 0.06+0.005] + #### uncomment the above to train non-virtual terrain + + class terrain( Go1FieldCfg.terrain ): + max_init_terrain_level = 2 + border_size = 5 + slope_treshold = 20. + curriculum = False + + BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict( + options= [ + "leap", + ], + leap= dict( + length= (0.3, 0.8), + depth= (0.4, 0.8), + height= 0.15, + ), + wall_height= -0.4, + virtual_terrain= True, # Change this to False for real terrain + no_perlin_threshold= 0.06, + )) + + TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict( + zScale= [0.05, 0.12], + )) + + class commands( Go1FieldCfg.commands ): + class ranges( Go1FieldCfg.commands.ranges ): + lin_vel_x = [1.8, 1.5] + lin_vel_y = [0.0, 0.0] + ang_vel_yaw = [0., 0.] + + class control( Go1FieldCfg.control ): + computer_clip_torque = True + + class termination( Go1FieldCfg.termination ): + # additional factors that determines whether to terminates the episode + termination_terms = [ + "roll", + "pitch", + "z_low", + "z_high", + "out_of_track", + ] + # roll_kwargs = merge_dict(Go1FieldCfg.termination.roll_kwargs, dict( + # threshold= 0.7, + # leap_threshold= 0.7, + # )) + # pitch_kwargs = merge_dict(Go1FieldCfg.termination.pitch_kwargs, dict( + # threshold= 0.7, + # leap_threshold= 0.7, + # )) + z_low_kwargs = merge_dict(Go1FieldCfg.termination.z_low_kwargs, dict( + threshold= -0.1, + )) + z_high_kwargs = merge_dict(Go1FieldCfg.termination.z_high_kwargs, dict( + threshold= 2.0, + )) + + class domain_rand( Go1FieldCfg.domain_rand ): + class com_range( Go1FieldCfg.domain_rand.com_range ): + z = [-0.2, 0.2] + + init_base_pos_range = dict( + x= [0.2, 0.8], + y= [-0.25, 0.25], + ) + init_base_rot_range = dict( + roll= [-0.1, 0.1], + pitch= [-0.1, 0.1], + ) + + push_robots = False + + class rewards( Go1FieldCfg.rewards ): + class scales: + tracking_ang_vel = 0.05 + # world_vel_l2norm = -1. + tracking_world_vel = 5. + # leap_bonous_cond = 6. + legs_energy_substeps = -7e-6 # 5e5 spike (not scaled) + # alive = 3. + penetrate_depth = -1e-2 # 8 spike (not scaled) + penetrate_volume = -1e-4 # 100 spike (not scaled) + exceed_dof_pos_limits = -8e-1 + exceed_torque_limits_l1norm = -1. + # action_rate = -0.1 + delta_torques = -1e-7 + dof_acc = -1e-7 + torques = -4e-5 # 2000 spike (not scaled) + yaw_abs = -0.2 + collision = -1. + lin_pos_y = -0.4 + orientation = -0.1 # 0.3 segment (not scaled) + hip_pos = -5. # 0.5 spike (not scaled) + dof_error = -0.15 # 1.2 spike (not scaled) + tracking_sigma = 0.35 + only_positive_rewards = False + soft_dof_pos_limit = 0.7 + + class curriculum( Go1FieldCfg.curriculum ): + # penetrate_volume_threshold_harder = 9000 + # penetrate_volume_threshold_easier = 10000 + # penetrate_depth_threshold_harder = 300 + # penetrate_depth_threshold_easier = 5000 + ####### use extreme large value to disable penetrate curriculum + penetrate_volume_threshold_harder = 900000 + penetrate_volume_threshold_easier = 1000000 + penetrate_depth_threshold_harder = 30000 + penetrate_depth_threshold_easier = 500000 + +logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") +class Go1LeapCfgPPO( Go1FieldCfgPPO ): + + class runner( Go1FieldCfgPPO.runner ): + resume = True + load_run = "{Your traind walking model directory}" + # load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025" + # load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep27_14-56-25_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pDofLimit8e-01_pCollision0.1_kp40_kd0.5fromSep27_02-44-48") + load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct09_09-51-58_Skills_leap_propDelay0.04-0.05_pEnergySubsteps-8e-06_pPenD1.e-2_pDofLimit4e-01_pCollision0.5_kp40_kd0.5fromOct05_02-16-22") + load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct25_11-39-12_Skills_leap_pEnertySubsteps4.e-5_pActRate2.e-1_pDofLimit8.e-1_pCollision5.e-1_noPropNoise_noTanh_zeroResetAction_actionCliphard_fromOct09_09-51-58") + load_run = "Oct28_03-32-35_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pActRate2.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct25_11-39-12" + load_run = osp.join(logs_root, "field_go1_noTanh_oracle", "Oct28_13-23-39_Skills_leap_pEnertySubsteps2.e-5_rTrackVel2._rAlive2.0_pSyncAllLegs6.e-1_pOrient6.e-1_pHipPos5.e+0_noTanh_actionCliphard_virtual_fromOct28_03-32-35") + load_run = "Oct29_04-23-33_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorques2.e-5_pHipPos5.e+0_pDorErrCond8.e-2_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct28_13-23-39" + # load_run = "Oct29_08-30-23_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed2.e-1_pPosY2.e-1_pTorques2.e-5_pHipPos5.e+0_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_04-23-33" + # load_run = "Oct29_15-46-30_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed2.e-1_pOrient3.e-1_pTorques2.e-5_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_08-30-23" + load_run = "Oct30_02-07-26_Skills_leap_pEnertySubsteps6.e-6_rTrackVel6._pTorqueExceed1.e-1_pPosY4.e-1_pTorques2.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_allowNegativeReward_actionCliphard_virtual_fromOct29_04-23-33" + load_run = "Oct30_04-10-12_Skills_leap_pEnertySubsteps4.e-6_rTrackVel5._pTorqueExceed3.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_02-07-26" + load_run = "Oct30_05-07-24_Skills_leap_pEnertySubsteps5.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12" + + load_run = "Oct30_05-08-03_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed4.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12" + load_run = "Oct30_05-08-20_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed5.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_noPenCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_04-10-12" + load_run = "Oct30_07-32-10_Skills_leap_pEnertySubsteps6.e-6_rTrackVel5._pTorqueExceed8.e-1_pPosY4.e-1_pTorques4.e-5_pHipPos5.e+0_pDorErr1.5e-1_noPenCurriculum_noCurriculum_noComputerClip_noTanh_noPush_allowNegativeReward_actionCliphard_virtual_fromOct30_05-07-24" + + run_name = "".join(["Skills_", + ("Multi" if len(Go1LeapCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1LeapCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1LeapCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")), + ("_pEnertySubsteps" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.legs_energy_substeps, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "legs_energy_substeps", 0.0) != 0.0 else ""), + ("_rTrackVel" + np.format_float_positional(Go1LeapCfg.rewards.scales.tracking_world_vel) if getattr(Go1LeapCfg.rewards.scales, "tracking_world_vel", 0.) != 0. else ""), + ("_rAlive{:.1f}".format(Go1LeapCfg.rewards.scales.alive) if getattr(Go1LeapCfg.rewards.scales, "alive", 0.) != 0. else ""), + # ("_pActRate" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.action_rate, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "action_rate", 0.) != 0.0 else ""), + # ("_pDofAcc" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_acc, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_acc", 0.) != 0.0 else ""), + # ("_pDofLimit" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.exceed_dof_pos_limits, precision=1, exp_digits=1) if Go1LeapCfg.rewards.scales.exceed_dof_pos_limits != 0.0 else ""), + # ("_pCollision" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.collision, precision=1, exp_digits=1) if Go1LeapCfg.rewards.scales.collision != 0.0 else ""), + # ("_leapBonousCond" if getattr(Go1LeapCfg.rewards.scales, "leap_bonous_cond", 0.) != 0.0 else ""), + ("_pTorqueExceed" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.exceed_torque_limits_l1norm, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0.0 else ""), + # ("_pSyncAllLegs" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.sync_all_legs, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "sync_all_legs", 0.) != 0.0 else ""), + # ("_pOrient" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.orientation, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "orientation", 0.) != 0.0 else ""), + # ("_pPenD" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.penetrate_depth, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "penetrate_depth", 0.) != 0.0 else ""), + ("_pPosY" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.lin_pos_y, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "lin_pos_y", 0.) != 0.0 else ""), + # ("_pPenV" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.penetrate_volume, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "penetrate_volume", 0.) != 0.0 else ""), + ("_pTorques" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.torques, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "torques", 0.) != 0.0 else ""), + ("_pHipPos" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.hip_pos, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "hip_pos", 0.) != 0.0 else ""), + # ("_pDorErrCond" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_error_cond, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_error_cond", 0.) != 0.0 else ""), + ("_pDorErr" + np.format_float_scientific(-Go1LeapCfg.rewards.scales.dof_error, precision=1, exp_digits=1) if getattr(Go1LeapCfg.rewards.scales, "dof_error", 0.) != 0.0 else ""), + # ("_noPropNoise" if not Go1LeapCfg.noise.add_noise else ""), + ("_noPenCurriculum" if Go1LeapCfg.curriculum.penetrate_volume_threshold_harder > 2e5 else ""), + ("_noCurriculum" if not Go1LeapCfg.terrain.curriculum else ""), + ("_noComputerClip" if not Go1LeapCfg.control.computer_clip_torque else ""), + ("_noTanh"), + ("_noPush" if not Go1LeapCfg.domain_rand.push_robots else "_pushRobot"), + # ("_zeroResetAction" if Go1LeapCfg.init_state.zero_actions else ""), + ("_allowNegativeReward" if not Go1LeapCfg.rewards.only_positive_rewards else ""), + ("_actionClip" + Go1LeapCfg.normalization.clip_actions_method if getattr(Go1LeapCfg.normalization, "clip_actions_method", "") != "" else ""), + ("_virtual" if Go1LeapCfg.terrain.BarrierTrack_kwargs["virtual_terrain"] else ""), + ("_from" + "_".join(load_run.split("/")[-1].split("_")[:2]) if resume else ""), + ]) + max_iterations = 20000 + save_interval = 200 + diff --git a/legged_gym/legged_gym/envs/go1/go1_remote_config.py b/legged_gym/legged_gym/envs/go1/go1_remote_config.py new file mode 100644 index 0000000..1f72a77 --- /dev/null +++ b/legged_gym/legged_gym/envs/go1/go1_remote_config.py @@ -0,0 +1,120 @@ +import numpy as np +import os.path as osp +from legged_gym.utils.helpers import merge_dict +from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO + +class Go1RemoteCfg( Go1FieldCfg ): + class env( Go1FieldCfg.env ): + num_envs = 4096 + obs_components = [ + "proprioception", # 48 + ] + privileged_obs_components = [ + "proprioception", + "robot_config", + ] + use_lin_vel = False + privileged_use_lin_vel = True + + class init_state( Go1FieldCfg.init_state ): + pos = [0., 0., 0.42] + + class terrain( Go1FieldCfg.terrain ): + num_rows = 6 + num_cols = 6 + selected = "TerrainPerlin" + TerrainPerlin_kwargs = dict( + zScale= 0.15, + frequency= 10, + ) + + class commands( Go1FieldCfg.commands ): + class ranges( Go1FieldCfg.commands.ranges ): + lin_vel_x = [-1.0, 1.0] + lin_vel_y = [-1., 1.] + ang_vel_yaw = [-1., 1.] + + class domain_rand( Go1FieldCfg.domain_rand ): + init_base_pos_range = dict( + x= [-0.1, 0.1], + y= [-0.1, 0.1], + ) + init_base_rot_range = dict( + roll= [-0.4, 0.4], + pitch= [-0.4, 0.4], + ) + init_base_vel_range = merge_dict(Go1FieldCfg.domain_rand.init_base_vel_range, dict( + x= [-0.8, 1.5], + y= [-0.8, 0.8], + )) + + class rewards( Go1FieldCfg.rewards ): + class scales: + ###### hacker from Field + tracking_ang_vel = 1. + tracking_lin_vel = 3. + # lin_vel_l2norm = -1. + alive = 1. + legs_energy_substeps = -6e-7 + # penalty for hardware safety + # exceed_dof_pos_limits = -4e-2 + exceed_torque_limits_l1norm = -1e-2 + # penalty for walking gait, probably no need + lin_vel_z = -0.5 + ang_vel_xy = -0.05 + orientation = -4. + dof_acc = -1.e-7 + collision = -10. + action_rate = -0.01 + delta_torques = -1e-7 + torques = -1.e-5 + hip_pos = -0.4 + dof_error = -0.04 + stand_still = -0.6 + only_positive_rewards = False + soft_dof_pos_limit = 0.6 + + class normalization( Go1FieldCfg.normalization ): + clip_actions_method = "hard" + +logs_root = osp.join(osp.dirname(osp.dirname(osp.dirname(osp.dirname(osp.abspath(__file__))))), "logs") +class Go1RemoteCfgPPO( Go1FieldCfgPPO ): + class algorithm( Go1FieldCfgPPO.algorithm ): + entropy_coef = 0.01 + clip_min_std = 0.02 + + class runner( Go1FieldCfgPPO.runner ): + resume = True + # load_run = osp.join(logs_root, "field_a1/Sep26_15-33-45_WalkByRemote_pEnergySubsteps2e-5_pTorqueL18e-01_pCollision0.2_propDelay0.04-0.05_aScale0.5_kp40_kd0.5_maxPushAng0.5_noTanh_hardActClip_noResume") + load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Sep28_11-53-30_WalkByRemote_pEnergySubsteps-4e-05_kp40_kd0.5_noTanh_fromSep26_15-33-45") + load_run = "Oct04_16-15-17_WalkByRemote_pEnergySubsteps-1e-04_softDof0.5_pStand0.0_pDofAcc5e-7_kp40_kd0.5_noTanh_fromSep28_11-53-30" + load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct12_16-01-06_WalkByRemote_pLin1.0_pFootForce1.e-02_xComMin-0.2_maxForce200_propDelay0.04-0.05_entropyCoef0.01_noTanh_fromOct11_04-34-39") + load_run = "Oct23_03-33-29_WalkByRemote_pEnergySubsteps-1e-05_rLinTrack3.0_softDof0.6_pStand1.0_kp40_kd0.5_noTanh_fromOct12_16-01-06" + load_run = "Oct23_11-01-47_WalkByRemote_pEnergySubsteps-1e-05_rLinTrack5.0_pCollide1e-1_pOrient1e-1_pStand2e-1_softDof0.6_kp40_kd0.5_noTanh_fromOct23_03-33-29" + load_run = "Oct23_13-14-42_WalkByRemote_pEnergySubsteps1e-05_rLinTrack5.0_pDofAcc2.5e-7_pCollide1e-1_pOrient1e-1_pExceedDof8e-1_pStand4e-1_softDof0.6_noTanh_EntropyCoef0.01_fromOct23_11-01-47" + load_run = osp.join(logs_root, "field_a1_noTanh_oracle", "Oct12_16-00-55_WalkByRemote_pLin1.0_pFootForce1.e-02_xComMin-0.2_maxForce200_propDelay0.05-0.07_entropyCoef0.01_noTanh_fromOct11_04-34-39") + load_run = "Oct24_15-05-23_WalkByRemote_rLinTrack3.0_pActRate1e-1_softDof0.6_noTanh_zeroResetAction_hackReset_EntropyCoef0.01_actionCliphard_fromOct12_16-00-55" + + run_name = "".join(["WalkByRemote", + ("_pEnergySubsteps{:.0e}".format(-Go1RemoteCfg.rewards.scales.legs_energy_substeps) if getattr(Go1RemoteCfg.rewards.scales, "legs_energy_substeps", 0.) != 0. else ""), + ("_rLinTrack{:.1f}".format(Go1RemoteCfg.rewards.scales.tracking_lin_vel) if getattr(Go1RemoteCfg.rewards.scales, "tracking_lin_vel", 0.) != 0. else ""), + ("_pLinVelL2{:.1f}".format(-Go1RemoteCfg.rewards.scales.lin_vel_l2norm) if getattr(Go1RemoteCfg.rewards.scales, "lin_vel_l2norm", 0.) != 0. else ""), + ("_rAlive{:.1f}".format(Go1RemoteCfg.rewards.scales.alive) if getattr(Go1RemoteCfg.rewards.scales, "alive", 0.) != 0. else ""), + # ("_pDofAcc" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.dof_acc, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "dof_acc", 0.) != 0. else ""), + # ("_pCollide" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.collision, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "collision", 0.) != 0. else ""), + # ("_pOrient" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.orientation, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "orientation", 0.) != 0. else ""), + # ("_pExceedDof" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.exceed_dof_pos_limits, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "exceed_dof_pos_limits", 0.) != 0. else ""), + ("_pExceedTorqueL1" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.exceed_torque_limits_l1norm, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "exceed_torque_limits_l1norm", 0.) != 0. else ""), + # ("_pStand" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.stand_still, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "stand_still", 0.) != 0. else ""), + ("_pActRate" + np.format_float_scientific(-Go1RemoteCfg.rewards.scales.action_rate, trim= "-", exp_digits= 1) if getattr(Go1RemoteCfg.rewards.scales, "action_rate", 0.) != 0. else ""), + ("_softDof{:.1f}".format(Go1RemoteCfg.rewards.soft_dof_pos_limit) if Go1RemoteCfg.rewards.soft_dof_pos_limit != 0.9 else ""), + # ("_kp{:d}".format(int(Go1RemoteCfg.control.stiffness["joint"])) if Go1RemoteCfg.control.stiffness["joint"] != 50 else ""), + # ("_kd{:.1f}".format(Go1RemoteCfg.control.damping["joint"]) if Go1RemoteCfg.control.damping["joint"] != 1. else ""), + ("_noTanh"), + # ("_zeroResetAction" if Go1RemoteCfg.init_state.zero_actions else ""), + ("_EntropyCoef0.01"), + ("_actionClip" + Go1RemoteCfg.normalization.clip_actions_method if getattr(Go1RemoteCfg.normalization, "clip_actions_method", None) is not None else ""), + ("_noResume" if not resume else "_from" + "_".join(load_run.split("/")[-1].split("_")[:2])), + ]) + + diff --git a/legged_gym/legged_gym/envs/go1/go1_tilt_config.py b/legged_gym/legged_gym/envs/go1/go1_tilt_config.py new file mode 100644 index 0000000..0308bdd --- /dev/null +++ b/legged_gym/legged_gym/envs/go1/go1_tilt_config.py @@ -0,0 +1,90 @@ +from os import path as osp +import numpy as np +from legged_gym.envs.go1.go1_field_config import Go1FieldCfg, Go1FieldCfgPPO +from legged_gym.utils.helpers import merge_dict + +class Go1TiltCfg( Go1FieldCfg ): + + #### uncomment this to train non-virtual terrain + # class sensor( Go1FieldCfg.sensor ): + # class proprioception( Go1FieldCfg.sensor.proprioception ): + # delay_action_obs = True + # latency_range = [0.04-0.0025, 0.04+0.0075] + #### uncomment the above to train non-virtual terrain + + class terrain( Go1FieldCfg.terrain ): + max_init_terrain_level = 2 + border_size = 5 + slope_treshold = 20. + curriculum = True + + BarrierTrack_kwargs = merge_dict(Go1FieldCfg.terrain.BarrierTrack_kwargs, dict( + options= [ + "tilt", + ], + tilt= dict( + width= (0.24, 0.8), + depth= (0.4, 1.), # size along the forward axis + opening_angle= 0.0, # [rad] an opening that make the robot easier to get into the obstacle + wall_height= 0.5, + ), + virtual_terrain= True, # Change this to False for real terrain + no_perlin_threshold= 0.06, + )) + + TerrainPerlin_kwargs = merge_dict(Go1FieldCfg.terrain.TerrainPerlin_kwargs, dict( + zScale= [0.05, 0.1], + )) + + class commands( Go1FieldCfg.commands ): + class ranges( Go1FieldCfg.commands.ranges ): + lin_vel_x = [0.3, 0.6] + lin_vel_y = [0.0, 0.0] + ang_vel_yaw = [0., 0.] + + class termination( Go1FieldCfg.termination ): + # additional factors that determines whether to terminates the episode + termination_terms = [ + "roll", + "pitch", + "z_low", + "z_high", + "out_of_track", + ] + roll_kwargs = merge_dict(Go1FieldCfg.termination.roll_kwargs, dict( + threshold= 0.4, + leap_threshold= 0.4, + )) + z_high_kwargs = merge_dict(Go1FieldCfg.termination.z_high_kwargs, dict( + threshold= 2.0, + )) + + class rewards( Go1FieldCfg.rewards ): + class scales: + tracking_ang_vel = 0.05 + world_vel_l2norm = -1. + legs_energy_substeps = -1e-6 + alive = 2. + penetrate_depth = -4e-3 + penetrate_volume = -4e-3 + exceed_dof_pos_limits = -1e-1 + exceed_torque_limits_i = -1e-1 + + class curriculum( Go1FieldCfg.curriculum ): + penetrate_volume_threshold_harder = 9000 + penetrate_volume_threshold_easier = 10000 + penetrate_depth_threshold_harder = 300 + penetrate_depth_threshold_easier = 5000 + +class Go1TiltCfgPPO( Go1FieldCfgPPO ): + class runner( Go1FieldCfgPPO.runner ): + run_name = "".join(["Skill", + ("Multi" if len(Go1TiltCfg.terrain.BarrierTrack_kwargs["options"]) > 1 else (Go1TiltCfg.terrain.BarrierTrack_kwargs["options"][0] if Go1TiltCfg.terrain.BarrierTrack_kwargs["options"] else "PlaneWalking")), + ("_tiltMax{:.1f}".format(Go1TiltCfg.terrain.BarrierTrack_kwargs["tilt"]["width"][1]) if Go1TiltCfg.terrain.BarrierTrack_kwargs["tilt"]["width"][1] > 0.5 else ""), + ]) + resume = True + load_run = "{Your traind walking model directory}" + load_run = "Sep20_03-37-32_SkillopensourcePlaneWalking_pEnergySubsteps1e-5_pTorqueExceedIndicate1e-1_aScale0.5_tClip202025" + max_iterations = 20000 + save_interval = 500 + diff --git a/legged_gym/legged_gym/scripts/clear_dataset.py b/legged_gym/legged_gym/scripts/clear_dataset.py index 5e4a8a9..683438c 100644 --- a/legged_gym/legged_gym/scripts/clear_dataset.py +++ b/legged_gym/legged_gym/scripts/clear_dataset.py @@ -7,13 +7,13 @@ import argparse def main(args): for data_dir in args.data_dirs: if osp.isfile(osp.join(data_dir, "metadata.json")): - shutil.move( + shutil.copy2( osp.join(data_dir, "metadata.json"), osp.join(osp.dirname(data_dir), data_dir.split("/")[-1] + ".json") ) print(f"Moved metadata.json to {osp.join(osp.dirname(data_dir), data_dir.split('/')[-1] + '.json')}") # removing the directory - if osp.isdir(data_dir): + if osp.isdir(data_dir) and (not osp.islink(data_dir)): print("Removing directory: ", data_dir) if not args.only_files: shutil.rmtree(data_dir) diff --git a/legged_gym/legged_gym/scripts/collect.py b/legged_gym/legged_gym/scripts/collect.py index 60e6eb6..37be847 100644 --- a/legged_gym/legged_gym/scripts/collect.py +++ b/legged_gym/legged_gym/scripts/collect.py @@ -29,36 +29,34 @@ def main(args): update_class_from_dict(train_cfg, d, strict= True) # Some custom settings - env_cfg.terrain.BarrierTrack_kwargs["options"] = [ - "tilt", # "tilt", - "crawl", # "crawl", - "climb", # "climb", - "climb", # "climb", - "leap", # "leap", - ] - ####### customized option to increase data distribution ####### + # ####### customized option to increase data distribution ####### action_sample_std = 0.0 ############# some predefined options ############# if len(env_cfg.terrain.BarrierTrack_kwargs["options"]) == 1: - env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1 env_cfg.terrain.num_rows = 20; env_cfg.terrain.num_cols = 30 else: # for parkour env # >>> option 1 - env_cfg.domain_rand.added_mass_range = [1.5, 3.0] - env_cfg.terrain.BarrierTrack_kwargs["climb_down_prob"] = 0.45 - env_cfg.terrain.num_rows = 60; env_cfg.terrain.num_cols = 1 + env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 2.8 + env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 2.4 + env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.6) + env_cfg.domain_rand.init_base_pos_range["x"] = (0.4, 1.8) + env_cfg.terrain.num_rows = 12; env_cfg.terrain.num_cols = 10 # >>> option 2 - # env_cfg.terrain.BarrierTrack_kwargs["climb"]["depth"] = (0.1, 0.3) - # env_cfg.terrain.BarrierTrack_kwargs["climb"]["climb_down_prob"] = 0.55 - # env_cfg.terrain.num_rows = 60; env_cfg.terrain.num_cols = 1 + # env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 3. + # env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 4.0 + # env_cfg.terrain.BarrierTrack_kwargs["wall_height"] = (-0.5, -0.2) + # env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 1.4) + # env_cfg.domain_rand.init_base_pos_range["x"] = (1.6, 2.0) + # env_cfg.terrain.num_rows = 16; env_cfg.terrain.num_cols = 5 # >>> option 3 + # env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 1.6 + # env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 2.2 + # env_cfg.terrain.BarrierTrack_kwargs["wall_height"] = (-0.5, 0.1) + # env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.5) + # env_cfg.domain_rand.init_base_pos_range["x"] = (0.2, 0.9) # env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1 - # env_cfg.terrain.BarrierTrack_kwargs["track_width"] = 1.6 - # env_cfg.terrain.BarrierTrack_kwargs["wall_thickness"] = (0.0, 0.3) - # env_cfg.terrain.BarrierTrack_kwargs["track_block_length"] = 3.0 - # env_cfg.terrain.BarrierTrack_kwargs["climb"]["depth"] = (0.5, 1.8) - # env_cfg.terrain.num_rows = 20; env_cfg.terrain.num_cols = 24 # action_sample_std = 0.1 + # env_cfg.terrain.num_rows = 22; env_cfg.terrain.num_cols = 16 pass if (env_cfg.terrain.BarrierTrack_kwargs["options"][0] == "leap") and all(i == env_cfg.terrain.BarrierTrack_kwargs["options"][0] for i in env_cfg.terrain.BarrierTrack_kwargs["options"]): ######### For leap, because the platform is usually higher than the ground. diff --git a/legged_gym/legged_gym/scripts/play.py b/legged_gym/legged_gym/scripts/play.py index c9397a3..0a1063d 100644 --- a/legged_gym/legged_gym/scripts/play.py +++ b/legged_gym/legged_gym/scripts/play.py @@ -101,7 +101,7 @@ def play(args): env_cfg.terrain.curriculum = False env_cfg.terrain.BarrierTrack_kwargs["options"] = [ # "crawl", - "climb", + "jump", # "leap", # "tilt", ] @@ -146,7 +146,7 @@ def play(args): logs_root + "/field_a1_oracle/Jun03_00-33-08_Skills_climb_pEnergySubsteps2e-6_rAlive2_pTorqueExceedIndicate2e-1_propDelay0.04-0.05_noPerlinRate0.2_nSubsteps4_envFreq50.0_aScale0.5", logs_root + "/field_a1_oracle/Jun04_01-03-59_Skills_leap_pEnergySubsteps2e-6_rAlive2_pPenV4e-3_pPenD4e-3_pPosY0.20_pYaw0.20_pTorqueExceedSquare1e-3_leapH0.2_propDelay0.04-0.05_noPerlinRate0.2_aScale0.5", ] - train_cfg.policy.climb_down_policy_path = logs_root + "/field_a1_oracle/Aug30_16-12-14_Skills_climb_climbDownProb0.5_propDelay0.04-0.05" + train_cfg.policy.jump_down_policy_path = logs_root + "/field_a1_oracle/Aug30_16-12-14_Skills_climb_climbDownProb0.5_propDelay0.04-0.05" train_cfg.runner.resume = False env_cfg.env.use_lin_vel = True train_cfg.policy.cmd_vel_mapping = { diff --git a/legged_gym/legged_gym/utils/helpers.py b/legged_gym/legged_gym/utils/helpers.py index d56f253..fc0a0c4 100644 --- a/legged_gym/legged_gym/utils/helpers.py +++ b/legged_gym/legged_gym/utils/helpers.py @@ -163,7 +163,7 @@ def update_cfg_from_args(env_cfg, cfg_train, args): return env_cfg, cfg_train -def get_args(): +def get_args(custom_args=[]): custom_parameters = [ {"name": "--task", "type": str, "default": "anymal_c_flat", "help": "Resume training or start testing from a checkpoint. Overrides config file if provided."}, {"name": "--resume", "action": "store_true", "default": False, "help": "Resume training from a checkpoint"}, @@ -178,7 +178,7 @@ def get_args(): {"name": "--num_envs", "type": int, "help": "Number of environments to create. Overrides config file if provided."}, {"name": "--seed", "type": int, "help": "Random seed. Overrides config file if provided."}, {"name": "--max_iterations", "type": int, "help": "Maximum number of training iterations. Overrides config file if provided."}, - ] + ] + custom_args # parse arguments args = gymutil.parse_arguments( description="RL Policy", diff --git a/legged_gym/legged_gym/utils/logger.py b/legged_gym/legged_gym/utils/logger.py index 3906a7e..7ef0353 100644 --- a/legged_gym/legged_gym/utils/logger.py +++ b/legged_gym/legged_gym/utils/logger.py @@ -96,7 +96,9 @@ class Logger: a.legend() # plot base pitch a = axs[0, 2] - if log["base_pitch"]: a.plot(time, log["base_pitch"], label='measured') + if log["base_pitch"]: + a.plot(time, log["base_pitch"], label='measured') + a.plot(time, [-0.75] * len(time), label= 'thresh') # if log["command_yaw"]: a.plot(time, log["command_yaw"], label='commanded') a.set(xlabel='time [s]', ylabel='base ang [rad]', title='Base pitch') a.legend() @@ -113,11 +115,15 @@ class Logger: a.plot(time, forces[:, i], label=f'force {i}') a.set(xlabel='time [s]', ylabel='Forces z [N]', title='Vertical Contact forces') a.legend() - # plot torque/vel curves + # # plot torque/vel curves + # a = axs[2, 1] + # if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured') + # a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves') + # a.legend() + # plot power curves a = axs[2, 1] - if log["dof_vel"]!=[] and log["dof_torque"]!=[]: a.plot(log["dof_vel"], log["dof_torque"], 'x', label='measured') - a.set(xlabel='Joint vel [rad/s]', ylabel='Joint Torque [Nm]', title='Torque/velocity curves') - a.legend() + if log["power"]!=[]: a.plot(time, log["power"], label='power [W]') + a.set(xlabel='time [s]', ylabel='Power [W]', title='Power') # plot torques a = axs[2, 2] if log["dof_torque"]!=[]: a.plot(time, log["dof_torque"], label='measured') @@ -126,20 +132,29 @@ class Logger: # plot rewards a = axs[3, 0] if log["max_torques"]: a.plot(time, log["max_torques"], label='max_torques') + if log["max_torque_motor"]: a.plot(time, log["max_torque_motor"], label='max_torque_motor') + if log["max_torque_leg"]: a.plot(time, log["max_torque_leg"], label='max_torque_leg') a.set(xlabel='time [s]', ylabel='max_torques [Nm]', title='max_torques') - a.legend() + a.legend(fontsize= 5) # plot customed data a = axs[3, 1] if log["student_action"]: a.plot(time, log["student_action"], label='s') a.plot(time, log["teacher_action"], label='t') a.legend() - a.set(xlabel='time [s]', ylabel='tanh', title='student/teacher action') + a.set(xlabel='time [s]', ylabel='value before step()', title='student/teacher action') a = axs[3, 2] - if log["teacher_action"]: - a.plot(time, [i-j for i, j in zip(log["student_action"], log["teacher_action"])], label='action difference') - a.set(xlabel='time [s]', ylabel='tanh', title='action difference') - a.legend() + a.plot(time, log["reward"], label='rewards') + for i in log["mark"]: + if i > 0: + a.plot(time, log["mark"], label='user mark') + break + for key in log.keys(): + if "reward_removed_" in key: + a.plot(time, log[key], label= key) + a.set(xlabel='time [s]', ylabel='', title='rewards') + # a.set_ylim([-0.12, 0.1]) + a.legend(fontsize = 5) plt.show() def print_rewards(self): diff --git a/legged_gym/legged_gym/utils/terrain/barrier_track.py b/legged_gym/legged_gym/utils/terrain/barrier_track.py index 928b4a4..e439121 100644 --- a/legged_gym/legged_gym/utils/terrain/barrier_track.py +++ b/legged_gym/legged_gym/utils/terrain/barrier_track.py @@ -12,7 +12,7 @@ class BarrierTrack: # default kwargs track_kwargs = dict( options= [ - "climb", + "jump", "crawl", "tilt", ], # each race track will permute all the options @@ -22,12 +22,11 @@ class BarrierTrack: track_block_length= 1.2, # the x-axis distance from the env origin point wall_thickness= 0.04, # [m] wall_height= 0.5, # [m] - climb= dict( + jump= dict( height= 0.3, depth= 0.04, # size along the forward axis - fake_offset= 0.0, # [m] fake offset will make climb's height info greater than its physical height. - fake_height= 0.0, # [m] offset/height only one of them can be non-zero - climb_down_prob= 0.0, # if > 0, will have a chance to climb down from the obstacle + fake_offset= 0.0, # [m] fake offset will make jump's height info greater than its physical height. + jump_down_prob= 0.0, # if > 0, will have a chance to jump down from the obstacle ), crawl= dict( height= 0.32, @@ -53,16 +52,18 @@ class BarrierTrack: border_height= 0., # Incase we want the surrounding plane to be lower than the track virtual_terrain= False, draw_virtual_terrain= False, # set True for visualization - check_skill_combinations= False, # check if some specific skills are connected, if set. e.g. climb -> leap + check_skill_combinations= False, # check if some specific skills are connected, if set. e.g. jump -> leap engaging_next_threshold= 0., # if > 0, engaging_next is based on this threshold instead of track_block_length/2. Make sure the obstacle is not too long. + engaging_finish_threshold= 0., # an obstacle is considered finished only if the last volume point is this amount away from the block origin. curriculum_perlin= True, # If True, perlin noise scale will be depends on the difficulty if possible. no_perlin_threshold= 0.02, # If the perlin noise is too small, clip it to zero. + walk_in_skill_gap= False, # If True, obstacle ID will be walk when the distance to the obstacle does not reach engaging_next_threshold ) - max_track_options = 4 # ("tilt", "crawl", "climb", "dynamic") at most + max_track_options = 4 # ("tilt", "crawl", "jump", "dynamic") at most track_options_id_dict = { "tilt": 1, "crawl": 2, - "climb": 3, + "jump": 3, "leap": 4, } # track_id are aranged in this order def __init__(self, cfg, num_robots: int) -> None: @@ -88,7 +89,7 @@ class BarrierTrack: # For each track block (n_options + 1 in total), 3 parameters are enabled: # - track_id: int, starting track is 0, other numbers depends on the options order. # - obstacle_depth: float, - # - obstacle_critical_params: e.g. tilt width, crawl height, climb height + # - obstacle_critical_params: e.g. tilt width, crawl height, jump height # num_rows + 1 incase the robot finish the entire row of tracks self.track_info_map = torch.zeros( @@ -101,6 +102,11 @@ class BarrierTrack: dtype= torch.float32, device= self.device, ) + self.block_starting_height_map = torch.zeros( + (self.cfg.num_rows, self.cfg.num_cols, self.n_blocks_per_track), + dtype= torch.float32, + device= self.device, + ) # height [m] related to the world coordinate system def initialize_track(self): """ All track blocks are defined as follows @@ -126,7 +132,7 @@ class BarrierTrack: np.ceil(self.track_kwargs["track_block_length"] / self.cfg.horizontal_scale).astype(int), np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int), ) - self.n_blocks_per_track = (self.track_kwargs["n_obstacles_per_track"] + 1) if self.track_kwargs["randomize_obstacle_order"] else (len(self.track_kwargs["options"]) + 1) + self.n_blocks_per_track = (self.track_kwargs["n_obstacles_per_track"] + 1) if (self.track_kwargs["randomize_obstacle_order"] and len(self.track_kwargs["options"]) > 0) else (len(self.track_kwargs["options"]) + 1) self.track_resolution = ( np.ceil(self.track_kwargs["track_block_length"] * self.n_blocks_per_track / self.cfg.horizontal_scale).astype(int), np.ceil(self.track_kwargs["track_width"] / self.cfg.horizontal_scale).astype(int), @@ -183,7 +189,7 @@ class BarrierTrack: height_offset_px = 0 return track_trimesh, track_heightfield, block_info, height_offset_px - def get_climb_track(self, + def get_jump_track(self, wall_thickness, trimesh_template, heightfield_template, @@ -191,25 +197,25 @@ class BarrierTrack: heightfield_noise= None, virtual= False, ): - if isinstance(self.track_kwargs["climb"]["depth"], (tuple, list)): + if isinstance(self.track_kwargs["jump"]["depth"], (tuple, list)): if not virtual: - climb_depth = min(*self.track_kwargs["climb"]["depth"]) + jump_depth = min(*self.track_kwargs["jump"]["depth"]) else: - climb_depth = np.random.uniform(*self.track_kwargs["climb"]["depth"]) + jump_depth = np.random.uniform(*self.track_kwargs["jump"]["depth"]) else: - climb_depth = self.track_kwargs["climb"]["depth"] - if isinstance(self.track_kwargs["climb"]["height"], (tuple, list)): + jump_depth = self.track_kwargs["jump"]["depth"] + if isinstance(self.track_kwargs["jump"]["height"], (tuple, list)): if difficulty is None: - climb_height = np.random.uniform(*self.track_kwargs["climb"]["height"]) + jump_height = np.random.uniform(*self.track_kwargs["jump"]["height"]) else: - climb_height = (1-difficulty) * self.track_kwargs["climb"]["height"][0] + difficulty * self.track_kwargs["climb"]["height"][1] + jump_height = (1-difficulty) * self.track_kwargs["jump"]["height"][0] + difficulty * self.track_kwargs["jump"]["height"][1] else: - climb_height = self.track_kwargs["climb"]["height"] - if self.track_kwargs["climb"].get("climb_down_prob", 0.) > 0.: - if np.random.uniform() < self.track_kwargs["climb"]["climb_down_prob"]: - climb_height = -climb_height - depth_px = int(climb_depth / self.cfg.horizontal_scale) - height_value = climb_height / self.cfg.vertical_scale + jump_height = self.track_kwargs["jump"]["height"] + if self.track_kwargs["jump"].get("jump_down_prob", 0.) > 0.: + if np.random.uniform() < self.track_kwargs["jump"]["jump_down_prob"]: + jump_height = -jump_height + depth_px = int(jump_depth / self.cfg.horizontal_scale) + height_value = jump_height / self.cfg.vertical_scale wall_thickness_px = int(wall_thickness / self.cfg.horizontal_scale) + 1 if not heightfield_noise is None: @@ -223,7 +229,7 @@ class BarrierTrack: ] += height_value if height_value < 0.: track_heightfield[ - depth_px:, + (0 if virtual else depth_px):, max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1), ] += height_value track_trimesh = convert_heightfield_to_trimesh( @@ -233,12 +239,15 @@ class BarrierTrack: self.cfg.slope_treshold, ) assert not ( - self.track_kwargs["climb"].get("fake_offset", 0.) != 0. and self.track_kwargs["climb"].get("fake_height", 0.) != 0.), \ + self.track_kwargs["jump"].get("fake_offset", 0.) != 0. and self.track_kwargs["jump"].get("fake_height", 0.) != 0.), \ "fake_offset and fake_height cannot be both non-zero" - climb_height_ = climb_height + (self.track_kwargs["climb"].get("fake_offset", 0.) if self.track_kwargs["climb"].get("fake_offset", 0.) == 0. else self.track_kwargs["climb"].get("fake_height", 0.)) + jump_height_ = jump_height + ( + self.track_kwargs["jump"].get("fake_offset", 0.) \ + if jump_height > 0. \ + else 0.) block_info = torch.tensor([ - climb_depth, - climb_height_, + jump_depth, + jump_height_, ], dtype= torch.float32, device= self.device) height_offset_px = height_value if not virtual else min(height_value, 0) return track_trimesh, track_heightfield, block_info, height_offset_px @@ -399,9 +408,9 @@ class BarrierTrack: track_heightfield = heightfield_template + heightfield_noise else: track_heightfield = heightfield_template.copy() - # There is no difference between virtual/non-virtual environment. + start_px = int(self.track_kwargs["leap"].get("fake_offset", 0.) / self.cfg.horizontal_scale) + 1 if not virtual else 1 track_heightfield[ - 1: length_px+1, + start_px: length_px+1, max(0, wall_thickness_px-1): min(-1, -wall_thickness_px+1), ] -= depth_value track_trimesh = convert_heightfield_to_trimesh( @@ -538,13 +547,14 @@ class BarrierTrack: self.track_info_map[row_idx, col_idx, 0, 0] = 0 self.track_info_map[row_idx, col_idx, 0, 1:] = block_info self.track_width_map[row_idx, col_idx] = self.env_width - wall_thickness * 2 + self.block_starting_height_map[row_idx, col_idx, 0] = block_starting_height_px * self.cfg.vertical_scale block_starting_height_px += height_offset_px for obstacle_idx, obstacle_selection in enumerate(obstacle_order): obstacle_name = self.track_kwargs["options"][obstacle_selection] obstacle_id = self.track_options_id_dict[obstacle_name] # call method to generate trimesh and heightfield for each track block. - # For example get_climb_track, get_tilt_track + # For example get_jump_track, get_tilt_track # using `virtual_track` to create non-collision mesh for collocation method in training. track_trimesh, track_heightfield, block_info, height_offset_px = getattr(self, "get_" + obstacle_name + "_track")( wall_thickness, @@ -576,6 +586,7 @@ class BarrierTrack: ) self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 0] = obstacle_id self.track_info_map[row_idx, col_idx, obstacle_idx + 1, 1:] = block_info + self.block_starting_height_map[row_idx, col_idx, obstacle_idx + 1] = block_starting_height_px * self.cfg.vertical_scale block_starting_height_px += height_offset_px return block_starting_height_px @@ -711,7 +722,7 @@ class BarrierTrack: block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,) block_idx_clipped = torch.clip( block_idx, - 0., + 0, (self.n_blocks_per_track - 1), ) in_track_mask = (track_idx == track_idx_clipped).all(dim= -1) & (block_idx == block_idx_clipped) @@ -757,6 +768,7 @@ class BarrierTrack: else self.env_block_length / 2 min_x_points_offset = torch.min(volume_points_offset[:, :, 0], dim= -1)[0] # (n,) engaging_next_block = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) \ + & (in_block_distance > self.track_kwargs.get("engaging_finish_threshold", 0.)) \ & (in_block_distance > engaging_next_distance) # update the engaging track_idx and block_idx if engaging next @@ -790,7 +802,9 @@ class BarrierTrack: 0 ].to(int) if self.track_kwargs.get("walk_in_skill_gap", False): - between_skill_mask = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) & (in_block_distance < engaging_next_distance) + between_skill_mask = ((in_block_distance + min_x_points_offset) > engaging_obstacle_depth) \ + & (in_block_distance > self.track_kwargs.get("engaging_finish_threshold", 0.)) \ + & (in_block_distance < engaging_next_distance) obstacle_id_selection[between_skill_mask] = 0. engaging_block_info[between_skill_mask] = 0. engaging_obstacle_onehot[ @@ -827,18 +841,35 @@ class BarrierTrack: distance_neg_y, ], dim= -1) # (n, 2) - def get_climb_penetration_depths(self, + def get_jump_penetration_depths(self, block_infos, positions_in_block, mask_only= False, ): - in_obstacle_mask = positions_in_block[:, 0] <= block_infos[:, 1] - climb_over_mask = positions_in_block[:, 2] > block_infos[:, 2] - penetrated_mask = torch.logical_and(in_obstacle_mask, (torch.logical_not(climb_over_mask))) + in_up_mask = torch.logical_and( + positions_in_block[:, 0] <= block_infos[:, 1], + block_infos[:, 2] > 0., + ) + in_down_mask = torch.logical_and( + positions_in_block[:, 0] <= block_infos[:, 1], + block_infos[:, 2] < 0., + ) + jump_over_mask = torch.logical_and( + positions_in_block[:, 2] > block_infos[:, 2], + positions_in_block[:, 2] > 0, # to avoid the penetration of virtual obstacle in jump down. + ) # (n_points,) + + penetrated_mask = torch.logical_and( + torch.logical_or(in_up_mask, in_down_mask), + (torch.logical_not(jump_over_mask)), + ) if mask_only: return penetrated_mask.to(torch.float32) penetration_depths_buffer = torch.zeros_like(penetrated_mask, dtype= torch.float32) - penetration_depths_buffer[penetrated_mask] = block_infos[penetrated_mask, 2] - positions_in_block[penetrated_mask, 2] + penetrate_up_mask = torch.logical_and(penetrated_mask, in_up_mask) + penetration_depths_buffer[penetrate_up_mask] = block_infos[penetrate_up_mask, 2] - positions_in_block[penetrate_up_mask, 2] + penetrate_down_mask = torch.logical_and(penetrated_mask, in_down_mask) + penetration_depths_buffer[penetrate_down_mask] = 0. - positions_in_block[penetrate_down_mask, 2] return penetration_depths_buffer def get_tilt_penetration_depths(self, @@ -906,10 +937,11 @@ class BarrierTrack: forward_distance = sample_points[:, 0] - self.cfg.border_size - (track_idx[:, 0] * self.env_length) # (N,) w.r.t a track block_idx = torch.floor(forward_distance / self.env_block_length).to(int) # (N,) block_idx[block_idx >= self.track_info_map.shape[2]] = 0. - positions_in_block = torch.cat([ - (forward_distance % self.env_block_length).unsqueeze(-1), - sample_points[:, 1:] - self.env_origins_pyt[track_idx_clipped[:, 0], track_idx_clipped[:, 1]][:, 1:], - ], dim= -1) + positions_in_block = torch.stack([ + forward_distance % self.env_block_length, + sample_points[:, 1] - self.env_origins_pyt[track_idx_clipped[:, 0], track_idx_clipped[:, 1]][:, 1], + sample_points[:, 2] - self.block_starting_height_map[track_idx_clipped[:, 0], track_idx_clipped[:, 1], block_idx], + ], dim= -1) # (N, 3) related to the origin of the block, not the track. block_infos = self.track_info_map[track_idx_clipped[:, 0], track_idx_clipped[:, 1], block_idx] # (N, 3) penetration_depths = torch.zeros_like(sample_points[:, 0]) # shape (N,) @@ -967,23 +999,23 @@ class BarrierTrack: return passed_depths ######## methods to draw visualization ####################### - def draw_virtual_climb_track(self, + def draw_virtual_jump_track(self, block_info, - xy_origin, + block_origin, ): - climb_depth = block_info[0] - climb_height = block_info[1] + jump_depth = block_info[1] + jump_height = block_info[2] geom = gymutil.WireframeBoxGeometry( - climb_depth, + jump_depth if jump_height > 0 else self.track_kwargs["jump"].get("down_forward_length", 0.1), self.env_width, - climb_height, + jump_height, pose= None, color= (0, 0, 1), ) pose = gymapi.Transform(gymapi.Vec3( - climb_depth/2 + xy_origin[0], - self.env_width/2 + xy_origin[1], - climb_height/2, + jump_depth/2 + block_origin[0], + block_origin[1], + jump_height/2 + block_origin[2], ), r= None) gymutil.draw_lines( geom, @@ -995,10 +1027,10 @@ class BarrierTrack: def draw_virtual_tilt_track(self, block_info, - xy_origin, + block_origin, ): - tilt_depth = block_info[0] - tilt_width = block_info[1] + tilt_depth = block_info[1] + tilt_width = block_info[2] wall_height = self.track_kwargs["tilt"]["wall_height"] geom = gymutil.WireframeBoxGeometry( tilt_depth, @@ -1009,9 +1041,9 @@ class BarrierTrack: ) pose = gymapi.Transform(gymapi.Vec3( - tilt_depth/2 + xy_origin[0], - (self.env_width - tilt_width) / 4 + xy_origin[1], - wall_height/2, + tilt_depth/2 + block_origin[0], + block_origin[1] + (self.env_width + tilt_width) / 4, + wall_height/2 + block_origin[2], ), r= None) gymutil.draw_lines( geom, @@ -1021,9 +1053,9 @@ class BarrierTrack: pose, ) pose = gymapi.Transform(gymapi.Vec3( - tilt_depth/2 + xy_origin[0], - self.env_width - (self.env_width - tilt_width) / 4 + xy_origin[1], - wall_height/2, + tilt_depth/2 + block_origin[0], + block_origin[1] - (self.env_width + tilt_width) / 4, + wall_height/2 + block_origin[2], ), r= None) gymutil.draw_lines( geom, @@ -1035,11 +1067,11 @@ class BarrierTrack: def draw_virtual_crawl_track(self, block_info, - xy_origin, + block_origin, ): - crawl_depth = block_info[0] - crawl_height = block_info[1] - wall_height = self.track_kwargs["crawl"]["wall_height"] + crawl_depth = block_info[1] + crawl_height = block_info[2] + wall_height = self.track_kwargs["crawl"]["wall_height"][1] if isinstance(self.track_kwargs["crawl"]["wall_height"], (list, tuple)) else self.track_kwargs["crawl"]["wall_height"] geom = gymutil.WireframeBoxGeometry( crawl_depth, self.env_width, @@ -1048,9 +1080,9 @@ class BarrierTrack: color= (0, 0, 1), ) pose = gymapi.Transform(gymapi.Vec3( - crawl_depth/2 + xy_origin[0], - self.env_width/2 + xy_origin[1], - wall_height/2 + crawl_height, + crawl_depth/2 + block_origin[0], + block_origin[1], + wall_height/2 + crawl_height + block_origin[2], ), r= None) gymutil.draw_lines( geom, @@ -1062,11 +1094,11 @@ class BarrierTrack: def draw_virtual_leap_track(self, block_info, - xy_origin, + block_origin, ): # virtual/non-virtual terrain looks the same when leaping the gap. # but the expected height can be visualized - leap_length = block_info[0] + leap_length = block_info[1] expected_height = self.track_kwargs["leap"]["height"] geom = gymutil.WireframeBoxGeometry( leap_length, @@ -1076,9 +1108,9 @@ class BarrierTrack: color= (0, 0.5, 0.5), ) pose = gymapi.Transform(gymapi.Vec3( - leap_length/2 + xy_origin[0], - self.env_width/2 + xy_origin[1], - expected_height/2, + leap_length/2 + block_origin[0], + block_origin[1], + expected_height/2 + block_origin[2], ), r= None) gymutil.draw_lines( geom, @@ -1090,12 +1122,12 @@ class BarrierTrack: def draw_virtual_track(self, - track_origin_px, row_idx, col_idx, ): difficulties = self.get_difficulty(row_idx, col_idx) virtual_terrain = difficulties[1] + track_origin = self.env_origins[row_idx, col_idx] for block_idx in range(1, self.track_info_map.shape[2]): if virtual_terrain and self.track_kwargs["draw_virtual_terrain"]: @@ -1104,13 +1136,14 @@ class BarrierTrack: if v == obstacle_id: obstacle_name = k break - block_info = self.track_info_map[row_idx, col_idx, block_idx, 1:] - - heightfield_x0 = track_origin_px[0] + self.track_block_resolution[0] * block_idx - heightfield_y0 = track_origin_px[1] + block_info = self.track_info_map[row_idx, col_idx, block_idx] # (3,) getattr(self, "draw_virtual_" + obstacle_name + "_track")( block_info, - (np.array([heightfield_x0, heightfield_y0]) * self.cfg.horizontal_scale), + np.array([ + track_origin[0] + self.track_kwargs["track_block_length"] * block_idx, + track_origin[1], + self.block_starting_height_map[row_idx, col_idx, block_idx].cpu().numpy(), + ]), ) def draw_virtual_terrain(self, viewer): @@ -1119,7 +1152,6 @@ class BarrierTrack: for row_idx in range(self.cfg.num_rows): for col_idx in range(self.cfg.num_cols): self.draw_virtual_track( - self.track_origins_px[row_idx, col_idx, :2], row_idx= row_idx, col_idx= col_idx, ) diff --git a/onboard_script/a1_real.py b/onboard_codes/a1_real.py similarity index 100% rename from onboard_script/a1_real.py rename to onboard_codes/a1_real.py diff --git a/onboard_script/a1_ros_run.py b/onboard_codes/a1_ros_run.py similarity index 100% rename from onboard_script/a1_ros_run.py rename to onboard_codes/a1_ros_run.py diff --git a/onboard_script/a1_visual_embedding.py b/onboard_codes/a1_visual_embedding.py similarity index 100% rename from onboard_script/a1_visual_embedding.py rename to onboard_codes/a1_visual_embedding.py diff --git a/onboard_script/go1_visual_embedding.py b/onboard_codes/go1_visual_embedding.py similarity index 100% rename from onboard_script/go1_visual_embedding.py rename to onboard_codes/go1_visual_embedding.py