214 lines
8.2 KiB
Python
214 lines
8.2 KiB
Python
|
from .base_config import BaseConfig
|
||
|
|
||
|
class LeggedRobotCfg(BaseConfig):
|
||
|
class env:
|
||
|
num_envs = 4096
|
||
|
num_observations = 48
|
||
|
num_privileged_obs = None # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise
|
||
|
num_actions = 12
|
||
|
env_spacing = 3. # not used with heightfields/trimeshes
|
||
|
send_timeouts = True # send time out information to the algorithm
|
||
|
episode_length_s = 20 # episode length in seconds
|
||
|
|
||
|
class terrain:
|
||
|
mesh_type = 'plane' # "heightfield" # none, plane, heightfield or trimesh
|
||
|
horizontal_scale = 0.1 # [m]
|
||
|
vertical_scale = 0.005 # [m]
|
||
|
border_size = 25 # [m]
|
||
|
curriculum = True
|
||
|
static_friction = 1.0
|
||
|
dynamic_friction = 1.0
|
||
|
restitution = 0.
|
||
|
# rough terrain only:
|
||
|
measure_heights = True
|
||
|
measured_points_x = [-0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] # 1mx1.6m rectangle (without center line)
|
||
|
measured_points_y = [-0.5, -0.4, -0.3, -0.2, -0.1, 0., 0.1, 0.2, 0.3, 0.4, 0.5]
|
||
|
selected = False # select a unique terrain type and pass all arguments
|
||
|
terrain_kwargs = None # Dict of arguments for selected terrain
|
||
|
max_init_terrain_level = 5 # starting curriculum state
|
||
|
terrain_length = 8.
|
||
|
terrain_width = 8.
|
||
|
num_rows= 10 # number of terrain rows (levels)
|
||
|
num_cols = 20 # number of terrain cols (types)
|
||
|
# terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
|
||
|
terrain_proportions = [0.1, 0.1, 0.35, 0.25, 0.2]
|
||
|
# trimesh only:
|
||
|
slope_treshold = 0.75 # slopes above this threshold will be corrected to vertical surfaces
|
||
|
|
||
|
class commands:
|
||
|
curriculum = False
|
||
|
max_curriculum = 1.
|
||
|
num_commands = 4 # default: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
|
||
|
resampling_time = 10. # time before command are changed[s]
|
||
|
heading_command = True # if true: compute ang vel command from heading error
|
||
|
class ranges:
|
||
|
lin_vel_x = [-1.0, 1.0] # min max [m/s]
|
||
|
lin_vel_y = [-1.0, 1.0] # min max [m/s]
|
||
|
ang_vel_yaw = [-1, 1] # min max [rad/s]
|
||
|
heading = [-3.14, 3.14]
|
||
|
|
||
|
class init_state:
|
||
|
pos = [0.0, 0.0, 1.] # x,y,z [m]
|
||
|
rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
|
||
|
lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s]
|
||
|
ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s]
|
||
|
default_joint_angles = { # target angles when action = 0.0
|
||
|
"joint_a": 0.,
|
||
|
"joint_b": 0.}
|
||
|
|
||
|
class control:
|
||
|
control_type = 'P' # P: position, V: velocity, T: torques
|
||
|
# PD Drive parameters:
|
||
|
stiffness = {'joint_a': 10.0, 'joint_b': 15.} # [N*m/rad]
|
||
|
damping = {'joint_a': 1.0, 'joint_b': 1.5} # [N*m*s/rad]
|
||
|
# action scale: target angle = actionScale * action + defaultAngle
|
||
|
action_scale = 0.5
|
||
|
# decimation: Number of control action updates @ sim DT per policy DT
|
||
|
decimation = 4
|
||
|
|
||
|
class asset:
|
||
|
file = ""
|
||
|
name = "legged_robot" # actor name
|
||
|
foot_name = "None" # name of the feet bodies, used to index body state and contact force tensors
|
||
|
penalize_contacts_on = []
|
||
|
terminate_after_contacts_on = []
|
||
|
disable_gravity = False
|
||
|
collapse_fixed_joints = True # merge bodies connected by fixed joints. Specific fixed joints can be kept by adding " <... dont_collapse="true">
|
||
|
fix_base_link = False # fixe the base of the robot
|
||
|
default_dof_drive_mode = 3 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
|
||
|
self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
|
||
|
replace_cylinder_with_capsule = True # replace collision cylinders with capsules, leads to faster/more stable simulation
|
||
|
flip_visual_attachments = True # Some .obj meshes must be flipped from y-up to z-up
|
||
|
|
||
|
density = 0.001
|
||
|
angular_damping = 0.
|
||
|
linear_damping = 0.
|
||
|
max_angular_velocity = 1000.
|
||
|
max_linear_velocity = 1000.
|
||
|
armature = 0.
|
||
|
thickness = 0.01
|
||
|
|
||
|
class domain_rand:
|
||
|
randomize_friction = True
|
||
|
friction_range = [0.5, 1.25]
|
||
|
randomize_base_mass = False
|
||
|
added_mass_range = [-1., 1.]
|
||
|
push_robots = True
|
||
|
push_interval_s = 15
|
||
|
max_push_vel_xy = 1.
|
||
|
|
||
|
class rewards:
|
||
|
class scales:
|
||
|
termination = -0.0
|
||
|
tracking_lin_vel = 1.0
|
||
|
tracking_ang_vel = 0.5
|
||
|
lin_vel_z = -2.0
|
||
|
ang_vel_xy = -0.05
|
||
|
orientation = -0.
|
||
|
torques = -0.00001
|
||
|
dof_vel = -0.
|
||
|
dof_acc = -2.5e-7
|
||
|
base_height = -0.
|
||
|
feet_air_time = 1.0
|
||
|
collision = -1.
|
||
|
feet_stumble = -0.0
|
||
|
action_rate = -0.01
|
||
|
stand_still = -0.
|
||
|
|
||
|
only_positive_rewards = True # if true negative total rewards are clipped at zero (avoids early termination problems)
|
||
|
tracking_sigma = 0.25 # tracking reward = exp(-error^2/sigma)
|
||
|
soft_dof_pos_limit = 1. # percentage of urdf limits, values above this limit are penalized
|
||
|
soft_dof_vel_limit = 1.
|
||
|
soft_torque_limit = 1.
|
||
|
base_height_target = 1.
|
||
|
max_contact_force = 100. # forces above this value are penalized
|
||
|
|
||
|
class normalization:
|
||
|
class obs_scales:
|
||
|
lin_vel = 2.0
|
||
|
ang_vel = 0.25
|
||
|
dof_pos = 1.0
|
||
|
dof_vel = 0.05
|
||
|
height_measurements = 5.0
|
||
|
clip_observations = 100.
|
||
|
clip_actions = 100.
|
||
|
|
||
|
class noise:
|
||
|
add_noise = True
|
||
|
noise_level = 1.0 # scales other values
|
||
|
class noise_scales:
|
||
|
dof_pos = 0.01
|
||
|
dof_vel = 1.5
|
||
|
lin_vel = 0.1
|
||
|
ang_vel = 0.2
|
||
|
gravity = 0.05
|
||
|
height_measurements = 0.1
|
||
|
|
||
|
# viewer camera:
|
||
|
class viewer:
|
||
|
ref_env = 0
|
||
|
pos = [10, 0, 6] # [m]
|
||
|
lookat = [11., 5, 3.] # [m]
|
||
|
|
||
|
class sim:
|
||
|
dt = 0.005
|
||
|
substeps = 1
|
||
|
gravity = [0., 0. ,-9.81] # [m/s^2]
|
||
|
up_axis = 1 # 0 is y, 1 is z
|
||
|
|
||
|
class physx:
|
||
|
num_threads = 10
|
||
|
solver_type = 1 # 0: pgs, 1: tgs
|
||
|
num_position_iterations = 4
|
||
|
num_velocity_iterations = 0
|
||
|
contact_offset = 0.01 # [m]
|
||
|
rest_offset = 0.0 # [m]
|
||
|
bounce_threshold_velocity = 0.5 #0.5 [m/s]
|
||
|
max_depenetration_velocity = 1.0
|
||
|
max_gpu_contact_pairs = 2**23 #2**24 -> needed for 8000 envs and more
|
||
|
default_buffer_size_multiplier = 5
|
||
|
contact_collection = 2 # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
|
||
|
|
||
|
class LeggedRobotCfgPPO(BaseConfig):
|
||
|
seed = 1
|
||
|
runner_class_name = 'OnPolicyRunner'
|
||
|
class policy:
|
||
|
init_noise_std = 1.0
|
||
|
actor_hidden_dims = [512, 256, 128]
|
||
|
critic_hidden_dims = [512, 256, 128]
|
||
|
activation = 'elu' # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
|
||
|
# only for 'ActorCriticRecurrent':
|
||
|
# rnn_type = 'lstm'
|
||
|
# rnn_hidden_size = 512
|
||
|
# rnn_num_layers = 1
|
||
|
|
||
|
class algorithm:
|
||
|
# training params
|
||
|
value_loss_coef = 1.0
|
||
|
use_clipped_value_loss = True
|
||
|
clip_param = 0.2
|
||
|
entropy_coef = 0.01
|
||
|
num_learning_epochs = 5
|
||
|
num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches
|
||
|
learning_rate = 1.e-3 #5.e-4
|
||
|
schedule = 'adaptive' # could be adaptive, fixed
|
||
|
gamma = 0.99
|
||
|
lam = 0.95
|
||
|
desired_kl = 0.01
|
||
|
max_grad_norm = 1.
|
||
|
|
||
|
class runner:
|
||
|
policy_class_name = 'ActorCritic'
|
||
|
algorithm_class_name = 'PPO'
|
||
|
num_steps_per_env = 24 # per iteration
|
||
|
max_iterations = 1500 # number of policy updates
|
||
|
|
||
|
# logging
|
||
|
save_interval = 50 # check for potential saves every this many iterations
|
||
|
experiment_name = 'test'
|
||
|
run_name = ''
|
||
|
# load and resume
|
||
|
resume = False
|
||
|
load_run = -1 # -1 = last run
|
||
|
checkpoint = -1 # -1 = last saved model
|
||
|
resume_path = None # updated from load_run and chkpt
|