first version done
This commit is contained in:
parent
34e6399781
commit
89f2970234
|
@ -29,6 +29,8 @@ class A1LeapCfg( A1FieldCfg ):
|
|||
),
|
||||
virtual_terrain= True, # Change this to False for real terrain
|
||||
no_perlin_threshold= 0.06,
|
||||
n_obstacles_curriculum = True,
|
||||
n_obstacles_per_track=2,
|
||||
))
|
||||
|
||||
TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
|
||||
|
@ -102,7 +104,7 @@ class A1LeapCfgPPO( A1FieldCfgPPO ):
|
|||
resume = True
|
||||
# load_run = "{Your traind walking model directory}"
|
||||
# load_run = "May16_18-12-08_WalkingBase_pEnergySubsteps2e-5_aScale0.5"
|
||||
load_run = "Leap_2m_2500"
|
||||
load_run = "Leap_loss_success"
|
||||
# load_run = "May15_21-34-27_Skillleap_pEnergySubsteps-1e-06_virtual"#"May15_17-07-38_WalkingBase_pEnergySubsteps2e-5_aScale0.5"
|
||||
# load_run = "{Your virtually trained leap model directory}"
|
||||
max_iterations = 20000
|
||||
|
|
|
@ -141,6 +141,7 @@ class LeggedRobot(BaseTask):
|
|||
self.last_actions[:] = self.actions[:]
|
||||
self.last_dof_vel[:] = self.dof_vel[:]
|
||||
self.last_root_vel[:] = self.root_states[:, 7:13]
|
||||
self.last_root_pos[:] = self.root_states[:, :3]
|
||||
self.last_torques[:] = self.torques[:]
|
||||
|
||||
if self.viewer and self.enable_viewer_sync and self.debug_viz:
|
||||
|
@ -600,6 +601,7 @@ class LeggedRobot(BaseTask):
|
|||
self.last_dof_vel = torch.zeros_like(self.dof_vel)
|
||||
self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13])
|
||||
self.last_torques = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False)
|
||||
self.last_root_pos = torch.zeros_like(self.root_states[:, :3])
|
||||
self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading
|
||||
self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) # TODO change this
|
||||
self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False)
|
||||
|
@ -638,6 +640,7 @@ class LeggedRobot(BaseTask):
|
|||
self.actions[env_ids] = 0.
|
||||
self.last_actions[env_ids] = 0.
|
||||
self.last_dof_vel[env_ids] = 0.
|
||||
self.last_root_pos[env_ids] = 0.
|
||||
self.last_torques[env_ids] = 0.
|
||||
self.feet_air_time[env_ids] = 0.
|
||||
self.episode_length_buf[env_ids] = 0
|
||||
|
|
|
@ -1038,7 +1038,7 @@ class LeggedRobotField(LeggedRobot):
|
|||
return (1 - torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)) * engaging_mask # reverse version of tracking reward
|
||||
|
||||
def _reward_lin_pos_x(self):
|
||||
return torch.abs((self.root_states[:, :3] - self.env_origins)[:, 0])
|
||||
return (self.root_states[:, :3] - self.last_root_pos)[:, 0]
|
||||
|
||||
##### Some helper functions that override parent class attributes #####
|
||||
@property
|
||||
|
|
|
@ -106,7 +106,7 @@ def play(args):
|
|||
# "tilt",
|
||||
]
|
||||
env_cfg.terrain.BarrierTrack_kwargs["leap"] = dict(
|
||||
length= (1.3, 1.3),
|
||||
length= (1.0, 1.0),
|
||||
depth= (0.4, 0.8),
|
||||
height= 0.2,
|
||||
)
|
||||
|
@ -114,8 +114,8 @@ def play(args):
|
|||
if "one_obstacle_per_track" in env_cfg.terrain.BarrierTrack_kwargs.keys():
|
||||
env_cfg.terrain.BarrierTrack_kwargs.pop("one_obstacle_per_track")
|
||||
env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 2# 2
|
||||
env_cfg.commands.ranges.lin_vel_x = [3.0, 3.0] # [1.2, 1.2]
|
||||
env_cfg.terrain.BarrierTrack_kwargs['track_block_length']= 3.
|
||||
env_cfg.commands.ranges.lin_vel_x = [1.5, 1.5] # [1.2, 1.2]
|
||||
env_cfg.terrain.BarrierTrack_kwargs['track_block_length']= 2.0
|
||||
if "distill" in args.task:
|
||||
env_cfg.commands.ranges.lin_vel_x = [0.0, 0.0]
|
||||
env_cfg.commands.ranges.lin_vel_y = [-0., 0.]
|
||||
|
|
|
@ -106,14 +106,14 @@ def play(args):
|
|||
# "tilt",
|
||||
]
|
||||
env_cfg.terrain.BarrierTrack_kwargs["leap"] = dict(
|
||||
length= (1.5, 1.5),
|
||||
length= (0.5, 1),
|
||||
depth= (0.4, 0.8),
|
||||
height= 0.2,
|
||||
)
|
||||
|
||||
if "one_obstacle_per_track" in env_cfg.terrain.BarrierTrack_kwargs.keys():
|
||||
env_cfg.terrain.BarrierTrack_kwargs.pop("one_obstacle_per_track")
|
||||
env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1# 2
|
||||
env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 2# 2
|
||||
env_cfg.commands.ranges.lin_vel_x = [3.0, 3.0] # [1.2, 1.2]
|
||||
env_cfg.terrain.BarrierTrack_kwargs['track_block_length']= 3.
|
||||
if "distill" in args.task:
|
||||
|
|
|
@ -479,6 +479,23 @@ class BarrierTrack:
|
|||
# adding trimesh and heighfields
|
||||
if "one_obstacle_per_track" in self.track_kwargs.keys():
|
||||
print("Warning: one_obstacle_per_track is deprecated, use n_obstacles_per_track instead.")
|
||||
|
||||
difficulties = self.get_difficulty(row_idx, col_idx)
|
||||
difficulty, virtual_track = difficulties[:2]
|
||||
n_obstacles_curriculum = self.track_kwargs.get("n_obstacles_curriculum", False)
|
||||
if n_obstacles_curriculum:
|
||||
# n_obstacles_per_track = min(int(difficulty * self.track_kwargs.get("n_obstacles_per_track", 1)) + 1, self.track_kwargs.get("n_obstacles_per_track", 1))
|
||||
n_obstacles_per_track = self.track_kwargs.get("n_obstacles_per_track", 1)
|
||||
obstacle_order = np.random.choice(
|
||||
len(self.track_kwargs["options"]),
|
||||
size= n_obstacles_per_track,
|
||||
replace= True,
|
||||
)
|
||||
# if difficulty > 0.5:
|
||||
# difficulty = (difficulty - 0.2) / 0.8
|
||||
# else:
|
||||
# difficulty *= 2
|
||||
else:
|
||||
if self.track_kwargs["randomize_obstacle_order"] and len(self.track_kwargs["options"]) > 0:
|
||||
obstacle_order = np.random.choice(
|
||||
len(self.track_kwargs["options"]),
|
||||
|
@ -487,8 +504,6 @@ class BarrierTrack:
|
|||
)
|
||||
else:
|
||||
obstacle_order = np.arange(len(self.track_kwargs["options"]))
|
||||
difficulties = self.get_difficulty(row_idx, col_idx)
|
||||
difficulty, virtual_track = difficulties[:2]
|
||||
|
||||
if self.track_kwargs["add_perlin_noise"]:
|
||||
TerrainPerlin_kwargs = self.cfg.TerrainPerlin_kwargs
|
||||
|
|
|
@ -235,7 +235,7 @@ class PPO:
|
|||
if current_learning_iteration is None:
|
||||
vel_loss = 0
|
||||
else:
|
||||
vel_loss = torch.square(velocity-2).mean() * np.exp(-0.01 * current_learning_iteration + 125)
|
||||
vel_loss = torch.square(velocity-2).mean() * np.exp(-0.01 * current_learning_iteration + 165)
|
||||
vel_loss += torch.square(torch.clamp_max(velocity, 1.) - 1).mean()
|
||||
|
||||
return_ = dict(
|
||||
|
|
|
@ -208,6 +208,7 @@ class OnPolicyRunner:
|
|||
'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
|
||||
f"""{'Value function loss:':>{pad}} {locs["losses"]['value_loss']:.4f}\n"""
|
||||
f"""{'Surrogate loss:':>{pad}} {locs["losses"]['surrogate_loss']:.4f}\n"""
|
||||
f"""{'Velocity loss:':>{pad}} {locs["losses"]['vel_loss']:.4f}\n"""
|
||||
f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
|
||||
f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
|
||||
f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n"""
|
||||
|
@ -221,7 +222,6 @@ class OnPolicyRunner:
|
|||
'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
|
||||
f"""{'Value function loss:':>{pad}} {locs["losses"]['value_loss']:.4f}\n"""
|
||||
f"""{'Surrogate loss:':>{pad}} {locs["losses"]['surrogate_loss']:.4f}\n"""
|
||||
f"""{'Velocity loss:':>{pad}} {locs["losses"]['vel_loss']:.4f}\n"""
|
||||
f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
|
||||
# f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
|
||||
# f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n"""
|
||||
|
|
Loading…
Reference in New Issue