first version done

This commit is contained in:
Jerry Xu 2024-05-28 17:55:06 -04:00
parent 34e6399781
commit 89f2970234
8 changed files with 37 additions and 17 deletions

View File

@ -29,6 +29,8 @@ class A1LeapCfg( A1FieldCfg ):
),
virtual_terrain= True, # Change this to False for real terrain
no_perlin_threshold= 0.06,
n_obstacles_curriculum = True,
n_obstacles_per_track=2,
))
TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict(
@ -102,7 +104,7 @@ class A1LeapCfgPPO( A1FieldCfgPPO ):
resume = True
# load_run = "{Your traind walking model directory}"
# load_run = "May16_18-12-08_WalkingBase_pEnergySubsteps2e-5_aScale0.5"
load_run = "Leap_2m_2500"
load_run = "Leap_loss_success"
# load_run = "May15_21-34-27_Skillleap_pEnergySubsteps-1e-06_virtual"#"May15_17-07-38_WalkingBase_pEnergySubsteps2e-5_aScale0.5"
# load_run = "{Your virtually trained leap model directory}"
max_iterations = 20000

View File

@ -141,6 +141,7 @@ class LeggedRobot(BaseTask):
self.last_actions[:] = self.actions[:]
self.last_dof_vel[:] = self.dof_vel[:]
self.last_root_vel[:] = self.root_states[:, 7:13]
self.last_root_pos[:] = self.root_states[:, :3]
self.last_torques[:] = self.torques[:]
if self.viewer and self.enable_viewer_sync and self.debug_viz:
@ -600,6 +601,7 @@ class LeggedRobot(BaseTask):
self.last_dof_vel = torch.zeros_like(self.dof_vel)
self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13])
self.last_torques = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False)
self.last_root_pos = torch.zeros_like(self.root_states[:, :3])
self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading
self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) # TODO change this
self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False)
@ -638,6 +640,7 @@ class LeggedRobot(BaseTask):
self.actions[env_ids] = 0.
self.last_actions[env_ids] = 0.
self.last_dof_vel[env_ids] = 0.
self.last_root_pos[env_ids] = 0.
self.last_torques[env_ids] = 0.
self.feet_air_time[env_ids] = 0.
self.episode_length_buf[env_ids] = 0

View File

@ -1038,7 +1038,7 @@ class LeggedRobotField(LeggedRobot):
return (1 - torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)) * engaging_mask # reverse version of tracking reward
def _reward_lin_pos_x(self):
return torch.abs((self.root_states[:, :3] - self.env_origins)[:, 0])
return (self.root_states[:, :3] - self.last_root_pos)[:, 0]
##### Some helper functions that override parent class attributes #####
@property

View File

@ -106,7 +106,7 @@ def play(args):
# "tilt",
]
env_cfg.terrain.BarrierTrack_kwargs["leap"] = dict(
length= (1.3, 1.3),
length= (1.0, 1.0),
depth= (0.4, 0.8),
height= 0.2,
)
@ -114,8 +114,8 @@ def play(args):
if "one_obstacle_per_track" in env_cfg.terrain.BarrierTrack_kwargs.keys():
env_cfg.terrain.BarrierTrack_kwargs.pop("one_obstacle_per_track")
env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 2# 2
env_cfg.commands.ranges.lin_vel_x = [3.0, 3.0] # [1.2, 1.2]
env_cfg.terrain.BarrierTrack_kwargs['track_block_length']= 3.
env_cfg.commands.ranges.lin_vel_x = [1.5, 1.5] # [1.2, 1.2]
env_cfg.terrain.BarrierTrack_kwargs['track_block_length']= 2.0
if "distill" in args.task:
env_cfg.commands.ranges.lin_vel_x = [0.0, 0.0]
env_cfg.commands.ranges.lin_vel_y = [-0., 0.]

View File

@ -106,14 +106,14 @@ def play(args):
# "tilt",
]
env_cfg.terrain.BarrierTrack_kwargs["leap"] = dict(
length= (1.5, 1.5),
length= (0.5, 1),
depth= (0.4, 0.8),
height= 0.2,
)
if "one_obstacle_per_track" in env_cfg.terrain.BarrierTrack_kwargs.keys():
env_cfg.terrain.BarrierTrack_kwargs.pop("one_obstacle_per_track")
env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1# 2
env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 2# 2
env_cfg.commands.ranges.lin_vel_x = [3.0, 3.0] # [1.2, 1.2]
env_cfg.terrain.BarrierTrack_kwargs['track_block_length']= 3.
if "distill" in args.task:

View File

@ -479,6 +479,23 @@ class BarrierTrack:
# adding trimesh and heighfields
if "one_obstacle_per_track" in self.track_kwargs.keys():
print("Warning: one_obstacle_per_track is deprecated, use n_obstacles_per_track instead.")
difficulties = self.get_difficulty(row_idx, col_idx)
difficulty, virtual_track = difficulties[:2]
n_obstacles_curriculum = self.track_kwargs.get("n_obstacles_curriculum", False)
if n_obstacles_curriculum:
# n_obstacles_per_track = min(int(difficulty * self.track_kwargs.get("n_obstacles_per_track", 1)) + 1, self.track_kwargs.get("n_obstacles_per_track", 1))
n_obstacles_per_track = self.track_kwargs.get("n_obstacles_per_track", 1)
obstacle_order = np.random.choice(
len(self.track_kwargs["options"]),
size= n_obstacles_per_track,
replace= True,
)
# if difficulty > 0.5:
# difficulty = (difficulty - 0.2) / 0.8
# else:
# difficulty *= 2
else:
if self.track_kwargs["randomize_obstacle_order"] and len(self.track_kwargs["options"]) > 0:
obstacle_order = np.random.choice(
len(self.track_kwargs["options"]),
@ -487,8 +504,6 @@ class BarrierTrack:
)
else:
obstacle_order = np.arange(len(self.track_kwargs["options"]))
difficulties = self.get_difficulty(row_idx, col_idx)
difficulty, virtual_track = difficulties[:2]
if self.track_kwargs["add_perlin_noise"]:
TerrainPerlin_kwargs = self.cfg.TerrainPerlin_kwargs

View File

@ -235,7 +235,7 @@ class PPO:
if current_learning_iteration is None:
vel_loss = 0
else:
vel_loss = torch.square(velocity-2).mean() * np.exp(-0.01 * current_learning_iteration + 125)
vel_loss = torch.square(velocity-2).mean() * np.exp(-0.01 * current_learning_iteration + 165)
vel_loss += torch.square(torch.clamp_max(velocity, 1.) - 1).mean()
return_ = dict(

View File

@ -208,6 +208,7 @@ class OnPolicyRunner:
'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
f"""{'Value function loss:':>{pad}} {locs["losses"]['value_loss']:.4f}\n"""
f"""{'Surrogate loss:':>{pad}} {locs["losses"]['surrogate_loss']:.4f}\n"""
f"""{'Velocity loss:':>{pad}} {locs["losses"]['vel_loss']:.4f}\n"""
f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n"""
@ -221,7 +222,6 @@ class OnPolicyRunner:
'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
f"""{'Value function loss:':>{pad}} {locs["losses"]['value_loss']:.4f}\n"""
f"""{'Surrogate loss:':>{pad}} {locs["losses"]['surrogate_loss']:.4f}\n"""
f"""{'Velocity loss:':>{pad}} {locs["losses"]['vel_loss']:.4f}\n"""
f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
# f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
# f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n"""