diff --git a/legged_gym/legged_gym/envs/a1/a1_leap_config.py b/legged_gym/legged_gym/envs/a1/a1_leap_config.py index 7963d51..ae6bd5b 100644 --- a/legged_gym/legged_gym/envs/a1/a1_leap_config.py +++ b/legged_gym/legged_gym/envs/a1/a1_leap_config.py @@ -29,6 +29,8 @@ class A1LeapCfg( A1FieldCfg ): ), virtual_terrain= True, # Change this to False for real terrain no_perlin_threshold= 0.06, + n_obstacles_curriculum = True, + n_obstacles_per_track=2, )) TerrainPerlin_kwargs = merge_dict(A1FieldCfg.terrain.TerrainPerlin_kwargs, dict( @@ -102,7 +104,7 @@ class A1LeapCfgPPO( A1FieldCfgPPO ): resume = True # load_run = "{Your traind walking model directory}" # load_run = "May16_18-12-08_WalkingBase_pEnergySubsteps2e-5_aScale0.5" - load_run = "Leap_2m_2500" + load_run = "Leap_loss_success" # load_run = "May15_21-34-27_Skillleap_pEnergySubsteps-1e-06_virtual"#"May15_17-07-38_WalkingBase_pEnergySubsteps2e-5_aScale0.5" # load_run = "{Your virtually trained leap model directory}" max_iterations = 20000 diff --git a/legged_gym/legged_gym/envs/base/legged_robot.py b/legged_gym/legged_gym/envs/base/legged_robot.py index 488f38d..feb95f6 100644 --- a/legged_gym/legged_gym/envs/base/legged_robot.py +++ b/legged_gym/legged_gym/envs/base/legged_robot.py @@ -141,6 +141,7 @@ class LeggedRobot(BaseTask): self.last_actions[:] = self.actions[:] self.last_dof_vel[:] = self.dof_vel[:] self.last_root_vel[:] = self.root_states[:, 7:13] + self.last_root_pos[:] = self.root_states[:, :3] self.last_torques[:] = self.torques[:] if self.viewer and self.enable_viewer_sync and self.debug_viz: @@ -600,6 +601,7 @@ class LeggedRobot(BaseTask): self.last_dof_vel = torch.zeros_like(self.dof_vel) self.last_root_vel = torch.zeros_like(self.root_states[:, 7:13]) self.last_torques = torch.zeros(self.num_envs, self.num_actions, dtype=torch.float, device=self.device, requires_grad=False) + self.last_root_pos = torch.zeros_like(self.root_states[:, :3]) self.commands = torch.zeros(self.num_envs, self.cfg.commands.num_commands, dtype=torch.float, device=self.device, requires_grad=False) # x vel, y vel, yaw vel, heading self.commands_scale = torch.tensor([self.obs_scales.lin_vel, self.obs_scales.lin_vel, self.obs_scales.ang_vel], device=self.device, requires_grad=False,) # TODO change this self.feet_air_time = torch.zeros(self.num_envs, self.feet_indices.shape[0], dtype=torch.float, device=self.device, requires_grad=False) @@ -638,6 +640,7 @@ class LeggedRobot(BaseTask): self.actions[env_ids] = 0. self.last_actions[env_ids] = 0. self.last_dof_vel[env_ids] = 0. + self.last_root_pos[env_ids] = 0. self.last_torques[env_ids] = 0. self.feet_air_time[env_ids] = 0. self.episode_length_buf[env_ids] = 0 diff --git a/legged_gym/legged_gym/envs/base/legged_robot_field.py b/legged_gym/legged_gym/envs/base/legged_robot_field.py index be46710..03f5fd8 100644 --- a/legged_gym/legged_gym/envs/base/legged_robot_field.py +++ b/legged_gym/legged_gym/envs/base/legged_robot_field.py @@ -1038,7 +1038,7 @@ class LeggedRobotField(LeggedRobot): return (1 - torch.exp(-world_vel_error/self.cfg.rewards.tracking_sigma)) * engaging_mask # reverse version of tracking reward def _reward_lin_pos_x(self): - return torch.abs((self.root_states[:, :3] - self.env_origins)[:, 0]) + return (self.root_states[:, :3] - self.last_root_pos)[:, 0] ##### Some helper functions that override parent class attributes ##### @property diff --git a/legged_gym/legged_gym/scripts/play.py b/legged_gym/legged_gym/scripts/play.py index 4c92741..2a26b08 100644 --- a/legged_gym/legged_gym/scripts/play.py +++ b/legged_gym/legged_gym/scripts/play.py @@ -106,7 +106,7 @@ def play(args): # "tilt", ] env_cfg.terrain.BarrierTrack_kwargs["leap"] = dict( - length= (1.3, 1.3), + length= (1.0, 1.0), depth= (0.4, 0.8), height= 0.2, ) @@ -114,8 +114,8 @@ def play(args): if "one_obstacle_per_track" in env_cfg.terrain.BarrierTrack_kwargs.keys(): env_cfg.terrain.BarrierTrack_kwargs.pop("one_obstacle_per_track") env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 2# 2 - env_cfg.commands.ranges.lin_vel_x = [3.0, 3.0] # [1.2, 1.2] - env_cfg.terrain.BarrierTrack_kwargs['track_block_length']= 3. + env_cfg.commands.ranges.lin_vel_x = [1.5, 1.5] # [1.2, 1.2] + env_cfg.terrain.BarrierTrack_kwargs['track_block_length']= 2.0 if "distill" in args.task: env_cfg.commands.ranges.lin_vel_x = [0.0, 0.0] env_cfg.commands.ranges.lin_vel_y = [-0., 0.] diff --git a/legged_gym/legged_gym/scripts/play_vel.py b/legged_gym/legged_gym/scripts/play_vel.py index e2f16b0..9d2ee46 100644 --- a/legged_gym/legged_gym/scripts/play_vel.py +++ b/legged_gym/legged_gym/scripts/play_vel.py @@ -106,14 +106,14 @@ def play(args): # "tilt", ] env_cfg.terrain.BarrierTrack_kwargs["leap"] = dict( - length= (1.5, 1.5), + length= (0.5, 1), depth= (0.4, 0.8), height= 0.2, ) if "one_obstacle_per_track" in env_cfg.terrain.BarrierTrack_kwargs.keys(): env_cfg.terrain.BarrierTrack_kwargs.pop("one_obstacle_per_track") - env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 1# 2 + env_cfg.terrain.BarrierTrack_kwargs["n_obstacles_per_track"] = 2# 2 env_cfg.commands.ranges.lin_vel_x = [3.0, 3.0] # [1.2, 1.2] env_cfg.terrain.BarrierTrack_kwargs['track_block_length']= 3. if "distill" in args.task: diff --git a/legged_gym/legged_gym/utils/terrain/barrier_track.py b/legged_gym/legged_gym/utils/terrain/barrier_track.py index 9567083..c2eb277 100644 --- a/legged_gym/legged_gym/utils/terrain/barrier_track.py +++ b/legged_gym/legged_gym/utils/terrain/barrier_track.py @@ -479,16 +479,31 @@ class BarrierTrack: # adding trimesh and heighfields if "one_obstacle_per_track" in self.track_kwargs.keys(): print("Warning: one_obstacle_per_track is deprecated, use n_obstacles_per_track instead.") - if self.track_kwargs["randomize_obstacle_order"] and len(self.track_kwargs["options"]) > 0: - obstacle_order = np.random.choice( - len(self.track_kwargs["options"]), - size= self.track_kwargs.get("n_obstacles_per_track", 1), - replace= True, - ) - else: - obstacle_order = np.arange(len(self.track_kwargs["options"])) + difficulties = self.get_difficulty(row_idx, col_idx) difficulty, virtual_track = difficulties[:2] + n_obstacles_curriculum = self.track_kwargs.get("n_obstacles_curriculum", False) + if n_obstacles_curriculum: + # n_obstacles_per_track = min(int(difficulty * self.track_kwargs.get("n_obstacles_per_track", 1)) + 1, self.track_kwargs.get("n_obstacles_per_track", 1)) + n_obstacles_per_track = self.track_kwargs.get("n_obstacles_per_track", 1) + obstacle_order = np.random.choice( + len(self.track_kwargs["options"]), + size= n_obstacles_per_track, + replace= True, + ) + # if difficulty > 0.5: + # difficulty = (difficulty - 0.2) / 0.8 + # else: + # difficulty *= 2 + else: + if self.track_kwargs["randomize_obstacle_order"] and len(self.track_kwargs["options"]) > 0: + obstacle_order = np.random.choice( + len(self.track_kwargs["options"]), + size= self.track_kwargs.get("n_obstacles_per_track", 1), + replace= True, + ) + else: + obstacle_order = np.arange(len(self.track_kwargs["options"])) if self.track_kwargs["add_perlin_noise"]: TerrainPerlin_kwargs = self.cfg.TerrainPerlin_kwargs diff --git a/rsl_rl/rsl_rl/algorithms/ppo.py b/rsl_rl/rsl_rl/algorithms/ppo.py index db11708..aa070a2 100644 --- a/rsl_rl/rsl_rl/algorithms/ppo.py +++ b/rsl_rl/rsl_rl/algorithms/ppo.py @@ -235,7 +235,7 @@ class PPO: if current_learning_iteration is None: vel_loss = 0 else: - vel_loss = torch.square(velocity-2).mean() * np.exp(-0.01 * current_learning_iteration + 125) + vel_loss = torch.square(velocity-2).mean() * np.exp(-0.01 * current_learning_iteration + 165) vel_loss += torch.square(torch.clamp_max(velocity, 1.) - 1).mean() return_ = dict( diff --git a/rsl_rl/rsl_rl/runners/on_policy_runner.py b/rsl_rl/rsl_rl/runners/on_policy_runner.py index c609309..99c6ba4 100644 --- a/rsl_rl/rsl_rl/runners/on_policy_runner.py +++ b/rsl_rl/rsl_rl/runners/on_policy_runner.py @@ -208,6 +208,7 @@ class OnPolicyRunner: 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n""" f"""{'Value function loss:':>{pad}} {locs["losses"]['value_loss']:.4f}\n""" f"""{'Surrogate loss:':>{pad}} {locs["losses"]['surrogate_loss']:.4f}\n""" + f"""{'Velocity loss:':>{pad}} {locs["losses"]['vel_loss']:.4f}\n""" f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""" f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n""" f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n""" @@ -221,7 +222,6 @@ class OnPolicyRunner: 'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n""" f"""{'Value function loss:':>{pad}} {locs["losses"]['value_loss']:.4f}\n""" f"""{'Surrogate loss:':>{pad}} {locs["losses"]['surrogate_loss']:.4f}\n""" - f"""{'Velocity loss:':>{pad}} {locs["losses"]['vel_loss']:.4f}\n""" f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""" # f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n""" # f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n"""