diff --git a/legged_gym/legged_gym/envs/a1/a1_leap_config.py b/legged_gym/legged_gym/envs/a1/a1_leap_config.py index 4fcb98b..1c9ffd3 100644 --- a/legged_gym/legged_gym/envs/a1/a1_leap_config.py +++ b/legged_gym/legged_gym/envs/a1/a1_leap_config.py @@ -10,7 +10,8 @@ class A1LeapCfg( A1FieldCfg ): # delay_action_obs = True # latency_range = [0.04-0.0025, 0.04+0.0075] #### uncomment the above to train non-virtual terrain - + class env(A1FieldCfg.env): + num_envs = 4 class terrain( A1FieldCfg.terrain ): max_init_terrain_level = 2 border_size = 5 @@ -68,6 +69,7 @@ class A1LeapCfg( A1FieldCfg ): exceed_dof_pos_limits = -1e-1 exceed_torque_limits_i = -2e-1 # track_predict_vel_l2norm = -1. + soft_dof_pos_limit = 0.9 class curriculum( A1FieldCfg.curriculum ): penetrate_volume_threshold_harder = 9000 @@ -80,6 +82,8 @@ class A1LeapCfgPPO( A1FieldCfgPPO ): class algorithm( A1FieldCfgPPO.algorithm ): entropy_coef = 0.0 clip_min_std = 0.2 + lin_vel_x = [2.0, 3.0] + command_scale = 2.0 class runner( A1FieldCfgPPO.runner ): policy_class_name = "ActorCriticRecurrent" diff --git a/rsl_rl/rsl_rl/algorithms/ppo.py b/rsl_rl/rsl_rl/algorithms/ppo.py index 9c1689b..95c3ea1 100644 --- a/rsl_rl/rsl_rl/algorithms/ppo.py +++ b/rsl_rl/rsl_rl/algorithms/ppo.py @@ -77,6 +77,7 @@ class PPO: self.velocity_planner = velocity_planner self.velocity_optimizer = getattr(optim, optimizer_class_name)(self.velocity_planner.parameters(), lr=learning_rate) self.lin_vel_x = kwargs.get('lin_vel_x', None) + self.command_scale = kwargs.get('command_scale', 2.0) # PPO parameters self.clip_param = clip_param @@ -106,11 +107,11 @@ class PPO: if self.actor_critic.is_recurrent: self.transition.hidden_states = self.actor_critic.get_hidden_states() # Compute the actions and values - vel_obs = torch.cat([obs[:, :9], obs[:, 12:]], dim=1) + vel_obs = torch.cat([obs[..., :9], obs[..., 12:]], dim=-1) velocity = self.velocity_planner(vel_obs) if self.lin_vel_x is not None: velocity = torch.clip(velocity, self.lin_vel_x[0], self.lin_vel_x[1]) - + velocity *= self.command_scale self.transition.actions = self.actor_critic.act(obs, velocity=velocity)[0].detach() self.transition.values = self.actor_critic.evaluate(critic_obs).detach() self.transition.actions_log_prob = self.actor_critic.get_actions_log_prob(self.transition.actions).detach() diff --git a/rsl_rl/rsl_rl/modules/actor_critic_recurrent.py b/rsl_rl/rsl_rl/modules/actor_critic_recurrent.py index 597af83..ebc259e 100644 --- a/rsl_rl/rsl_rl/modules/actor_critic_recurrent.py +++ b/rsl_rl/rsl_rl/modules/actor_critic_recurrent.py @@ -75,7 +75,8 @@ class ActorCriticRecurrent(ActorCritic): def act(self, observations, masks=None, hidden_states=None, velocity=None): if velocity is not None: - observations[..., 9] = velocity.squeeze() + # print(velocity.squeeze()) + observations[..., 9] = velocity.squeeze(-1) # vel_obs = torch.cat([observations[:, :9], observations[:, 12:]], dim=1) # velocity = self.velocity_planner(vel_obs) # velocity = torch.clip(velocity, self.lin_vel_x[0], self.lin_vel_x[1])