scale

2025-03-03 21:54:54 +08:00 · 2025-03-03 21:54:54 +08:00 · 5ec8fa4569
parent 5d2c846deb
commit 5ec8fa4569
18 changed files with 80 additions and 112 deletions
--- a/src/rl_sar/library/core/rl_sdk/rl_sdk.cpp
+++ b/src/rl_sar/library/core/rl_sdk/rl_sdk.cpp
@ -102,17 +102,18 @@ void RL::InitControl()
 void RL::ComputeOutput(const torch::Tensor &actions, torch::Tensor &output_dof_pos, torch::Tensor &output_dof_vel, torch::Tensor &output_dof_tau)
 {
-    torch::Tensor joint_actions_scaled = actions * this->params.action_scale;
+    torch::Tensor actions_scaled = actions * this->params.action_scale;
-    torch::Tensor wheel_actions_scaled = torch::zeros({1, this->params.num_of_dofs});
+    torch::Tensor pos_actions_scaled = actions_scaled;
    torch::Tensor vel_actions_scaled = torch::zeros_like(actions);
    for (int i : this->params.wheel_indices)
    {
-        joint_actions_scaled[0][i] = 0.0;
+        pos_actions_scaled[0][i] = 0.0;
-        wheel_actions_scaled[0][i] = actions[0][i] * this->params.action_scale_wheel;
+        vel_actions_scaled[0][i] = actions[0][i];
    }
-    torch::Tensor actions_scaled = joint_actions_scaled + wheel_actions_scaled;
+    torch::Tensor all_actions_scaled = pos_actions_scaled + vel_actions_scaled;
-    output_dof_pos = joint_actions_scaled + this->params.default_dof_pos;
+    output_dof_pos = pos_actions_scaled + this->params.default_dof_pos;
-    output_dof_vel = wheel_actions_scaled;
+    output_dof_vel = vel_actions_scaled;
-    output_dof_tau = this->params.rl_kp * (actions_scaled + this->params.default_dof_pos - this->obs.dof_pos) - this->params.rl_kd * this->obs.dof_vel;
+    output_dof_tau = this->params.rl_kp * (all_actions_scaled + this->params.default_dof_pos - this->obs.dof_pos) - this->params.rl_kd * this->obs.dof_vel;
    output_dof_tau = torch::clamp(output_dof_tau, -(this->params.torque_limits), this->params.torque_limits);
 }
@ -498,10 +499,7 @@ void RL::ReadYaml(std::string robot_path)
        this->params.clip_actions_upper = torch::tensor(ReadVectorFromYaml<double>(config["clip_actions_upper"])).view({1, -1});
        this->params.clip_actions_lower = torch::tensor(ReadVectorFromYaml<double>(config["clip_actions_lower"])).view({1, -1});
    }
-    this->params.action_scale = config["action_scale"].as<double>();
+    this->params.action_scale = torch::tensor(ReadVectorFromYaml<double>(config["action_scale"])).view({1, -1});
    this->params.hip_scale_reduction = config["hip_scale_reduction"].as<double>();
    this->params.hip_scale_reduction_indices = ReadVectorFromYaml<int>(config["hip_scale_reduction_indices"]);
    this->params.action_scale_wheel = config["action_scale_wheel"].as<double>();
    this->params.wheel_indices = ReadVectorFromYaml<int>(config["wheel_indices"]);
    this->params.num_of_dofs = config["num_of_dofs"].as<int>();
    this->params.lin_vel_scale = config["lin_vel_scale"].as<double>();
--- a/src/rl_sar/library/core/rl_sdk/rl_sdk.hpp
+++ b/src/rl_sar/library/core/rl_sdk/rl_sdk.hpp
@ -87,10 +87,7 @@ struct ModelParams
    std::vector<int> observations_history;
    double damping;
    double stiffness;
-    double action_scale;
+    torch::Tensor action_scale;
    double hip_scale_reduction;
    std::vector<int> hip_scale_reduction_indices;
    double action_scale_wheel;
    std::vector<int> wheel_indices;
    int num_of_dofs;
    double lin_vel_scale;
--- a/src/rl_sar/models/a1/legged_gym/config.yaml
+++ b/src/rl_sar/models/a1/legged_gym/config.yaml
@ -34,11 +34,11 @@ a1/legged_gym:
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0]
  hip_scale_reduction: 0.5
  hip_scale_reduction_indices: [0, 3, 6, 9]
  num_of_dofs: 12
-  action_scale: 0.25
+  action_scale: [0.125, 0.25, 0.25,
-  action_scale_wheel: 0.0
+                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25]
  wheel_indices: []
  lin_vel_scale: 2.0
  ang_vel_scale: 0.25
--- a/src/rl_sar/models/a1/robot_lab/config.yaml
+++ b/src/rl_sar/models/a1/robot_lab/config.yaml
@ -34,11 +34,11 @@ a1/robot_lab:
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
  num_of_dofs: 12
-  action_scale: 0.25
+  action_scale: [0.125, 0.25, 0.25,
-  action_scale_wheel: 0.0
+                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25]
  wheel_indices: []
  lin_vel_scale: 2.0
  ang_vel_scale: 0.25
--- a/src/rl_sar/models/b2/robot_lab/config.yaml
+++ b/src/rl_sar/models/b2/robot_lab/config.yaml
@ -34,11 +34,11 @@ b2/robot_lab:
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
  num_of_dofs: 12
-  action_scale: 0.25
+  action_scale: [0.125, 0.25, 0.25,
-  action_scale_wheel: 0.0
+                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25]
  wheel_indices: []
  lin_vel_scale: 2.0
  ang_vel_scale: 0.25
--- a/src/rl_sar/models/b2w/robot_lab/config.yaml
+++ b/src/rl_sar/models/b2w/robot_lab/config.yaml
@ -37,11 +37,12 @@ b2w/robot_lab:
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0,
             1.0, 1.0, 1.0, 1.0]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
  num_of_dofs: 16
-  action_scale: 0.25
+  action_scale: [0.125, 0.25, 0.25,
-  action_scale_wheel: 5.0
+                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25,
                 5.0, 5.0, 5.0, 5.0]
  wheel_indices: [12, 13, 14, 15]
  lin_vel_scale: 2.0
  ang_vel_scale: 0.25
--- a/src/rl_sar/models/go2/himloco/config.yaml
+++ b/src/rl_sar/models/go2/himloco/config.yaml
@ -34,11 +34,11 @@ go2/himloco:
             5, 5, 5,
             5, 5, 5,
             5, 5, 5]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: [0, 3, 6, 9]
  num_of_dofs: 12
-  action_scale: 0.25
+  action_scale: [0.125, 0.25, 0.25,
-  action_scale_wheel: 0.0
+                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25]
  wheel_indices: []
  lin_vel_scale: 2.0
  ang_vel_scale: 0.25
--- a/src/rl_sar/models/go2/robot_lab/config.yaml
+++ b/src/rl_sar/models/go2/robot_lab/config.yaml
@ -34,11 +34,11 @@ go2/robot_lab:
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
  num_of_dofs: 12
-  action_scale: 0.25
+  action_scale: [0.125, 0.25, 0.25,
-  action_scale_wheel: 0.0
+                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25]
  wheel_indices: []
  lin_vel_scale: 2.0
  ang_vel_scale: 0.25
--- a/src/rl_sar/models/go2w/robot_lab/config.yaml
+++ b/src/rl_sar/models/go2w/robot_lab/config.yaml
@ -37,11 +37,12 @@ go2w/robot_lab:
             3.0, 3.0, 3.0,
             3.0, 3.0, 3.0,
             0.5, 0.5, 0.5, 0.5]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
  num_of_dofs: 16
-  action_scale: 0.25
+  action_scale: [0.125, 0.25, 0.25,
-  action_scale_wheel: 5.0
+                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25,
                 0.125, 0.25, 0.25,
                 5.0, 5.0, 5.0, 5.0]
  wheel_indices: [12, 13, 14, 15]
  lin_vel_scale: 2.0
  ang_vel_scale: 0.25
--- a/src/rl_sar/models/gr1t1/legged_gym/config.yaml
+++ b/src/rl_sar/models/gr1t1/legged_gym/config.yaml
@ -22,11 +22,9 @@ gr1t1/legged_gym:
             57.0, 43.0, 114.0, 114.0, 15.3]
  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
             5.7, 4.3, 11.4, 11.4, 1.5]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
  num_of_dofs: 10
-  action_scale: 1.0
+  action_scale: [1.0, 1.0, 1.0, 1.0, 1.0,
-  action_scale_wheel: 0.0
+                 1.0, 1.0, 1.0, 1.0, 1.0]
  wheel_indices: []
  lin_vel_scale: 1.0
  ang_vel_scale: 1.0
--- a/src/rl_sar/models/gr1t2/legged_gym/config.yaml
+++ b/src/rl_sar/models/gr1t2/legged_gym/config.yaml
@ -22,11 +22,9 @@ gr1t2/legged_gym:
             57.0, 43.0, 114.0, 114.0, 15.3]
  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
             5.7, 4.3, 11.4, 11.4, 1.5]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
  num_of_dofs: 10
-  action_scale: 1.0
+  action_scale: [1.0, 1.0, 1.0, 1.0, 1.0,
-  action_scale_wheel: 0.0
+                 1.0, 1.0, 1.0, 1.0, 1.0]
  wheel_indices: []
  lin_vel_scale: 1.0
  ang_vel_scale: 1.0
--- a/src/rl_sar/models/l4w4/legged_gym/config.yaml
+++ b/src/rl_sar/models/l4w4/legged_gym/config.yaml
@ -36,11 +36,11 @@ l4w4/legged_gym:
             3.0, 3.0, 3.0, 0.5,
             3.0, 3.0, 3.0, 0.5,
             3.0, 3.0, 3.0, 0.5]
  hip_scale_reduction: 0.5
  hip_scale_reduction_indices: [0, 4, 8, 12]
  num_of_dofs: 16
-  action_scale: 0.5
+  action_scale: [0.25, 0.5, 0.5, 8.0,
-  action_scale_wheel: 8.0
+                 0.25, 0.5, 0.5, 8.0,
                 0.25, 0.5, 0.5, 8.0,
                 0.25, 0.5, 0.5, 8.0]
  wheel_indices: [3, 7, 11, 15]
  lin_vel_scale: 2.0
  ang_vel_scale: 0.25
--- a/src/rl_sar/scripts/rl_sdk.py
+++ b/src/rl_sar/scripts/rl_sdk.py
@ -75,16 +75,15 @@ class ModelParams:
        self.damping = None
        self.stiffness = None
        self.action_scale = None
-        self.hip_scale_reduction = None
+        self.wheel_indices = None
        self.hip_scale_reduction_indices = None
        self.clip_actions_upper = None
        self.clip_actions_lower = None
        self.num_of_dofs = None
        self.lin_vel_scale = None
        self.ang_vel_scale = None
        self.dof_pos_scale = None
        self.dof_vel_scale = None
        self.clip_obs = None
        self.clip_actions_upper = None
        self.clip_actions_lower = None
        self.torque_limits = None
        self.rl_kd = None
        self.rl_kp = None
@ -161,7 +160,10 @@ class RL:
            elif observation == "commands":
                obs_list.append(self.obs.commands * self.params.commands_scale)
            elif observation == "dof_pos":
-                obs_list.append((self.obs.dof_pos - self.params.default_dof_pos) * self.params.dof_pos_scale)
+                dof_pos_rel = self.obs.dof_pos - self.params.default_dof_pos
                for i in self.params.wheel_indices:
                    dof_pos_rel[0, i] = 0.0
                obs_list.append(dof_pos_rel * self.params.dof_pos_scale)
            elif observation == "dof_vel":
                obs_list.append(self.obs.dof_vel * self.params.dof_vel_scale)
            elif observation == "actions":
@ -199,6 +201,20 @@ class RL:
        actions_scaled = actions * self.params.action_scale
        return actions_scaled + self.params.default_dof_pos
    def ComputeOutput(self, actions):
        actions_scaled = actions * self.params.action_scale
        pos_actions_scaled = actions_scaled.clone()
        vel_actions_scaled = torch.zeros_like(actions)
        for i in self.params.wheel_indices:
            pos_actions_scaled[0][i] = 0.0
            vel_actions_scaled[0][i] = actions[0][i]
        all_actions_scaled = pos_actions_scaled + vel_actions_scaled
        output_dof_pos = pos_actions_scaled + self.params.default_dof_pos
        output_dof_vel = vel_actions_scaled
        output_dof_tau = self.params.rl_kp * (all_actions_scaled + self.params.default_dof_pos - self.obs.dof_pos) - self.params.rl_kd * self.obs.dof_vel
        output_dof_tau = torch.clamp(output_dof_tau, -(self.params.torque_limits), self.params.torque_limits)
        return output_dof_pos, output_dof_vel, output_dof_tau
    def QuatRotateInverse(self, q, v, framework):
        if framework == "isaacsim":
            q_w = q[:, 0]
@ -384,15 +400,14 @@ class RL:
        self.params.observations = config["observations"]
        self.params.observations_history = config["observations_history"]
        self.params.clip_obs = config["clip_obs"]
        self.params.action_scale = config["action_scale"]
        self.params.hip_scale_reduction = config["hip_scale_reduction"]
        self.params.hip_scale_reduction_indices = config["hip_scale_reduction_indices"]
        if config["clip_actions_lower"] is None and config["clip_actions_upper"] is None:
            self.params.clip_actions_upper = None
            self.params.clip_actions_lower = None
        else:
            self.params.clip_actions_upper = torch.tensor(config["clip_actions_upper"]).view(1, -1)
            self.params.clip_actions_lower = torch.tensor(config["clip_actions_lower"]).view(1, -1)
        self.params.action_scale = torch.tensor(config["action_scale"]).view(1, -1)
        self.params.wheel_indices = config["wheel_indices"]
        self.params.num_of_dofs = config["num_of_dofs"]
        self.params.lin_vel_scale = config["lin_vel_scale"]
        self.params.ang_vel_scale = config["ang_vel_scale"]
--- a/src/rl_sar/scripts/rl_sim.py
+++ b/src/rl_sar/scripts/rl_sim.py
@ -47,6 +47,7 @@ class RL_Sim(RL):
        # init
        torch.set_grad_enabled(False)
        torch.set_num_threads(4)
        self.joint_publishers_commands = [MotorCommand() for _ in range(self.params.num_of_dofs)]
        self.InitObservations()
        self.InitOutputs()
@ -189,19 +190,10 @@ class RL_Sim(RL):
            self.obs.dof_pos = torch.tensor(self.robot_state.motor_state.q).narrow(0, 0, self.params.num_of_dofs).unsqueeze(0)
            self.obs.dof_vel = torch.tensor(self.robot_state.motor_state.dq).narrow(0, 0, self.params.num_of_dofs).unsqueeze(0)
-            clamped_actions = self.Forward()
+            self.obs.actions = self.Forward()
            self.output_dof_pos, self.output_dof_vel, self.output_dof_tau = self.ComputeOutput(self.obs.actions)
-            for i in self.params.hip_scale_reduction_indices:
+            # self.TorqueProtect(self.output_dof_pos)
                clamped_actions[0][i] *= self.params.hip_scale_reduction
            self.obs.actions = clamped_actions
            origin_output_dof_tau = self.ComputeTorques(self.obs.actions)
            # self.TorqueProtect(origin_output_dof_tau)
            self.output_dof_tau = torch.clamp(origin_output_dof_tau, -(self.params.torque_limits), self.params.torque_limits)
            self.output_dof_pos = self.ComputePosition(self.obs.actions)
            if CSV_LOGGER:
                tau_est = torch.zeros((1, self.params.num_of_dofs))
--- a/src/rl_sar/src/rl_real_a1.cpp
+++ b/src/rl_sar/src/rl_real_a1.cpp
@ -164,15 +164,7 @@ void RL_Real::RunModel()
        this->obs.dof_pos = torch::tensor(this->robot_state.motor_state.q).narrow(0, 0, this->params.num_of_dofs).unsqueeze(0);
        this->obs.dof_vel = torch::tensor(this->robot_state.motor_state.dq).narrow(0, 0, this->params.num_of_dofs).unsqueeze(0);
-        torch::Tensor clamped_actions = this->Forward();
+        this->obs.actions = this->Forward();
        this->obs.actions = clamped_actions;
        for (int i : this->params.hip_scale_reduction_indices)
        {
            clamped_actions[0][i] *= this->params.hip_scale_reduction;
        }
        this->ComputeOutput(this->obs.actions, this->output_dof_pos, this->output_dof_vel, this->output_dof_tau);
        if (this->output_dof_pos.defined() && this->output_dof_pos.numel() > 0)
--- a/src/rl_sar/src/rl_real_go2.cpp
+++ b/src/rl_sar/src/rl_real_go2.cpp
@ -170,15 +170,7 @@ void RL_Real::RunModel()
        this->obs.dof_pos = torch::tensor(this->robot_state.motor_state.q).narrow(0, 0, this->params.num_of_dofs).unsqueeze(0);
        this->obs.dof_vel = torch::tensor(this->robot_state.motor_state.dq).narrow(0, 0, this->params.num_of_dofs).unsqueeze(0);
-        torch::Tensor clamped_actions = this->Forward();
+        this->obs.actions = this->Forward();
        this->obs.actions = clamped_actions;
        for (int i : this->params.hip_scale_reduction_indices)
        {
            clamped_actions[0][i] *= this->params.hip_scale_reduction;
        }
        this->ComputeOutput(this->obs.actions, this->output_dof_pos, this->output_dof_vel, this->output_dof_tau);
        if (this->output_dof_pos.defined() && this->output_dof_pos.numel() > 0)
--- a/src/rl_sar/src/rl_real_l4w4.cpp
+++ b/src/rl_sar/src/rl_real_l4w4.cpp
@ -186,15 +186,7 @@ void RL_Real::RunModel()
        this->obs.dof_pos = torch::tensor(this->robot_state.motor_state.q).narrow(0, 0, this->params.num_of_dofs).unsqueeze(0);
        this->obs.dof_vel = torch::tensor(this->robot_state.motor_state.dq).narrow(0, 0, this->params.num_of_dofs).unsqueeze(0);
-        torch::Tensor clamped_actions = this->Forward();
+        this->obs.actions = this->Forward();
        this->obs.actions = clamped_actions;
        for (int i : this->params.hip_scale_reduction_indices)
        {
            clamped_actions[0][i] *= this->params.hip_scale_reduction;
        }
        this->ComputeOutput(this->obs.actions, this->output_dof_pos, this->output_dof_vel, this->output_dof_tau);
        if (this->output_dof_pos.defined() && this->output_dof_pos.numel() > 0)
--- a/src/rl_sar/src/rl_sim.cpp
+++ b/src/rl_sar/src/rl_sim.cpp
@ -270,15 +270,7 @@ void RL_Sim::RunModel()
        this->obs.dof_pos = torch::tensor(this->robot_state.motor_state.q).narrow(0, 0, this->params.num_of_dofs).unsqueeze(0);
        this->obs.dof_vel = torch::tensor(this->robot_state.motor_state.dq).narrow(0, 0, this->params.num_of_dofs).unsqueeze(0);
-        torch::Tensor clamped_actions = this->Forward();
+        this->obs.actions = this->Forward();
        this->obs.actions = clamped_actions;
        for (int i : this->params.hip_scale_reduction_indices)
        {
            clamped_actions[0][i] *= this->params.hip_scale_reduction;
        }
        this->ComputeOutput(this->obs.actions, this->output_dof_pos, this->output_dof_vel, this->output_dof_tau);
        if (this->output_dof_pos.defined() && this->output_dof_pos.numel() > 0)