feat: remove the `use_history` field, add `observations_history` to support history observations, and adjust the related logic to handle the case of empty history observations

2024-10-12 23:45:18 +08:00 · 2024-10-12 23:45:18 +08:00 · 274db72d4b
parent 11ec554365
commit 274db72d4b
14 changed files with 151 additions and 119 deletions
--- a/src/rl_sar/library/rl_sdk/rl_sdk.cpp
+++ b/src/rl_sar/library/rl_sdk/rl_sdk.cpp
@ -421,11 +421,18 @@ void RL::ReadYaml(std::string robot_name)
    this->params.framework = config["framework"].as<std::string>();
    int rows = config["rows"].as<int>();
    int cols = config["cols"].as<int>();
-    this->params.use_history = config["use_history"].as<bool>();
    this->params.dt = config["dt"].as<double>();
    this->params.decimation = config["decimation"].as<int>();
    this->params.num_observations = config["num_observations"].as<int>();
    this->params.observations = ReadVectorFromYaml<std::string>(config["observations"]);
+    if (config["observations_history"].IsNull())
+    {
+        this->params.observations_history = {};
+    }
+    else
+    {
+        this->params.observations_history = ReadVectorFromYaml<int>(config["observations_history"]);
+    }
    this->params.clip_obs = config["clip_obs"].as<double>();
    if (config["clip_actions_lower"].IsNull() && config["clip_actions_upper"].IsNull())
    {
--- a/src/rl_sar/library/rl_sdk/rl_sdk.hpp
+++ b/src/rl_sar/library/rl_sdk/rl_sdk.hpp
@ -72,11 +72,11 @@ struct ModelParams
 {
    std::string model_name;
    std::string framework;
-    bool use_history;
    double dt;
    int decimation;
    int num_observations;
    std::vector<std::string> observations;
+    std::vector<int> observations_history;
    double damping;
    double stiffness;
    double action_scale;
--- a/src/rl_sar/models/a1_isaacgym/config.yaml
+++ b/src/rl_sar/models/a1_isaacgym/config.yaml
@ -3,36 +3,36 @@ a1_isaacgym:
  framework: "isaacgym"
  rows: 4
  cols: 3
-  use_history: True
  dt: 0.005
  decimation: 4
  num_observations: 45
  observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
+  observations_history: [0, 1, 2, 3, 4, 5]
  clip_obs: 100.0
-  clip_actions_lower: [-100, -100, -100, 
-                       -100, -100, -100, 
-                       -100, -100, -100, 
-                       -100, -100, -100] 
-  clip_actions_upper: [100, 100, 100, 
-                       100, 100, 100, 
-                       100, 100, 100, 
-                       100, 100, 100] 
-  rl_kp: [20, 20, 20, 
-          20, 20, 20, 
-          20, 20, 20, 
-          20, 20, 20] 
-  rl_kd: [0.5, 0.5, 0.5, 
-          0.5, 0.5, 0.5, 
-          0.5, 0.5, 0.5, 
-          0.5, 0.5, 0.5] 
-  fixed_kp: [80, 80, 80, 
-             80, 80, 80, 
-             80, 80, 80, 
-             80, 80, 80] 
-  fixed_kd: [3, 3, 3, 
-             3, 3, 3, 
-             3, 3, 3, 
-             3, 3, 3] 
+  clip_actions_lower: [-100, -100, -100,
+                       -100, -100, -100,
+                       -100, -100, -100,
+                       -100, -100, -100]
+  clip_actions_upper: [100, 100, 100,
+                       100, 100, 100,
+                       100, 100, 100,
+                       100, 100, 100]
+  rl_kp: [20, 20, 20,
+          20, 20, 20,
+          20, 20, 20,
+          20, 20, 20]
+  rl_kd: [0.5, 0.5, 0.5,
+          0.5, 0.5, 0.5,
+          0.5, 0.5, 0.5,
+          0.5, 0.5, 0.5]
+  fixed_kp: [80, 80, 80,
+             80, 80, 80,
+             80, 80, 80,
+             80, 80, 80]
+  fixed_kd: [3, 3, 3,
+             3, 3, 3,
+             3, 3, 3,
+             3, 3, 3]
  hip_scale_reduction: 0.5
  hip_scale_reduction_indices: [0, 3, 6, 9]
  num_of_dofs: 12
@ -42,14 +42,14 @@ a1_isaacgym:
  dof_pos_scale: 1.0
  dof_vel_scale: 0.05
  commands_scale: [2.0, 2.0, 1.0]
-  torque_limits: [33.5, 33.5, 33.5, 
-                  33.5, 33.5, 33.5, 
-                  33.5, 33.5, 33.5, 
-                  33.5, 33.5, 33.5] 
-  default_dof_pos: [ 0.1000, 0.8000, -1.5000, 
-                    -0.1000, 0.8000, -1.5000, 
-                     0.1000, 1.0000, -1.5000, 
-                    -0.1000, 1.0000, -1.5000] 
+  torque_limits: [33.5, 33.5, 33.5,
+                  33.5, 33.5, 33.5,
+                  33.5, 33.5, 33.5,
+                  33.5, 33.5, 33.5]
+  default_dof_pos: [ 0.1000, 0.8000, -1.5000,
+                    -0.1000, 0.8000, -1.5000,
+                     0.1000, 1.0000, -1.5000,
+                    -0.1000, 1.0000, -1.5000]
  joint_controller_names: ["FL_hip_controller", "FL_thigh_controller", "FL_calf_controller",
                           "FR_hip_controller", "FR_thigh_controller", "FR_calf_controller",
                           "RL_hip_controller", "RL_thigh_controller", "RL_calf_controller",
--- a/src/rl_sar/models/a1_isaacsim/config.yaml
+++ b/src/rl_sar/models/a1_isaacsim/config.yaml
@ -3,36 +3,36 @@ a1_isaacsim:
  framework: "isaacsim"
  rows: 4
  cols: 3
-  use_history: False
  dt: 0.005
  decimation: 4
  num_observations: 45
  observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
+  observations_history: []
  clip_obs: 100.0
-  clip_actions_lower: [-100, -100, -100, 
-                       -100, -100, -100, 
-                       -100, -100, -100, 
-                       -100, -100, -100] 
-  clip_actions_upper: [100, 100, 100, 
-                       100, 100, 100, 
-                       100, 100, 100, 
-                       100, 100, 100] 
-  rl_kp: [20, 20, 20, 
-          20, 20, 20, 
-          20, 20, 20, 
-          20, 20, 20] 
-  rl_kd: [0.5, 0.5, 0.5, 
-          0.5, 0.5, 0.5, 
-          0.5, 0.5, 0.5, 
-          0.5, 0.5, 0.5] 
-  fixed_kp: [80, 80, 80, 
-             80, 80, 80, 
-             80, 80, 80, 
-             80, 80, 80] 
-  fixed_kd: [3, 3, 3, 
-             3, 3, 3, 
-             3, 3, 3, 
-             3, 3, 3] 
+  clip_actions_lower: [-100, -100, -100,
+                       -100, -100, -100,
+                       -100, -100, -100,
+                       -100, -100, -100]
+  clip_actions_upper: [100, 100, 100,
+                       100, 100, 100,
+                       100, 100, 100,
+                       100, 100, 100]
+  rl_kp: [20, 20, 20,
+          20, 20, 20,
+          20, 20, 20,
+          20, 20, 20]
+  rl_kd: [0.5, 0.5, 0.5,
+          0.5, 0.5, 0.5,
+          0.5, 0.5, 0.5,
+          0.5, 0.5, 0.5]
+  fixed_kp: [80, 80, 80,
+             80, 80, 80,
+             80, 80, 80,
+             80, 80, 80]
+  fixed_kd: [3, 3, 3,
+             3, 3, 3,
+             3, 3, 3,
+             3, 3, 3]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
  num_of_dofs: 12
@ -42,14 +42,14 @@ a1_isaacsim:
  dof_pos_scale: 1.0
  dof_vel_scale: 0.05
  commands_scale: [1.0, 1.0, 1.0]
-  torque_limits: [33.5, 33.5, 33.5, 
-                  33.5, 33.5, 33.5, 
-                  33.5, 33.5, 33.5, 
-                  33.5, 33.5, 33.5] 
-  default_dof_pos: [ 0.1000, 0.8000, -1.5000, 
-                    -0.1000, 0.8000, -1.5000, 
-                     0.1000, 1.0000, -1.5000, 
-                    -0.1000, 1.0000, -1.5000] 
+  torque_limits: [33.5, 33.5, 33.5,
+                  33.5, 33.5, 33.5,
+                  33.5, 33.5, 33.5,
+                  33.5, 33.5, 33.5]
+  default_dof_pos: [ 0.1000, 0.8000, -1.5000,
+                    -0.1000, 0.8000, -1.5000,
+                     0.1000, 1.0000, -1.5000,
+                    -0.1000, 1.0000, -1.5000]
  joint_controller_names: ["FL_hip_controller", "FL_thigh_controller", "FL_calf_controller",
                           "FR_hip_controller", "FR_thigh_controller", "FR_calf_controller",
                           "RL_hip_controller", "RL_thigh_controller", "RL_calf_controller",
--- a/src/rl_sar/models/go2_isaacgym/config.yaml
+++ b/src/rl_sar/models/go2_isaacgym/config.yaml
@ -3,11 +3,11 @@ go2_isaacgym:
  framework: "isaacgym"
  rows: 4
  cols: 3
-  use_history: True
  dt: 0.005
  decimation: 4
  num_observations: 45
  observations: ["commands", "ang_vel", "gravity_vec", "dof_pos", "dof_vel", "actions"]
+  observations_history: [5, 4, 3, 2, 1, 0]
  clip_obs: 100.0
  clip_actions_lower: [-100, -100, -100,
                       -100, -100, -100,
--- a/src/rl_sar/models/gr1t1_isaacgym/config.yaml
+++ b/src/rl_sar/models/gr1t1_isaacgym/config.yaml
@ -3,23 +3,23 @@ gr1t1_isaacgym:
  framework: "isaacgym"
  rows: 2
  cols: 5
-  use_history: False
  dt: 0.001
  decimation: 20
  num_observations: 39
  observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
+  observations_history: []
  clip_obs: 100.0
  clip_actions_lower: [-0.4391, -1.0491, -2.0991, -0.4391, -1.3991,
                       -1.1391, -1.0491, -2.0991, -0.4391, -1.3991]
  clip_actions_upper: [1.1391, 1.0491, 1.0491, 2.2691, 0.8691,
                       0.4391, 1.0491, 1.0491, 2.2691, 0.8691]
-  rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3, 
+  rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
          57.0, 43.0, 114.0, 114.0, 15.3]
-  rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5, 
+  rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
          5.7, 4.3, 11.4, 11.4, 1.5]
-  fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3, 
+  fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
             57.0, 43.0, 114.0, 114.0, 15.3]
-  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5, 
+  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
             5.7, 4.3, 11.4, 11.4, 1.5]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
@ -32,7 +32,7 @@ gr1t1_isaacgym:
  commands_scale: [1.0, 1.0, 1.0]
  torque_limits: [60.0, 45.0, 130.0, 130.0, 16.0,
                  60.0, 45.0, 130.0, 130.0, 16.0]
-  default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618, 
+  default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
                    0.0, 0.0, -0.2618, 0.5236, -0.2618]
-  joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller", 
+  joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
                           "r_hip_roll_controller", "r_hip_yaw_controller", "r_hip_pitch_controller", "r_knee_pitch_controller", "r_ankle_pitch_controller"]
--- a/src/rl_sar/models/gr1t1_isaacsim/config.yaml
+++ b/src/rl_sar/models/gr1t1_isaacsim/config.yaml
@ -3,23 +3,23 @@ gr1t1_isaacsim:
  framework: "isaacsim"
  rows: 2
  cols: 5
-  use_history: False
  dt: 0.001
  decimation: 20
  num_observations: 39
  observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
+  observations_history: []
  clip_obs: 100.0
  clip_actions_lower: [-0.4391, -1.0491, -2.0991, -0.4391, -1.3991,
                       -1.1391, -1.0491, -2.0991, -0.4391, -1.3991]
  clip_actions_upper: [1.1391, 1.0491, 1.0491, 2.2691, 0.8691,
                       0.4391, 1.0491, 1.0491, 2.2691, 0.8691]
-  rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3, 
+  rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
          57.0, 43.0, 114.0, 114.0, 15.3]
-  rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5, 
+  rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
          5.7, 4.3, 11.4, 11.4, 1.5]
-  fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3, 
+  fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
             57.0, 43.0, 114.0, 114.0, 15.3]
-  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5, 
+  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
             5.7, 4.3, 11.4, 11.4, 1.5]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
@ -32,7 +32,7 @@ gr1t1_isaacsim:
  commands_scale: [1.0, 1.0, 1.0]
  torque_limits: [60.0, 45.0, 130.0, 130.0, 16.0,
                  60.0, 45.0, 130.0, 130.0, 16.0]
-  default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618, 
+  default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
                    0.0, 0.0, -0.2618, 0.5236, -0.2618]
-  joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller", 
+  joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
                           "r_hip_roll_controller", "r_hip_yaw_controller", "r_hip_pitch_controller", "r_knee_pitch_controller", "r_ankle_pitch_controller"]
--- a/src/rl_sar/models/gr1t2_isaacgym/config.yaml
+++ b/src/rl_sar/models/gr1t2_isaacgym/config.yaml
@ -3,23 +3,23 @@ gr1t2_isaacgym:
  framework: "isaacgym"
  rows: 2
  cols: 5
-  use_history: False
  dt: 0.001
  decimation: 20
  num_observations: 39
  observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
+  observations_history: []
  clip_obs: 100.0
  clip_actions_lower: [-0.4391, -1.0491, -2.0991, -0.4391, -1.3991,
                       -1.1391, -1.0491, -2.0991, -0.4391, -1.3991]
  clip_actions_upper: [1.1391, 1.0491, 1.0491, 2.2691, 0.8691,
                       0.4391, 1.0491, 1.0491, 2.2691, 0.8691]
-  rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3, 
+  rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
          57.0, 43.0, 114.0, 114.0, 15.3]
-  rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5, 
+  rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
          5.7, 4.3, 11.4, 11.4, 1.5]
-  fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3, 
+  fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
             57.0, 43.0, 114.0, 114.0, 15.3]
-  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5, 
+  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
             5.7, 4.3, 11.4, 11.4, 1.5]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
@ -32,7 +32,7 @@ gr1t2_isaacgym:
  commands_scale: [1.0, 1.0, 1.0]
  torque_limits: [60.0, 45.0, 130.0, 130.0, 16.0,
                  60.0, 45.0, 130.0, 130.0, 16.0]
-  default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618, 
+  default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
                    0.0, 0.0, -0.2618, 0.5236, -0.2618]
-  joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller", 
+  joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
                           "r_hip_roll_controller", "r_hip_yaw_controller", "r_hip_pitch_controller", "r_knee_pitch_controller", "r_ankle_pitch_controller"]
--- a/src/rl_sar/models/gr1t2_isaacsim/config.yaml
+++ b/src/rl_sar/models/gr1t2_isaacsim/config.yaml
@ -3,23 +3,23 @@ gr1t2_isaacsim:
  framework: "isaacsim"
  rows: 2
  cols: 5
-  use_history: False
  dt: 0.001
  decimation: 20
  num_observations: 39
  observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
+  observations_history: []
  clip_obs: 100.0
  clip_actions_lower: [-0.4391, -1.0491, -2.0991, -0.4391, -1.3991,
                       -1.1391, -1.0491, -2.0991, -0.4391, -1.3991]
  clip_actions_upper: [1.1391, 1.0491, 1.0491, 2.2691, 0.8691,
                       0.4391, 1.0491, 1.0491, 2.2691, 0.8691]
-  rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3, 
+  rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
          57.0, 43.0, 114.0, 114.0, 15.3]
-  rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5, 
+  rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
          5.7, 4.3, 11.4, 11.4, 1.5]
-  fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3, 
+  fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
             57.0, 43.0, 114.0, 114.0, 15.3]
-  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5, 
+  fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
             5.7, 4.3, 11.4, 11.4, 1.5]
  hip_scale_reduction: 1.0
  hip_scale_reduction_indices: []
@ -32,7 +32,7 @@ gr1t2_isaacsim:
  commands_scale: [1.0, 1.0, 1.0]
  torque_limits: [60.0, 45.0, 130.0, 130.0, 16.0,
                  60.0, 45.0, 130.0, 130.0, 16.0]
-  default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618, 
+  default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
                    0.0, 0.0, -0.2618, 0.5236, -0.2618]
-  joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller", 
+  joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
                           "r_hip_roll_controller", "r_hip_yaw_controller", "r_hip_pitch_controller", "r_knee_pitch_controller", "r_ankle_pitch_controller"]
--- a/src/rl_sar/scripts/rl_sdk.py
+++ b/src/rl_sar/scripts/rl_sdk.py
@ -64,11 +64,11 @@ class ModelParams:
    def __init__(self):
        self.model_name = None
        self.framework = None
-        self.use_history = None
        self.dt = None
        self.decimation = None
        self.num_observations = None
        self.observations = None
+        self.observations_history = None
        self.damping = None
        self.stiffness = None
        self.action_scale = None
@ -378,16 +378,20 @@ class RL:
        self.params.framework = config["framework"]
        rows = config["rows"]
        cols = config["cols"]
-        self.params.use_history = config["use_history"]
        self.params.dt = config["dt"]
        self.params.decimation = config["decimation"]
        self.params.num_observations = config["num_observations"]
        self.params.observations = config["observations"]
+        if config["observations_history"] is None:
+            self.params.observations_history = None
+        else:
+            self.params.observations_history = config["observations_history"]
+
        self.params.clip_obs = config["clip_obs"]
        self.params.action_scale = config["action_scale"]
        self.params.hip_scale_reduction = config["hip_scale_reduction"]
        self.params.hip_scale_reduction_indices = config["hip_scale_reduction_indices"]
-        if config["clip_actions_upper"] is None and config["clip_actions_upper"] is None:
+        if config["clip_actions_lower"] is None and config["clip_actions_upper"] is None:
            self.params.clip_actions_upper = None
            self.params.clip_actions_lower = None
        else:
--- a/src/rl_sar/scripts/rl_sim.py
+++ b/src/rl_sar/scripts/rl_sim.py
@ -36,8 +36,8 @@ class RL_Sim(RL):
        self.ReadYaml(self.robot_name)

        # history
-        if self.params.use_history:
-            self.history_obs_buf = ObservationBuffer(1, self.params.num_observations, 6)
+        if self.params.observations_history is None:
+            self.history_obs_buf = ObservationBuffer(1, self.params.num_observations, len(self.params.observations_history))

        # Due to the fact that the robot_state_publisher sorts the joint names alphabetically,
        # the mapping table is established according to the order defined in the YAML file
@ -202,9 +202,9 @@ class RL_Sim(RL):
    def Forward(self):
        torch.set_grad_enabled(False)
        clamped_obs = self.ComputeObservation()
-        if self.params.use_history:
+        if self.params.observations_history is None:
            self.history_obs_buf.insert(clamped_obs)
-            history_obs = self.history_obs_buf.get_obs_vec(np.arange(6))
+            history_obs = self.history_obs_buf.get_obs_vec(self.params.observations_history)
            actions = self.model.forward(history_obs)
        else:
            actions = self.model.forward(clamped_obs)
--- a/src/rl_sar/src/rl_real_a1.cpp
+++ b/src/rl_sar/src/rl_real_a1.cpp
@ -12,7 +12,10 @@ RL_Real::RL_Real() : unitree_safe(UNITREE_LEGGED_SDK::LeggedType::A1), unitree_u
    this->ReadYaml(this->robot_name);

    // history
-    this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, 6);
+    if (!this->params.observations_history.empty())
+    {
+        this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, this->params.observations_history.size());
+    }

    this->unitree_udp.InitCmdData(this->unitree_low_command);

@ -176,10 +179,17 @@ torch::Tensor RL_Real::Forward()

    torch::Tensor clamped_obs = this->ComputeObservation();

-    this->history_obs_buf.insert(clamped_obs);
-    this->history_obs = this->history_obs_buf.get_obs_vec({0, 1, 2, 3, 4, 5});
-
-    torch::Tensor actions = this->model.forward({this->history_obs}).toTensor();
+    torch::Tensor actions;
+    if (!this->params.observations_history.empty())
+    {
+        this->history_obs_buf.insert(clamped_obs);
+        this->history_obs = this->history_obs_buf.get_obs_vec(this->params.observations_history);
+        actions = this->model.forward({this->history_obs}).toTensor();
+    }
+    else
+    {
+        actions = this->model.forward({clamped_obs}).toTensor();
+    }

    if (this->params.clip_actions_upper.numel() != 0 && this->params.clip_actions_lower.numel() != 0)
    {
--- a/src/rl_sar/src/rl_real_go2.cpp
+++ b/src/rl_sar/src/rl_real_go2.cpp
@ -12,7 +12,10 @@ void RL_Real::RL_Real()
    this->ReadYaml(this->robot_name);

    // history
-    this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, 6);
+    if (!this->params.observations_history.empty())
+    {
+        this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, this->params.observations_history.size());
+    }

    // init robot
    this->InitRobotStateClient();
@ -187,10 +190,17 @@ torch::Tensor RL_Real::Forward()

    torch::Tensor clamped_obs = this->ComputeObservation();

-    this->history_obs_buf.insert(clamped_obs);
-    this->history_obs = this->history_obs_buf.get_obs_vec({5, 4, 3, 2, 1, 0});
-
-    torch::Tensor actions = this->model.forward({this->history_obs}).toTensor();
+    torch::Tensor actions;
+    if (!this->params.observations_history.empty())
+    {
+        this->history_obs_buf.insert(clamped_obs);
+        this->history_obs = this->history_obs_buf.get_obs_vec(this->params.observations_history);
+        actions = this->model.forward({this->history_obs}).toTensor();
+    }
+    else
+    {
+        actions = this->model.forward({clamped_obs}).toTensor();
+    }

    if (this->params.clip_actions_upper.numel() != 0 && this->params.clip_actions_lower.numel() != 0)
    {
--- a/src/rl_sar/src/rl_sim.cpp
+++ b/src/rl_sar/src/rl_sim.cpp
@ -12,9 +12,9 @@ RL_Sim::RL_Sim()
    this->ReadYaml(this->robot_name);

    // history
-    if (this->params.use_history)
+    if (!this->params.observations_history.empty())
    {
-        this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, 6);
+        this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, this->params.observations_history.size());
    }

    // Due to the fact that the robot_state_publisher sorts the joint names alphabetically,
@ -247,13 +247,14 @@ void RL_Sim::RunModel()
 torch::Tensor RL_Sim::Forward()
 {
    torch::autograd::GradMode::set_enabled(false);
+
    torch::Tensor clamped_obs = this->ComputeObservation();
+
    torch::Tensor actions;
-    if (this->params.use_history)
+    if (!this->params.observations_history.empty())
    {
        this->history_obs_buf.insert(clamped_obs);
-        // TODO-devel-go2 这里要找一种方法适配不同的顺序，不能直接改这里，会导致a1的模型不可用
-        this->history_obs = this->history_obs_buf.get_obs_vec({5, 4, 3, 2, 1, 0});
+        this->history_obs = this->history_obs_buf.get_obs_vec(this->params.observations_history);
        actions = this->model.forward({this->history_obs}).toTensor();
    }
    else