feat: remove the `use_history` field, add `observations_history` to support history observations, and adjust the related logic to handle the case of empty history observations

This commit is contained in:
fan-ziqi 2024-10-12 23:45:18 +08:00
parent 11ec554365
commit 274db72d4b
14 changed files with 151 additions and 119 deletions

View File

@ -421,11 +421,18 @@ void RL::ReadYaml(std::string robot_name)
this->params.framework = config["framework"].as<std::string>();
int rows = config["rows"].as<int>();
int cols = config["cols"].as<int>();
this->params.use_history = config["use_history"].as<bool>();
this->params.dt = config["dt"].as<double>();
this->params.decimation = config["decimation"].as<int>();
this->params.num_observations = config["num_observations"].as<int>();
this->params.observations = ReadVectorFromYaml<std::string>(config["observations"]);
if (config["observations_history"].IsNull())
{
this->params.observations_history = {};
}
else
{
this->params.observations_history = ReadVectorFromYaml<int>(config["observations_history"]);
}
this->params.clip_obs = config["clip_obs"].as<double>();
if (config["clip_actions_lower"].IsNull() && config["clip_actions_upper"].IsNull())
{

View File

@ -72,11 +72,11 @@ struct ModelParams
{
std::string model_name;
std::string framework;
bool use_history;
double dt;
int decimation;
int num_observations;
std::vector<std::string> observations;
std::vector<int> observations_history;
double damping;
double stiffness;
double action_scale;

View File

@ -3,36 +3,36 @@ a1_isaacgym:
framework: "isaacgym"
rows: 4
cols: 3
use_history: True
dt: 0.005
decimation: 4
num_observations: 45
observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
observations_history: [0, 1, 2, 3, 4, 5]
clip_obs: 100.0
clip_actions_lower: [-100, -100, -100,
-100, -100, -100,
-100, -100, -100,
-100, -100, -100]
clip_actions_upper: [100, 100, 100,
100, 100, 100,
100, 100, 100,
100, 100, 100]
rl_kp: [20, 20, 20,
20, 20, 20,
20, 20, 20,
20, 20, 20]
rl_kd: [0.5, 0.5, 0.5,
0.5, 0.5, 0.5,
0.5, 0.5, 0.5,
0.5, 0.5, 0.5]
fixed_kp: [80, 80, 80,
80, 80, 80,
80, 80, 80,
80, 80, 80]
fixed_kd: [3, 3, 3,
3, 3, 3,
3, 3, 3,
3, 3, 3]
clip_actions_lower: [-100, -100, -100,
-100, -100, -100,
-100, -100, -100,
-100, -100, -100]
clip_actions_upper: [100, 100, 100,
100, 100, 100,
100, 100, 100,
100, 100, 100]
rl_kp: [20, 20, 20,
20, 20, 20,
20, 20, 20,
20, 20, 20]
rl_kd: [0.5, 0.5, 0.5,
0.5, 0.5, 0.5,
0.5, 0.5, 0.5,
0.5, 0.5, 0.5]
fixed_kp: [80, 80, 80,
80, 80, 80,
80, 80, 80,
80, 80, 80]
fixed_kd: [3, 3, 3,
3, 3, 3,
3, 3, 3,
3, 3, 3]
hip_scale_reduction: 0.5
hip_scale_reduction_indices: [0, 3, 6, 9]
num_of_dofs: 12
@ -42,14 +42,14 @@ a1_isaacgym:
dof_pos_scale: 1.0
dof_vel_scale: 0.05
commands_scale: [2.0, 2.0, 1.0]
torque_limits: [33.5, 33.5, 33.5,
33.5, 33.5, 33.5,
33.5, 33.5, 33.5,
33.5, 33.5, 33.5]
default_dof_pos: [ 0.1000, 0.8000, -1.5000,
-0.1000, 0.8000, -1.5000,
0.1000, 1.0000, -1.5000,
-0.1000, 1.0000, -1.5000]
torque_limits: [33.5, 33.5, 33.5,
33.5, 33.5, 33.5,
33.5, 33.5, 33.5,
33.5, 33.5, 33.5]
default_dof_pos: [ 0.1000, 0.8000, -1.5000,
-0.1000, 0.8000, -1.5000,
0.1000, 1.0000, -1.5000,
-0.1000, 1.0000, -1.5000]
joint_controller_names: ["FL_hip_controller", "FL_thigh_controller", "FL_calf_controller",
"FR_hip_controller", "FR_thigh_controller", "FR_calf_controller",
"RL_hip_controller", "RL_thigh_controller", "RL_calf_controller",

View File

@ -3,36 +3,36 @@ a1_isaacsim:
framework: "isaacsim"
rows: 4
cols: 3
use_history: False
dt: 0.005
decimation: 4
num_observations: 45
observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
observations_history: []
clip_obs: 100.0
clip_actions_lower: [-100, -100, -100,
-100, -100, -100,
-100, -100, -100,
-100, -100, -100]
clip_actions_upper: [100, 100, 100,
100, 100, 100,
100, 100, 100,
100, 100, 100]
rl_kp: [20, 20, 20,
20, 20, 20,
20, 20, 20,
20, 20, 20]
rl_kd: [0.5, 0.5, 0.5,
0.5, 0.5, 0.5,
0.5, 0.5, 0.5,
0.5, 0.5, 0.5]
fixed_kp: [80, 80, 80,
80, 80, 80,
80, 80, 80,
80, 80, 80]
fixed_kd: [3, 3, 3,
3, 3, 3,
3, 3, 3,
3, 3, 3]
clip_actions_lower: [-100, -100, -100,
-100, -100, -100,
-100, -100, -100,
-100, -100, -100]
clip_actions_upper: [100, 100, 100,
100, 100, 100,
100, 100, 100,
100, 100, 100]
rl_kp: [20, 20, 20,
20, 20, 20,
20, 20, 20,
20, 20, 20]
rl_kd: [0.5, 0.5, 0.5,
0.5, 0.5, 0.5,
0.5, 0.5, 0.5,
0.5, 0.5, 0.5]
fixed_kp: [80, 80, 80,
80, 80, 80,
80, 80, 80,
80, 80, 80]
fixed_kd: [3, 3, 3,
3, 3, 3,
3, 3, 3,
3, 3, 3]
hip_scale_reduction: 1.0
hip_scale_reduction_indices: []
num_of_dofs: 12
@ -42,14 +42,14 @@ a1_isaacsim:
dof_pos_scale: 1.0
dof_vel_scale: 0.05
commands_scale: [1.0, 1.0, 1.0]
torque_limits: [33.5, 33.5, 33.5,
33.5, 33.5, 33.5,
33.5, 33.5, 33.5,
33.5, 33.5, 33.5]
default_dof_pos: [ 0.1000, 0.8000, -1.5000,
-0.1000, 0.8000, -1.5000,
0.1000, 1.0000, -1.5000,
-0.1000, 1.0000, -1.5000]
torque_limits: [33.5, 33.5, 33.5,
33.5, 33.5, 33.5,
33.5, 33.5, 33.5,
33.5, 33.5, 33.5]
default_dof_pos: [ 0.1000, 0.8000, -1.5000,
-0.1000, 0.8000, -1.5000,
0.1000, 1.0000, -1.5000,
-0.1000, 1.0000, -1.5000]
joint_controller_names: ["FL_hip_controller", "FL_thigh_controller", "FL_calf_controller",
"FR_hip_controller", "FR_thigh_controller", "FR_calf_controller",
"RL_hip_controller", "RL_thigh_controller", "RL_calf_controller",

View File

@ -3,11 +3,11 @@ go2_isaacgym:
framework: "isaacgym"
rows: 4
cols: 3
use_history: True
dt: 0.005
decimation: 4
num_observations: 45
observations: ["commands", "ang_vel", "gravity_vec", "dof_pos", "dof_vel", "actions"]
observations_history: [5, 4, 3, 2, 1, 0]
clip_obs: 100.0
clip_actions_lower: [-100, -100, -100,
-100, -100, -100,

View File

@ -3,23 +3,23 @@ gr1t1_isaacgym:
framework: "isaacgym"
rows: 2
cols: 5
use_history: False
dt: 0.001
decimation: 20
num_observations: 39
observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
observations_history: []
clip_obs: 100.0
clip_actions_lower: [-0.4391, -1.0491, -2.0991, -0.4391, -1.3991,
-1.1391, -1.0491, -2.0991, -0.4391, -1.3991]
clip_actions_upper: [1.1391, 1.0491, 1.0491, 2.2691, 0.8691,
0.4391, 1.0491, 1.0491, 2.2691, 0.8691]
rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
57.0, 43.0, 114.0, 114.0, 15.3]
rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
5.7, 4.3, 11.4, 11.4, 1.5]
fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
57.0, 43.0, 114.0, 114.0, 15.3]
fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
5.7, 4.3, 11.4, 11.4, 1.5]
hip_scale_reduction: 1.0
hip_scale_reduction_indices: []
@ -32,7 +32,7 @@ gr1t1_isaacgym:
commands_scale: [1.0, 1.0, 1.0]
torque_limits: [60.0, 45.0, 130.0, 130.0, 16.0,
60.0, 45.0, 130.0, 130.0, 16.0]
default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
0.0, 0.0, -0.2618, 0.5236, -0.2618]
joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
"r_hip_roll_controller", "r_hip_yaw_controller", "r_hip_pitch_controller", "r_knee_pitch_controller", "r_ankle_pitch_controller"]

View File

@ -3,23 +3,23 @@ gr1t1_isaacsim:
framework: "isaacsim"
rows: 2
cols: 5
use_history: False
dt: 0.001
decimation: 20
num_observations: 39
observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
observations_history: []
clip_obs: 100.0
clip_actions_lower: [-0.4391, -1.0491, -2.0991, -0.4391, -1.3991,
-1.1391, -1.0491, -2.0991, -0.4391, -1.3991]
clip_actions_upper: [1.1391, 1.0491, 1.0491, 2.2691, 0.8691,
0.4391, 1.0491, 1.0491, 2.2691, 0.8691]
rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
57.0, 43.0, 114.0, 114.0, 15.3]
rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
5.7, 4.3, 11.4, 11.4, 1.5]
fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
57.0, 43.0, 114.0, 114.0, 15.3]
fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
5.7, 4.3, 11.4, 11.4, 1.5]
hip_scale_reduction: 1.0
hip_scale_reduction_indices: []
@ -32,7 +32,7 @@ gr1t1_isaacsim:
commands_scale: [1.0, 1.0, 1.0]
torque_limits: [60.0, 45.0, 130.0, 130.0, 16.0,
60.0, 45.0, 130.0, 130.0, 16.0]
default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
0.0, 0.0, -0.2618, 0.5236, -0.2618]
joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
"r_hip_roll_controller", "r_hip_yaw_controller", "r_hip_pitch_controller", "r_knee_pitch_controller", "r_ankle_pitch_controller"]

View File

@ -3,23 +3,23 @@ gr1t2_isaacgym:
framework: "isaacgym"
rows: 2
cols: 5
use_history: False
dt: 0.001
decimation: 20
num_observations: 39
observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
observations_history: []
clip_obs: 100.0
clip_actions_lower: [-0.4391, -1.0491, -2.0991, -0.4391, -1.3991,
-1.1391, -1.0491, -2.0991, -0.4391, -1.3991]
clip_actions_upper: [1.1391, 1.0491, 1.0491, 2.2691, 0.8691,
0.4391, 1.0491, 1.0491, 2.2691, 0.8691]
rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
57.0, 43.0, 114.0, 114.0, 15.3]
rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
5.7, 4.3, 11.4, 11.4, 1.5]
fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
57.0, 43.0, 114.0, 114.0, 15.3]
fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
5.7, 4.3, 11.4, 11.4, 1.5]
hip_scale_reduction: 1.0
hip_scale_reduction_indices: []
@ -32,7 +32,7 @@ gr1t2_isaacgym:
commands_scale: [1.0, 1.0, 1.0]
torque_limits: [60.0, 45.0, 130.0, 130.0, 16.0,
60.0, 45.0, 130.0, 130.0, 16.0]
default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
0.0, 0.0, -0.2618, 0.5236, -0.2618]
joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
"r_hip_roll_controller", "r_hip_yaw_controller", "r_hip_pitch_controller", "r_knee_pitch_controller", "r_ankle_pitch_controller"]

View File

@ -3,23 +3,23 @@ gr1t2_isaacsim:
framework: "isaacsim"
rows: 2
cols: 5
use_history: False
dt: 0.001
decimation: 20
num_observations: 39
observations: ["ang_vel", "gravity_vec", "commands", "dof_pos", "dof_vel", "actions"]
observations_history: []
clip_obs: 100.0
clip_actions_lower: [-0.4391, -1.0491, -2.0991, -0.4391, -1.3991,
-1.1391, -1.0491, -2.0991, -0.4391, -1.3991]
clip_actions_upper: [1.1391, 1.0491, 1.0491, 2.2691, 0.8691,
0.4391, 1.0491, 1.0491, 2.2691, 0.8691]
rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
rl_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
57.0, 43.0, 114.0, 114.0, 15.3]
rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
rl_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
5.7, 4.3, 11.4, 11.4, 1.5]
fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
fixed_kp: [57.0, 43.0, 114.0, 114.0, 15.3,
57.0, 43.0, 114.0, 114.0, 15.3]
fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
fixed_kd: [5.7, 4.3, 11.4, 11.4, 1.5,
5.7, 4.3, 11.4, 11.4, 1.5]
hip_scale_reduction: 1.0
hip_scale_reduction_indices: []
@ -32,7 +32,7 @@ gr1t2_isaacsim:
commands_scale: [1.0, 1.0, 1.0]
torque_limits: [60.0, 45.0, 130.0, 130.0, 16.0,
60.0, 45.0, 130.0, 130.0, 16.0]
default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
default_dof_pos: [0.0, 0.0, -0.2618, 0.5236, -0.2618,
0.0, 0.0, -0.2618, 0.5236, -0.2618]
joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
joint_controller_names: ["l_hip_roll_controller", "l_hip_yaw_controller", "l_hip_pitch_controller", "l_knee_pitch_controller", "l_ankle_pitch_controller",
"r_hip_roll_controller", "r_hip_yaw_controller", "r_hip_pitch_controller", "r_knee_pitch_controller", "r_ankle_pitch_controller"]

View File

@ -64,11 +64,11 @@ class ModelParams:
def __init__(self):
self.model_name = None
self.framework = None
self.use_history = None
self.dt = None
self.decimation = None
self.num_observations = None
self.observations = None
self.observations_history = None
self.damping = None
self.stiffness = None
self.action_scale = None
@ -378,16 +378,20 @@ class RL:
self.params.framework = config["framework"]
rows = config["rows"]
cols = config["cols"]
self.params.use_history = config["use_history"]
self.params.dt = config["dt"]
self.params.decimation = config["decimation"]
self.params.num_observations = config["num_observations"]
self.params.observations = config["observations"]
if config["observations_history"] is None:
self.params.observations_history = None
else:
self.params.observations_history = config["observations_history"]
self.params.clip_obs = config["clip_obs"]
self.params.action_scale = config["action_scale"]
self.params.hip_scale_reduction = config["hip_scale_reduction"]
self.params.hip_scale_reduction_indices = config["hip_scale_reduction_indices"]
if config["clip_actions_upper"] is None and config["clip_actions_upper"] is None:
if config["clip_actions_lower"] is None and config["clip_actions_upper"] is None:
self.params.clip_actions_upper = None
self.params.clip_actions_lower = None
else:

View File

@ -36,8 +36,8 @@ class RL_Sim(RL):
self.ReadYaml(self.robot_name)
# history
if self.params.use_history:
self.history_obs_buf = ObservationBuffer(1, self.params.num_observations, 6)
if self.params.observations_history is None:
self.history_obs_buf = ObservationBuffer(1, self.params.num_observations, len(self.params.observations_history))
# Due to the fact that the robot_state_publisher sorts the joint names alphabetically,
# the mapping table is established according to the order defined in the YAML file
@ -202,9 +202,9 @@ class RL_Sim(RL):
def Forward(self):
torch.set_grad_enabled(False)
clamped_obs = self.ComputeObservation()
if self.params.use_history:
if self.params.observations_history is None:
self.history_obs_buf.insert(clamped_obs)
history_obs = self.history_obs_buf.get_obs_vec(np.arange(6))
history_obs = self.history_obs_buf.get_obs_vec(self.params.observations_history)
actions = self.model.forward(history_obs)
else:
actions = self.model.forward(clamped_obs)

View File

@ -12,7 +12,10 @@ RL_Real::RL_Real() : unitree_safe(UNITREE_LEGGED_SDK::LeggedType::A1), unitree_u
this->ReadYaml(this->robot_name);
// history
this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, 6);
if (!this->params.observations_history.empty())
{
this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, this->params.observations_history.size());
}
this->unitree_udp.InitCmdData(this->unitree_low_command);
@ -176,10 +179,17 @@ torch::Tensor RL_Real::Forward()
torch::Tensor clamped_obs = this->ComputeObservation();
this->history_obs_buf.insert(clamped_obs);
this->history_obs = this->history_obs_buf.get_obs_vec({0, 1, 2, 3, 4, 5});
torch::Tensor actions = this->model.forward({this->history_obs}).toTensor();
torch::Tensor actions;
if (!this->params.observations_history.empty())
{
this->history_obs_buf.insert(clamped_obs);
this->history_obs = this->history_obs_buf.get_obs_vec(this->params.observations_history);
actions = this->model.forward({this->history_obs}).toTensor();
}
else
{
actions = this->model.forward({clamped_obs}).toTensor();
}
if (this->params.clip_actions_upper.numel() != 0 && this->params.clip_actions_lower.numel() != 0)
{

View File

@ -12,7 +12,10 @@ void RL_Real::RL_Real()
this->ReadYaml(this->robot_name);
// history
this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, 6);
if (!this->params.observations_history.empty())
{
this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, this->params.observations_history.size());
}
// init robot
this->InitRobotStateClient();
@ -187,10 +190,17 @@ torch::Tensor RL_Real::Forward()
torch::Tensor clamped_obs = this->ComputeObservation();
this->history_obs_buf.insert(clamped_obs);
this->history_obs = this->history_obs_buf.get_obs_vec({5, 4, 3, 2, 1, 0});
torch::Tensor actions = this->model.forward({this->history_obs}).toTensor();
torch::Tensor actions;
if (!this->params.observations_history.empty())
{
this->history_obs_buf.insert(clamped_obs);
this->history_obs = this->history_obs_buf.get_obs_vec(this->params.observations_history);
actions = this->model.forward({this->history_obs}).toTensor();
}
else
{
actions = this->model.forward({clamped_obs}).toTensor();
}
if (this->params.clip_actions_upper.numel() != 0 && this->params.clip_actions_lower.numel() != 0)
{

View File

@ -12,9 +12,9 @@ RL_Sim::RL_Sim()
this->ReadYaml(this->robot_name);
// history
if (this->params.use_history)
if (!this->params.observations_history.empty())
{
this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, 6);
this->history_obs_buf = ObservationBuffer(1, this->params.num_observations, this->params.observations_history.size());
}
// Due to the fact that the robot_state_publisher sorts the joint names alphabetically,
@ -247,13 +247,14 @@ void RL_Sim::RunModel()
torch::Tensor RL_Sim::Forward()
{
torch::autograd::GradMode::set_enabled(false);
torch::Tensor clamped_obs = this->ComputeObservation();
torch::Tensor actions;
if (this->params.use_history)
if (!this->params.observations_history.empty())
{
this->history_obs_buf.insert(clamped_obs);
// TODO-devel-go2 这里要找一种方法适配不同的顺序不能直接改这里会导致a1的模型不可用
this->history_obs = this->history_obs_buf.get_obs_vec({5, 4, 3, 2, 1, 0});
this->history_obs = this->history_obs_buf.get_obs_vec(this->params.observations_history);
actions = this->model.forward({this->history_obs}).toTensor();
}
else