fix typos and rename all `resample_time` to `resampling_time`

* supporting numpy 1.24 and pytorch 2.4.1 * add parameters for go2 distillation using single GPU (a.k.a `multi_process_ = False`)
2024-10-10 22:54:35 +08:00 · 2024-10-10 22:54:35 +08:00 · 789e83c40b
parent 637080edaf
commit 789e83c40b
11 changed files with 31 additions and 26 deletions
--- a/legged_gym/README.md
+++ b/legged_gym/README.md
@ -3,17 +3,16 @@ This is the tutorial for training the skill policy and distilling the parkour po

 ## Installation ##
 1. Create a new Python virtual env or conda environment with Python 3.6, 3.7, or 3.8 (3.8 recommended)
-2. Install PyTorch 1.10 with cuda-11.3:
-    - `pip install torch==1.10.0+cu113 torchvision==0.11.1+cu113 torchaudio==0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html`
-3. Install Isaac Gym
+2. Install Isaac Gym
   - Download and install Isaac Gym Preview 4 (I didn't test the history version) from https://developer.nvidia.com/isaac-gym
   - `cd isaacgym/python && pip install -e .`
   - Try running an example `cd examples && python 1080_balls_of_solitude.py`
   - For troubleshooting check docs `isaacgym/docs/index.html`
-4. Install rsl_rl (PPO implementation)
+   - We tested on `pytorch 2.4.1`
+3. Install rsl_rl (PPO implementation)
   - Using the command to direct to the root path of this repository
   - `cd rsl_rl && pip install -e .` 
-5. Install legged_gym
+4. Install legged_gym
   - `cd ../legged_gym && pip install -e .`

 ## Usage ##
--- a/legged_gym/legged_gym/envs/a1/a1_field_config.py
+++ b/legged_gym/legged_gym/envs/a1/a1_field_config.py
@ -42,7 +42,7 @@ class A1FieldCfg( A1RoughCfg ):
        class proprioception:
            delay_action_obs = False
            latency_range = [0.0, 0.0]
-            latency_resample_time = 2.0 # [s]
+            latency_resampling_time = 2.0 # [s]
    
    class terrain( A1RoughCfg.terrain ):
        mesh_type = "trimesh" # Don't change
@ -126,7 +126,7 @@ class A1FieldCfg( A1RoughCfg ):
        motor_clip_torque = False
        # action_delay = False # set to True to enable action delay in sim
        # action_delay_range = [0.002, 0.022] # [s]
-        # action_delay_resample_time = 5.0 # [s]
+        # action_delay_resampling_time = 5.0 # [s]

    class asset( A1RoughCfg.asset ):
        penalize_contacts_on = ["base", "thigh", "calf"]
--- a/legged_gym/legged_gym/envs/a1/a1_field_distill_config.py
+++ b/legged_gym/legged_gym/envs/a1/a1_field_distill_config.py
@ -45,7 +45,7 @@ class A1FieldDistillCfg( A1FieldCfg ):

            # adding randomized latency
            latency_range = [0.2, 0.26] # for [16, 32, 32] -> 128 -> 128 visual model in (240, 424 option)
-            latency_resample_time = 5.0 # [s]
+            latency_resampling_time = 5.0 # [s]
            refresh_duration = 1/10 # [s] for (240, 424 option with onboard script fixed to no more than 20Hz)

            # config to simulate stero RGBD camera
@ -56,7 +56,7 @@ class A1FieldDistillCfg( A1FieldCfg ):
        class proprioception:
            delay_action_obs = True
            latency_range = [0.04-0.0025, 0.04+0.0075] # [min, max] in seconds
-            latency_resample_time = 2.0 # [s]
+            latency_resampling_time = 2.0 # [s]

    class terrain( A1FieldCfg.terrain ):
        num_rows = 2
--- a/legged_gym/legged_gym/envs/base/legged_robot_noisy.py
+++ b/legged_gym/legged_gym/envs/base/legged_robot_noisy.py
@ -96,7 +96,7 @@ class LeggedRobotNoisyMixin:
        self.torque_exceed_count_substep[(torch.abs(self.torques) > self.torque_limits * self.cfg.rewards.soft_torque_limit).any(dim= -1)] += 1
        
        ### count how many times in the episode the robot is out of dof pos limit (summing all dofs)
-        self.out_of_dof_pos_limit_count_substep += self._reward_dof_pos_limits().int()
+        self.out_of_dof_pos_limit_count_substep += (self._reward_dof_pos_limits() > 0).int()
        ### or using a1_const.h value to check whether the robot is out of dof pos limit
        # joint_pos_limit_high = torch.tensor([0.802, 4.19, -0.916] * 4, device= self.device) - 0.001
        # joint_pos_limit_low = torch.tensor([-0.802, -1.05, -2.7] * 4, device= self.device) + 0.001
@ -139,7 +139,7 @@ class LeggedRobotNoisyMixin:
        all_obs_components = self.all_obs_components

        if getattr(self.cfg.control, "action_delay", False):
-            assert hasattr(self.cfg.control, "action_delay_range") and hasattr(self.cfg.control, "action_delay_resample_time"), "Please specify action_delay_range and action_delay_resample_time in the config file."
+            assert hasattr(self.cfg.control, "action_delay_range") and hasattr(self.cfg.control, "action_delay_resampling_time"), "Please specify action_delay_range and action_delay_resampling_time in the config file."
            self.build_action_delay_buffer()

        self.component_governed_by_sensor = dict()
@ -177,7 +177,6 @@ class LeggedRobotNoisyMixin:
                (self.num_envs, 1),
                device= self.device,
            ).flatten()
-        self.action_delayed_frames = ((self.action_delay_buffer / self.dt) + 1).to(int)

    def build_depth_image_processor_buffers(self, sensor_name):
        assert sensor_name == "forward_camera", "Only forward_camera is supported for now."
@ -268,7 +267,7 @@ class LeggedRobotNoisyMixin:
        return_ = super()._reset_buffers(env_ids)
        if hasattr(self, "actions_history_buffer"):
            self.actions_history_buffer[:, env_ids] = 0.
-            self.action_delayed_frames[env_ids] = self.cfg.control.action_history_buffer_length
+            self._resample_action_delay(env_ids)
        for sensor_name in self.available_sensors:
            if not hasattr(self.cfg.sensor, sensor_name):
                continue
@ -757,7 +756,7 @@ class LeggedRobotNoisyMixin:
                -self.forward_camera_delayed_frames,
                torch.arange(self.num_envs, device= self.device),
            ].clone()
-            self.forward_depth_refreshed = True
+            self.forward_depth_obs_refreshed = True
        if not hasattr(self.cfg.sensor, "forward_camera") or privileged:
            return super()._get_forward_depth_obs(privileged).reshape(self.num_envs, -1)

--- a/legged_gym/legged_gym/envs/go1/go1_field_distill_config.py
+++ b/legged_gym/legged_gym/envs/go1/go1_field_distill_config.py
@ -69,7 +69,7 @@ class Go1FieldDistillCfg( Go1FieldCfg ):
            # for go1, usb2.0, 480x640, d435i camera
            latency_range = [0.25, 0.30] # [s]
            # latency_range = [0.28, 0.36] # [s]
-            latency_resample_time = 5.0 # [s]
+            latency_resampling_time = 5.0 # [s]
            refresh_duration = 1/10 # [s] for (240, 424 option with onboard script fixed to no more than 20Hz)
            
            # config to simulate stero RGBD camera
--- a/legged_gym/legged_gym/envs/go2/go2_config.py
+++ b/legged_gym/legged_gym/envs/go2/go2_config.py
@ -42,7 +42,7 @@ class Go2RoughCfg( LeggedRobotCfg ):
        class proprioception:
            obs_components = ["ang_vel", "projected_gravity", "commands", "dof_pos", "dof_vel"]
            latency_range = [0.005, 0.045] # [s]
-            latency_resample_time = 5.0 # [s]
+            latency_resampling_time = 5.0 # [s]

    class terrain:
        selected = "TerrainPerlin"
--- a/legged_gym/legged_gym/envs/go2/go2_distill_config.py
+++ b/legged_gym/legged_gym/envs/go2/go2_distill_config.py
@ -37,6 +37,9 @@ class Go2DistillCfg( Go2FieldCfg ):
        if multi_process_:
            num_rows = 4
            num_cols = 1
+        else:
+            num_rows = 10
+            num_cols = 20
        curriculum = False

        BarrierTrack_kwargs = merge_dict(Go2FieldCfg.terrain.BarrierTrack_kwargs, dict(
@ -69,7 +72,7 @@ class Go2DistillCfg( Go2FieldCfg ):
            depth_range = [0.0, 3.0]

            latency_range = [0.08, 0.142]
-            latency_resample_time = 5.0
+            latency_resampling_time = 5.0
            refresh_duration = 1/10 # [s]

    class commands( Go2FieldCfg.commands ):
--- a/legged_gym/legged_gym/scripts/collect.py
+++ b/legged_gym/legged_gym/scripts/collect.py
@ -4,6 +4,7 @@ from collections import OrderedDict
 import torch
 from datetime import datetime
 import numpy as np
+np.float = float
 import os
 import json
 import os.path as osp
--- a/legged_gym/legged_gym/scripts/play.py
+++ b/legged_gym/legged_gym/scripts/play.py
@ -405,14 +405,14 @@ def play(args):
        # if (abs(env.substep_torques[robot_index]) > 35.).any():
            # exceed_idxs = torch.where(abs(env.substep_torques[robot_index]) > 35.)
            # print("substep_torques:", exceed_idxs[1], env.substep_torques[robot_index][exceed_idxs[0], exceed_idxs[1]])
-        if env.torque_exceed_count_envstep[robot_index].any():
-            print("substep torque exceed limit ratio", 
-                (torch.abs(env.substep_torques[robot_index]) / (env.torque_limits.unsqueeze(0))).max(),
-                "joint index",
-                torch.where((torch.abs(env.substep_torques[robot_index]) > env.torque_limits.unsqueeze(0) * env.cfg.rewards.soft_torque_limit).any(dim= 0))[0],
-                "timestep", i,
-            )
-            env.torque_exceed_count_envstep[robot_index] = 0
+        # if env.torque_exceed_count_envstep[robot_index].any():
+        #     print("substep torque exceed limit ratio", 
+        #         (torch.abs(env.substep_torques[robot_index]) / (env.torque_limits.unsqueeze(0))).max(),
+        #         "joint index",
+        #         torch.where((torch.abs(env.substep_torques[robot_index]) > env.torque_limits.unsqueeze(0) * env.cfg.rewards.soft_torque_limit).any(dim= 0))[0],
+        #         "timestep", i,
+        #     )
+        #     env.torque_exceed_count_envstep[robot_index] = 0
        # if (torch.abs(env.torques[robot_index]) > env.torque_limits.unsqueeze(0) * env.cfg.rewards.soft_torque_limit).any():
        #     print("torque exceed limit ratio",
        #         (torch.abs(env.torques[robot_index]) / (env.torque_limits.unsqueeze(0))).max(),
--- a/rsl_rl/rsl_rl/storage/rollout_files/rollout_dataset.py
+++ b/rsl_rl/rsl_rl/storage/rollout_files/rollout_dataset.py
@ -118,7 +118,7 @@ class RolloutDataset(RolloutFileBase):
            print("RolloutDataset: trajectory not enough, need {} at least, waiting for 15 minutes...".format(self.keep_latest_n_trajs))
            time.sleep(60 * 15)
        # use trajectory index to identify the trajectory in all_available_trajectory_dirs
-        self.traj_identifiers = self.unused_trajectory_idxs[:self.num_envs]
+        self.traj_identifiers = [None] * self.num_envs
        self.unused_trajectory_idxs = [i for i in self.unused_trajectory_idxs if i not in self.traj_identifiers]
        self.traj_file_names = [[] for _ in range(self.num_envs)]
        self.traj_lengths = [None for _ in range(self.num_envs)]
--- a/rsl_rl/setup.py
+++ b/rsl_rl/setup.py
@ -12,5 +12,8 @@ setup(name='rsl_rl',
            "torch>=1.4.0",
            "torchvision>=0.5.0",
            "numpy>=1.16.4"
+            "tensorboardX",
+            "tensorboard",
+            "tabulate",
      ],
      )