walk these ways controller added and tested in simulation.

2024-03-18 23:44:34 -04:00 · 2024-03-18 23:44:34 -04:00 · cf73faf024
parent 174c5a3460
commit cf73faf024
45 changed files with 634 additions and 47 deletions
--- a/Go2Py/assets/checkpoints/walk_these_ways/checkpoints/ac_weights_last.pt
+++ b/Go2Py/assets/checkpoints/walk_these_ways/checkpoints/ac_weights_last.pt
--- a/Go2Py/assets/checkpoints/walk_these_ways/checkpoints/adaptation_module_latest.jit
+++ b/Go2Py/assets/checkpoints/walk_these_ways/checkpoints/adaptation_module_latest.jit
--- a/Go2Py/assets/checkpoints/walk_these_ways/checkpoints/body_latest.jit
+++ b/Go2Py/assets/checkpoints/walk_these_ways/checkpoints/body_latest.jit
--- a/Go2Py/assets/checkpoints/walk_these_ways/curriculum/distribution.pkl
+++ b/Go2Py/assets/checkpoints/walk_these_ways/curriculum/distribution.pkl
--- a/Go2Py/assets/checkpoints/walk_these_ways/curriculum/info.pkl
+++ b/Go2Py/assets/checkpoints/walk_these_ways/curriculum/info.pkl
--- a/Go2Py/assets/checkpoints/walk_these_ways/metrics.pkl
+++ b/Go2Py/assets/checkpoints/walk_these_ways/metrics.pkl
--- a/Go2Py/assets/checkpoints/walk_these_ways/parameters.pkl
+++ b/Go2Py/assets/checkpoints/walk_these_ways/parameters.pkl
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/00158.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/00158.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/00209.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/00209.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/00339.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/00339.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/00406.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/00406.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/00551.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/00551.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/00671.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/00671.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/00732.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/00732.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/00857.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/00857.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/00944.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/00944.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01070.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01070.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01153.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01153.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01278.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01278.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01333.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01333.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01477.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01477.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01561.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01561.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01658.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01658.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01783.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01783.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01867.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01867.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/01950.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/01950.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02076.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02076.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02159.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02159.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02229.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02229.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02354.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02354.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02450.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02450.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02575.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02575.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02659.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02659.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02784.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02784.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02867.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02867.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/02951.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/02951.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/03076.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/03076.mp4
--- a/Go2Py/assets/checkpoints/walk_these_ways/videos/03160.mp4
+++ b/Go2Py/assets/checkpoints/walk_these_ways/videos/03160.mp4
--- a/Go2Py/controllers/init.py
+++ b/Go2Py/controllers/init.py
--- a/Go2Py/controllers/walk_these_ways.py
+++ b/Go2Py/controllers/walk_these_ways.py
@ -0,0 +1,477 @@
+import os
+import pickle as pkl
+import time
+
+import numpy as np
+import torch
+
+def loadParameters(path):
+    with open(path + "/parameters.pkl", "rb") as file:
+        pkl_cfg = pkl.load(file)
+        cfg = pkl_cfg["Cfg"]
+    return cfg
+
+
+def class_to_dict(obj) -> dict:
+    if not hasattr(obj, "__dict__"):
+        return obj
+    result = {}
+    for key in dir(obj):
+        if key.startswith("_") or key == "terrain":
+            continue
+        element = []
+        val = getattr(obj, key)
+        if isinstance(val, list):
+            for item in val:
+                element.append(class_to_dict(item))
+        else:
+            element = class_to_dict(val)
+        result[key] = element
+    return result
+
+
+class HistoryWrapper:
+    def __init__(self, env):
+        self.env = env
+
+        if isinstance(self.env.cfg, dict):
+            self.obs_history_length = self.env.cfg["env"]["num_observation_history"]
+        else:
+            self.obs_history_length = self.env.cfg.env.num_observation_history
+        self.num_obs_history = self.obs_history_length * self.env.num_obs
+        self.obs_history = torch.zeros(
+            self.env.num_envs,
+            self.num_obs_history,
+            dtype=torch.float,
+            device=self.env.device,
+            requires_grad=False,
+        )
+        self.num_privileged_obs = self.env.num_privileged_obs
+
+    def step(self, action):
+        obs, rew, done, info = self.env.step(action)
+        privileged_obs = info["privileged_obs"]
+
+        self.obs_history = torch.cat(
+            (self.obs_history[:, self.env.num_obs :], obs), dim=-1
+        )
+        return (
+            {
+                "obs": obs,
+                "privileged_obs": privileged_obs,
+                "obs_history": self.obs_history,
+            },
+            rew,
+            done,
+            info,
+        )
+
+    def get_observations(self):
+        obs = self.env.get_observations()
+        privileged_obs = self.env.get_privileged_observations()
+        self.obs_history = torch.cat(
+            (self.obs_history[:, self.env.num_obs :], obs), dim=-1
+        )
+        return {
+            "obs": obs,
+            "privileged_obs": privileged_obs,
+            "obs_history": self.obs_history,
+        }
+
+    def get_obs(self):
+        obs = self.env.get_obs()
+        privileged_obs = self.env.get_privileged_observations()
+        self.obs_history = torch.cat(
+            (self.obs_history[:, self.env.num_obs :], obs), dim=-1
+        )
+        return {
+            "obs": obs,
+            "privileged_obs": privileged_obs,
+            "obs_history": self.obs_history,
+        }
+
+    def reset_idx(
+        self, env_ids
+    ):  # it might be a problem that this isn't getting called!!
+        ret = self.env.reset_idx(env_ids)
+        self.obs_history[env_ids, :] = 0
+        return ret
+
+    def reset(self):
+        ret = self.env.reset()
+        privileged_obs = self.env.get_privileged_observations()
+        self.obs_history[:, :] = 0
+        return {
+            "obs": ret,
+            "privileged_obs": privileged_obs,
+            "obs_history": self.obs_history,
+        }
+
+    def __getattr__(self, name):
+        return getattr(self.env, name)
+
+
+class CommandInterface:
+    def __init__(self, limits=None):
+        self.limits = limits
+        gaits = {
+            "pronking": [0, 0, 0],
+            "trotting": [0.5, 0, 0],
+            "bounding": [0, 0.5, 0],
+            "pacing": [0, 0, 0.5],
+        }
+        self.x_vel_cmd, self.y_vel_cmd, self.yaw_vel_cmd = 0.0, 0.0, 0.0
+        self.body_height_cmd = 0.0
+        self.step_frequency_cmd = 3.0
+        self.gait = torch.tensor(gaits["trotting"])
+        self.footswing_height_cmd = 0.03
+        self.pitch_cmd = 0.0
+        self.roll_cmd = 0.0
+        self.stance_width_cmd = 0.0
+
+    def get_command(self):
+        command = np.zeros((19,))
+        command[0] = self.x_vel_cmd
+        command[1] = self.y_vel_cmd
+        command[2] = self.yaw_vel_cmd
+        command[3] = self.body_height_cmd
+        command[4] = self.step_frequency_cmd
+        command[5:8] = self.gait
+        command[8] = 0.5
+        command[9] = self.footswing_height_cmd
+        command[10] = self.pitch_cmd
+        command[11] = self.roll_cmd
+        command[12] = self.stance_width_cmd
+        return command, False
+
+
+class Policy:
+    def __init__(self, checkpoint_path):
+        self.body = torch.jit.load(
+            os.path.join(checkpoint_path, "checkpoints/body_latest.jit")
+        )
+        self.adaptation_module = torch.jit.load(
+            os.path.join(checkpoint_path, "checkpoints/adaptation_module_latest.jit")
+        )
+
+    def __call__(self, obs, info):
+        latent = self.adaptation_module.forward(obs["obs_history"].to("cpu"))
+        action = self.body.forward(
+            torch.cat((obs["obs_history"].to("cpu"), latent), dim=-1)
+        )
+        info["latent"] = latent
+        return action
+
+
+class WalkTheseWaysAgent:
+    def __init__(self, cfg, command_profile, robot):
+        self.robot = robot
+        if not isinstance(cfg, dict):
+            cfg = class_to_dict(cfg)
+        self.cfg = cfg
+        self.command_profile = command_profile
+        # self.lcm_bridge = LCMBridgeClient(robot_name=self.robot_name)
+        self.dt = self.cfg["control"]["decimation"] * self.cfg["sim"]["dt"]
+        self.timestep = 0
+
+        self.num_obs = self.cfg["env"]["num_observations"]
+        self.num_envs = 1
+        self.num_privileged_obs = self.cfg["env"]["num_privileged_obs"]
+        self.num_actions = self.cfg["env"]["num_actions"]
+        self.num_commands = self.cfg["commands"]["num_commands"]
+        self.device = "cpu"
+
+        if "obs_scales" in self.cfg.keys():
+            self.obs_scales = self.cfg["obs_scales"]
+        else:
+            self.obs_scales = self.cfg["normalization"]["obs_scales"]
+
+        self.commands_scale = np.array(
+            [
+                self.obs_scales["lin_vel"],
+                self.obs_scales["lin_vel"],
+                self.obs_scales["ang_vel"],
+                self.obs_scales["body_height_cmd"],
+                1,
+                1,
+                1,
+                1,
+                1,
+                self.obs_scales["footswing_height_cmd"],
+                self.obs_scales["body_pitch_cmd"],
+                # 0, self.obs_scales["body_pitch_cmd"],
+                self.obs_scales["body_roll_cmd"],
+                self.obs_scales["stance_width_cmd"],
+                self.obs_scales["stance_length_cmd"],
+                self.obs_scales["aux_reward_cmd"],
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+            ]
+        )[: self.num_commands]
+
+        joint_names = [
+            "FL_hip_joint",
+            "FL_thigh_joint",
+            "FL_calf_joint",
+            "FR_hip_joint",
+            "FR_thigh_joint",
+            "FR_calf_joint",
+            "RL_hip_joint",
+            "RL_thigh_joint",
+            "RL_calf_joint",
+            "RR_hip_joint",
+            "RR_thigh_joint",
+            "RR_calf_joint",
+        ]
+
+        policy_joint_names = joint_names
+
+        unitree_joint_names = [
+            "FR_hip_joint",
+            "FR_thigh_joint",
+            "FR_calf_joint",
+            "FL_hip_joint",
+            "FL_thigh_joint",
+            "FL_calf_joint",
+            "RR_hip_joint",
+            "RR_thigh_joint",
+            "RR_calf_joint",
+            "RL_hip_joint",
+            "RL_thigh_joint",
+            "RL_calf_joint",
+        ]
+
+        policy_to_unitree_map=[]
+        unitree_to_policy_map=[]
+        for i,policy_joint_name in enumerate(policy_joint_names):
+            id = np.where([name==policy_joint_name for name in unitree_joint_names])[0][0]
+            policy_to_unitree_map.append((i,id))
+        self.policy_to_unitree_map=np.array(policy_to_unitree_map)
+
+        for i,unitree_joint_name in enumerate(unitree_joint_names):
+            id = np.where([name==unitree_joint_name for name in policy_joint_names])[0][0]
+            unitree_to_policy_map.append((i,id))
+        self.unitree_to_policy_map=np.array(unitree_to_policy_map)
+        
+        self.default_dof_pos = np.array(
+            [
+                self.cfg["init_state"]["default_joint_angles"][name]
+                for name in joint_names
+            ]
+        )
+        try:
+            self.default_dof_pos_scale = np.array(
+                [
+                    self.cfg["init_state"]["default_hip_scales"],
+                    self.cfg["init_state"]["default_thigh_scales"],
+                    self.cfg["init_state"]["default_calf_scales"],
+                    self.cfg["init_state"]["default_hip_scales"],
+                    self.cfg["init_state"]["default_thigh_scales"],
+                    self.cfg["init_state"]["default_calf_scales"],
+                    self.cfg["init_state"]["default_hip_scales"],
+                    self.cfg["init_state"]["default_thigh_scales"],
+                    self.cfg["init_state"]["default_calf_scales"],
+                    self.cfg["init_state"]["default_hip_scales"],
+                    self.cfg["init_state"]["default_thigh_scales"],
+                    self.cfg["init_state"]["default_calf_scales"],
+                ]
+            )
+        except KeyError:
+            self.default_dof_pos_scale = np.ones(12)
+        self.default_dof_pos = self.default_dof_pos * self.default_dof_pos_scale
+
+        self.p_gains = np.zeros(12)
+        self.d_gains = np.zeros(12)
+        for i in range(12):
+            joint_name = joint_names[i]
+            found = False
+            for dof_name in self.cfg["control"]["stiffness"].keys():
+                if dof_name in joint_name:
+                    self.p_gains[i] = self.cfg["control"]["stiffness"][dof_name]
+                    self.d_gains[i] = self.cfg["control"]["damping"][dof_name]
+                    found = True
+            if not found:
+                self.p_gains[i] = 0.0
+                self.d_gains[i] = 0.0
+                if self.cfg["control"]["control_type"] in ["P", "V"]:
+                    print(
+                        f"PD gain of joint {joint_name} were not defined, setting them to zero"
+                    )
+
+        print(f"p_gains: {self.p_gains}")
+
+        self.commands = np.zeros((1, self.num_commands))
+        self.actions = torch.zeros(12)
+        self.last_actions = torch.zeros(12)
+        self.gravity_vector = np.zeros(3)
+        self.dof_pos = np.zeros(12)
+        self.dof_vel = np.zeros(12)
+        self.body_linear_vel = np.zeros(3)
+        self.body_angular_vel = np.zeros(3)
+        self.joint_pos_target = np.zeros(12)
+        self.joint_vel_target = np.zeros(12)
+        self.torques = np.zeros(12)
+        self.contact_state = np.ones(4)
+        self.test = 0
+
+        self.gait_indices = torch.zeros(self.num_envs, dtype=torch.float)
+        self.clock_inputs = torch.zeros(self.num_envs, 4, dtype=torch.float)
+
+        if "obs_scales" in self.cfg.keys():
+            self.obs_scales = self.cfg["obs_scales"]
+        else:
+            self.obs_scales = self.cfg["normalization"]["obs_scales"]
+
+    def wait_for_state(self):
+        # return self.lcm_bridge.getStates(timeout=2)
+        pass
+
+    def get_obs(self):
+        cmds, reset_timer = self.command_profile.get_command()
+        self.commands[:, :] = cmds[: self.num_commands]
+
+        # self.state = self.wait_for_state()
+        joint_state = self.robot.getJointStates()
+        if joint_state is not None:
+            self.gravity_vector = self.robot.getGravityInBody()
+            self.dof_pos = joint_state['q'][self.unitree_to_policy_map[:,1]]
+            self.dof_vel = joint_state['dq'][self.unitree_to_policy_map[:,1]]
+
+        if reset_timer:
+            self.reset_gait_indices()
+
+        ob = np.concatenate(
+            (
+                self.gravity_vector.reshape(1, -1),
+                self.commands * self.commands_scale,
+                (self.dof_pos - self.default_dof_pos).reshape(1, -1)
+                * self.obs_scales["dof_pos"],
+                self.dof_vel.reshape(1, -1) * self.obs_scales["dof_vel"],
+                torch.clip(
+                    self.actions,
+                    -self.cfg["normalization"]["clip_actions"],
+                    self.cfg["normalization"]["clip_actions"],
+                )
+                .cpu()
+                .detach()
+                .numpy()
+                .reshape(1, -1),
+            ),
+            axis=1,
+        )
+
+        if self.cfg["env"]["observe_two_prev_actions"]:
+            ob = np.concatenate(
+                (ob, self.last_actions.cpu().detach().numpy().reshape(1, -1)), axis=1
+            )
+
+        if self.cfg["env"]["observe_clock_inputs"]:
+            ob = np.concatenate((ob, self.clock_inputs), axis=1)
+
+        return torch.tensor(ob, device=self.device).float()
+
+    def get_privileged_observations(self):
+        return None
+
+    def publish_action(self, action, hard_reset=False):
+        # command_for_robot = UnitreeLowCommand()
+        self.joint_pos_target = (
+            action[0, :12].detach().cpu().numpy() * self.cfg["control"]["action_scale"]
+        ).flatten()
+        self.joint_pos_target[[0, 3, 6, 9]] *= self.cfg["control"][
+            "hip_scale_reduction"
+        ]
+        self.joint_pos_target += self.default_dof_pos
+        self.joint_vel_target = np.zeros(12)
+        # command_for_robot.q_des = self.joint_pos_target
+        # command_for_robot.dq_des = self.joint_vel_target
+        # command_for_robot.kp = self.p_gains
+        # command_for_robot.kd = self.d_gains
+        # command_for_robot.tau_ff = np.zeros(12)
+        if hard_reset:
+            command_for_robot.id = -1
+
+        self.torques = (self.joint_pos_target - self.dof_pos) * self.p_gains + (
+            self.joint_vel_target - self.dof_vel
+        ) * self.d_gains
+        # self.lcm_bridge.sendCommands(command_for_robot)
+        self.robot.setCommands(self.joint_pos_target[self.policy_to_unitree_map[:,1]],\
+                               self.joint_vel_target[self.policy_to_unitree_map[:,1]],\
+                               self.p_gains[self.policy_to_unitree_map[:,1]],\
+                               self.d_gains[self.policy_to_unitree_map[:,1]],
+                               np.zeros(12))
+
+    def reset(self):
+        self.actions = torch.zeros(12)
+        self.time = time.time()
+        self.timestep = 0
+        return self.get_obs()
+
+    def reset_gait_indices(self):
+        self.gait_indices = torch.zeros(self.num_envs, dtype=torch.float)
+
+    def step(self, actions, hard_reset=False):
+        clip_actions = self.cfg["normalization"]["clip_actions"]
+        self.last_actions = self.actions[:]
+        self.actions = torch.clip(actions[0:1, :], -clip_actions, clip_actions)
+        self.publish_action(self.actions, hard_reset=hard_reset)
+        # time.sleep(max(self.dt - (time.time() - self.time), 0))
+        if self.timestep % 100 == 0:
+            print(f"frq: {1 / (time.time() - self.time)} Hz")
+        self.time = time.time()
+        obs = self.get_obs()
+
+        # clock accounting
+        frequencies = self.commands[:, 4]
+        phases = self.commands[:, 5]
+        offsets = self.commands[:, 6]
+        if self.num_commands == 8:
+            bounds = 0
+        else:
+            bounds = self.commands[:, 7]
+        self.gait_indices = torch.remainder(
+            self.gait_indices + self.dt * frequencies, 1.0
+        )
+
+        if (
+            "pacing_offset" in self.cfg["commands"]
+            and self.cfg["commands"]["pacing_offset"]
+        ):
+            self.foot_indices = [
+                self.gait_indices + phases + offsets + bounds,
+                self.gait_indices + bounds,
+                self.gait_indices + offsets,
+                self.gait_indices + phases,
+            ]
+        else:
+            self.foot_indices = [
+                self.gait_indices + phases + offsets + bounds,
+                self.gait_indices + offsets,
+                self.gait_indices + bounds,
+                self.gait_indices + phases,
+            ]
+        self.clock_inputs[:, 0] = torch.sin(2 * np.pi * self.foot_indices[0])
+        self.clock_inputs[:, 1] = torch.sin(2 * np.pi * self.foot_indices[1])
+        self.clock_inputs[:, 2] = torch.sin(2 * np.pi * self.foot_indices[2])
+        self.clock_inputs[:, 3] = torch.sin(2 * np.pi * self.foot_indices[3])
+
+        infos = {
+            "joint_pos": self.dof_pos[np.newaxis, :],
+            "joint_vel": self.dof_vel[np.newaxis, :],
+            "joint_pos_target": self.joint_pos_target[np.newaxis, :],
+            "joint_vel_target": self.joint_vel_target[np.newaxis, :],
+            "body_linear_vel": self.body_linear_vel[np.newaxis, :],
+            "body_angular_vel": self.body_angular_vel[np.newaxis, :],
+            "contact_state": self.contact_state[np.newaxis, :],
+            "clock_inputs": self.clock_inputs[np.newaxis, :],
+            "body_linear_vel_cmd": self.commands[:, 0:2],
+            "body_angular_vel_cmd": self.commands[:, 2:],
+            "privileged_obs": None,
+        }
+
+        self.timestep += 1
+        return obs, None, None, infos
--- a/Go2Py/sim/mujoco.py
+++ b/Go2Py/sim/mujoco.py
@ -8,7 +8,7 @@ import pinocchio as pin
 from pinocchio.robot_wrapper import RobotWrapper
 from Go2Py import ASSETS_PATH
 import os
-
+from scipy.spatial.transform import Rotation
 class Go2Sim:
    def __init__(self, render=True, dt=0.002):
        
@ -144,6 +144,12 @@ class Go2Sim:
            'nle':nle
        }

+    def getGravityInBody(self):
+        _, q = self.getPose()
+        R = Rotation.from_quat([q[1], q[2], q[3], q[0]]).as_matrix()
+        g_in_body = R.T@np.array([0.0, 0.0, -1.0]).reshape(3, 1)
+        return g_in_body
+
    def overheat(self):
        return False

--- a/examples/fsm_sim.ipynb
+++ b/examples/fsm_sim.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -12,7 +12,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -22,7 +22,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -92,7 +92,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -101,7 +101,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -125,7 +125,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -144,10 +144,6 @@
    "        self.user_controller_callback = user_controller_callback\n",
    "\n",
    "        self.state = \"damping\"\n",
-    "\n",
-    "        # self.tracking_kp = np.array(4*[200, 200, 350.]).reshape(12)\n",
-    "        # self.tracking_kv = np.array(12*[10.])\n",
-    "\n",
    "        self.tracking_kp = np.array(4*[150, 150, 150.]).reshape(12)\n",
    "        self.tracking_kv = np.array(12*[3.])\n",
    "        self.damping_kv = np.array(12*[2.])\n",
@ -284,30 +280,7 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Exception in thread Thread-9:\n",
-      "Traceback (most recent call last):\n",
-      "  File \"/home/rstaion/miniconda3/envs/b1py/lib/python3.8/threading.py\", line 932, in _bootstrap_inner\n",
-      "    self.run()\n",
-      "  File \"/home/rstaion/miniconda3/envs/b1py/lib/python3.8/threading.py\", line 870, in run\n",
-      "    self._target(*self._args, **self._kwargs)\n",
-      "  File \"/tmp/ipykernel_130541/2701327530.py\", line 76, in simUpdate\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  File \"/home/rstaion/projects/rooholla/locomotion/Go2Py/Go2Py/sim/mujoco.py\", line 122, in step\n",
-      "    tau = np.diag(self.kp)@(self.q_des-q).reshape(12,1)+ \\\n",
-      "numpy.core._exceptions.UFuncTypeError: ufunc 'subtract' did not contain a loop with signature matching types (dtype('float64'), dtype('<U1')) -> None\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "fsm = FSM(robot, remote, safety)"
   ]
--- a/examples/lowlevel_ros2_interface.ipynb
+++ b/examples/lowlevel_ros2_interface.ipynb
@ -148,7 +148,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.18"
+   "version": "3.8.10"
  }
 },
 "nbformat": 4,
--- a/examples/mujoco_test.ipynb
+++ b/examples/mujoco_test.ipynb
@ -17,7 +17,7 @@
   "outputs": [],
   "source": [
    "robot = Go2Sim()\n",
-    "robot.standUp()"
+    "robot.standUpReset()"
   ]
  },
  {
@ -29,10 +29,10 @@
    "import mujoco\n",
    "import time\n",
    "q,dq = robot.getJointStates()\n",
-    "robot.standUp()\n",
+    "robot.standUpReset()\n",
    "for i in range(100000):\n",
-    "    q,dq = robot.getJointStates()\n",
-    "    tau = 20*np.eye(12)@(robot.q0 - q).reshape(12,1)\n",
+    "    state = robot.getJointStates()\n",
+    "    tau = 20*np.eye(12)@(robot.q0 - state['q']).reshape(12,1)\n",
    "    robot.setCommands(np.zeros(12), np.zeros(12), np.zeros(12), np.zeros(12), tau)\n",
    "    robot.step()"
   ]
@ -82,13 +82,6 @@
   "source": [
    "robot.getSiteJacobian('FR_foot')"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
--- a/examples/walk_these_ways_sim.ipynb
+++ b/examples/walk_these_ways_sim.ipynb
@ -0,0 +1,138 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from Go2Py.sim.mujoco import Go2Sim\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "robot = Go2Sim()\n",
+    "robot.standUpReset()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from Go2Py.controllers.walk_these_ways import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "checkpoint_path = \"/home/rstaion/projects/rooholla/locomotion/Go2Py/Go2Py/assets/checkpoints/walk_these_ways/\"\n",
+    "\n",
+    "cfg = loadParameters(checkpoint_path)\n",
+    "policy = Policy(checkpoint_path)\n",
+    "command_profile = CommandInterface()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "p_gains: [20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20. 20.]\n"
+     ]
+    }
+   ],
+   "source": [
+    "agent = WalkTheseWaysAgent(cfg, command_profile, robot)\n",
+    "agent = HistoryWrapper(agent)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "control_dt = cfg[\"control\"][\"decimation\"] * cfg[\"sim\"][\"dt\"]\n",
+    "simulation_dt = robot.dt\n",
+    "obs = agent.reset()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "frq: 12.822735624382831 Hz\n",
+      "frq: 47.954632763194 Hz\n",
+      "frq: 53.309743511528 Hz\n",
+      "frq: 54.65674559220214 Hz\n",
+      "frq: 45.29730547005778 Hz\n"
+     ]
+    }
+   ],
+   "source": [
+    "robot.reset()\n",
+    "obs = agent.reset()\n",
+    "for i in range(5000):\n",
+    "    policy_info = {}\n",
+    "    action = policy(obs, policy_info)\n",
+    "    if i % (control_dt // simulation_dt) == 0:\n",
+    "        obs, ret, done, info = agent.step(action)\n",
+    "    robot.step()\n",
+    "    command_profile.yaw_vel_cmd = 1.2\n",
+    "    command_profile.x_vel_cmd = 0.8\n",
+    "    command_profile.y_vel_cmd = 0.0\n",
+    "    command_profile.stance_width_cmd=0.2\n",
+    "    command_profile.footswing_height_cmd=-0.05\n",
+    "    command_profile.step_frequency_cmd = 2.5\n",
+    "    time.sleep(robot.dt/4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "b1-env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}