testing adapting aloha
This commit is contained in:
parent
85eea19264
commit
0a4bba1da7
|
@ -0,0 +1,82 @@
|
||||||
|
{
|
||||||
|
"type": "pi0fast",
|
||||||
|
"n_obs_steps": 1,
|
||||||
|
"normalization_mapping": {
|
||||||
|
"VISUAL": "IDENTITY",
|
||||||
|
"STATE": "MEAN_STD",
|
||||||
|
"ACTION": "MEAN_STD"
|
||||||
|
},
|
||||||
|
"input_features": {
|
||||||
|
"observation.image": {
|
||||||
|
"type": "VISUAL",
|
||||||
|
"shape": [
|
||||||
|
3,
|
||||||
|
256,
|
||||||
|
256
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"observation.image2": {
|
||||||
|
"type": "VISUAL",
|
||||||
|
"shape": [
|
||||||
|
3,
|
||||||
|
256,
|
||||||
|
256
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"observation.image3": {
|
||||||
|
"type": "VISUAL",
|
||||||
|
"shape": [
|
||||||
|
3,
|
||||||
|
256,
|
||||||
|
256
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"observation.state": {
|
||||||
|
"type": "STATE",
|
||||||
|
"shape": [
|
||||||
|
8
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"output_features": {
|
||||||
|
"action": {
|
||||||
|
"type": "ACTION",
|
||||||
|
"shape": [
|
||||||
|
7
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"use_env_state": true,
|
||||||
|
"exclude_image_keys": "",
|
||||||
|
"normalize_per_robot_type": false,
|
||||||
|
"chunk_size": 10,
|
||||||
|
"n_action_steps": 5,
|
||||||
|
"max_state_dim": 32,
|
||||||
|
"max_action_dim": 32,
|
||||||
|
"resize_imgs_with_padding": [
|
||||||
|
224,
|
||||||
|
224
|
||||||
|
],
|
||||||
|
"interpolate_like_pi": false,
|
||||||
|
"empty_cameras": 0,
|
||||||
|
"adapt_to_pi_aloha": false,
|
||||||
|
"use_delta_joint_actions_aloha": false,
|
||||||
|
"tokenizer_max_length": 48,
|
||||||
|
"proj_width": 1024,
|
||||||
|
"max_decoding_steps": 256,
|
||||||
|
"fast_skip_tokens": 128,
|
||||||
|
"max_input_seq_len": 256,
|
||||||
|
"use_cache": true,
|
||||||
|
"freeze_vision_encoder": true,
|
||||||
|
"freeze_lm_head": true,
|
||||||
|
"optimizer_lr": 0.0001,
|
||||||
|
"optimizer_betas": [
|
||||||
|
0.9,
|
||||||
|
0.95
|
||||||
|
],
|
||||||
|
"optimizer_eps": 1e-08,
|
||||||
|
"optimizer_weight_decay": 1e-10,
|
||||||
|
"scheduler_warmup_steps": 1000,
|
||||||
|
"scheduler_decay_steps": 30000,
|
||||||
|
"scheduler_decay_lr": 2.5e-06
|
||||||
|
}
|
|
@ -21,8 +21,8 @@ class PEFTConfig:
|
||||||
class PI0FASTConfig(PreTrainedConfig):
|
class PI0FASTConfig(PreTrainedConfig):
|
||||||
# Input / output structure.
|
# Input / output structure.
|
||||||
n_obs_steps: int = 1
|
n_obs_steps: int = 1
|
||||||
chunk_size: int = 51
|
chunk_size: int = 10
|
||||||
n_action_steps: int = 50
|
n_action_steps: int = 5
|
||||||
|
|
||||||
normalization_mapping: dict[str, NormalizationMode] = field(
|
normalization_mapping: dict[str, NormalizationMode] = field(
|
||||||
default_factory=lambda: {
|
default_factory=lambda: {
|
||||||
|
|
|
@ -87,6 +87,61 @@ def display(tensor: torch.Tensor):
|
||||||
print(f"Max: {tensor.max().item()}")
|
print(f"Max: {tensor.max().item()}")
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(x, min_val, max_val):
|
||||||
|
return (x - min_val) / (max_val - min_val)
|
||||||
|
|
||||||
|
|
||||||
|
def unnormalize(x, min_val, max_val):
|
||||||
|
return x * (max_val - min_val) + min_val
|
||||||
|
|
||||||
|
|
||||||
|
def safe_arcsin(value):
|
||||||
|
# This ensures that the input stays within
|
||||||
|
# [−1,1] to avoid invalid values for arcsin
|
||||||
|
return torch.arcsin(torch.clamp(value, -1.0, 1.0))
|
||||||
|
|
||||||
|
|
||||||
|
def aloha_gripper_to_angular(value):
|
||||||
|
# Aloha transforms the gripper positions into a linear space. The following code
|
||||||
|
# reverses this transformation to be consistent with pi0 which is pretrained in
|
||||||
|
# angular space.
|
||||||
|
#
|
||||||
|
# These values are coming from the Aloha code:
|
||||||
|
# PUPPET_GRIPPER_POSITION_OPEN, PUPPET_GRIPPER_POSITION_CLOSED
|
||||||
|
value = unnormalize(value, min_val=0.01844, max_val=0.05800)
|
||||||
|
|
||||||
|
# This is the inverse of the angular to linear transformation inside the Interbotix code.
|
||||||
|
def linear_to_radian(linear_position, arm_length, horn_radius):
|
||||||
|
value = (horn_radius**2 + linear_position**2 - arm_length**2) / (2 * horn_radius * linear_position)
|
||||||
|
return safe_arcsin(value)
|
||||||
|
|
||||||
|
# The constants are taken from the Interbotix code.
|
||||||
|
value = linear_to_radian(value, arm_length=0.036, horn_radius=0.022)
|
||||||
|
|
||||||
|
# Normalize to [0, 1].
|
||||||
|
# The values 0.4 and 1.5 were measured on an actual Trossen robot.
|
||||||
|
return normalize(value, min_val=0.4, max_val=1.5)
|
||||||
|
|
||||||
|
|
||||||
|
def aloha_gripper_from_angular(value):
|
||||||
|
# Convert from the gripper position used by pi0 to the gripper position that is used by Aloha.
|
||||||
|
# Note that the units are still angular but the range is different.
|
||||||
|
|
||||||
|
# The values 0.4 and 1.5 were measured on an actual Trossen robot.
|
||||||
|
value = unnormalize(value, min_val=0.4, max_val=1.5)
|
||||||
|
|
||||||
|
# These values are coming from the Aloha code:
|
||||||
|
# PUPPET_GRIPPER_JOINT_OPEN, PUPPET_GRIPPER_JOINT_CLOSE
|
||||||
|
return normalize(value, min_val=-0.6213, max_val=1.4910)
|
||||||
|
|
||||||
|
|
||||||
|
def aloha_gripper_from_angular_inv(value):
|
||||||
|
# Directly inverts the gripper_from_angular function.
|
||||||
|
value = unnormalize(value, min_val=-0.6213, max_val=1.4910)
|
||||||
|
return normalize(value, min_val=0.4, max_val=1.5)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PI0FASTPolicy(PreTrainedPolicy):
|
class PI0FASTPolicy(PreTrainedPolicy):
|
||||||
"""Wrapper class around PI0FAST tokenizer and model to train and run inference within LeRobot."""
|
"""Wrapper class around PI0FAST tokenizer and model to train and run inference within LeRobot."""
|
||||||
|
|
||||||
|
@ -130,6 +185,33 @@ class PI0FASTPolicy(PreTrainedPolicy):
|
||||||
def get_optim_params(self) -> dict:
|
def get_optim_params(self) -> dict:
|
||||||
return self.parameters()
|
return self.parameters()
|
||||||
|
|
||||||
|
def _pi_aloha_decode_state(self, state):
|
||||||
|
# Flip the joints.
|
||||||
|
for motor_idx in [1, 2, 8, 9]:
|
||||||
|
state[:, motor_idx] *= -1
|
||||||
|
# Reverse the gripper transformation that is being applied by the Aloha runtime.
|
||||||
|
for motor_idx in [6, 13]:
|
||||||
|
state[:, motor_idx] = aloha_gripper_to_angular(state[:, motor_idx])
|
||||||
|
return state
|
||||||
|
|
||||||
|
def _pi_aloha_encode_actions(self, actions):
|
||||||
|
# Flip the joints.
|
||||||
|
for motor_idx in [1, 2, 8, 9]:
|
||||||
|
actions[:, :, motor_idx] *= -1
|
||||||
|
# Reverse the gripper transformation that is being applied by the Aloha runtime.
|
||||||
|
for motor_idx in [6, 13]:
|
||||||
|
actions[:, :, motor_idx] = aloha_gripper_from_angular(actions[:, :, motor_idx])
|
||||||
|
return actions
|
||||||
|
|
||||||
|
def _pi_aloha_encode_actions_inv(self, actions):
|
||||||
|
# Flip the joints again.
|
||||||
|
for motor_idx in [1, 2, 8, 9]:
|
||||||
|
actions[:, :, motor_idx] *= -1
|
||||||
|
# Reverse the gripper transformation that is being applied by the Aloha runtime.
|
||||||
|
for motor_idx in [6, 13]:
|
||||||
|
actions[:, :, motor_idx] = aloha_gripper_from_angular_inv(actions[:, :, motor_idx])
|
||||||
|
return actions
|
||||||
|
|
||||||
@torch.no_grad
|
@torch.no_grad
|
||||||
def select_action(self, batch: dict[str, Tensor]) -> Tensor:
|
def select_action(self, batch: dict[str, Tensor]) -> Tensor:
|
||||||
"""Select a single action given environment observations.
|
"""Select a single action given environment observations.
|
||||||
|
|
|
@ -9,8 +9,9 @@ TASK=AlohaTransferCube-v0
|
||||||
REPO_ID=lerobot/aloha_sim_transfer_cube_human
|
REPO_ID=lerobot/aloha_sim_transfer_cube_human
|
||||||
OUT_DIR=~/logs/lerobot/tmp/act_aloha_transfer
|
OUT_DIR=~/logs/lerobot/tmp/act_aloha_transfer
|
||||||
|
|
||||||
EVAL_FREQ=5
|
EVAL_FREQ=50
|
||||||
|
|
||||||
|
POLICY_PATH=~/.cache/openpi/openpi-assets/checkpoints/pi0_fast_base_pytorch/
|
||||||
POLICY=pi0fast
|
POLICY=pi0fast
|
||||||
|
|
||||||
python lerobot/scripts/train.py \
|
python lerobot/scripts/train.py \
|
||||||
|
|
Loading…
Reference in New Issue