diff --git a/lerobot/common/policies/dexvla/README.md b/lerobot/common/policies/dexvla/README.md index 9d0b9805..b34a40bb 100644 --- a/lerobot/common/policies/dexvla/README.md +++ b/lerobot/common/policies/dexvla/README.md @@ -2,7 +2,7 @@ DexVLA: Vision-Language Model with Plug-In Diffusion Expert for Visuomotor Policy Learning This policy is Community Contributed. For more information about DexVLA, you can also refer to [this](https://github.com/juruobenruo/DexVLA). -This is [project website](https://dex-vla.github.io/). +This is [project website](https://dex-vla.github.io/). ## Dataset ### Data format @@ -141,4 +141,4 @@ python lerobot/scripts/eval.py \ ~~~ ### Inference Speed -Tested on a single A6000 GPU, the DexVLA could infer 3.4 action chunks in one second. For each action chunk, if we execute 25 actions, the real control frequency can be 85 (3.4*25)Hz. \ No newline at end of file +Tested on a single A6000 GPU, the DexVLA could infer 3.4 action chunks in one second. For each action chunk, if we execute 25 actions, the real control frequency can be 85 (3.4*25)Hz. diff --git a/lerobot/common/policies/dexvla/fusion_modules.py b/lerobot/common/policies/dexvla/fusion_modules.py index bff61072..701a4ada 100644 --- a/lerobot/common/policies/dexvla/fusion_modules.py +++ b/lerobot/common/policies/dexvla/fusion_modules.py @@ -16,6 +16,7 @@ import torch.nn as nn + class ActionProjector(nn.Module): def __init__(self, in_dim, out_dim=1024): super().__init__() diff --git a/lerobot/common/policies/dexvla/robot_data_processor.py b/lerobot/common/policies/dexvla/robot_data_processor.py index 3a7ce995..eba11890 100644 --- a/lerobot/common/policies/dexvla/robot_data_processor.py +++ b/lerobot/common/policies/dexvla/robot_data_processor.py @@ -19,6 +19,7 @@ import torch from PIL import Image from qwen_vl_utils import fetch_image + class Qwen2VLAProcess: def __init__( self,