Merge branch '2025_02_20_add_dexvla' of https://github.com/JayceWen/lerobot into 2025_02_20_add_dexvla

This commit is contained in:
wk 2025-03-11 14:42:57 +08:00
commit 463add8fc8
2 changed files with 4 additions and 3 deletions

View File

@ -13,6 +13,7 @@ from timm.models.vision_transformer import Mlp, use_fused_attn
from torch.jit import Final from torch.jit import Final
from transformers import AutoModel from transformers import AutoModel
from transformers.modeling_utils import PreTrainedModel from transformers.modeling_utils import PreTrainedModel
from .configuration_scaledp import ScaleDPPolicyConfig from .configuration_scaledp import ScaleDPPolicyConfig
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
@ -193,8 +194,6 @@ class FinalLayer(nn.Module):
return x return x
class ScaleDP(PreTrainedModel): class ScaleDP(PreTrainedModel):
""" """
Diffusion models with a Transformer backbone. Diffusion models with a Transformer backbone.

View File

@ -1377,7 +1377,9 @@ class Qwen2VLModel(Qwen2VLPreTrainedModel):
(sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device (sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=device
) )
diagonal_attend_mask = torch.arange(target_length, device=device) > cache_position.reshape(-1, 1) diagonal_attend_mask = torch.arange(target_length, device=device) > cache_position.reshape(-1, 1)
if config.sliding_window is not None and (not isinstance(past_key_values, SlidingWindowCache) or sequence_length > target_length): if config.sliding_window is not None and (
not isinstance(past_key_values, SlidingWindowCache) or sequence_length > target_length
):
# if we have sliding window, we should not attend to tokens beyond sliding window length, so we mask them out also # if we have sliding window, we should not attend to tokens beyond sliding window length, so we mask them out also
# the check is needed to verify is current checkpoint was trained with sliding window or not # the check is needed to verify is current checkpoint was trained with sliding window or not
sliding_attend_mask = torch.arange(target_length, device=device) <= ( sliding_attend_mask = torch.arange(target_length, device=device) <= (