fix log_alpha in modeling_sac: change to nn.parameter

added pretrained vision model in policy

Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
This commit is contained in:
Michel Aractingi 2025-02-13 11:26:24 +01:00
parent dc086dc21f
commit 459f22ed30
4 changed files with 7 additions and 8 deletions

View File

@ -126,7 +126,7 @@ class SACPolicy(
# TODO (azouitine): Handle the case where the temparameter is a fixed
# TODO (michel-aractingi): Put the log_alpha in cuda by default because otherwise
# it triggers "can't optimize a non-leaf Tensor"
self.log_alpha = torch.tensor([0.0], requires_grad=True, device=torch.device("mps"))
self.log_alpha = nn.Parameter(torch.tensor([0.0]))
self.temperature = self.log_alpha.exp().item()
def reset(self):
@ -634,7 +634,7 @@ class PretrainedImageEncoder(nn.Module):
"""Set up CNN encoder"""
from transformers import AutoModel
self.image_enc_layers = AutoModel.from_pretrained(config.vision_encoder_name)
self.image_enc_layers = AutoModel.from_pretrained(config.vision_encoder_name, trust_remote_code=True)
# self.image_enc_layers.pooler = Identity()
if hasattr(self.image_enc_layers.config, "hidden_sizes"):

View File

@ -1,6 +1,6 @@
# @package _global_
fps: 30
fps: 10
env:
name: real_world
@ -26,6 +26,6 @@ env:
joint_masking_action_space: [1, 1, 1, 1, 0, 0] # disable wrist and gripper
reward_classifier:
pretrained_path: outputs/classifier/checkpoints/best/pretrained_model
pretrained_path: outputs/classifier/13-02-random-sample-resnet10-frozen/checkpoints/best/pretrained_model
config_path: lerobot/configs/policy/hilserl_classifier.yaml

View File

@ -8,7 +8,7 @@
# env.gym.obs_type=environment_state_agent_pos \
seed: 1
dataset_repo_id: null # aractingi/push_green_cube_hf_cropped_resized
dataset_repo_id: aractingi/push_cube_square_light_offline_demo_cropped_resized
training:
# Offline training dataloader
@ -52,7 +52,7 @@ policy:
n_action_steps: 1
shared_encoder: true
# vision_encoder_name: null
vision_encoder_name: "helper2424/resnet10"
freeze_vision_encoder: true
input_shapes:
# # TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?

View File

@ -411,7 +411,7 @@ def add_actor_information_and_train(
next_observations = batch["next_state"]
done = batch["done"]
assert_and_breakpoint(observations=observations, actions=actions, next_state=next_observations)
check_nan_in_transition(observations=observations, actions=actions, next_state=next_observations)
with policy_lock:
loss_critic = policy.compute_loss_critic(
@ -533,7 +533,6 @@ def make_optimizers_and_scheduler(cfg, policy: nn.Module):
optimizer_critic = torch.optim.Adam(
params=policy.critic_ensemble.parameters(), lr=policy.config.critic_lr
)
# We wrap policy log temperature in list because this is a torch tensor and not a nn.Module
optimizer_temperature = torch.optim.Adam(params=[policy.log_alpha], lr=policy.config.critic_lr)
lr_scheduler = None
optimizers = {