diff --git a/lerobot/common/policies/sac/modeling_sac.py b/lerobot/common/policies/sac/modeling_sac.py index a3d5d8e6..11830aa1 100644 --- a/lerobot/common/policies/sac/modeling_sac.py +++ b/lerobot/common/policies/sac/modeling_sac.py @@ -126,7 +126,7 @@ class SACPolicy( # TODO (azouitine): Handle the case where the temparameter is a fixed # TODO (michel-aractingi): Put the log_alpha in cuda by default because otherwise # it triggers "can't optimize a non-leaf Tensor" - self.log_alpha = torch.tensor([0.0], requires_grad=True, device=torch.device("mps")) + self.log_alpha = nn.Parameter(torch.tensor([0.0])) self.temperature = self.log_alpha.exp().item() def reset(self): @@ -634,7 +634,7 @@ class PretrainedImageEncoder(nn.Module): """Set up CNN encoder""" from transformers import AutoModel - self.image_enc_layers = AutoModel.from_pretrained(config.vision_encoder_name) + self.image_enc_layers = AutoModel.from_pretrained(config.vision_encoder_name, trust_remote_code=True) # self.image_enc_layers.pooler = Identity() if hasattr(self.image_enc_layers.config, "hidden_sizes"): diff --git a/lerobot/configs/env/so100_real.yaml b/lerobot/configs/env/so100_real.yaml index b5afea52..bceeae59 100644 --- a/lerobot/configs/env/so100_real.yaml +++ b/lerobot/configs/env/so100_real.yaml @@ -1,6 +1,6 @@ # @package _global_ -fps: 30 +fps: 10 env: name: real_world @@ -26,6 +26,6 @@ env: joint_masking_action_space: [1, 1, 1, 1, 0, 0] # disable wrist and gripper reward_classifier: - pretrained_path: outputs/classifier/checkpoints/best/pretrained_model + pretrained_path: outputs/classifier/13-02-random-sample-resnet10-frozen/checkpoints/best/pretrained_model config_path: lerobot/configs/policy/hilserl_classifier.yaml \ No newline at end of file diff --git a/lerobot/configs/policy/sac_real.yaml b/lerobot/configs/policy/sac_real.yaml index afcb408e..f5607867 100644 --- a/lerobot/configs/policy/sac_real.yaml +++ b/lerobot/configs/policy/sac_real.yaml @@ -8,7 +8,7 @@ # env.gym.obs_type=environment_state_agent_pos \ seed: 1 -dataset_repo_id: null # aractingi/push_green_cube_hf_cropped_resized +dataset_repo_id: aractingi/push_cube_square_light_offline_demo_cropped_resized training: # Offline training dataloader @@ -52,7 +52,7 @@ policy: n_action_steps: 1 shared_encoder: true - # vision_encoder_name: null + vision_encoder_name: "helper2424/resnet10" freeze_vision_encoder: true input_shapes: # # TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env? diff --git a/lerobot/scripts/server/learner_server.py b/lerobot/scripts/server/learner_server.py index 1b54e3a9..460b845d 100644 --- a/lerobot/scripts/server/learner_server.py +++ b/lerobot/scripts/server/learner_server.py @@ -411,7 +411,7 @@ def add_actor_information_and_train( next_observations = batch["next_state"] done = batch["done"] - assert_and_breakpoint(observations=observations, actions=actions, next_state=next_observations) + check_nan_in_transition(observations=observations, actions=actions, next_state=next_observations) with policy_lock: loss_critic = policy.compute_loss_critic( @@ -533,7 +533,6 @@ def make_optimizers_and_scheduler(cfg, policy: nn.Module): optimizer_critic = torch.optim.Adam( params=policy.critic_ensemble.parameters(), lr=policy.config.critic_lr ) - # We wrap policy log temperature in list because this is a torch tensor and not a nn.Module optimizer_temperature = torch.optim.Adam(params=[policy.log_alpha], lr=policy.config.critic_lr) lr_scheduler = None optimizers = {