fix log_alpha in modeling_sac: change to nn.parameter
added pretrained vision model in policy Co-authored-by: Adil Zouitine <adilzouitinegm@gmail.com>
This commit is contained in:
parent
dc086dc21f
commit
459f22ed30
|
@ -126,7 +126,7 @@ class SACPolicy(
|
||||||
# TODO (azouitine): Handle the case where the temparameter is a fixed
|
# TODO (azouitine): Handle the case where the temparameter is a fixed
|
||||||
# TODO (michel-aractingi): Put the log_alpha in cuda by default because otherwise
|
# TODO (michel-aractingi): Put the log_alpha in cuda by default because otherwise
|
||||||
# it triggers "can't optimize a non-leaf Tensor"
|
# it triggers "can't optimize a non-leaf Tensor"
|
||||||
self.log_alpha = torch.tensor([0.0], requires_grad=True, device=torch.device("mps"))
|
self.log_alpha = nn.Parameter(torch.tensor([0.0]))
|
||||||
self.temperature = self.log_alpha.exp().item()
|
self.temperature = self.log_alpha.exp().item()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
|
@ -634,7 +634,7 @@ class PretrainedImageEncoder(nn.Module):
|
||||||
"""Set up CNN encoder"""
|
"""Set up CNN encoder"""
|
||||||
from transformers import AutoModel
|
from transformers import AutoModel
|
||||||
|
|
||||||
self.image_enc_layers = AutoModel.from_pretrained(config.vision_encoder_name)
|
self.image_enc_layers = AutoModel.from_pretrained(config.vision_encoder_name, trust_remote_code=True)
|
||||||
# self.image_enc_layers.pooler = Identity()
|
# self.image_enc_layers.pooler = Identity()
|
||||||
|
|
||||||
if hasattr(self.image_enc_layers.config, "hidden_sizes"):
|
if hasattr(self.image_enc_layers.config, "hidden_sizes"):
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# @package _global_
|
# @package _global_
|
||||||
|
|
||||||
fps: 30
|
fps: 10
|
||||||
|
|
||||||
env:
|
env:
|
||||||
name: real_world
|
name: real_world
|
||||||
|
@ -26,6 +26,6 @@ env:
|
||||||
joint_masking_action_space: [1, 1, 1, 1, 0, 0] # disable wrist and gripper
|
joint_masking_action_space: [1, 1, 1, 1, 0, 0] # disable wrist and gripper
|
||||||
|
|
||||||
reward_classifier:
|
reward_classifier:
|
||||||
pretrained_path: outputs/classifier/checkpoints/best/pretrained_model
|
pretrained_path: outputs/classifier/13-02-random-sample-resnet10-frozen/checkpoints/best/pretrained_model
|
||||||
config_path: lerobot/configs/policy/hilserl_classifier.yaml
|
config_path: lerobot/configs/policy/hilserl_classifier.yaml
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
# env.gym.obs_type=environment_state_agent_pos \
|
# env.gym.obs_type=environment_state_agent_pos \
|
||||||
|
|
||||||
seed: 1
|
seed: 1
|
||||||
dataset_repo_id: null # aractingi/push_green_cube_hf_cropped_resized
|
dataset_repo_id: aractingi/push_cube_square_light_offline_demo_cropped_resized
|
||||||
|
|
||||||
training:
|
training:
|
||||||
# Offline training dataloader
|
# Offline training dataloader
|
||||||
|
@ -52,7 +52,7 @@ policy:
|
||||||
n_action_steps: 1
|
n_action_steps: 1
|
||||||
|
|
||||||
shared_encoder: true
|
shared_encoder: true
|
||||||
# vision_encoder_name: null
|
vision_encoder_name: "helper2424/resnet10"
|
||||||
freeze_vision_encoder: true
|
freeze_vision_encoder: true
|
||||||
input_shapes:
|
input_shapes:
|
||||||
# # TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
|
# # TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
|
||||||
|
|
|
@ -411,7 +411,7 @@ def add_actor_information_and_train(
|
||||||
next_observations = batch["next_state"]
|
next_observations = batch["next_state"]
|
||||||
done = batch["done"]
|
done = batch["done"]
|
||||||
|
|
||||||
assert_and_breakpoint(observations=observations, actions=actions, next_state=next_observations)
|
check_nan_in_transition(observations=observations, actions=actions, next_state=next_observations)
|
||||||
|
|
||||||
with policy_lock:
|
with policy_lock:
|
||||||
loss_critic = policy.compute_loss_critic(
|
loss_critic = policy.compute_loss_critic(
|
||||||
|
@ -533,7 +533,6 @@ def make_optimizers_and_scheduler(cfg, policy: nn.Module):
|
||||||
optimizer_critic = torch.optim.Adam(
|
optimizer_critic = torch.optim.Adam(
|
||||||
params=policy.critic_ensemble.parameters(), lr=policy.config.critic_lr
|
params=policy.critic_ensemble.parameters(), lr=policy.config.critic_lr
|
||||||
)
|
)
|
||||||
# We wrap policy log temperature in list because this is a torch tensor and not a nn.Module
|
|
||||||
optimizer_temperature = torch.optim.Adam(params=[policy.log_alpha], lr=policy.config.critic_lr)
|
optimizer_temperature = torch.optim.Adam(params=[policy.log_alpha], lr=policy.config.critic_lr)
|
||||||
lr_scheduler = None
|
lr_scheduler = None
|
||||||
optimizers = {
|
optimizers = {
|
||||||
|
|
Loading…
Reference in New Issue