From 8d2463f45b4cd22f5ce6e38b7beade9231e52f37 Mon Sep 17 00:00:00 2001
From: Alexander Soare <alexander.soare159@gmail.com>
Date: Fri, 5 Apr 2024 18:46:30 +0100
Subject: [PATCH] backup wip

---
 lerobot/common/policies/abstract.py   |  76 ------------------
 lerobot/common/policies/act/policy.py | 111 ++++++++++++++++++++------
 lerobot/scripts/eval.py               |   2 +-
 lerobot/scripts/train.py              |   3 +-
 scripts/convert_act_weights.py        |  33 ++++----
 5 files changed, 105 insertions(+), 120 deletions(-)

diff --git a/lerobot/common/policies/abstract.py b/lerobot/common/policies/abstract.py
index 6dc72bef..beebd8ac 100644
--- a/lerobot/common/policies/abstract.py
+++ b/lerobot/common/policies/abstract.py
@@ -4,79 +4,3 @@ import torch
 from torch import Tensor, nn
 
 
-class AbstractPolicy(nn.Module):
-    """Base policy which all policies should be derived from.
-
-    The forward method should generally not be overriden as it plays the role of handling multi-step policies. See its
-    documentation for more information.
-
-    Note:
-        When implementing a concrete class (e.g. `AlohaDataset`, `PushtEnv`, `DiffusionPolicy`), you need to:
-            1. set the required class attributes:
-                - for classes inheriting from `AbstractDataset`: `available_datasets`
-                - for classes inheriting from `AbstractEnv`: `name`, `available_tasks`
-                - for classes inheriting from `AbstractPolicy`: `name`
-            2. update variables in `lerobot/__init__.py` (e.g. `available_envs`, `available_datasets_per_envs`, `available_policies`)
-            3. update variables in `tests/test_available.py` by importing your new class
-    """
-
-    name: str | None = None  # same name should be used to instantiate the policy in factory.py
-
-    def __init__(self, n_action_steps: int | None):
-        """
-        n_action_steps: Sets the cache size for storing action trajectories. If None, it is assumed that a single
-            action is returned by `select_actions` and that doesn't have a horizon dimension. The `forward` method then
-            adds that dimension.
-        """
-        super().__init__()
-        assert self.name is not None, "Subclasses of `AbstractPolicy` should set the `name` class attribute."
-        self.n_action_steps = n_action_steps
-        self.clear_action_queue()
-
-    def update(self, replay_buffer, step):
-        """One step of the policy's learning algorithm."""
-        raise NotImplementedError("Abstract method")
-
-    def save(self, fp):
-        torch.save(self.state_dict(), fp)
-
-    def load(self, fp):
-        d = torch.load(fp)
-        self.load_state_dict(d)
-
-    def select_actions(self, observation) -> Tensor:
-        """Select an action (or trajectory of actions) based on an observation during rollout.
-
-        If n_action_steps was provided at initialization, this should return a (batch_size, n_action_steps, *) tensor of
-        actions. Otherwise if n_actions_steps is None, this should return a (batch_size, *) tensor of actions.
-        """
-        raise NotImplementedError("Abstract method")
-
-    def clear_action_queue(self):
-        """This should be called whenever the environment is reset."""
-        if self.n_action_steps is not None:
-            self._action_queue = deque([], maxlen=self.n_action_steps)
-
-    def forward(self, *args, **kwargs) -> Tensor:
-        """Inference step that makes multi-step policies compatible with their single-step environments.
-
-        WARNING: In general, this should not be overriden.
-
-        Consider a "policy" that observes the environment then charts a course of N actions to take. To make this fit
-        into the formalism of a TorchRL environment, we view it as being effectively a policy that (1) makes an
-        observation and prepares a queue of actions, (2) consumes that queue when queried, regardless of the environment
-        observation, (3) repopulates the action queue when empty. This method handles the aforementioned logic so that
-        the subclass doesn't have to.
-
-        This method effectively wraps the `select_actions` method of the subclass. The following assumptions are made:
-        1. The `select_actions` method returns a Tensor of actions with shape (B, H, *) where B is the batch size, H is
-           the action trajectory horizon and * is the action dimensions.
-        2. Prior to the `select_actions` method being called, theres is an `n_action_steps` instance attribute defined.
-        """
-        if self.n_action_steps is None:
-            return self.select_actions(*args, **kwargs)
-        if len(self._action_queue) == 0:
-            # `select_actions` returns a (batch_size, n_action_steps, *) tensor, but the queue effectively has shape
-            # (n_action_steps, batch_size, *), hence the transpose.
-            self._action_queue.extend(self.select_actions(*args, **kwargs).transpose(0, 1))
-        return self._action_queue.popleft()
diff --git a/lerobot/common/policies/act/policy.py b/lerobot/common/policies/act/policy.py
index f42c6a3c..a9a5ac06 100644
--- a/lerobot/common/policies/act/policy.py
+++ b/lerobot/common/policies/act/policy.py
@@ -3,7 +3,7 @@
 As per Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware (https://arxiv.org/abs/2304.13705).
 The majority of changes here involve removing unused code, unifying naming, and adding helpful comments.
 """
-
+from collections import deque
 import math
 import time
 from itertools import chain
@@ -22,6 +22,67 @@ from torchvision.ops.misc import FrozenBatchNorm2d
 from lerobot.common.utils import get_safe_torch_device
 
 
+# class AbstractPolicy(nn.Module):
+#     """Base policy which all policies should be derived from.
+
+#     The forward method should generally not be overriden as it plays the role of handling multi-step policies. See its
+#     documentation for more information.
+
+#     Note:
+#         When implementing a concrete class (e.g. `AlohaDataset`, `PushtEnv`, `DiffusionPolicy`), you need to:
+#             1. set the required class attributes:
+#                 - for classes inheriting from `AbstractDataset`: `available_datasets`
+#                 - for classes inheriting from `AbstractEnv`: `name`, `available_tasks`
+#                 - for classes inheriting from `AbstractPolicy`: `name`
+#             2. update variables in `lerobot/__init__.py` (e.g. `available_envs`, `available_datasets_per_envs`, `available_policies`)
+#             3. update variables in `tests/test_available.py` by importing your new class
+#     """
+
+#     name: str | None = None  # same name should be used to instantiate the policy in factory.py
+
+#     def __init__(self, n_action_steps: int | None):
+#         """
+#         n_action_steps: Sets the cache size for storing action trajectories. If None, it is assumed that a single
+#             action is returned by `select_actions` and that doesn't have a horizon dimension. The `forward` method then
+#             adds that dimension.
+#         """
+#         super().__init__()
+#         assert self.name is not None, "Subclasses of `AbstractPolicy` should set the `name` class attribute."
+#         self.n_action_steps = n_action_steps
+#         self.clear_action_queue()
+
+#     def clear_action_queue(self):
+#         """This should be called whenever the environment is reset."""
+#         if self.n_action_steps is not None:
+#             self._action_queue = deque([], maxlen=self.n_action_steps)
+
+#     def forward(self, fn) -> Tensor:
+#         """Inference step that makes multi-step policies compatible with their single-step environments.
+
+#         WARNING: In general, this should not be overriden.
+
+#         Consider a "policy" that observes the environment then charts a course of N actions to take. To make this fit
+#         into the formalism of a TorchRL environment, we view it as being effectively a policy that (1) makes an
+#         observation and prepares a queue of actions, (2) consumes that queue when queried, regardless of the environment
+#         observation, (3) repopulates the action queue when empty. This method handles the aforementioned logic so that
+#         the subclass doesn't have to.
+
+#         This method effectively wraps the `select_actions` method of the subclass. The following assumptions are made:
+#         1. The `select_actions` method returns a Tensor of actions with shape (B, H, *) where B is the batch size, H is
+#            the action trajectory horizon and * is the action dimensions.
+#         2. Prior to the `select_actions` method being called, theres is an `n_action_steps` instance attribute defined.
+#         """
+#         if self.n_action_steps is None:
+#             return self.select_actions(*args, **kwargs)
+#         if len(self._action_queue) == 0:
+#             # `select_actions` returns a (batch_size, n_action_steps, *) tensor, but the queue effectively has shape
+#             # (n_action_steps, batch_size, *), hence the transpose.
+#             self._action_queue.extend(self.select_actions(*args, **kwargs).transpose(0, 1))
+#         return self._action_queue.popleft()
+
+
+
+
 class ActionChunkingTransformerPolicy(nn.Module):
     """
     Action Chunking Transformer Policy as per Learning Fine-Grained Bimanual Manipulation with Low-Cost
@@ -168,14 +229,16 @@ class ActionChunkingTransformerPolicy(nn.Module):
                 nn.init.xavier_uniform_(p)
 
     @torch.no_grad()
-    def select_actions(self, batch, *_):
+    def select_action(self, batch, *_):
         # TODO(now): Implement queueing mechanism.
         self.eval()
         self._preprocess_batch(batch)
 
         # TODO(now): What's up with this 0.182?
         action = self.forward(
-            robot_state=batch["observation.state"] * 0.182, image=batch["observation.images.top"]
+            robot_state=batch["observation.state"] * 0.182,
+            image=batch["observation.images.top"],
+            return_loss=False,
         )
 
         if self.cfg.temporal_agg:
@@ -226,7 +289,7 @@ class ActionChunkingTransformerPolicy(nn.Module):
         assert batch_size % self.cfg.horizon == 0
         assert batch_size % num_slices == 0
 
-        loss = self.compute_loss(batch)
+        loss = self.forward(batch, return_loss=True)["loss"]
         loss.backward()
 
         grad_norm = torch.nn.utils.clip_grad_norm_(
@@ -247,44 +310,38 @@ class ActionChunkingTransformerPolicy(nn.Module):
 
         return info
 
-    def compute_loss(self, batch):
-        loss_dict = self.forward(
-            robot_state=batch["observation.state"],
-            image=batch["observation.images.top"],
-            actions=batch["action"],
-        )
-        loss = loss_dict["loss"]
-        return loss
-
-    def forward(self, robot_state: Tensor, image: Tensor, actions: Tensor | None = None):
+    def forward(self, batch: dict[str, Tensor], return_loss: bool = False):
         # TODO(now): Maybe this shouldn't be here?
         normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-        image = normalize(image)
+        images = normalize(batch["observation.images.top"])
 
-        is_training = actions is not None
-        if is_training:  # training time
-            actions = actions[:, : self.horizon]
+        if return_loss:  # training time
+            actions_hat, (mu_hat, log_sigma_x2_hat) = self._forward(
+                batch["observation.state"], images, batch["action"]
+            )
 
-            a_hat, (mu, log_sigma_x2) = self._forward(robot_state, image, actions)
-
-            all_l1 = F.l1_loss(actions, a_hat, reduction="none")
-            l1 = all_l1.mean()
+            l1_loss = (
+                F.l1_loss(batch["action"], actions_hat, reduction="none")
+                * ~batch["action_is_pad"].unsqueeze(-1)
+            ).mean()
 
             loss_dict = {}
-            loss_dict["l1"] = l1
+            loss_dict["l1"] = l1_loss
             if self.cfg.use_vae:
                 # Calculate Dₖₗ(latent_pdf || standard_normal). Note: After computing the KL-divergence for
                 # each dimension independently, we sum over the latent dimension to get the total
                 # KL-divergence per batch element, then take the mean over the batch.
                 # (See App. B of https://arxiv.org/abs/1312.6114 for more details).
-                mean_kld = (-0.5 * (1 + log_sigma_x2 - mu.pow(2) - (log_sigma_x2).exp())).sum(-1).mean()
+                mean_kld = (
+                    (-0.5 * (1 + log_sigma_x2_hat - mu_hat.pow(2) - (log_sigma_x2_hat).exp())).sum(-1).mean()
+                )
                 loss_dict["kl"] = mean_kld
                 loss_dict["loss"] = loss_dict["l1"] + loss_dict["kl"] * self.cfg.kl_weight
             else:
                 loss_dict["loss"] = loss_dict["l1"]
             return loss_dict
         else:
-            action, _ = self._forward(robot_state, image)  # no action, sample from prior
+            action, _ = self._forward(batch["observation.state"], images)
             return action
 
     def _forward(self, robot_state: Tensor, image: Tensor, actions: Tensor | None = None):
@@ -321,7 +378,9 @@ class ActionChunkingTransformerPolicy(nn.Module):
             # Forward pass through VAE encoder and sample the latent with the reparameterization trick.
             cls_token_out = self.vae_encoder(
                 vae_encoder_input.permute(1, 0, 2), pos_embed=pos_embed.permute(1, 0, 2)
-            )[0]  # (B, D)
+            )[
+                0
+            ]  # (B, D)
             latent_pdf_params = self.vae_encoder_latent_output_proj(cls_token_out)
             mu = latent_pdf_params[:, : self.latent_dim]
             # This is 2log(sigma). Done this way to match the original implementation.
diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py
index e7ba53fc..b05f9704 100644
--- a/lerobot/scripts/eval.py
+++ b/lerobot/scripts/eval.py
@@ -251,7 +251,7 @@ def eval(cfg: dict, out_dir=None, stats_path=None):
     dataset = make_dataset(cfg, stats_path=stats_path)
 
     logging.info("Making environment.")
-    env = make_env(cfg, num_parallel_envs=cfg.eval_episodes)
+    env = make_env(cfg, num_parallel_envs=cfg.rollout_batch_size)
 
     # when policy is None, rollout a random policy
     policy = make_policy(cfg) if cfg.policy.pretrained_model_path else None
diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py
index d49dfff8..81f3cdbc 100644
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -148,7 +148,8 @@ def train(cfg: dict, out_dir=None, job_name=None):
     #     )
 
     logging.info("make_env")
-    env = make_env(cfg, num_parallel_envs=cfg.eval_episodes)
+    # TODO(now): uncomment
+    #env = make_env(cfg, num_parallel_envs=cfg.eval_episodes)
 
     logging.info("make_policy")
     policy = make_policy(cfg)
diff --git a/scripts/convert_act_weights.py b/scripts/convert_act_weights.py
index b1492009..d5e38796 100644
--- a/scripts/convert_act_weights.py
+++ b/scripts/convert_act_weights.py
@@ -28,22 +28,23 @@ for to_remove in start_removals:
 # Replace keys based on what they start with.
 
 start_replacements = [
-    ("model.query_embed.weight", "model.pos_embed.weight"),
-    ("model.pos_table", "model.vae_encoder_pos_enc"),
-    ("model.pos_embed.weight", "model.decoder_pos_embed.weight"),
-    ("model.encoder.", "model.vae_encoder."),
-    ("model.encoder_action_proj.", "model.vae_encoder_action_input_proj."),
-    ("model.encoder_joint_proj.", "model.vae_encoder_robot_state_input_proj."),
-    ("model.latent_proj.", "model.vae_encoder_latent_output_proj."),
-    ("model.latent_proj.", "model.vae_encoder_latent_output_proj."),
-    ("model.input_proj.", "model.encoder_img_feat_input_proj."),
-    ("model.input_proj_robot_state", "model.encoder_robot_state_input_proj"),
-    ("model.latent_out_proj.", "model.encoder_latent_input_proj."),
-    ("model.transformer.encoder.", "model.encoder."),
-    ("model.transformer.decoder.", "model.decoder."),
-    ("model.backbones.0.0.body.", "model.backbone."),
-    ("model.additional_pos_embed.weight", "model.encoder_robot_and_latent_pos_embed.weight"),
-    ("model.cls_embed.weight", "model.vae_encoder_cls_embed.weight"),
+    ("model.", ""),
+    ("query_embed.weight", "pos_embed.weight"),
+    ("pos_table", "vae_encoder_pos_enc"),
+    ("pos_embed.weight", "decoder_pos_embed.weight"),
+    ("encoder.", "vae_encoder."),
+    ("encoder_action_proj.", "vae_encoder_action_input_proj."),
+    ("encoder_joint_proj.", "vae_encoder_robot_state_input_proj."),
+    ("latent_proj.", "vae_encoder_latent_output_proj."),
+    ("latent_proj.", "vae_encoder_latent_output_proj."),
+    ("input_proj.", "encoder_img_feat_input_proj."),
+    ("input_proj_robot_state", "encoder_robot_state_input_proj"),
+    ("latent_out_proj.", "encoder_latent_input_proj."),
+    ("transformer.encoder.", "encoder."),
+    ("transformer.decoder.", "decoder."),
+    ("backbones.0.0.body.", "backbone."),
+    ("additional_pos_embed.weight", "encoder_robot_and_latent_pos_embed.weight"),
+    ("cls_embed.weight", "vae_encoder_cls_embed.weight"),
 ]
 
 for to_replace, replace_with in start_replacements: