From 87fcc536f91e857873c5e1cc9c05a23335dd27e6 Mon Sep 17 00:00:00 2001
From: Alexander Soare <alexander.soare159@gmail.com>
Date: Mon, 11 Mar 2024 18:45:21 +0000
Subject: [PATCH] wip - still need to verify full training run

---
 lerobot/common/envs/pusht/pusht_image_env.py         |  2 +-
 .../diffusion/model/multi_image_obs_encoder.py       |  2 ++
 lerobot/configs/policy/diffusion.yaml                | 12 ++++++------
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/lerobot/common/envs/pusht/pusht_image_env.py b/lerobot/common/envs/pusht/pusht_image_env.py
index 5f7bc03c..2d52c89e 100644
--- a/lerobot/common/envs/pusht/pusht_image_env.py
+++ b/lerobot/common/envs/pusht/pusht_image_env.py
@@ -25,7 +25,7 @@ class PushTImageEnv(PushTEnv):
         img = super()._render_frame(mode="rgb_array")
 
         agent_pos = np.array(self.agent.position)
-        img_obs = np.moveaxis(img.astype(np.float32) / 255, -1, 0)
+        img_obs = np.moveaxis(img.astype(np.float32), -1, 0)
         obs = {"image": img_obs, "agent_pos": agent_pos}
 
         # draw action
diff --git a/lerobot/common/policies/diffusion/model/multi_image_obs_encoder.py b/lerobot/common/policies/diffusion/model/multi_image_obs_encoder.py
index 0b4bba7d..91472dd5 100644
--- a/lerobot/common/policies/diffusion/model/multi_image_obs_encoder.py
+++ b/lerobot/common/policies/diffusion/model/multi_image_obs_encoder.py
@@ -123,6 +123,8 @@ class MultiImageObsEncoder(ModuleAttrMixin):
                 if imagenet_norm:
                     # TODO(rcadene): move normalizer to dataset and env
                     this_normalizer = torchvision.transforms.Normalize(
+                        # Note: This matches the normalization in the original impl. for PushT Image. This may not be
+                        # the case for other tasks.
                         mean=[127.5, 127.5, 127.5],
                         std=[127.5, 127.5, 127.5],
                     )
diff --git a/lerobot/configs/policy/diffusion.yaml b/lerobot/configs/policy/diffusion.yaml
index 28fd4e4e..f07e4754 100644
--- a/lerobot/configs/policy/diffusion.yaml
+++ b/lerobot/configs/policy/diffusion.yaml
@@ -42,8 +42,8 @@ policy:
   num_inference_steps: 100
   obs_as_global_cond: ${obs_as_global_cond}
   # crop_shape: null
-  diffusion_step_embed_dim: 256  # before 128
-  down_dims: [256, 512, 1024]  # before [512, 1024, 2048]
+  diffusion_step_embed_dim: 128
+  down_dims: [512, 1024, 2048]
   kernel_size: 5
   n_groups: 8
   cond_predict_scale: True
@@ -109,13 +109,13 @@ training:
   debug: False
   resume: True
   # optimization
-  # lr_scheduler: cosine
-  # lr_warmup_steps: 500
-  num_epochs: 8000
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 500
   # gradient_accumulate_every: 1
   # EMA destroys performance when used with BatchNorm
   # replace BatchNorm with GroupNorm.
-  # use_ema: True
+  use_ema: True
   freeze_encoder: False
   # training loop control
   # in epochs