diff --git a/lerobot/configs/policy/act.yaml b/lerobot/configs/policy/act.yaml
index a49a97f8..15efcce8 100644
--- a/lerobot/configs/policy/act.yaml
+++ b/lerobot/configs/policy/act.yaml
@@ -3,6 +3,12 @@
 seed: 1000
 dataset_repo_id: lerobot/aloha_sim_insertion_human
 
+override_dataset_stats:
+  observation.images.top:
+    # stats from imagenet, since we use a pretrained vision model
+    mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
+    std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
+
 training:
   offline_steps: 80000
   online_steps: 0
@@ -18,12 +24,6 @@ training:
   grad_clip_norm: 10
   online_steps_between_rollouts: 1
 
-  override_dataset_stats:
-    observation.images.top:
-      # stats from imagenet, since we use a pretrained vision model
-      mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
-      std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
-
   delta_timestamps:
     action: "[i / ${fps} for i in range(${policy.chunk_size})]"
 
diff --git a/lerobot/configs/policy/diffusion.yaml b/lerobot/configs/policy/diffusion.yaml
index 9a4aeb2a..7278985e 100644
--- a/lerobot/configs/policy/diffusion.yaml
+++ b/lerobot/configs/policy/diffusion.yaml
@@ -7,6 +7,20 @@
 seed: 100000
 dataset_repo_id: lerobot/pusht
 
+override_dataset_stats:
+  # TODO(rcadene, alexander-soare): should we remove image stats as well? do we use a pretrained vision model?
+  observation.image:
+    mean: [[[0.5]], [[0.5]], [[0.5]]]  # (c,1,1)
+    std: [[[0.5]], [[0.5]], [[0.5]]]  # (c,1,1)
+  # TODO(rcadene, alexander-soare): we override state and action stats to use the same as the pretrained model
+  # from the original codebase, but we should remove these and train our own pretrained model
+  observation.state:
+    min: [13.456424, 32.938293]
+    max: [496.14618, 510.9579]
+  action:
+    min: [12.0, 25.0]
+    max: [511.0, 511.0]
+
 training:
   offline_steps: 200000
   online_steps: 0
@@ -34,20 +48,6 @@ eval:
   n_episodes: 50
   batch_size: 50
 
-override_dataset_stats:
-  # TODO(rcadene, alexander-soare): should we remove image stats as well? do we use a pretrained vision model?
-  observation.image:
-    mean: [[[0.5]], [[0.5]], [[0.5]]]  # (c,1,1)
-    std: [[[0.5]], [[0.5]], [[0.5]]]  # (c,1,1)
-  # TODO(rcadene, alexander-soare): we override state and action stats to use the same as the pretrained model
-  # from the original codebase, but we should remove these and train our own pretrained model
-  observation.state:
-    min: [13.456424, 32.938293]
-    max: [496.14618, 510.9579]
-  action:
-    min: [12.0, 25.0]
-    max: [511.0, 511.0]
-
 policy:
   name: diffusion
 
diff --git a/tests/data/save_policy_to_safetensors/aloha_act/actions.safetensors b/tests/data/save_policy_to_safetensors/aloha_act/actions.safetensors
index 70c9b6d8..7e7ad8e1 100644
Binary files a/tests/data/save_policy_to_safetensors/aloha_act/actions.safetensors and b/tests/data/save_policy_to_safetensors/aloha_act/actions.safetensors differ
diff --git a/tests/data/save_policy_to_safetensors/aloha_act/grad_stats.safetensors b/tests/data/save_policy_to_safetensors/aloha_act/grad_stats.safetensors
index 2e845189..5188d8f4 100644
Binary files a/tests/data/save_policy_to_safetensors/aloha_act/grad_stats.safetensors and b/tests/data/save_policy_to_safetensors/aloha_act/grad_stats.safetensors differ
diff --git a/tests/data/save_policy_to_safetensors/aloha_act/output_dict.safetensors b/tests/data/save_policy_to_safetensors/aloha_act/output_dict.safetensors
index e8d537c8..02235e17 100644
Binary files a/tests/data/save_policy_to_safetensors/aloha_act/output_dict.safetensors and b/tests/data/save_policy_to_safetensors/aloha_act/output_dict.safetensors differ
diff --git a/tests/data/save_policy_to_safetensors/aloha_act/param_stats.safetensors b/tests/data/save_policy_to_safetensors/aloha_act/param_stats.safetensors
index 6e33879f..7a2e0e70 100644
Binary files a/tests/data/save_policy_to_safetensors/aloha_act/param_stats.safetensors and b/tests/data/save_policy_to_safetensors/aloha_act/param_stats.safetensors differ