diff --git a/lerobot/configs/env/maniskill_example.yaml b/lerobot/configs/env/maniskill_example.yaml
index 3df23b2e..2beaa8a6 100644
--- a/lerobot/configs/env/maniskill_example.yaml
+++ b/lerobot/configs/env/maniskill_example.yaml
@@ -1,6 +1,6 @@
 # @package _global_
 
-fps: 20
+fps: 400
 
 env:
   name: maniskill/pushcube
diff --git a/lerobot/configs/policy/sac_maniskill.yaml b/lerobot/configs/policy/sac_maniskill.yaml
index c9bbca44..cf20d059 100644
--- a/lerobot/configs/policy/sac_maniskill.yaml
+++ b/lerobot/configs/policy/sac_maniskill.yaml
@@ -8,22 +8,23 @@
 #   env.gym.obs_type=environment_state_agent_pos \
 
 seed: 1
-dataset_repo_id: "AdilZtn/Maniskill-Pushcube-demonstration-medium"
+# dataset_repo_id: "AdilZtn/Maniskill-Pushcube-demonstration-medium"
+dataset_repo_id: null
 
 training:
   # Offline training dataloader
   num_workers: 4
 
   batch_size: 512
-  grad_clip_norm: 10.0
+  grad_clip_norm: 40.0
   lr: 3e-4
 
 
-  storage_device: "cpu"
+  storage_device: "cuda"
 
   eval_freq: 2500
   log_freq: 10
-  save_freq: 2000000
+  save_freq: 1000000
 
   online_steps: 1000000
   online_rollout_n_episodes: 10
@@ -32,17 +33,12 @@ training:
   online_sampling_ratio: 1.0
   online_env_seed: 10000
   online_buffer_capacity: 200000
+  offline_buffer_capacity: 100000
   online_buffer_seed_size: 0
   online_step_before_learning: 500
   do_online_rollout_async: false
   policy_update_freq: 1
 
-  # delta_timestamps:
-  #   observation.environment_state: "[i / ${fps} for i in range(${policy.horizon} + 1)]"
-  #   observation.state: "[i / ${fps} for i in range(${policy.horizon} + 1)]"
-  #   action: "[i / ${fps} for i in range(${policy.horizon})]"
-  #   next.reward: "[i / ${fps} for i in range(${policy.horizon})]"
-
 policy:
   name: sac
 
@@ -68,28 +64,33 @@ policy:
   camera_number: 1
 
   # Normalization / Unnormalization
-  input_normalization_modes: null
-  # input_normalization_modes:
-  #  observation.state: min_max
-  input_normalization_params: null
-    # observation.state:
-    #   min: [-1.9361e+00, -7.7640e-01, -7.7094e-01, -2.9709e+00, -8.5656e-01,
-    #       1.0764e+00, -1.2680e+00,  0.0000e+00,  0.0000e+00, -9.3448e+00,
-    #      -3.3828e+00, -3.8420e+00, -5.2553e+00, -3.4154e+00, -6.5082e+00,
-    #      -6.0500e+00, -8.7193e+00, -8.2337e+00, -3.4650e-01, -4.9441e-01,
-    #       8.3516e-03, -3.1114e-01, -9.9700e-01, -2.3471e-01, -2.7137e-01]
+  # input_normalization_modes: null
+  input_normalization_modes:
+    observation.state: min_max
+    observation.image: mean_std
+  # input_normalization_params: null
+  input_normalization_params:
+    observation.state:
+      min: [-1.9361e+00, -7.7640e-01, -7.7094e-01, -2.9709e+00, -8.5656e-01,
+          1.0764e+00, -1.2680e+00,  0.0000e+00,  0.0000e+00, -9.3448e+00,
+         -3.3828e+00, -3.8420e+00, -5.2553e+00, -3.4154e+00, -6.5082e+00,
+         -6.0500e+00, -8.7193e+00, -8.2337e+00, -3.4650e-01, -4.9441e-01,
+          8.3516e-03, -3.1114e-01, -9.9700e-01, -2.3471e-01, -2.7137e-01]
+      max: [ 0.8644,  1.4306,  1.8520, -0.7578,  0.9508,  3.4901,  1.9381,  0.0400,
+          0.0400,  5.0885,  4.7156,  7.9393,  7.9100,  2.9796,  5.7720,  4.7163,
+          7.8145,  9.7415,  0.2422,  0.4505,  0.6306,  0.2622,  1.0000,  0.5135,
+          0.4001]
 
-    #   max: [ 0.8644,  1.4306,  1.8520, -0.7578,  0.9508,  3.4901,  1.9381,  0.0400,
-    #       0.0400,  5.0885,  4.7156,  7.9393,  7.9100,  2.9796,  5.7720,  4.7163,
-    #       7.8145,  9.7415,  0.2422,  0.4505,  0.6306,  0.2622,  1.0000,  0.5135,
-    #       0.4001]
+    observation.image:
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
 
   output_normalization_modes:
     action: min_max
   output_normalization_params:
     action:
-      min: [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0]
-      max: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+      min: [-0.03, -0.03, -0.03, -0.03, -0.03, -0.03, -0.03]
+      max: [0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03]
   output_normalization_shapes:
     action: [7]
 
@@ -99,8 +100,8 @@ policy:
   # discount: 0.99
   discount: 0.80
   temperature_init: 1.0
-  num_critics: 10 #10
-  num_subsample_critics: 2
+  num_critics: 2 #10
+  num_subsample_critics: null
   critic_lr: 3e-4
   actor_lr: 3e-4
   temperature_lr: 3e-4
@@ -111,7 +112,7 @@ policy:
 actor_learner_config:
   learner_host: "127.0.0.1"
   learner_port: 50051
-  policy_parameters_push_frequency: 1
+  policy_parameters_push_frequency: 4
   concurrency:
-    actor: 'processes'
-    learner: 'processes'
+    actor: 'threads'
+    learner: 'threads'
diff --git a/lerobot/scripts/server/learner_server.py b/lerobot/scripts/server/learner_server.py
index d1235980..713fc2a8 100644
--- a/lerobot/scripts/server/learner_server.py
+++ b/lerobot/scripts/server/learner_server.py
@@ -202,6 +202,7 @@ def initialize_offline_replay_buffer(
         action_delta=cfg.env.wrapper.delta_action,
         storage_device=storage_device,
         optimize_memory=True,
+        capacity=cfg.training.offline_buffer_capacity,
     )
     return offline_replay_buffer
 
diff --git a/lerobot/scripts/server/maniskill_manipulator.py b/lerobot/scripts/server/maniskill_manipulator.py
index e4d55955..495042de 100644
--- a/lerobot/scripts/server/maniskill_manipulator.py
+++ b/lerobot/scripts/server/maniskill_manipulator.py
@@ -159,7 +159,7 @@ def make_maniskill(
     env.unwrapped.metadata["render_fps"] = 20
     env = ManiSkillCompat(env)
     env = ManiSkillActionWrapper(env)
-    env = ManiSkillMultiplyActionWrapper(env, multiply_factor=1)
+    env = ManiSkillMultiplyActionWrapper(env, multiply_factor=0.03)
 
     return env