diff --git a/.github/workflows/test-docker-build.yml b/.github/workflows/test-docker-build.yml
index e77c570e..c3102564 100644
--- a/.github/workflows/test-docker-build.yml
+++ b/.github/workflows/test-docker-build.yml
@@ -41,7 +41,7 @@ jobs:
 
       - name: Get changed files
         id: changed-files
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@3f54ebb830831fc121d3263c1857cfbdc310cdb9 #v42
         with:
           files: docker/**
           json: "true"
diff --git a/benchmarks/video/README.md b/benchmarks/video/README.md
index 49e49811..daa3e1f4 100644
--- a/benchmarks/video/README.md
+++ b/benchmarks/video/README.md
@@ -51,7 +51,7 @@ For a comprehensive list and documentation of these parameters, see the ffmpeg d
 ### Decoding parameters
 **Decoder**
 We tested two video decoding backends from torchvision:
-- `pyav` (default)
+- `pyav`
 - `video_reader` (requires to build torchvision from source)
 
 **Requested timestamps**
diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py
index 101e71f4..6ef955dd 100644
--- a/lerobot/common/datasets/lerobot_dataset.py
+++ b/lerobot/common/datasets/lerobot_dataset.py
@@ -69,6 +69,7 @@ from lerobot.common.datasets.video_utils import (
     VideoFrame,
     decode_video_frames,
     encode_video_frames,
+    get_safe_default_codec,
     get_video_info,
 )
 from lerobot.common.robot_devices.robots.utils import Robot
@@ -462,7 +463,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
             download_videos (bool, optional): Flag to download the videos. Note that when set to True but the
                 video files are already present on local disk, they won't be downloaded again. Defaults to
                 True.
-            video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to torchcodec.
+            video_backend (str | None, optional): Video backend to use for decoding videos. Defaults to torchcodec when available int the platform; otherwise, defaults to 'pyav'.
                 You can also use the 'pyav' decoder used by Torchvision, which used to be the default option, or 'video_reader' which is another decoder of Torchvision.
         """
         super().__init__()
@@ -473,7 +474,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         self.episodes = episodes
         self.tolerance_s = tolerance_s
         self.revision = revision if revision else CODEBASE_VERSION
-        self.video_backend = video_backend if video_backend else "torchcodec"
+        self.video_backend = video_backend if video_backend else get_safe_default_codec()
         self.delta_indices = None
 
         # Unused attributes
@@ -1027,7 +1028,7 @@ class LeRobotDataset(torch.utils.data.Dataset):
         obj.delta_timestamps = None
         obj.delta_indices = None
         obj.episode_data_index = None
-        obj.video_backend = video_backend if video_backend is not None else "torchcodec"
+        obj.video_backend = video_backend if video_backend is not None else get_safe_default_codec()
         return obj
 
 
diff --git a/lerobot/common/datasets/video_utils.py b/lerobot/common/datasets/video_utils.py
index 3fe19d8b..4f696861 100644
--- a/lerobot/common/datasets/video_utils.py
+++ b/lerobot/common/datasets/video_utils.py
@@ -13,6 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import importlib
 import json
 import logging
 import subprocess
@@ -27,14 +28,23 @@ import torch
 import torchvision
 from datasets.features.features import register_feature
 from PIL import Image
-from torchcodec.decoders import VideoDecoder
+
+
+def get_safe_default_codec():
+    if importlib.util.find_spec("torchcodec"):
+        return "torchcodec"
+    else:
+        logging.warning(
+            "'torchcodec' is not available in your platform, falling back to 'pyav' as a default decoder"
+        )
+        return "pyav"
 
 
 def decode_video_frames(
     video_path: Path | str,
     timestamps: list[float],
     tolerance_s: float,
-    backend: str = "torchcodec",
+    backend: str | None = None,
 ) -> torch.Tensor:
     """
     Decodes video frames using the specified backend.
@@ -43,13 +53,15 @@ def decode_video_frames(
         video_path (Path): Path to the video file.
         timestamps (list[float]): List of timestamps to extract frames.
         tolerance_s (float): Allowed deviation in seconds for frame retrieval.
-        backend (str, optional): Backend to use for decoding. Defaults to "torchcodec".
+        backend (str, optional): Backend to use for decoding. Defaults to "torchcodec" when available in the platform; otherwise, defaults to "pyav"..
 
     Returns:
         torch.Tensor: Decoded frames.
 
     Currently supports torchcodec on cpu and pyav.
     """
+    if backend is None:
+        backend = get_safe_default_codec()
     if backend == "torchcodec":
         return decode_video_frames_torchcodec(video_path, timestamps, tolerance_s)
     elif backend in ["pyav", "video_reader"]:
@@ -173,6 +185,12 @@ def decode_video_frames_torchcodec(
     and all subsequent frames until reaching the requested frame. The number of key frames in a video
     can be adjusted during encoding to take into account decoding time and video size in bytes.
     """
+
+    if importlib.util.find_spec("torchcodec"):
+        from torchcodec.decoders import VideoDecoder
+    else:
+        raise ImportError("torchcodec is required but not available.")
+
     # initialize video decoder
     decoder = VideoDecoder(video_path, device=device, seek_mode="approximate")
     loaded_frames = []
diff --git a/lerobot/common/policies/act/modeling_act.py b/lerobot/common/policies/act/modeling_act.py
index f2b16a1e..72d4df03 100644
--- a/lerobot/common/policies/act/modeling_act.py
+++ b/lerobot/common/policies/act/modeling_act.py
@@ -119,9 +119,7 @@ class ACTPolicy(PreTrainedPolicy):
         batch = self.normalize_inputs(batch)
         if self.config.image_features:
             batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch["observation.images"] = torch.stack(
-                [batch[key] for key in self.config.image_features], dim=-4
-            )
+            batch["observation.images"] = [batch[key] for key in self.config.image_features]
 
         # If we are doing temporal ensembling, do online updates where we keep track of the number of actions
         # we are ensembling over.
@@ -149,9 +147,8 @@ class ACTPolicy(PreTrainedPolicy):
         batch = self.normalize_inputs(batch)
         if self.config.image_features:
             batch = dict(batch)  # shallow copy so that adding a key doesn't modify the original
-            batch["observation.images"] = torch.stack(
-                [batch[key] for key in self.config.image_features], dim=-4
-            )
+            batch["observation.images"] = [batch[key] for key in self.config.image_features]
+
         batch = self.normalize_targets(batch)
         actions_hat, (mu_hat, log_sigma_x2_hat) = self.model(batch)
 
@@ -413,11 +410,10 @@ class ACT(nn.Module):
                 "actions must be provided when using the variational objective in training mode."
             )
 
-        batch_size = (
-            batch["observation.images"]
-            if "observation.images" in batch
-            else batch["observation.environment_state"]
-        ).shape[0]
+        if "observation.images" in batch:
+            batch_size = batch["observation.images"][0].shape[0]
+        else:
+            batch_size = batch["observation.environment_state"].shape[0]
 
         # Prepare the latent for input to the transformer encoder.
         if self.config.use_vae and "action" in batch:
@@ -490,20 +486,21 @@ class ACT(nn.Module):
             all_cam_features = []
             all_cam_pos_embeds = []
 
-            for cam_index in range(batch["observation.images"].shape[-4]):
-                cam_features = self.backbone(batch["observation.images"][:, cam_index])["feature_map"]
-                # TODO(rcadene, alexander-soare): remove call to `.to` to speedup forward ; precompute and use
-                # buffer
+            # For a list of images, the H and W may vary but H*W is constant.
+            for img in batch["observation.images"]:
+                cam_features = self.backbone(img)["feature_map"]
                 cam_pos_embed = self.encoder_cam_feat_pos_embed(cam_features).to(dtype=cam_features.dtype)
-                cam_features = self.encoder_img_feat_input_proj(cam_features)  # (B, C, h, w)
+                cam_features = self.encoder_img_feat_input_proj(cam_features)
+
+                # Rearrange features to (sequence, batch, dim).
+                cam_features = einops.rearrange(cam_features, "b c h w -> (h w) b c")
+                cam_pos_embed = einops.rearrange(cam_pos_embed, "b c h w -> (h w) b c")
+
                 all_cam_features.append(cam_features)
                 all_cam_pos_embeds.append(cam_pos_embed)
-            # Concatenate camera observation feature maps and positional embeddings along the width dimension,
-            # and move to (sequence, batch, dim).
-            all_cam_features = torch.cat(all_cam_features, axis=-1)
-            encoder_in_tokens.extend(einops.rearrange(all_cam_features, "b c h w -> (h w) b c"))
-            all_cam_pos_embeds = torch.cat(all_cam_pos_embeds, axis=-1)
-            encoder_in_pos_embed.extend(einops.rearrange(all_cam_pos_embeds, "b c h w -> (h w) b c"))
+
+            encoder_in_tokens.extend(torch.cat(all_cam_features, axis=0))
+            encoder_in_pos_embed.extend(torch.cat(all_cam_pos_embeds, axis=0))
 
         # Stack all tokens along the sequence dimension.
         encoder_in_tokens = torch.stack(encoder_in_tokens, axis=0)
diff --git a/lerobot/common/policies/dexvla/README.md b/lerobot/common/policies/dexvla/README.md
index 9d0b9805..b34a40bb 100644
--- a/lerobot/common/policies/dexvla/README.md
+++ b/lerobot/common/policies/dexvla/README.md
@@ -2,7 +2,7 @@
 DexVLA: Vision-Language Model with Plug-In Diffusion Expert for Visuomotor Policy Learning</h1>
 
 This policy is Community Contributed. For more information about DexVLA, you can also refer to [this](https://github.com/juruobenruo/DexVLA).
-This is [project website](https://dex-vla.github.io/). 
+This is [project website](https://dex-vla.github.io/).
 
 ## Dataset
 ### Data format
@@ -141,4 +141,4 @@ python lerobot/scripts/eval.py \
 ~~~
 
 ### Inference Speed
-Tested on a single A6000 GPU, the DexVLA could infer 3.4 action chunks in one second. For each action chunk, if we execute 25 actions, the real control frequency can be 85 (3.4*25)Hz.
\ No newline at end of file
+Tested on a single A6000 GPU, the DexVLA could infer 3.4 action chunks in one second. For each action chunk, if we execute 25 actions, the real control frequency can be 85 (3.4*25)Hz.
diff --git a/lerobot/common/robot_devices/robots/manipulator.py b/lerobot/common/robot_devices/robots/manipulator.py
index 8a7c7fe6..9173abc6 100644
--- a/lerobot/common/robot_devices/robots/manipulator.py
+++ b/lerobot/common/robot_devices/robots/manipulator.py
@@ -474,7 +474,7 @@ class ManipulatorRobot:
             # Used when record_data=True
             follower_goal_pos[name] = goal_pos
 
-            goal_pos = goal_pos.numpy().astype(np.int32)
+            goal_pos = goal_pos.numpy().astype(np.float32)
             self.follower_arms[name].write("Goal_Position", goal_pos)
             self.logs[f"write_follower_{name}_goal_pos_dt_s"] = time.perf_counter() - before_fwrite_t
 
@@ -596,7 +596,7 @@ class ManipulatorRobot:
             action_sent.append(goal_pos)
 
             # Send goal position to each follower
-            goal_pos = goal_pos.numpy().astype(np.int32)
+            goal_pos = goal_pos.numpy().astype(np.float32)
             self.follower_arms[name].write("Goal_Position", goal_pos)
 
         return torch.cat(action_sent)
diff --git a/lerobot/common/utils/wandb_utils.py b/lerobot/common/utils/wandb_utils.py
index 9985b894..700ebea5 100644
--- a/lerobot/common/utils/wandb_utils.py
+++ b/lerobot/common/utils/wandb_utils.py
@@ -69,7 +69,13 @@ class WandBLogger:
         os.environ["WANDB_SILENT"] = "True"
         import wandb
 
-        wandb_run_id = get_wandb_run_id_from_filesystem(self.log_dir) if cfg.resume else None
+        wandb_run_id = (
+            cfg.wandb.run_id
+            if cfg.wandb.run_id
+            else get_wandb_run_id_from_filesystem(self.log_dir)
+            if cfg.resume
+            else None
+        )
         wandb.init(
             id=wandb_run_id,
             project=self.cfg.project,
diff --git a/lerobot/configs/default.py b/lerobot/configs/default.py
index 1e7f5819..b23bbb6d 100644
--- a/lerobot/configs/default.py
+++ b/lerobot/configs/default.py
@@ -20,6 +20,7 @@ from lerobot.common import (
     policies,  # noqa: F401
 )
 from lerobot.common.datasets.transforms import ImageTransformsConfig
+from lerobot.common.datasets.video_utils import get_safe_default_codec
 
 
 @dataclass
@@ -35,7 +36,7 @@ class DatasetConfig:
     image_transforms: ImageTransformsConfig = field(default_factory=ImageTransformsConfig)
     revision: str | None = None
     use_imagenet_stats: bool = True
-    video_backend: str = "pyav"
+    video_backend: str = field(default_factory=get_safe_default_codec)
 
 
 @dataclass
@@ -46,6 +47,7 @@ class WandBConfig:
     project: str = "lerobot"
     entity: str | None = None
     notes: str | None = None
+    run_id: str | None = None
 
 
 @dataclass
diff --git a/lerobot/configs/train.py b/lerobot/configs/train.py
index 2b147a5b..7a787b83 100644
--- a/lerobot/configs/train.py
+++ b/lerobot/configs/train.py
@@ -79,7 +79,9 @@ class TrainPipelineConfig(HubMixin):
             # The entire train config is already loaded, we just need to get the checkpoint dir
             config_path = parser.parse_arg("config_path")
             if not config_path:
-                raise ValueError("A config_path is expected when resuming a run.")
+                raise ValueError(
+                    f"A config_path is expected when resuming a run. Please specify path to {TRAIN_CONFIG_NAME}"
+                )
             if not Path(config_path).resolve().exists():
                 raise NotADirectoryError(
                     f"{config_path=} is expected to be a local path. "
diff --git a/pyproject.toml b/pyproject.toml
index f7daca2e..2a85b7c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,7 @@ dependencies = [
     "rerun-sdk>=0.21.0",
     "termcolor>=2.4.0",
     "torch>=2.2.1",
-    "torchcodec>=0.2.1",
+    "torchcodec>=0.2.1; sys_platform != 'win32' and (sys_platform != 'linux' or (platform_machine != 'aarch64' and platform_machine != 'arm64' and platform_machine != 'armv7l'))",
     "torchvision>=0.21.0",
     "wandb>=0.16.3",
     "zarr>=2.17.0",