diff --git a/examples/real_robot_example/README.md b/examples/real_robot_example/README.md
index f2567fd4..499f2b1d 100644
--- a/examples/real_robot_example/README.md
+++ b/examples/real_robot_example/README.md
@@ -27,9 +27,9 @@ Follow these steps:
 
 ## 0 - record examples
 
-Run the `0_record_training_data.py` example, selecting the duration and number of episodes you want to record, e.g.
+Run the `record_training_data.py` example, selecting the duration and number of episodes you want to record, e.g.
 ```
-DATA_DIR='./data' python 0_record_training_data.py \
+DATA_DIR='./data' python record_training_data.py \
 --repo-id=thomwolf/blue_red_sort \
 --num-episodes=50 \
 --num-frames=400
@@ -44,15 +44,34 @@ TODO:
 
 Use the standard dataset visualization script pointing it to the right folder:
 ```
-DATA_DIR='./data' python visualize_dataset.py python lerobot/scripts/visualize_dataset.py \
+DATA_DIR='./data' python ../../lerobot/scripts/visualize_dataset.py \
     --repo-id thomwolf/blue_red_sort \
     --episode-index 0
 ```
 
-## (soon) Train a policy
+## 2 - Train a policy
 
-Run `1_train_real_policy.py` example
+From the example directory let's run this command to train a model using ACT
 
-## (soon) Evaluate the policy in the real world
+```
+DATA_DIR='./data' python ../../lerobot/scripts/train.py \
+    device=cuda \
+    hydra.searchpath=[file://./train_config/] \
+    hydra.run.dir=./outputs/train/blue_red_sort \
+    dataset_repo_id=thomwolf/blue_red_sort \
+    env=gym_real_world \
+    policy=act_real_world \
+    wandb.enable=false
+```
 
-Run `2_evaluate_real_policy.py` example
+## 3 - Evaluate the policy in the real world
+
+From the example directory let's run this command to evaluate our policy.
+The configuration for running the policy is in the checkpoint of the model.
+You can override parameters as follow:
+
+```
+python run_policy.py \
+    -p ./outputs/train/blue_red_sort/checkpoints/last/pretrained_model/
+    env.episode_length=1000
+```
diff --git a/examples/real_robot_example/gym_real_world/gym_environment.py b/examples/real_robot_example/gym_real_world/gym_environment.py
index 9a1f5694..0507d2dc 100644
--- a/examples/real_robot_example/gym_real_world/gym_environment.py
+++ b/examples/real_robot_example/gym_real_world/gym_environment.py
@@ -11,13 +11,13 @@ from .robot import Robot
 FPS = 30
 
 CAMERAS_SHAPES = {
-    "observation.images.high": (480, 640, 3),
-    "observation.images.low": (480, 640, 3),
+    "images.high": (480, 640, 3),
+    "images.low": (480, 640, 3),
 }
 
 CAMERAS_PORTS = {
-    "observation.images.high": "/dev/video6",
-    "observation.images.low": "/dev/video0",
+    "images.high": "/dev/video6",
+    "images.low": "/dev/video0",
 }
 
 LEADER_PORT = "/dev/ttyACM1"
@@ -52,6 +52,8 @@ class RealEnv(gym.Env):
         leader_port: str = LEADER_PORT,
         warmup_steps: int = 100,
         trigger_torque=70,
+        fps: int = FPS,
+        fps_tolerance: float = 0.1,
     ):
         self.num_joints = num_joints
         self.cameras_shapes = cameras_shapes
@@ -62,6 +64,8 @@ class RealEnv(gym.Env):
         self.follower_port = follower_port
         self.leader_port = leader_port
         self.record = record
+        self.fps = fps
+        self.fps_tolerance = fps_tolerance
 
         # Initialize the robot
         self.follower = Robot(device_name=self.follower_port)
@@ -72,10 +76,13 @@ class RealEnv(gym.Env):
         # Initialize the cameras - sorted by camera names
         self.cameras = {}
         for cn, p in sorted(self.cameras_ports.items()):
-            assert cn.startswith("observation.images."), "Camera names must start with 'observation.images.'."
             self.cameras[cn] = cv2.VideoCapture(p)
-        if not all(c.isOpened() for c in self.cameras.values()):
-            raise OSError("Cannot open all camera ports.")
+            if not self.cameras[cn].isOpened():
+                raise OSError(
+                    f"Cannot open camera port {p} for {cn}."
+                    f" Make sure the camera is connected and the port is correct."
+                    f"Also check you are not spinning several instances of the same environment (eval.batch_size)"
+                )
 
         # Specify gym action and observation spaces
         observation_space = {}
@@ -98,7 +105,7 @@ class RealEnv(gym.Env):
         if self.cameras_shapes:
             for cn, hwc_shape in self.cameras_shapes.items():
                 # Assumes images are unsigned int8 in [0,255]
-                observation_space[f"images.{cn}"] = spaces.Box(
+                observation_space[cn] = spaces.Box(
                     low=0,
                     high=255,
                     # height x width x channels (e.g. 480 x 640 x 3)
@@ -111,22 +118,20 @@ class RealEnv(gym.Env):
 
         self._observation = {}
         self._terminated = False
-        self._action_time = time.time()
+        self.starting_time = time.time()
+        self.timestamps = []
 
     def _get_obs(self):
         qpos = self.follower.read_position()
         self._observation["agent_pos"] = pwm2pos(qpos)
         for cn, c in self.cameras.items():
-            self._observation[f"images.{cn}"] = capture_image(
-                c, self.cameras_shapes[cn][1], self.cameras_shapes[cn][0]
-            )
+            self._observation[cn] = capture_image(c, self.cameras_shapes[cn][1], self.cameras_shapes[cn][0])
 
         if self.record:
-            leader_pos = self.leader.read_position()
-            self._observation["leader_pos"] = pwm2pos(leader_pos)
+            action = self.leader.read_position()
+            self._observation["leader_pos"] = pwm2pos(action)
 
     def reset(self, seed: int | None = None):
-        del seed
         # Reset the robot and sync the leader and follower if we are recording
         for _ in range(self.warmup_steps):
             self._get_obs()
@@ -134,10 +139,22 @@ class RealEnv(gym.Env):
                 self.follower.set_goal_pos(pos2pwm(self._observation["leader_pos"]))
         self._terminated = False
         info = {}
+        self.timestamps = []
         return self._observation, info
 
     def step(self, action: np.ndarray = None):
-        # Reset the observation
+        if self.timestamps:
+            # wait the right amount of time to stay at the desired fps
+            time.sleep(max(0, 1 / self.fps - (time.time() - self.timestamps[-1])))
+            recording_time = time.time() - self.starting_time
+        else:
+            # it's the first step so we start the timer
+            self.starting_time = time.time()
+            recording_time = 0
+
+        self.timestamps.append(recording_time)
+
+        # Get the observation
         self._get_obs()
         if self.record:
             # Teleoperate the leader
@@ -145,9 +162,20 @@ class RealEnv(gym.Env):
         else:
             # Apply the action to the follower
             self.follower.set_goal_pos(pos2pwm(action))
+
         reward = 0
         terminated = truncated = self._terminated
-        info = {}
+        info = {"timestamp": recording_time, "fps_error": False}
+
+        # Check if we are able to keep up with the desired fps
+        if recording_time - self.timestamps[-1] > 1 / (self.fps - self.fps_tolerance):
+            print(
+                f"Error: recording time interval {recording_time - self.timestamps[-1]:.2f} is greater"
+                f"than expected {1 / (self.fps - self.fps_tolerance):.2f}"
+                f" at frame {len(self.timestamps)}"
+            )
+            info["fps_error"] = True
+
         return self._observation, reward, terminated, truncated, info
 
     def render(self): ...
diff --git a/examples/real_robot_example/0_record_training_data.py b/examples/real_robot_example/record_training_data.py
similarity index 81%
rename from examples/real_robot_example/0_record_training_data.py
rename to examples/real_robot_example/record_training_data.py
index 9fb42ed5..fcb3aae6 100644
--- a/examples/real_robot_example/0_record_training_data.py
+++ b/examples/real_robot_example/record_training_data.py
@@ -1,7 +1,11 @@
+"""This script demonstrates how to record a LeRobot dataset of training data
+using a very simple gym environment (see in examples/real_robot_example/gym_real_world/gym_environment.py).
+
+"""
+
 import argparse
 import copy
 import os
-import time
 
 import gym_real_world  # noqa: F401
 import gymnasium as gym
@@ -27,15 +31,12 @@ parser.add_argument("--num-frames", type=int, default=400)
 parser.add_argument("--num-workers", type=int, default=16)
 parser.add_argument("--keep-last", action="store_true")
 parser.add_argument("--push-to-hub", action="store_true")
+parser.add_argument("--fps", type=int, default=30, help="Frames per second of the recording.")
 parser.add_argument(
-    "--fps",
-    type=int,
-    default=30,
-    help="Frames per second of the recording."
-    "If we are not able to record at this fps, we will adjust the fps in the metadata.",
-)
-parser.add_argument(
-    "--tolerance", type=float, default=0.01, help="Tolerance in seconds for the recording time."
+    "--fps_tolerance",
+    type=float,
+    default=0.1,
+    help="Tolerance in fps for the recording before dropping episodes.",
 )
 parser.add_argument(
     "--revision", type=str, default=CODEBASE_VERSION, help="Codebase version used to generate the dataset."
@@ -47,7 +48,7 @@ num_episodes = args.num_episodes
 num_frames = args.num_frames
 revision = args.revision
 fps = args.fps
-tolerance = args.tolerance
+fps_tolerance = args.fps_tolerance
 
 out_data = DATA_DIR / repo_id
 
@@ -67,7 +68,7 @@ if not os.path.exists(videos_dir):
 if __name__ == "__main__":
     # Create the gym environment - check the kwargs in gym_real_world/gym_environment.py
     gym_handle = "gym_real_world/RealEnv-v0"
-    env = gym.make(gym_handle, disable_env_checker=True, record=True)
+    env = gym.make(gym_handle, disable_env_checker=True, record=True, fps=fps, fps_tolerance=fps_tolerance)
 
     ep_dicts = []
     episode_data_index = {"from": [], "to": []}
@@ -84,59 +85,46 @@ if __name__ == "__main__":
         os.system(f'spd-say "go {ep_idx}"')
         # init buffers
         obs_replay = {k: [] for k in env.observation_space}
-        timestamps = []
 
-        starting_time = time.time()
+        drop_episode = False
+        timestamps = []
         for _ in tqdm(range(num_frames)):
             # Apply the next action
-            observation, _, _, _, _ = env.step(action=None)
+            observation, _, _, _, info = env.step(action=None)
             # images_stacked = np.hstack(list(observation['pixels'].values()))
             # images_stacked = cv2.cvtColor(images_stacked, cv2.COLOR_RGB2BGR)
             # cv2.imshow('frame', images_stacked)
 
+            if info["fps_error"]:
+                os.system(f'spd-say "Error fps too low, dropping episode {ep_idx}"')
+                drop_episode = True
+                break
+
             # store data
             for key in observation:
                 obs_replay[key].append(copy.deepcopy(observation[key]))
-
-            recording_time = time.time() - starting_time
-            timestamps.append(recording_time)
-
-            # Check if we are able to keep up with the desired fps
-            if recording_time > num_frames / fps + tolerance:
-                print(
-                    f"Error: recording time {recording_time:.2f} is greater than expected {num_frames / fps:.2f}"
-                    f" + tolerance {tolerance:.2f}"
-                    f" at frame {len(timestamps)}"
-                    f" in episode {ep_idx}."
-                    f"Dropping the rest of the episode."
-                )
-                break
-
-            # wait the right amount of time to stay at the desired fps
-            time.sleep(max(0, 1 / fps - (time.time() - starting_time)))
+            timestamps.append(info["timestamp"])
 
             # if cv2.waitKey(1) & 0xFF == ord('q'):
             #     break
 
         os.system('spd-say "stop"')
 
-        if len(timestamps) == num_frames:
+        if not drop_episode:
             os.system(f'spd-say "saving episode {ep_idx}"')
             ep_dict = {}
             # store images in png and create the video
             for img_key in env.cameras:
                 save_images_concurrently(
-                    obs_replay[f"images.{img_key}"],
+                    obs_replay[img_key],
                     images_dir / f"{img_key}_episode_{ep_idx:06d}",
                     args.num_workers,
                 )
-                # for i in tqdm(range(num_frames)):
-                #     cv2.imwrite(str(images_dir / f"{img_key}_episode_{ep_idx:06d}" / f"frame_{i:06d}.png"),
-                #                 obs_replay[i]['pixels'][img_key])
                 fname = f"{img_key}_episode_{ep_idx:06d}.mp4"
                 # store the reference to the video frame
-                ep_dict[img_key] = [{"path": f"videos/{fname}", "timestamp": tstp} for tstp in timestamps]
-                # shutil.rmtree(tmp_imgs_dir)
+                ep_dict[f"observation.{img_key}"] = [
+                    {"path": f"videos/{fname}", "timestamp": tstp} for tstp in timestamps
+                ]
 
             state = torch.tensor(np.array(obs_replay["agent_pos"]))
             action = torch.tensor(np.array(obs_replay["leader_pos"]))
@@ -198,8 +186,6 @@ if __name__ == "__main__":
     features["timestamp"] = Value(dtype="float32", id=None)
     features["next.done"] = Value(dtype="bool", id=None)
     features["index"] = Value(dtype="int64", id=None)
-    # TODO(rcadene): add success
-    # features["next.success"] = Value(dtype='bool', id=None)
 
     hf_dataset = Dataset.from_dict(data_dict, features=Features(features))
     hf_dataset.set_transform(hf_transform_to_torch)
diff --git a/examples/real_robot_example/run_policy.py b/examples/real_robot_example/run_policy.py
new file mode 100644
index 00000000..a47fb914
--- /dev/null
+++ b/examples/real_robot_example/run_policy.py
@@ -0,0 +1,60 @@
+import argparse
+import logging
+from pathlib import Path
+
+import gym_real_world  # noqa: F401
+import gymnasium as gym  # noqa: F401
+from huggingface_hub import snapshot_download
+from huggingface_hub.utils._errors import RepositoryNotFoundError
+from huggingface_hub.utils._validators import HFValidationError
+
+from lerobot.common.utils.utils import init_logging
+from lerobot.scripts.eval import eval
+
+if __name__ == "__main__":
+    init_logging()
+
+    parser = argparse.ArgumentParser(
+        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument(
+        "-p",
+        "--pretrained-policy-name-or-path",
+        help=(
+            "Either the repo ID of a model hosted on the Hub or a path to a directory containing weights "
+            "saved using `Policy.save_pretrained`. If not provided, the policy is initialized from scratch "
+            "(useful for debugging). This argument is mutually exclusive with `--config`."
+        ),
+    )
+    parser.add_argument("--revision", help="Optionally provide the Hugging Face Hub revision ID.")
+    parser.add_argument(
+        "overrides",
+        nargs="*",
+        help="Any key=value arguments to override config values (use dots for.nested=overrides)",
+    )
+    args = parser.parse_args()
+
+    try:
+        pretrained_policy_path = Path(
+            snapshot_download(args.pretrained_policy_name_or_path, revision=args.revision)
+        )
+    except (HFValidationError, RepositoryNotFoundError) as e:
+        if isinstance(e, HFValidationError):
+            error_message = (
+                "The provided pretrained_policy_name_or_path is not a valid Hugging Face Hub repo ID."
+            )
+        else:
+            error_message = (
+                "The provided pretrained_policy_name_or_path was not found on the Hugging Face Hub."
+            )
+
+        logging.warning(f"{error_message} Treating it as a local directory.")
+        pretrained_policy_path = Path(args.pretrained_policy_name_or_path)
+    if not pretrained_policy_path.is_dir() or not pretrained_policy_path.exists():
+        raise ValueError(
+            "The provided pretrained_policy_name_or_path is not a valid/existing Hugging Face Hub "
+            "repo ID, nor is it an existing local directory."
+        )
+
+    eval(pretrained_policy_path=pretrained_policy_path, config_overrides=args.overrides)
diff --git a/examples/real_robot_example/config/dora_koch_real.yaml b/examples/real_robot_example/train_config/env/gym_real_world.yaml
similarity index 50%
rename from examples/real_robot_example/config/dora_koch_real.yaml
rename to examples/real_robot_example/train_config/env/gym_real_world.yaml
index bf067f50..b31bd57f 100644
--- a/examples/real_robot_example/config/dora_koch_real.yaml
+++ b/examples/real_robot_example/train_config/env/gym_real_world.yaml
@@ -3,11 +3,10 @@
 fps: 30
 
 env:
-  name: dora
-  task: DoraKoch-v0
+  name: real_world
+  task: RealEnv-v0
   state_dim: 6
   action_dim: 6
   fps: ${fps}
-  episode_length: 400
-  gym:
-    fps: ${fps}
+  episode_length: 200
+  real_world: true
diff --git a/examples/real_robot_example/config/act_koch_real.yaml b/examples/real_robot_example/train_config/policy/act_real_world.yaml
similarity index 83%
rename from examples/real_robot_example/config/act_koch_real.yaml
rename to examples/real_robot_example/train_config/policy/act_real_world.yaml
index b5f4bd98..107b7658 100644
--- a/examples/real_robot_example/config/act_koch_real.yaml
+++ b/examples/real_robot_example/train_config/policy/act_real_world.yaml
@@ -1,8 +1,8 @@
 # @package _global_
 
 # Use `act_real.yaml` to train on real-world Aloha/Aloha2 datasets.
-# Compared to `act.yaml`, it contains 4 cameras (i.e. cam_right_wrist, cam_left_wrist, images,
-# cam_low) instead of 1 camera (i.e. top). Also, `training.eval_freq` is set to -1. This config is used
+# Compared to `act.yaml`, it contains 4 cameras (i.e. right_wrist, left_wrist, images,
+# low) instead of 1 camera (i.e. top). Also, `training.eval_freq` is set to -1. This config is used
 # to evaluate checkpoints at a certain frequency of training steps. When it is set to -1, it deactivates evaluation.
 # This is because real-world evaluation is done through [dora-lerobot](https://github.com/dora-rs/dora-lerobot).
 # Look at its README for more information on how to evaluate a checkpoint in the real-world.
@@ -15,14 +15,14 @@
 # ```
 
 seed: 1000
-dataset_repo_id: thomwolf/blue_sort
+dataset_repo_id: ???
 
 override_dataset_stats:
-  observation.images.cam_high:
+  observation.images.high:
     # stats from imagenet, since we use a pretrained vision model
     mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
     std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
-  observation.images.cam_low:
+  observation.images.low:
     # stats from imagenet, since we use a pretrained vision model
     mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
     std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
@@ -46,8 +46,8 @@ training:
     action: "[i / ${fps} for i in range(1, ${policy.chunk_size} + 1)]"
 
 eval:
-  n_episodes: 50
-  batch_size: 50
+  n_episodes: 1
+  batch_size: 1
 
 # See `configuration_act.py` for more details.
 policy:
@@ -60,16 +60,16 @@ policy:
 
   input_shapes:
     # TODO(rcadene, alexander-soare): add variables for height and width from the dataset/env?
-    observation.images.cam_high: [3, 480, 640]
-    observation.images.cam_low: [3, 480, 640]
+    observation.images.high: [3, 480, 640]
+    observation.images.low: [3, 480, 640]
     observation.state: ["${env.state_dim}"]
   output_shapes:
     action: ["${env.action_dim}"]
 
   # Normalization / Unnormalization
   input_normalization_modes:
-    observation.images.cam_high: mean_std
-    observation.images.cam_low: mean_std
+    observation.images.high: mean_std
+    observation.images.low: mean_std
     observation.state: mean_std
   output_normalization_modes:
     action: mean_std
diff --git a/lerobot/common/datasets/factory.py b/lerobot/common/datasets/factory.py
index 4732f577..bb10022c 100644
--- a/lerobot/common/datasets/factory.py
+++ b/lerobot/common/datasets/factory.py
@@ -56,7 +56,7 @@ def make_dataset(cfg, split: str = "train") -> LeRobotDataset | MultiLeRobotData
         )
 
     # A soft check to warn if the environment matches the dataset. Don't check if we are using a real world env (dora).
-    if cfg.env.name != "dora":
+    if not cfg.env.real_world:
         if isinstance(cfg.dataset_repo_id, str):
             dataset_repo_ids = [cfg.dataset_repo_id]  # single dataset
         else:
diff --git a/lerobot/common/envs/utils.py b/lerobot/common/envs/utils.py
index 8fce0369..507f86a2 100644
--- a/lerobot/common/envs/utils.py
+++ b/lerobot/common/envs/utils.py
@@ -29,10 +29,12 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten
     # map to expected inputs for the policy
     return_observations = {}
 
-    if isinstance(observations["pixels"], dict):
+    if "pixels" in observations and isinstance(observations["pixels"], dict):
         imgs = {f"observation.images.{key}": img for key, img in observations["pixels"].items()}
-    else:
+    elif "pixels" in observations and isinstance(observations["pixels"], np.ndarray):
         imgs = {"observation.image": observations["pixels"]}
+    else:
+        imgs = {f"observation.{key}": img for key, img in observations.items() if "images" in key}
 
     for imgkey, img in imgs.items():
         img = torch.from_numpy(img)
diff --git a/lerobot/configs/env/aloha.yaml b/lerobot/configs/env/aloha.yaml
index 296a4481..371c32bc 100644
--- a/lerobot/configs/env/aloha.yaml
+++ b/lerobot/configs/env/aloha.yaml
@@ -9,6 +9,7 @@ env:
   action_dim: 14
   fps: ${fps}
   episode_length: 400
+  real_world: false
   gym:
     obs_type: pixels_agent_pos
     render_mode: rgb_array
diff --git a/lerobot/configs/env/dora_aloha_real.yaml b/lerobot/configs/env/dora_aloha_real.yaml
index 088781d4..6661c1d3 100644
--- a/lerobot/configs/env/dora_aloha_real.yaml
+++ b/lerobot/configs/env/dora_aloha_real.yaml
@@ -9,5 +9,6 @@ env:
   action_dim: 14
   fps: ${fps}
   episode_length: 400
+  real_world: true
   gym:
     fps: ${fps}
diff --git a/lerobot/configs/env/pusht.yaml b/lerobot/configs/env/pusht.yaml
index 771fbbf4..dfe69a0d 100644
--- a/lerobot/configs/env/pusht.yaml
+++ b/lerobot/configs/env/pusht.yaml
@@ -10,6 +10,7 @@ env:
   action_dim: 2
   fps: ${fps}
   episode_length: 300
+  real_world: false
   gym:
     obs_type: pixels_agent_pos
     render_mode: rgb_array
diff --git a/lerobot/configs/env/xarm.yaml b/lerobot/configs/env/xarm.yaml
index 9dbb96f5..bcee46d6 100644
--- a/lerobot/configs/env/xarm.yaml
+++ b/lerobot/configs/env/xarm.yaml
@@ -10,6 +10,7 @@ env:
   action_dim: 4
   fps: ${fps}
   episode_length: 25
+  real_world: false
   gym:
     obs_type: pixels_agent_pos
     render_mode: rgb_array
diff --git a/lerobot/scripts/eval.py b/lerobot/scripts/eval.py
index 784e9fc6..6430d399 100644
--- a/lerobot/scripts/eval.py
+++ b/lerobot/scripts/eval.py
@@ -164,7 +164,7 @@ def rollout(
         # VectorEnv stores is_success in `info["final_info"][env_index]["is_success"]`. "final_info" isn't
         # available of none of the envs finished.
         if "final_info" in info:
-            successes = [info["is_success"] if info is not None else False for info in info["final_info"]]
+            successes = [i["is_success"] if i is not None else False for i in info["final_info"]]
         else:
             successes = [False] * env.num_envs
 
diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py
index 860412bd..3b5b8948 100644
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -406,7 +406,8 @@ def train(cfg: DictConfig, out_dir: str | None = None, job_name: str | None = No
 
         step += 1
 
-    eval_env.close()
+    if cfg.training.eval_freq > 0:
+        eval_env.close()
     logging.info("End of training")