Merge remote-tracking branch 'upstream/main' into policy_compatibility

2024-05-10 07:08:17 +01:00 · 2024-05-10 07:08:17 +01:00 · 4ae8d61629
parent 2ea8ad4daf b187942db4
commit 4ae8d61629
15 changed files with 90 additions and 26 deletions
--- a/7
+++ b/7
@ -22,9 +22,8 @@ test-end-to-end:
 	${MAKE} test-act-ete-eval
 	${MAKE} test-diffusion-ete-train
 	${MAKE} test-diffusion-ete-eval
-	# TODO(rcadene, alexander-soare): enable end-to-end tests for tdmpc
-	# ${MAKE} test-tdmpc-ete-train
-	# ${MAKE} test-tdmpc-ete-eval
+	${MAKE} test-tdmpc-ete-train
+	${MAKE} test-tdmpc-ete-eval
 	${MAKE} test-default-ete-eval

 test-act-ete-train:
@ -80,7 +79,7 @@ test-tdmpc-ete-train:
 		policy=tdmpc \
 		env=xarm \
 		env.task=XarmLift-v0 \
-		dataset_repo_id=lerobot/xarm_lift_medium_replay \
+		dataset_repo_id=lerobot/xarm_lift_medium \
 		wandb.enable=False \
 		training.offline_steps=2 \
 		training.online_steps=2 \
--- a/lerobot/common/utils/utils.py
+++ b/lerobot/common/utils/utils.py
@ -1,8 +1,10 @@
 import logging
 import os.path as osp
 import random
+from contextlib import contextmanager
 from datetime import datetime
 from pathlib import Path
+from typing import Generator

 import hydra
 import numpy as np
@ -39,6 +41,31 @@ def set_global_seed(seed):
    torch.cuda.manual_seed_all(seed)


+@contextmanager
+def seeded_context(seed: int) -> Generator[None, None, None]:
+    """Set the seed when entering a context, and restore the prior random state at exit.
+
+    Example usage:
+
+    ```
+    a = random.random()  # produces some random number
+    with seeded_context(1337):
+        b = random.random()  # produces some other random number
+    c = random.random()  # produces yet another random number, but the same it would have if we never made `b`
+    ```
+    """
+    random_state = random.getstate()
+    np_random_state = np.random.get_state()
+    torch_random_state = torch.random.get_rng_state()
+    torch_cuda_random_state = torch.cuda.random.get_rng_state()
+    set_global_seed(seed)
+    yield None
+    random.setstate(random_state)
+    np.random.set_state(np_random_state)
+    torch.random.set_rng_state(torch_random_state)
+    torch.cuda.random.set_rng_state(torch_cuda_random_state)
+
+
 def init_logging():
    def custom_format(record):
        dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
--- a/lerobot/configs/policy/act.yaml
+++ b/lerobot/configs/policy/act.yaml
@ -3,6 +3,12 @@
 seed: 1000
 dataset_repo_id: lerobot/aloha_sim_insertion_human

+override_dataset_stats:
+  observation.images.top:
+    # stats from imagenet, since we use a pretrained vision model
+    mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
+    std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
+
 training:
  offline_steps: 80000
  online_steps: 0
@ -18,12 +24,6 @@ training:
  grad_clip_norm: 10
  online_steps_between_rollouts: 1

-  override_dataset_stats:
-    observation.images.top:
-      # stats from imagenet, since we use a pretrained vision model
-      mean: [[[0.485]], [[0.456]], [[0.406]]]  # (c,1,1)
-      std: [[[0.229]], [[0.224]], [[0.225]]]  # (c,1,1)
-
  delta_timestamps:
    action: "[i / ${fps} for i in range(${policy.chunk_size})]"

--- a/lerobot/configs/policy/diffusion.yaml
+++ b/lerobot/configs/policy/diffusion.yaml
@ -7,6 +7,20 @@
 seed: 100000
 dataset_repo_id: lerobot/pusht

+override_dataset_stats:
+  # TODO(rcadene, alexander-soare): should we remove image stats as well? do we use a pretrained vision model?
+  observation.image:
+    mean: [[[0.5]], [[0.5]], [[0.5]]]  # (c,1,1)
+    std: [[[0.5]], [[0.5]], [[0.5]]]  # (c,1,1)
+  # TODO(rcadene, alexander-soare): we override state and action stats to use the same as the pretrained model
+  # from the original codebase, but we should remove these and train our own pretrained model
+  observation.state:
+    min: [13.456424, 32.938293]
+    max: [496.14618, 510.9579]
+  action:
+    min: [12.0, 25.0]
+    max: [511.0, 511.0]
+
 training:
  offline_steps: 200000
  online_steps: 0
@ -34,20 +48,6 @@ eval:
  n_episodes: 50
  batch_size: 50

-override_dataset_stats:
-  # TODO(rcadene, alexander-soare): should we remove image stats as well? do we use a pretrained vision model?
-  observation.image:
-    mean: [[[0.5]], [[0.5]], [[0.5]]]  # (c,1,1)
-    std: [[[0.5]], [[0.5]], [[0.5]]]  # (c,1,1)
-  # TODO(rcadene, alexander-soare): we override state and action stats to use the same as the pretrained model
-  # from the original codebase, but we should remove these and train our own pretrained model
-  observation.state:
-    min: [13.456424, 32.938293]
-    max: [496.14618, 510.9579]
-  action:
-    min: [12.0, 25.0]
-    max: [511.0, 511.0]
-
 policy:
  name: diffusion

--- a/lerobot/configs/policy/tdmpc.yaml
+++ b/lerobot/configs/policy/tdmpc.yaml
@ -1,7 +1,7 @@
 # @package _global_

 seed: 1
-dataset_repo_id: lerobot/xarm_lift_medium_replay
+dataset_repo_id: lerobot/xarm_lift_medium

 training:
  offline_steps: 25000
--- a/tests/data/save_policy_to_safetensors/aloha_act/actions.safetensors
+++ b/tests/data/save_policy_to_safetensors/aloha_act/actions.safetensors
--- a/tests/data/save_policy_to_safetensors/aloha_act/grad_stats.safetensors
+++ b/tests/data/save_policy_to_safetensors/aloha_act/grad_stats.safetensors
--- a/tests/data/save_policy_to_safetensors/aloha_act/output_dict.safetensors
+++ b/tests/data/save_policy_to_safetensors/aloha_act/output_dict.safetensors
--- a/tests/data/save_policy_to_safetensors/aloha_act/param_stats.safetensors
+++ b/tests/data/save_policy_to_safetensors/aloha_act/param_stats.safetensors
--- a/tests/data/save_policy_to_safetensors/xarm_tdmpc/actions.safetensors
+++ b/tests/data/save_policy_to_safetensors/xarm_tdmpc/actions.safetensors
--- a/tests/data/save_policy_to_safetensors/xarm_tdmpc/grad_stats.safetensors
+++ b/tests/data/save_policy_to_safetensors/xarm_tdmpc/grad_stats.safetensors
--- a/tests/data/save_policy_to_safetensors/xarm_tdmpc/output_dict.safetensors
+++ b/tests/data/save_policy_to_safetensors/xarm_tdmpc/output_dict.safetensors
--- a/tests/data/save_policy_to_safetensors/xarm_tdmpc/param_stats.safetensors
+++ b/tests/data/save_policy_to_safetensors/xarm_tdmpc/param_stats.safetensors
--- a/tests/test_policies.py
+++ b/tests/test_policies.py
@ -269,7 +269,7 @@ def test_normalize(insert_temporal_dim):
@pytest.mark.parametrize(
    "env_name, policy_name, extra_overrides",
    [
-        # ("xarm", "tdmpc", ["policy.n_action_repeats=2"]),
+        ("xarm", "tdmpc", []),
        (
            "pusht",
            "diffusion",
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -0,0 +1,38 @@
+import random
+from typing import Callable
+
+import numpy as np
+import pytest
+import torch
+
+from lerobot.common.utils.utils import seeded_context, set_global_seed
+
+
+@pytest.mark.parametrize(
+    "rand_fn",
+    [
+        random.random,
+        np.random.random,
+        lambda: torch.rand(1).item(),
+    ]
+    + [lambda: torch.rand(1, device="cuda")]
+    if torch.cuda.is_available()
+    else [],
+)
+def test_seeding(rand_fn: Callable[[], int]):
+    set_global_seed(0)
+    a = rand_fn()
+    with seeded_context(1337):
+        c = rand_fn()
+    b = rand_fn()
+    set_global_seed(0)
+    a_ = rand_fn()
+    b_ = rand_fn()
+    # Check that `set_global_seed` lets us reproduce a and b.
+    assert a_ == a
+    # Additionally, check that the `seeded_context` didn't interrupt the global RNG.
+    assert b_ == b
+    set_global_seed(1337)
+    c_ = rand_fn()
+    # Check that `seeded_context` and `global_seed` give the same reproducibility.
+    assert c_ == c