From 443a9eec88bb2ecb55f34f603865706d1671ff50 Mon Sep 17 00:00:00 2001
From: Simon Alibert <simon.alibert@huggingface.co>
Date: Thu, 31 Oct 2024 21:43:29 +0100
Subject: [PATCH] Remove/comment obsolete tests

---
 tests/test_datasets.py | 229 +++++++++++++++--------------------------
 1 file changed, 82 insertions(+), 147 deletions(-)

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 855cb26f..f540c6a8 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -16,7 +16,6 @@
 import json
 import logging
 from copy import deepcopy
-from itertools import chain
 from pathlib import Path
 
 import einops
@@ -30,15 +29,13 @@ import lerobot
 from lerobot.common.datasets.compute_stats import (
     aggregate_stats,
     compute_stats,
-    get_stats_einops_patterns,
 )
 from lerobot.common.datasets.factory import make_dataset
-from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, MultiLeRobotDataset
+from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
 from lerobot.common.datasets.utils import (
     create_branch,
     flatten_dict,
     hf_transform_to_torch,
-    load_previous_and_future_frames,
     unflatten_dict,
 )
 from lerobot.common.utils.utils import init_hydra_config, seeded_context
@@ -72,6 +69,7 @@ def test_same_attributes_defined(dataset_create, dataset_init):
     assert init_attr == create_attr, "Attribute sets do not match between __init__ and .create()"
 
 
+@pytest.mark.skip("TODO after v2 migration / removing hydra")
 @pytest.mark.parametrize(
     "env_name, repo_id, policy_name",
     lerobot.env_dataset_policy_triplets
@@ -143,162 +141,97 @@ def test_factory(env_name, repo_id, policy_name):
             assert key in item, f"{key}"
 
 
-# TODO(alexander-soare): If you're hunting for savings on testing time, this takes about 5 seconds.
-def test_multilerobotdataset_frames():
-    """Check that all dataset frames are incorporated."""
-    # Note: use the image variants of the dataset to make the test approx 3x faster.
-    # Note: We really do need three repo_ids here as at some point this caught an issue with the chaining
-    # logic that wouldn't be caught with two repo IDs.
-    repo_ids = [
-        "lerobot/aloha_sim_insertion_human_image",
-        "lerobot/aloha_sim_transfer_cube_human_image",
-        "lerobot/aloha_sim_insertion_scripted_image",
-    ]
-    sub_datasets = [LeRobotDataset(repo_id) for repo_id in repo_ids]
-    dataset = MultiLeRobotDataset(repo_ids)
-    assert len(dataset) == sum(len(d) for d in sub_datasets)
-    assert dataset.num_samples == sum(d.num_samples for d in sub_datasets)
-    assert dataset.num_episodes == sum(d.num_episodes for d in sub_datasets)
+# # TODO(alexander-soare): If you're hunting for savings on testing time, this takes about 5 seconds.
+# def test_multilerobotdataset_frames():
+#     """Check that all dataset frames are incorporated."""
+#     # Note: use the image variants of the dataset to make the test approx 3x faster.
+#     # Note: We really do need three repo_ids here as at some point this caught an issue with the chaining
+#     # logic that wouldn't be caught with two repo IDs.
+#     repo_ids = [
+#         "lerobot/aloha_sim_insertion_human_image",
+#         "lerobot/aloha_sim_transfer_cube_human_image",
+#         "lerobot/aloha_sim_insertion_scripted_image",
+#     ]
+#     sub_datasets = [LeRobotDataset(repo_id) for repo_id in repo_ids]
+#     dataset = MultiLeRobotDataset(repo_ids)
+#     assert len(dataset) == sum(len(d) for d in sub_datasets)
+#     assert dataset.num_samples == sum(d.num_samples for d in sub_datasets)
+#     assert dataset.num_episodes == sum(d.num_episodes for d in sub_datasets)
 
-    # Run through all items of the LeRobotDatasets in parallel with the items of the MultiLerobotDataset and
-    # check they match.
-    expected_dataset_indices = []
-    for i, sub_dataset in enumerate(sub_datasets):
-        expected_dataset_indices.extend([i] * len(sub_dataset))
+#     # Run through all items of the LeRobotDatasets in parallel with the items of the MultiLerobotDataset and
+#     # check they match.
+#     expected_dataset_indices = []
+#     for i, sub_dataset in enumerate(sub_datasets):
+#         expected_dataset_indices.extend([i] * len(sub_dataset))
 
-    for expected_dataset_index, sub_dataset_item, dataset_item in zip(
-        expected_dataset_indices, chain(*sub_datasets), dataset, strict=True
-    ):
-        dataset_index = dataset_item.pop("dataset_index")
-        assert dataset_index == expected_dataset_index
-        assert sub_dataset_item.keys() == dataset_item.keys()
-        for k in sub_dataset_item:
-            assert torch.equal(sub_dataset_item[k], dataset_item[k])
+#     for expected_dataset_index, sub_dataset_item, dataset_item in zip(
+#         expected_dataset_indices, chain(*sub_datasets), dataset, strict=True
+#     ):
+#         dataset_index = dataset_item.pop("dataset_index")
+#         assert dataset_index == expected_dataset_index
+#         assert sub_dataset_item.keys() == dataset_item.keys()
+#         for k in sub_dataset_item:
+#             assert torch.equal(sub_dataset_item[k], dataset_item[k])
 
 
-def test_compute_stats_on_xarm():
-    """Check that the statistics are computed correctly according to the stats_patterns property.
+# TODO(aliberts, rcadene): Refactor and move this to a tests/test_compute_stats.py
+# def test_compute_stats_on_xarm():
+#     """Check that the statistics are computed correctly according to the stats_patterns property.
 
-    We compare with taking a straight min, mean, max, std of all the data in one pass (which we can do
-    because we are working with a small dataset).
-    """
-    dataset = LeRobotDataset("lerobot/xarm_lift_medium")
+#     We compare with taking a straight min, mean, max, std of all the data in one pass (which we can do
+#     because we are working with a small dataset).
+#     """
+#     dataset = LeRobotDataset("lerobot/xarm_lift_medium")
 
-    # reduce size of dataset sample on which stats compute is tested to 10 frames
-    dataset.hf_dataset = dataset.hf_dataset.select(range(10))
+#     # reduce size of dataset sample on which stats compute is tested to 10 frames
+#     dataset.hf_dataset = dataset.hf_dataset.select(range(10))
 
-    # Note: we set the batch size to be smaller than the whole dataset to make sure we are testing batched
-    # computation of the statistics. While doing this, we also make sure it works when we don't divide the
-    # dataset into even batches.
-    computed_stats = compute_stats(dataset, batch_size=int(len(dataset) * 0.25), num_workers=0)
+#     # Note: we set the batch size to be smaller than the whole dataset to make sure we are testing batched
+#     # computation of the statistics. While doing this, we also make sure it works when we don't divide the
+#     # dataset into even batches.
+#     computed_stats = compute_stats(dataset, batch_size=int(len(dataset) * 0.25), num_workers=0)
 
-    # get einops patterns to aggregate batches and compute statistics
-    stats_patterns = get_stats_einops_patterns(dataset)
+#     # get einops patterns to aggregate batches and compute statistics
+#     stats_patterns = get_stats_einops_patterns(dataset)
 
-    # get all frames from the dataset in the same dtype and range as during compute_stats
-    dataloader = torch.utils.data.DataLoader(
-        dataset,
-        num_workers=0,
-        batch_size=len(dataset),
-        shuffle=False,
-    )
-    full_batch = next(iter(dataloader))
+#     # get all frames from the dataset in the same dtype and range as during compute_stats
+#     dataloader = torch.utils.data.DataLoader(
+#         dataset,
+#         num_workers=0,
+#         batch_size=len(dataset),
+#         shuffle=False,
+#     )
+#     full_batch = next(iter(dataloader))
 
-    # compute stats based on all frames from the dataset without any batching
-    expected_stats = {}
-    for k, pattern in stats_patterns.items():
-        full_batch[k] = full_batch[k].float()
-        expected_stats[k] = {}
-        expected_stats[k]["mean"] = einops.reduce(full_batch[k], pattern, "mean")
-        expected_stats[k]["std"] = torch.sqrt(
-            einops.reduce((full_batch[k] - expected_stats[k]["mean"]) ** 2, pattern, "mean")
-        )
-        expected_stats[k]["min"] = einops.reduce(full_batch[k], pattern, "min")
-        expected_stats[k]["max"] = einops.reduce(full_batch[k], pattern, "max")
+#     # compute stats based on all frames from the dataset without any batching
+#     expected_stats = {}
+#     for k, pattern in stats_patterns.items():
+#         full_batch[k] = full_batch[k].float()
+#         expected_stats[k] = {}
+#         expected_stats[k]["mean"] = einops.reduce(full_batch[k], pattern, "mean")
+#         expected_stats[k]["std"] = torch.sqrt(
+#             einops.reduce((full_batch[k] - expected_stats[k]["mean"]) ** 2, pattern, "mean")
+#         )
+#         expected_stats[k]["min"] = einops.reduce(full_batch[k], pattern, "min")
+#         expected_stats[k]["max"] = einops.reduce(full_batch[k], pattern, "max")
 
-    # test computed stats match expected stats
-    for k in stats_patterns:
-        assert torch.allclose(computed_stats[k]["mean"], expected_stats[k]["mean"])
-        assert torch.allclose(computed_stats[k]["std"], expected_stats[k]["std"])
-        assert torch.allclose(computed_stats[k]["min"], expected_stats[k]["min"])
-        assert torch.allclose(computed_stats[k]["max"], expected_stats[k]["max"])
+#     # test computed stats match expected stats
+#     for k in stats_patterns:
+#         assert torch.allclose(computed_stats[k]["mean"], expected_stats[k]["mean"])
+#         assert torch.allclose(computed_stats[k]["std"], expected_stats[k]["std"])
+#         assert torch.allclose(computed_stats[k]["min"], expected_stats[k]["min"])
+#         assert torch.allclose(computed_stats[k]["max"], expected_stats[k]["max"])
 
-    # load stats used during training which are expected to match the ones returned by computed_stats
-    loaded_stats = dataset.stats  # noqa: F841
+#     # load stats used during training which are expected to match the ones returned by computed_stats
+#     loaded_stats = dataset.stats  # noqa: F841
 
-    # TODO(rcadene): we can't test this because expected_stats is computed on a subset
-    # # test loaded stats match expected stats
-    # for k in stats_patterns:
-    #     assert torch.allclose(loaded_stats[k]["mean"], expected_stats[k]["mean"])
-    #     assert torch.allclose(loaded_stats[k]["std"], expected_stats[k]["std"])
-    #     assert torch.allclose(loaded_stats[k]["min"], expected_stats[k]["min"])
-    #     assert torch.allclose(loaded_stats[k]["max"], expected_stats[k]["max"])
-
-
-def test_load_previous_and_future_frames_within_tolerance():
-    hf_dataset = Dataset.from_dict(
-        {
-            "timestamp": [0.1, 0.2, 0.3, 0.4, 0.5],
-            "index": [0, 1, 2, 3, 4],
-            "episode_index": [0, 0, 0, 0, 0],
-        }
-    )
-    hf_dataset.set_transform(hf_transform_to_torch)
-    episode_data_index = {
-        "from": torch.tensor([0]),
-        "to": torch.tensor([5]),
-    }
-    delta_timestamps = {"index": [-0.2, 0, 0.139]}
-    tol = 0.04
-    item = hf_dataset[2]
-    item = load_previous_and_future_frames(item, hf_dataset, episode_data_index, delta_timestamps, tol)
-    data, is_pad = item["index"], item["index_is_pad"]
-    assert torch.equal(data, torch.tensor([0, 2, 3])), "Data does not match expected values"
-    assert not is_pad.any(), "Unexpected padding detected"
-
-
-def test_load_previous_and_future_frames_outside_tolerance_inside_episode_range():
-    hf_dataset = Dataset.from_dict(
-        {
-            "timestamp": [0.1, 0.2, 0.3, 0.4, 0.5],
-            "index": [0, 1, 2, 3, 4],
-            "episode_index": [0, 0, 0, 0, 0],
-        }
-    )
-    hf_dataset.set_transform(hf_transform_to_torch)
-    episode_data_index = {
-        "from": torch.tensor([0]),
-        "to": torch.tensor([5]),
-    }
-    delta_timestamps = {"index": [-0.2, 0, 0.141]}
-    tol = 0.04
-    item = hf_dataset[2]
-    with pytest.raises(AssertionError):
-        load_previous_and_future_frames(item, hf_dataset, episode_data_index, delta_timestamps, tol)
-
-
-def test_load_previous_and_future_frames_outside_tolerance_outside_episode_range():
-    hf_dataset = Dataset.from_dict(
-        {
-            "timestamp": [0.1, 0.2, 0.3, 0.4, 0.5],
-            "index": [0, 1, 2, 3, 4],
-            "episode_index": [0, 0, 0, 0, 0],
-        }
-    )
-    hf_dataset.set_transform(hf_transform_to_torch)
-    episode_data_index = {
-        "from": torch.tensor([0]),
-        "to": torch.tensor([5]),
-    }
-    delta_timestamps = {"index": [-0.3, -0.24, 0, 0.26, 0.3]}
-    tol = 0.04
-    item = hf_dataset[2]
-    item = load_previous_and_future_frames(item, hf_dataset, episode_data_index, delta_timestamps, tol)
-    data, is_pad = item["index"], item["index_is_pad"]
-    assert torch.equal(data, torch.tensor([0, 0, 2, 4, 4])), "Data does not match expected values"
-    assert torch.equal(
-        is_pad, torch.tensor([True, False, False, True, True])
-    ), "Padding does not match expected values"
+#     # TODO(rcadene): we can't test this because expected_stats is computed on a subset
+#     # # test loaded stats match expected stats
+#     # for k in stats_patterns:
+#     #     assert torch.allclose(loaded_stats[k]["mean"], expected_stats[k]["mean"])
+#     #     assert torch.allclose(loaded_stats[k]["std"], expected_stats[k]["std"])
+#     #     assert torch.allclose(loaded_stats[k]["min"], expected_stats[k]["min"])
+#     #     assert torch.allclose(loaded_stats[k]["max"], expected_stats[k]["max"])
 
 
 def test_flatten_unflatten_dict():
@@ -324,6 +257,7 @@ def test_flatten_unflatten_dict():
     assert json.dumps(original_d, sort_keys=True) == json.dumps(d, sort_keys=True), f"{original_d} != {d}"
 
 
+@pytest.mark.skip("TODO after v2 migration / removing hydra")
 @pytest.mark.parametrize(
     "repo_id",
     [
@@ -395,6 +329,7 @@ def test_backward_compatibility(repo_id):
     # load_and_compare(i - 1)
 
 
+@pytest.mark.skip("TODO after v2 migration / removing hydra")
 def test_aggregate_stats():
     """Makes 3 basic datasets and checks that aggregate stats are computed correctly."""
     with seeded_context(0):