From 443a9eec88bb2ecb55f34f603865706d1671ff50 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Thu, 31 Oct 2024 21:43:29 +0100 Subject: [PATCH] Remove/comment obsolete tests --- tests/test_datasets.py | 229 +++++++++++++++-------------------------- 1 file changed, 82 insertions(+), 147 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 855cb26f..f540c6a8 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -16,7 +16,6 @@ import json import logging from copy import deepcopy -from itertools import chain from pathlib import Path import einops @@ -30,15 +29,13 @@ import lerobot from lerobot.common.datasets.compute_stats import ( aggregate_stats, compute_stats, - get_stats_einops_patterns, ) from lerobot.common.datasets.factory import make_dataset -from lerobot.common.datasets.lerobot_dataset import LeRobotDataset, MultiLeRobotDataset +from lerobot.common.datasets.lerobot_dataset import LeRobotDataset from lerobot.common.datasets.utils import ( create_branch, flatten_dict, hf_transform_to_torch, - load_previous_and_future_frames, unflatten_dict, ) from lerobot.common.utils.utils import init_hydra_config, seeded_context @@ -72,6 +69,7 @@ def test_same_attributes_defined(dataset_create, dataset_init): assert init_attr == create_attr, "Attribute sets do not match between __init__ and .create()" +@pytest.mark.skip("TODO after v2 migration / removing hydra") @pytest.mark.parametrize( "env_name, repo_id, policy_name", lerobot.env_dataset_policy_triplets @@ -143,162 +141,97 @@ def test_factory(env_name, repo_id, policy_name): assert key in item, f"{key}" -# TODO(alexander-soare): If you're hunting for savings on testing time, this takes about 5 seconds. -def test_multilerobotdataset_frames(): - """Check that all dataset frames are incorporated.""" - # Note: use the image variants of the dataset to make the test approx 3x faster. - # Note: We really do need three repo_ids here as at some point this caught an issue with the chaining - # logic that wouldn't be caught with two repo IDs. - repo_ids = [ - "lerobot/aloha_sim_insertion_human_image", - "lerobot/aloha_sim_transfer_cube_human_image", - "lerobot/aloha_sim_insertion_scripted_image", - ] - sub_datasets = [LeRobotDataset(repo_id) for repo_id in repo_ids] - dataset = MultiLeRobotDataset(repo_ids) - assert len(dataset) == sum(len(d) for d in sub_datasets) - assert dataset.num_samples == sum(d.num_samples for d in sub_datasets) - assert dataset.num_episodes == sum(d.num_episodes for d in sub_datasets) +# # TODO(alexander-soare): If you're hunting for savings on testing time, this takes about 5 seconds. +# def test_multilerobotdataset_frames(): +# """Check that all dataset frames are incorporated.""" +# # Note: use the image variants of the dataset to make the test approx 3x faster. +# # Note: We really do need three repo_ids here as at some point this caught an issue with the chaining +# # logic that wouldn't be caught with two repo IDs. +# repo_ids = [ +# "lerobot/aloha_sim_insertion_human_image", +# "lerobot/aloha_sim_transfer_cube_human_image", +# "lerobot/aloha_sim_insertion_scripted_image", +# ] +# sub_datasets = [LeRobotDataset(repo_id) for repo_id in repo_ids] +# dataset = MultiLeRobotDataset(repo_ids) +# assert len(dataset) == sum(len(d) for d in sub_datasets) +# assert dataset.num_samples == sum(d.num_samples for d in sub_datasets) +# assert dataset.num_episodes == sum(d.num_episodes for d in sub_datasets) - # Run through all items of the LeRobotDatasets in parallel with the items of the MultiLerobotDataset and - # check they match. - expected_dataset_indices = [] - for i, sub_dataset in enumerate(sub_datasets): - expected_dataset_indices.extend([i] * len(sub_dataset)) +# # Run through all items of the LeRobotDatasets in parallel with the items of the MultiLerobotDataset and +# # check they match. +# expected_dataset_indices = [] +# for i, sub_dataset in enumerate(sub_datasets): +# expected_dataset_indices.extend([i] * len(sub_dataset)) - for expected_dataset_index, sub_dataset_item, dataset_item in zip( - expected_dataset_indices, chain(*sub_datasets), dataset, strict=True - ): - dataset_index = dataset_item.pop("dataset_index") - assert dataset_index == expected_dataset_index - assert sub_dataset_item.keys() == dataset_item.keys() - for k in sub_dataset_item: - assert torch.equal(sub_dataset_item[k], dataset_item[k]) +# for expected_dataset_index, sub_dataset_item, dataset_item in zip( +# expected_dataset_indices, chain(*sub_datasets), dataset, strict=True +# ): +# dataset_index = dataset_item.pop("dataset_index") +# assert dataset_index == expected_dataset_index +# assert sub_dataset_item.keys() == dataset_item.keys() +# for k in sub_dataset_item: +# assert torch.equal(sub_dataset_item[k], dataset_item[k]) -def test_compute_stats_on_xarm(): - """Check that the statistics are computed correctly according to the stats_patterns property. +# TODO(aliberts, rcadene): Refactor and move this to a tests/test_compute_stats.py +# def test_compute_stats_on_xarm(): +# """Check that the statistics are computed correctly according to the stats_patterns property. - We compare with taking a straight min, mean, max, std of all the data in one pass (which we can do - because we are working with a small dataset). - """ - dataset = LeRobotDataset("lerobot/xarm_lift_medium") +# We compare with taking a straight min, mean, max, std of all the data in one pass (which we can do +# because we are working with a small dataset). +# """ +# dataset = LeRobotDataset("lerobot/xarm_lift_medium") - # reduce size of dataset sample on which stats compute is tested to 10 frames - dataset.hf_dataset = dataset.hf_dataset.select(range(10)) +# # reduce size of dataset sample on which stats compute is tested to 10 frames +# dataset.hf_dataset = dataset.hf_dataset.select(range(10)) - # Note: we set the batch size to be smaller than the whole dataset to make sure we are testing batched - # computation of the statistics. While doing this, we also make sure it works when we don't divide the - # dataset into even batches. - computed_stats = compute_stats(dataset, batch_size=int(len(dataset) * 0.25), num_workers=0) +# # Note: we set the batch size to be smaller than the whole dataset to make sure we are testing batched +# # computation of the statistics. While doing this, we also make sure it works when we don't divide the +# # dataset into even batches. +# computed_stats = compute_stats(dataset, batch_size=int(len(dataset) * 0.25), num_workers=0) - # get einops patterns to aggregate batches and compute statistics - stats_patterns = get_stats_einops_patterns(dataset) +# # get einops patterns to aggregate batches and compute statistics +# stats_patterns = get_stats_einops_patterns(dataset) - # get all frames from the dataset in the same dtype and range as during compute_stats - dataloader = torch.utils.data.DataLoader( - dataset, - num_workers=0, - batch_size=len(dataset), - shuffle=False, - ) - full_batch = next(iter(dataloader)) +# # get all frames from the dataset in the same dtype and range as during compute_stats +# dataloader = torch.utils.data.DataLoader( +# dataset, +# num_workers=0, +# batch_size=len(dataset), +# shuffle=False, +# ) +# full_batch = next(iter(dataloader)) - # compute stats based on all frames from the dataset without any batching - expected_stats = {} - for k, pattern in stats_patterns.items(): - full_batch[k] = full_batch[k].float() - expected_stats[k] = {} - expected_stats[k]["mean"] = einops.reduce(full_batch[k], pattern, "mean") - expected_stats[k]["std"] = torch.sqrt( - einops.reduce((full_batch[k] - expected_stats[k]["mean"]) ** 2, pattern, "mean") - ) - expected_stats[k]["min"] = einops.reduce(full_batch[k], pattern, "min") - expected_stats[k]["max"] = einops.reduce(full_batch[k], pattern, "max") +# # compute stats based on all frames from the dataset without any batching +# expected_stats = {} +# for k, pattern in stats_patterns.items(): +# full_batch[k] = full_batch[k].float() +# expected_stats[k] = {} +# expected_stats[k]["mean"] = einops.reduce(full_batch[k], pattern, "mean") +# expected_stats[k]["std"] = torch.sqrt( +# einops.reduce((full_batch[k] - expected_stats[k]["mean"]) ** 2, pattern, "mean") +# ) +# expected_stats[k]["min"] = einops.reduce(full_batch[k], pattern, "min") +# expected_stats[k]["max"] = einops.reduce(full_batch[k], pattern, "max") - # test computed stats match expected stats - for k in stats_patterns: - assert torch.allclose(computed_stats[k]["mean"], expected_stats[k]["mean"]) - assert torch.allclose(computed_stats[k]["std"], expected_stats[k]["std"]) - assert torch.allclose(computed_stats[k]["min"], expected_stats[k]["min"]) - assert torch.allclose(computed_stats[k]["max"], expected_stats[k]["max"]) +# # test computed stats match expected stats +# for k in stats_patterns: +# assert torch.allclose(computed_stats[k]["mean"], expected_stats[k]["mean"]) +# assert torch.allclose(computed_stats[k]["std"], expected_stats[k]["std"]) +# assert torch.allclose(computed_stats[k]["min"], expected_stats[k]["min"]) +# assert torch.allclose(computed_stats[k]["max"], expected_stats[k]["max"]) - # load stats used during training which are expected to match the ones returned by computed_stats - loaded_stats = dataset.stats # noqa: F841 +# # load stats used during training which are expected to match the ones returned by computed_stats +# loaded_stats = dataset.stats # noqa: F841 - # TODO(rcadene): we can't test this because expected_stats is computed on a subset - # # test loaded stats match expected stats - # for k in stats_patterns: - # assert torch.allclose(loaded_stats[k]["mean"], expected_stats[k]["mean"]) - # assert torch.allclose(loaded_stats[k]["std"], expected_stats[k]["std"]) - # assert torch.allclose(loaded_stats[k]["min"], expected_stats[k]["min"]) - # assert torch.allclose(loaded_stats[k]["max"], expected_stats[k]["max"]) - - -def test_load_previous_and_future_frames_within_tolerance(): - hf_dataset = Dataset.from_dict( - { - "timestamp": [0.1, 0.2, 0.3, 0.4, 0.5], - "index": [0, 1, 2, 3, 4], - "episode_index": [0, 0, 0, 0, 0], - } - ) - hf_dataset.set_transform(hf_transform_to_torch) - episode_data_index = { - "from": torch.tensor([0]), - "to": torch.tensor([5]), - } - delta_timestamps = {"index": [-0.2, 0, 0.139]} - tol = 0.04 - item = hf_dataset[2] - item = load_previous_and_future_frames(item, hf_dataset, episode_data_index, delta_timestamps, tol) - data, is_pad = item["index"], item["index_is_pad"] - assert torch.equal(data, torch.tensor([0, 2, 3])), "Data does not match expected values" - assert not is_pad.any(), "Unexpected padding detected" - - -def test_load_previous_and_future_frames_outside_tolerance_inside_episode_range(): - hf_dataset = Dataset.from_dict( - { - "timestamp": [0.1, 0.2, 0.3, 0.4, 0.5], - "index": [0, 1, 2, 3, 4], - "episode_index": [0, 0, 0, 0, 0], - } - ) - hf_dataset.set_transform(hf_transform_to_torch) - episode_data_index = { - "from": torch.tensor([0]), - "to": torch.tensor([5]), - } - delta_timestamps = {"index": [-0.2, 0, 0.141]} - tol = 0.04 - item = hf_dataset[2] - with pytest.raises(AssertionError): - load_previous_and_future_frames(item, hf_dataset, episode_data_index, delta_timestamps, tol) - - -def test_load_previous_and_future_frames_outside_tolerance_outside_episode_range(): - hf_dataset = Dataset.from_dict( - { - "timestamp": [0.1, 0.2, 0.3, 0.4, 0.5], - "index": [0, 1, 2, 3, 4], - "episode_index": [0, 0, 0, 0, 0], - } - ) - hf_dataset.set_transform(hf_transform_to_torch) - episode_data_index = { - "from": torch.tensor([0]), - "to": torch.tensor([5]), - } - delta_timestamps = {"index": [-0.3, -0.24, 0, 0.26, 0.3]} - tol = 0.04 - item = hf_dataset[2] - item = load_previous_and_future_frames(item, hf_dataset, episode_data_index, delta_timestamps, tol) - data, is_pad = item["index"], item["index_is_pad"] - assert torch.equal(data, torch.tensor([0, 0, 2, 4, 4])), "Data does not match expected values" - assert torch.equal( - is_pad, torch.tensor([True, False, False, True, True]) - ), "Padding does not match expected values" +# # TODO(rcadene): we can't test this because expected_stats is computed on a subset +# # # test loaded stats match expected stats +# # for k in stats_patterns: +# # assert torch.allclose(loaded_stats[k]["mean"], expected_stats[k]["mean"]) +# # assert torch.allclose(loaded_stats[k]["std"], expected_stats[k]["std"]) +# # assert torch.allclose(loaded_stats[k]["min"], expected_stats[k]["min"]) +# # assert torch.allclose(loaded_stats[k]["max"], expected_stats[k]["max"]) def test_flatten_unflatten_dict(): @@ -324,6 +257,7 @@ def test_flatten_unflatten_dict(): assert json.dumps(original_d, sort_keys=True) == json.dumps(d, sort_keys=True), f"{original_d} != {d}" +@pytest.mark.skip("TODO after v2 migration / removing hydra") @pytest.mark.parametrize( "repo_id", [ @@ -395,6 +329,7 @@ def test_backward_compatibility(repo_id): # load_and_compare(i - 1) +@pytest.mark.skip("TODO after v2 migration / removing hydra") def test_aggregate_stats(): """Makes 3 basic datasets and checks that aggregate stats are computed correctly.""" with seeded_context(0):