diff --git a/lerobot/common/datasets/abstract.py b/lerobot/common/datasets/abstract.py
index 13be4cab..e9e9c610 100644
--- a/lerobot/common/datasets/abstract.py
+++ b/lerobot/common/datasets/abstract.py
@@ -152,7 +152,13 @@ class AbstractDataset(TensorDictReplayBuffer):
         return TensorStorage(TensorDict.load_memmap(self.data_dir / "replay_buffer"))
 
     def _compute_stats(self, batch_size: int = 32):
-        """Compute dataset statistics including minimum, maximum, mean, and standard deviation."""
+        """Compute dataset statistics including minimum, maximum, mean, and standard deviation.
+
+        TODO(alexander-soare): Add a num_batches argument which essentially allows one to use a subset of the
+            full dataset (for handling very large datasets). The sampling would then have to be random
+            (preferably without replacement). Both stats computation loops would ideally sample the same
+            items.
+        """
         rb = TensorDictReplayBuffer(
             storage=self._storage,
             batch_size=32,