From 7d5d99e036c7eccd973a0ced0084fdcc18cf3a85 Mon Sep 17 00:00:00 2001
From: Cadene <re.cadene@gmail.com>
Date: Tue, 19 Mar 2024 16:53:07 +0000
Subject: [PATCH] Address more comments

---
 README.md                           | 23 ++++++++++++++++++-----
 lerobot/common/datasets/abstract.py |  6 ++++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 7e4baad8..dc51fec2 100644
--- a/README.md
+++ b/README.md
@@ -148,9 +148,9 @@ DATA_DIR="tests/data" pytest -sx tests
 
 **Datasets**
 
-To add a pytorch rl dataset to the hub, first login and use a token generated from [huggingface settings](https://huggingface.co/settings/tokens) with write access:
+To add a dataset to the hub, first login and use a token generated from [huggingface settings](https://huggingface.co/settings/tokens) with write access:
 ```
-huggingface-cli login --token $HUGGINGFACE_TOKEN --add-to-git-credential
+huggingface-cli login --token ${HUGGINGFACE_TOKEN} --add-to-git-credential
 ```
 
 Then you can upload it to the hub with:
@@ -160,6 +160,12 @@ HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli upload $HF_USER/$DATASET data/$DATAS
 --revision v1.0
 ```
 
+You will need to set the corresponding version as a default argument in your dataset class:
+```python
+  version: str | None = "v1.0",
+```
+See: [`lerobot/common/datasets/pusht.py`](https://github.com/Cadene/lerobot/blob/main/lerobot/common/datasets/pusht.py)
+
 For instance, for [cadene/pusht](https://huggingface.co/datasets/cadene/pusht), we used:
 ```
 HF_USER=cadene
@@ -169,7 +175,7 @@ DATASET=pusht
 If you want to improve an existing dataset, you can download it locally with:
 ```
 mkdir -p data/$DATASET
-HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download $HF_USER/$DATASET \
+HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download ${HF_USER}/$DATASET \
 --repo-type dataset \
 --local-dir data/$DATASET \
 --local-dir-use-symlinks=False \
@@ -181,7 +187,7 @@ Iterate on your code and dataset with:
 DATA_DIR=data python train.py
 ```
 
-Then upload a new version (v2.0 or v1.1 if the changes are respectively more or less significant):
+Upload a new version (v2.0 or v1.1 if the changes are respectively more or less significant):
 ```
 HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli upload $HF_USER/$DATASET data/$DATASET \
 --repo-type dataset \
@@ -189,7 +195,14 @@ HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli upload $HF_USER/$DATASET data/$DATAS
 --delete "*"
 ```
 
-And you might want to mock the dataset if you need to update the unit tests as well:
+Then you will need to set the corresponding version as a default argument in your dataset class:
+```python
+  version: str | None = "v1.1",
+```
+See: [`lerobot/common/datasets/pusht.py`](https://github.com/Cadene/lerobot/blob/main/lerobot/common/datasets/pusht.py)
+
+
+Finally, you might want to mock the dataset if you need to update the unit tests as well:
 ```
 python tests/scripts/mock_dataset.py --in-data-dir data/$DATASET --out-data-dir tests/data/$DATASET
 ```
diff --git a/lerobot/common/datasets/abstract.py b/lerobot/common/datasets/abstract.py
index 9127d887..aec53877 100644
--- a/lerobot/common/datasets/abstract.py
+++ b/lerobot/common/datasets/abstract.py
@@ -35,6 +35,12 @@ class AbstractExperienceReplay(TensorDictReplayBuffer):
         self.version = version
         self.shuffle = shuffle
         self.root = root
+
+        if self.root is not None and self.version is not None:
+            logging.warning(
+                f"The version of the dataset ({self.version}) is not enforced when root is provided ({self.root})."
+            )
+
         storage = self._download_or_load_dataset()
 
         super().__init__(