From 2765877f28b9ef3f0e300b48c61166c53851ba63 Mon Sep 17 00:00:00 2001
From: Simon Alibert <75076266+aliberts@users.noreply.github.com>
Date: Mon, 29 Apr 2024 23:05:55 +0200
Subject: [PATCH] Speed up CI, add more checks (#115)

- Split pytest and end-to-end tests into separate jobs
- Add poetry check to ensure pyproject.toml and poetry.lock are in sync
- Add ruff format --diff to ensure style formatting is applied (fails if ruff would reformat anything)
---
 .github/workflows/build-docker-images.yml     |  4 +--
 .github/workflows/nightly-tests.yml           |  4 +--
 .github/workflows/{style.yml => quality.yml}  | 26 +++++++++++---
 .github/workflows/test-docker-build.yml       |  6 ++--
 .github/workflows/test.yml                    | 36 ++++++++++++++++++-
 examples/3_train_policy.py                    |  4 ++-
 .../push_dataset_to_hub/pusht_processor.py    |  4 +--
 lerobot/common/policies/act/modeling_act.py   |  1 -
 lerobot/scripts/train.py                      | 24 ++++++++++---
 9 files changed, 88 insertions(+), 21 deletions(-)
 rename .github/workflows/{style.yml => quality.yml} (66%)

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index 309f537c..40ac06a1 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -14,7 +14,7 @@ env:
 
 jobs:
   latest-cpu:
-    name: "Build CPU"
+    name: CPU
     runs-on: ubuntu-latest
     steps:
       - name: Cleanup disk
@@ -109,7 +109,7 @@ jobs:
     #       SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
 
   latest-cuda:
-    name: "Build GPU"
+    name: GPU
     runs-on: ubuntu-latest
     steps:
       - name: Cleanup disk
diff --git a/.github/workflows/nightly-tests.yml b/.github/workflows/nightly-tests.yml
index 43f2fe52..b30a0bca 100644
--- a/.github/workflows/nightly-tests.yml
+++ b/.github/workflows/nightly-tests.yml
@@ -13,7 +13,7 @@ env:
 
 jobs:
   run_all_tests_cpu:
-    name: "Test CPU"
+    name: CPU
     strategy:
       fail-fast: false
     runs-on: ubuntu-latest
@@ -40,7 +40,7 @@ jobs:
 
 
   run_all_tests_single_gpu:
-    name: "Test GPU"
+    name: GPU
     strategy:
       fail-fast: false
     runs-on: [single-gpu, nvidia-gpu, t4, ci]
diff --git a/.github/workflows/style.yml b/.github/workflows/quality.yml
similarity index 66%
rename from .github/workflows/style.yml
rename to .github/workflows/quality.yml
index 0b5998c3..5747891b 100644
--- a/.github/workflows/style.yml
+++ b/.github/workflows/quality.yml
@@ -1,4 +1,4 @@
-name: Style
+name: Quality
 
 on:
   workflow_dispatch:
@@ -14,7 +14,8 @@ env:
   PYTHON_VERSION: "3.10"
 
 jobs:
-  ruff_check:
+  style:
+    name: Style
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Repository
@@ -34,5 +35,22 @@ jobs:
       - name: Install Ruff
         run: python -m pip install "ruff==${{ env.RUFF_VERSION }}"
 
-      - name: Run Ruff
-        run: ruff check .
+      - name: Ruff check
+        run: ruff check
+
+      - name: Ruff format
+        run: ruff format --diff
+
+
+  poetry_check:
+    name: Poetry check
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v3
+
+      - name: Install poetry
+        run: pipx install poetry
+
+      - name: Poetry check
+        run: poetry check
diff --git a/.github/workflows/test-docker-build.yml b/.github/workflows/test-docker-build.yml
index f1c720f3..74c6f858 100644
--- a/.github/workflows/test-docker-build.yml
+++ b/.github/workflows/test-docker-build.yml
@@ -1,6 +1,6 @@
 # Inspired by
 # https://github.com/huggingface/peft/blob/main/.github/workflows/test-docker-build.yml
-name: Test Docker builds (PR)
+name: Test Dockerfiles
 
 on:
   pull_request:
@@ -15,7 +15,7 @@ env:
 
 jobs:
   get_changed_files:
-    name: "Get all modified Dockerfiles"
+    name: Detect modified Dockerfiles
     runs-on: ubuntu-latest
     outputs:
       matrix: ${{ steps.set-matrix.outputs.matrix }}
@@ -40,7 +40,7 @@ jobs:
 
 
   build_modified_dockerfiles:
-    name: "Build all modified Docker images"
+    name: Build modified Docker images
     needs: get_changed_files
     runs-on: ubuntu-latest
     if: ${{ needs.get_changed_files.outputs.matrix }} != ''
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 76e7000d..90d215cd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -21,7 +21,8 @@ on:
       - "poetry.lock"
 
 jobs:
-  tests:
+  pytest:
+    name: Pytest
     runs-on: ubuntu-latest
     env:
       DATA_DIR: tests/data
@@ -60,6 +61,39 @@ jobs:
             -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
             && rm -rf tests/outputs outputs
 
+
+  end-to-end:
+    name: End-to-end
+    runs-on: ubuntu-latest
+    env:
+      DATA_DIR: tests/data
+      MUJOCO_GL: egl
+    steps:
+      - name: Add SSH key for installing envs
+        uses: webfactory/ssh-agent@v0.9.0
+        with:
+          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
+
+      - uses: actions/checkout@v4
+
+      - name: Install EGL
+        run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev
+
+      - name: Install poetry
+        run: |
+          pipx install poetry && poetry config virtualenvs.in-project true
+          echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          cache: "poetry"
+
+      - name: Install poetry dependencies
+        run: |
+          poetry install --all-extras
+
       - name: Test end-to-end
         run: |
           make test-end-to-end \
diff --git a/examples/3_train_policy.py b/examples/3_train_policy.py
index 283c6c2b..fb2a4419 100644
--- a/examples/3_train_policy.py
+++ b/examples/3_train_policy.py
@@ -38,7 +38,9 @@ policy = DiffusionPolicy(cfg, lr_scheduler_num_training_steps=training_steps, da
 policy.train()
 policy.to(device)
 
-optimizer = torch.optim.Adam(policy.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay)
+optimizer = torch.optim.Adam(
+    policy.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay
+)
 
 # Create dataloader for offline training.
 dataloader = torch.utils.data.DataLoader(
diff --git a/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py b/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py
index 2f0ec3d6..7d5da0d4 100644
--- a/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py
+++ b/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py
@@ -14,8 +14,8 @@ from lerobot.common.datasets.utils import (
 
 
 class PushTProcessor:
-    """ Process zarr files formatted like in: https://github.com/real-stanford/diffusion_policy
-    """
+    """Process zarr files formatted like in: https://github.com/real-stanford/diffusion_policy"""
+
     def __init__(self, folder_path: Path, fps: int | None = None):
         self.zarr_path = folder_path
         if fps is None:
diff --git a/lerobot/common/policies/act/modeling_act.py b/lerobot/common/policies/act/modeling_act.py
index 1dec1525..22384ca0 100644
--- a/lerobot/common/policies/act/modeling_act.py
+++ b/lerobot/common/policies/act/modeling_act.py
@@ -195,7 +195,6 @@ class ActionChunkingTransformerPolicy(nn.Module):
 
         return loss_dict
 
-
     def _stack_images(self, batch: dict[str, Tensor]) -> dict[str, Tensor]:
         """Stacks all the images in a batch and puts them in a new key: "observation.images".
 
diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py
index 92c855fc..352c6967 100644
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@@ -48,7 +48,7 @@ def update_policy(policy, batch, optimizer, grad_clip_norm, lr_scheduler=None):
     info = {
         "loss": loss.item(),
         "grad_norm": float(grad_norm),
-        "lr": optimizer.param_groups[0]['lr'],
+        "lr": optimizer.param_groups[0]["lr"],
         "update_s": time.time() - start_time,
     }
 
@@ -271,17 +271,31 @@ def train(cfg: dict, out_dir=None, job_name=None):
     # Temporary hack to move optimizer out of policy
     if cfg.policy.name == "act":
         optimizer_params_dicts = [
-            {"params": [p for n, p in policy.named_parameters() if not n.startswith("backbone") and p.requires_grad]},
             {
-                "params": [p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad],
+                "params": [
+                    p
+                    for n, p in policy.named_parameters()
+                    if not n.startswith("backbone") and p.requires_grad
+                ]
+            },
+            {
+                "params": [
+                    p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad
+                ],
                 "lr": cfg.policy.lr_backbone,
             },
         ]
-        optimizer = torch.optim.AdamW(optimizer_params_dicts, lr=cfg.policy.lr, weight_decay=cfg.policy.weight_decay)
+        optimizer = torch.optim.AdamW(
+            optimizer_params_dicts, lr=cfg.policy.lr, weight_decay=cfg.policy.weight_decay
+        )
         lr_scheduler = None
     elif cfg.policy.name == "diffusion":
         optimizer = torch.optim.Adam(
-            policy.diffusion.parameters(), cfg.policy.lr, cfg.policy.adam_betas, cfg.policy.adam_eps, cfg.policy.adam_weight_decay
+            policy.diffusion.parameters(),
+            cfg.policy.lr,
+            cfg.policy.adam_betas,
+            cfg.policy.adam_eps,
+            cfg.policy.adam_weight_decay,
         )
         # TODO(rcadene): modify lr scheduler so that it doesn't depend on epochs but steps
         # configure lr scheduler