Speed up CI, add more checks (#115)

- Split pytest and end-to-end tests into separate jobs - Add poetry check to ensure pyproject.toml and poetry.lock are in sync - Add ruff format --diff to ensure style formatting is applied (fails if ruff would reformat anything)
2024-04-29 23:05:55 +02:00 · 2024-04-29 23:05:55 +02:00 · 2765877f28
parent 1ec5f77f7c
commit 2765877f28
9 changed files with 88 additions and 21 deletions
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@ -14,7 +14,7 @@ env:
 jobs:
  latest-cpu:
-    name: "Build CPU"
+    name: CPU
    runs-on: ubuntu-latest
    steps:
      - name: Cleanup disk
@ -109,7 +109,7 @@ jobs:
    #       SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
  latest-cuda:
-    name: "Build GPU"
+    name: GPU
    runs-on: ubuntu-latest
    steps:
      - name: Cleanup disk
--- a/.github/workflows/nightly-tests.yml
+++ b/.github/workflows/nightly-tests.yml
@ -13,7 +13,7 @@ env:
 jobs:
  run_all_tests_cpu:
-    name: "Test CPU"
+    name: CPU
    strategy:
      fail-fast: false
    runs-on: ubuntu-latest
@ -40,7 +40,7 @@ jobs:
  run_all_tests_single_gpu:
-    name: "Test GPU"
+    name: GPU
    strategy:
      fail-fast: false
    runs-on: [single-gpu, nvidia-gpu, t4, ci]
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@ -1,4 +1,4 @@
-name: Style
+name: Quality
 on:
  workflow_dispatch:
@ -14,7 +14,8 @@ env:
  PYTHON_VERSION: "3.10"
 jobs:
-  ruff_check:
+  style:
    name: Style
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repository
@ -34,5 +35,22 @@ jobs:
      - name: Install Ruff
        run: python -m pip install "ruff==${{ env.RUFF_VERSION }}"
-      - name: Run Ruff
+      - name: Ruff check
-        run: ruff check .
+        run: ruff check
      - name: Ruff format
        run: ruff format --diff
  poetry_check:
    name: Poetry check
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repository
        uses: actions/checkout@v3
      - name: Install poetry
        run: pipx install poetry
      - name: Poetry check
        run: poetry check
--- a/.github/workflows/test-docker-build.yml
+++ b/.github/workflows/test-docker-build.yml
@ -1,6 +1,6 @@
 # Inspired by
 # https://github.com/huggingface/peft/blob/main/.github/workflows/test-docker-build.yml
-name: Test Docker builds (PR)
+name: Test Dockerfiles
 on:
  pull_request:
@ -15,7 +15,7 @@ env:
 jobs:
  get_changed_files:
-    name: "Get all modified Dockerfiles"
+    name: Detect modified Dockerfiles
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
@ -40,7 +40,7 @@ jobs:
  build_modified_dockerfiles:
-    name: "Build all modified Docker images"
+    name: Build modified Docker images
    needs: get_changed_files
    runs-on: ubuntu-latest
    if: ${{ needs.get_changed_files.outputs.matrix }} != ''
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -21,7 +21,8 @@ on:
      - "poetry.lock"
 jobs:
-  tests:
+  pytest:
    name: Pytest
    runs-on: ubuntu-latest
    env:
      DATA_DIR: tests/data
@ -60,6 +61,39 @@ jobs:
            -W ignore::UserWarning:gymnasium.utils.env_checker:247 \
            && rm -rf tests/outputs outputs
  end-to-end:
    name: End-to-end
    runs-on: ubuntu-latest
    env:
      DATA_DIR: tests/data
      MUJOCO_GL: egl
    steps:
      - name: Add SSH key for installing envs
        uses: webfactory/ssh-agent@v0.9.0
        with:
          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
      - uses: actions/checkout@v4
      - name: Install EGL
        run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev
      - name: Install poetry
        run: |
          pipx install poetry && poetry config virtualenvs.in-project true
          echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH
      - name: Set up Python 3.10
        uses: actions/setup-python@v5
        with:
          python-version: "3.10"
          cache: "poetry"
      - name: Install poetry dependencies
        run: |
          poetry install --all-extras
      - name: Test end-to-end
        run: |
          make test-end-to-end \
--- a/examples/3_train_policy.py
+++ b/examples/3_train_policy.py
@ -38,7 +38,9 @@ policy = DiffusionPolicy(cfg, lr_scheduler_num_training_steps=training_steps, da
 policy.train()
 policy.to(device)
-optimizer = torch.optim.Adam(policy.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay)
+optimizer = torch.optim.Adam(
    policy.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay
 )
 # Create dataloader for offline training.
 dataloader = torch.utils.data.DataLoader(
--- a/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py
+++ b/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py
@ -14,8 +14,8 @@ from lerobot.common.datasets.utils import (
 class PushTProcessor:
-    """ Process zarr files formatted like in: https://github.com/real-stanford/diffusion_policy
+    """Process zarr files formatted like in: https://github.com/real-stanford/diffusion_policy"""
-    """
+
    def __init__(self, folder_path: Path, fps: int | None = None):
        self.zarr_path = folder_path
        if fps is None:
--- a/lerobot/common/policies/act/modeling_act.py
+++ b/lerobot/common/policies/act/modeling_act.py
@ -195,7 +195,6 @@ class ActionChunkingTransformerPolicy(nn.Module):
        return loss_dict
    def _stack_images(self, batch: dict[str, Tensor]) -> dict[str, Tensor]:
        """Stacks all the images in a batch and puts them in a new key: "observation.images".
--- a/lerobot/scripts/train.py
+++ b/lerobot/scripts/train.py
@ -48,7 +48,7 @@ def update_policy(policy, batch, optimizer, grad_clip_norm, lr_scheduler=None):
    info = {
        "loss": loss.item(),
        "grad_norm": float(grad_norm),
-        "lr": optimizer.param_groups[0]['lr'],
+        "lr": optimizer.param_groups[0]["lr"],
        "update_s": time.time() - start_time,
    }
@ -271,17 +271,31 @@ def train(cfg: dict, out_dir=None, job_name=None):
    # Temporary hack to move optimizer out of policy
    if cfg.policy.name == "act":
        optimizer_params_dicts = [
            {"params": [p for n, p in policy.named_parameters() if not n.startswith("backbone") and p.requires_grad]},
            {
-                "params": [p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad],
+                "params": [
                    p
                    for n, p in policy.named_parameters()
                    if not n.startswith("backbone") and p.requires_grad
                ]
            },
            {
                "params": [
                    p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad
                ],
                "lr": cfg.policy.lr_backbone,
            },
        ]
-        optimizer = torch.optim.AdamW(optimizer_params_dicts, lr=cfg.policy.lr, weight_decay=cfg.policy.weight_decay)
+        optimizer = torch.optim.AdamW(
            optimizer_params_dicts, lr=cfg.policy.lr, weight_decay=cfg.policy.weight_decay
        )
        lr_scheduler = None
    elif cfg.policy.name == "diffusion":
        optimizer = torch.optim.Adam(
-            policy.diffusion.parameters(), cfg.policy.lr, cfg.policy.adam_betas, cfg.policy.adam_eps, cfg.policy.adam_weight_decay
+            policy.diffusion.parameters(),
            cfg.policy.lr,
            cfg.policy.adam_betas,
            cfg.policy.adam_eps,
            cfg.policy.adam_weight_decay,
        )
        # TODO(rcadene): modify lr scheduler so that it doesn't depend on epochs but steps
        # configure lr scheduler