From 2765877f28b9ef3f0e300b48c61166c53851ba63 Mon Sep 17 00:00:00 2001 From: Simon Alibert <75076266+aliberts@users.noreply.github.com> Date: Mon, 29 Apr 2024 23:05:55 +0200 Subject: [PATCH] Speed up CI, add more checks (#115) - Split pytest and end-to-end tests into separate jobs - Add poetry check to ensure pyproject.toml and poetry.lock are in sync - Add ruff format --diff to ensure style formatting is applied (fails if ruff would reformat anything) --- .github/workflows/build-docker-images.yml | 4 +-- .github/workflows/nightly-tests.yml | 4 +-- .github/workflows/{style.yml => quality.yml} | 26 +++++++++++--- .github/workflows/test-docker-build.yml | 6 ++-- .github/workflows/test.yml | 36 ++++++++++++++++++- examples/3_train_policy.py | 4 ++- .../push_dataset_to_hub/pusht_processor.py | 4 +-- lerobot/common/policies/act/modeling_act.py | 1 - lerobot/scripts/train.py | 24 ++++++++++--- 9 files changed, 88 insertions(+), 21 deletions(-) rename .github/workflows/{style.yml => quality.yml} (66%) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index 309f537c..40ac06a1 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -14,7 +14,7 @@ env: jobs: latest-cpu: - name: "Build CPU" + name: CPU runs-on: ubuntu-latest steps: - name: Cleanup disk @@ -109,7 +109,7 @@ jobs: # SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} latest-cuda: - name: "Build GPU" + name: GPU runs-on: ubuntu-latest steps: - name: Cleanup disk diff --git a/.github/workflows/nightly-tests.yml b/.github/workflows/nightly-tests.yml index 43f2fe52..b30a0bca 100644 --- a/.github/workflows/nightly-tests.yml +++ b/.github/workflows/nightly-tests.yml @@ -13,7 +13,7 @@ env: jobs: run_all_tests_cpu: - name: "Test CPU" + name: CPU strategy: fail-fast: false runs-on: ubuntu-latest @@ -40,7 +40,7 @@ jobs: run_all_tests_single_gpu: - name: "Test GPU" + name: GPU strategy: fail-fast: false runs-on: [single-gpu, nvidia-gpu, t4, ci] diff --git a/.github/workflows/style.yml b/.github/workflows/quality.yml similarity index 66% rename from .github/workflows/style.yml rename to .github/workflows/quality.yml index 0b5998c3..5747891b 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/quality.yml @@ -1,4 +1,4 @@ -name: Style +name: Quality on: workflow_dispatch: @@ -14,7 +14,8 @@ env: PYTHON_VERSION: "3.10" jobs: - ruff_check: + style: + name: Style runs-on: ubuntu-latest steps: - name: Checkout Repository @@ -34,5 +35,22 @@ jobs: - name: Install Ruff run: python -m pip install "ruff==${{ env.RUFF_VERSION }}" - - name: Run Ruff - run: ruff check . + - name: Ruff check + run: ruff check + + - name: Ruff format + run: ruff format --diff + + + poetry_check: + name: Poetry check + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + + - name: Install poetry + run: pipx install poetry + + - name: Poetry check + run: poetry check diff --git a/.github/workflows/test-docker-build.yml b/.github/workflows/test-docker-build.yml index f1c720f3..74c6f858 100644 --- a/.github/workflows/test-docker-build.yml +++ b/.github/workflows/test-docker-build.yml @@ -1,6 +1,6 @@ # Inspired by # https://github.com/huggingface/peft/blob/main/.github/workflows/test-docker-build.yml -name: Test Docker builds (PR) +name: Test Dockerfiles on: pull_request: @@ -15,7 +15,7 @@ env: jobs: get_changed_files: - name: "Get all modified Dockerfiles" + name: Detect modified Dockerfiles runs-on: ubuntu-latest outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} @@ -40,7 +40,7 @@ jobs: build_modified_dockerfiles: - name: "Build all modified Docker images" + name: Build modified Docker images needs: get_changed_files runs-on: ubuntu-latest if: ${{ needs.get_changed_files.outputs.matrix }} != '' diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 76e7000d..90d215cd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,7 +21,8 @@ on: - "poetry.lock" jobs: - tests: + pytest: + name: Pytest runs-on: ubuntu-latest env: DATA_DIR: tests/data @@ -60,6 +61,39 @@ jobs: -W ignore::UserWarning:gymnasium.utils.env_checker:247 \ && rm -rf tests/outputs outputs + + end-to-end: + name: End-to-end + runs-on: ubuntu-latest + env: + DATA_DIR: tests/data + MUJOCO_GL: egl + steps: + - name: Add SSH key for installing envs + uses: webfactory/ssh-agent@v0.9.0 + with: + ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }} + + - uses: actions/checkout@v4 + + - name: Install EGL + run: sudo apt-get update && sudo apt-get install -y libegl1-mesa-dev + + - name: Install poetry + run: | + pipx install poetry && poetry config virtualenvs.in-project true + echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "poetry" + + - name: Install poetry dependencies + run: | + poetry install --all-extras + - name: Test end-to-end run: | make test-end-to-end \ diff --git a/examples/3_train_policy.py b/examples/3_train_policy.py index 283c6c2b..fb2a4419 100644 --- a/examples/3_train_policy.py +++ b/examples/3_train_policy.py @@ -38,7 +38,9 @@ policy = DiffusionPolicy(cfg, lr_scheduler_num_training_steps=training_steps, da policy.train() policy.to(device) -optimizer = torch.optim.Adam(policy.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay) +optimizer = torch.optim.Adam( + policy.diffusion.parameters(), cfg.lr, cfg.adam_betas, cfg.adam_eps, cfg.adam_weight_decay +) # Create dataloader for offline training. dataloader = torch.utils.data.DataLoader( diff --git a/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py b/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py index 2f0ec3d6..7d5da0d4 100644 --- a/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py +++ b/lerobot/common/datasets/push_dataset_to_hub/pusht_processor.py @@ -14,8 +14,8 @@ from lerobot.common.datasets.utils import ( class PushTProcessor: - """ Process zarr files formatted like in: https://github.com/real-stanford/diffusion_policy - """ + """Process zarr files formatted like in: https://github.com/real-stanford/diffusion_policy""" + def __init__(self, folder_path: Path, fps: int | None = None): self.zarr_path = folder_path if fps is None: diff --git a/lerobot/common/policies/act/modeling_act.py b/lerobot/common/policies/act/modeling_act.py index 1dec1525..22384ca0 100644 --- a/lerobot/common/policies/act/modeling_act.py +++ b/lerobot/common/policies/act/modeling_act.py @@ -195,7 +195,6 @@ class ActionChunkingTransformerPolicy(nn.Module): return loss_dict - def _stack_images(self, batch: dict[str, Tensor]) -> dict[str, Tensor]: """Stacks all the images in a batch and puts them in a new key: "observation.images". diff --git a/lerobot/scripts/train.py b/lerobot/scripts/train.py index 92c855fc..352c6967 100644 --- a/lerobot/scripts/train.py +++ b/lerobot/scripts/train.py @@ -48,7 +48,7 @@ def update_policy(policy, batch, optimizer, grad_clip_norm, lr_scheduler=None): info = { "loss": loss.item(), "grad_norm": float(grad_norm), - "lr": optimizer.param_groups[0]['lr'], + "lr": optimizer.param_groups[0]["lr"], "update_s": time.time() - start_time, } @@ -271,17 +271,31 @@ def train(cfg: dict, out_dir=None, job_name=None): # Temporary hack to move optimizer out of policy if cfg.policy.name == "act": optimizer_params_dicts = [ - {"params": [p for n, p in policy.named_parameters() if not n.startswith("backbone") and p.requires_grad]}, { - "params": [p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad], + "params": [ + p + for n, p in policy.named_parameters() + if not n.startswith("backbone") and p.requires_grad + ] + }, + { + "params": [ + p for n, p in policy.named_parameters() if n.startswith("backbone") and p.requires_grad + ], "lr": cfg.policy.lr_backbone, }, ] - optimizer = torch.optim.AdamW(optimizer_params_dicts, lr=cfg.policy.lr, weight_decay=cfg.policy.weight_decay) + optimizer = torch.optim.AdamW( + optimizer_params_dicts, lr=cfg.policy.lr, weight_decay=cfg.policy.weight_decay + ) lr_scheduler = None elif cfg.policy.name == "diffusion": optimizer = torch.optim.Adam( - policy.diffusion.parameters(), cfg.policy.lr, cfg.policy.adam_betas, cfg.policy.adam_eps, cfg.policy.adam_weight_decay + policy.diffusion.parameters(), + cfg.policy.lr, + cfg.policy.adam_betas, + cfg.policy.adam_eps, + cfg.policy.adam_weight_decay, ) # TODO(rcadene): modify lr scheduler so that it doesn't depend on epochs but steps # configure lr scheduler