Merge remote-tracking branch 'upstream/main' into unify_policy_api
This commit is contained in:
commit
bff4b673c9
|
@ -0,0 +1,54 @@
|
|||
name: "\U0001F41B Bug Report"
|
||||
description: Submit a bug report to help us improve LeRobot
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to submit a bug report! 🐛
|
||||
If this is not a bug related to the LeRobot library directly, but instead a general question about your code or the library specifically please use our [discord](https://discord.gg/s3KuuzsPFb).
|
||||
|
||||
- type: textarea
|
||||
id: system-info
|
||||
attributes:
|
||||
label: System Info
|
||||
description: If needed, you can share your lerobot configuration with us by running `python -m lerobot.commands.env` and copy-pasting its outputs below
|
||||
render: Shell
|
||||
placeholder: lerobot version, OS, python version, numpy version, torch version, and lerobot's configuration
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: checkboxes
|
||||
id: information-scripts-examples
|
||||
attributes:
|
||||
label: Information
|
||||
description: 'The problem arises when using:'
|
||||
options:
|
||||
- label: "One of the scripts in the examples/ folder of LeRobot"
|
||||
- label: "My own task or dataset (give details below)"
|
||||
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Reproduction
|
||||
description: |
|
||||
If needed, provide a simple code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
|
||||
Sharing error messages or stack traces could be useful as well!
|
||||
Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
|
||||
Try to avoid screenshots, as they are hard to read and don't allow copy-and-pasting.
|
||||
|
||||
placeholder: |
|
||||
Steps to reproduce the behavior:
|
||||
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
- type: textarea
|
||||
id: expected-behavior
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Expected behavior
|
||||
description: "A clear and concise description of what you would expect to happen."
|
|
@ -0,0 +1,15 @@
|
|||
# What does this PR do?
|
||||
|
||||
Example: Fixes # (issue)
|
||||
|
||||
|
||||
## Before submitting
|
||||
- Read the [contributor guideline](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md#submitting-a-pull-request-pr).
|
||||
- Provide a minimal code example for the reviewer to checkout & try.
|
||||
- Explain how you tested your changes.
|
||||
|
||||
|
||||
## Who can review?
|
||||
|
||||
Anyone in the community is free to review the PR once the tests have passed. Feel free to tag
|
||||
members/contributors who may be interested in your PR. Try to avoid tagging more than 3 people.
|
File diff suppressed because it is too large
Load Diff
|
@ -1,19 +1,25 @@
|
|||
[tool.poetry]
|
||||
name = "lerobot"
|
||||
version = "0.1.0"
|
||||
description = "Le robot is learning"
|
||||
description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch"
|
||||
authors = [
|
||||
"Rémi Cadène <re.cadene@gmail.com>",
|
||||
"Alexander Soare <alexander.soare159@gmail.com>",
|
||||
"Quentin Gallouédec <quentin.gallouedec@ec-lyon.fr>",
|
||||
"Simon Alibert <alibert.sim@gmail.com>",
|
||||
"Thomas Wolf <thomaswolfcontact@gmail.com>",
|
||||
]
|
||||
repository = "https://github.com/Cadene/lerobot"
|
||||
repository = "https://github.com/huggingface/lerobot"
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
license = "Apache-2.0"
|
||||
classifiers=[
|
||||
"Development Status :: 3 - Alpha",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Education",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Topic :: Software Development :: Build Tools",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
]
|
||||
packages = [{include = "lerobot"}]
|
||||
|
@ -23,53 +29,39 @@ packages = [{include = "lerobot"}]
|
|||
python = "^3.10"
|
||||
termcolor = "^2.4.0"
|
||||
omegaconf = "^2.3.0"
|
||||
pandas = "^2.2.1"
|
||||
wandb = "^0.16.3"
|
||||
moviepy = "^1.0.3"
|
||||
imageio = {extras = ["pyav"], version = "^2.34.0"}
|
||||
imageio = {extras = ["ffmpeg"], version = "^2.34.0"}
|
||||
gdown = "^5.1.0"
|
||||
hydra-core = "^1.3.2"
|
||||
einops = "^0.7.0"
|
||||
pygame = "^2.5.2"
|
||||
pymunk = "^6.6.0"
|
||||
zarr = "^2.17.0"
|
||||
numba = "^0.59.0"
|
||||
mpmath = "^1.3.0"
|
||||
torch = {version = "^2.2.1", source = "torch-cpu"}
|
||||
opencv-python = "^4.9.0.80"
|
||||
diffusers = "^0.26.3"
|
||||
torchvision = {version = "^0.17.1", source = "torch-cpu"}
|
||||
h5py = "^3.10.0"
|
||||
robomimic = "0.2.0"
|
||||
huggingface-hub = "^0.21.4"
|
||||
robomimic = "0.2.0"
|
||||
gymnasium = "^0.29.1"
|
||||
cmake = "^3.29.0.1"
|
||||
gym-pusht = { git = "git@github.com:huggingface/gym-pusht.git", optional = true}
|
||||
gym-xarm = { git = "git@github.com:huggingface/gym-xarm.git", optional = true}
|
||||
gym-aloha = { git = "git@github.com:huggingface/gym-aloha.git", optional = true}
|
||||
# gym-pusht = { path = "../gym-pusht", develop = true, optional = true}
|
||||
# gym-xarm = { path = "../gym-xarm", develop = true, optional = true}
|
||||
# gym-aloha = { path = "../gym-aloha", develop = true, optional = true}
|
||||
pre-commit = {version = "^3.7.0", optional = true}
|
||||
debugpy = {version = "^1.8.1", optional = true}
|
||||
pytest = {version = "^8.1.0", optional = true}
|
||||
pytest-cov = {version = "^5.0.0", optional = true}
|
||||
datasets = "^2.18.0"
|
||||
|
||||
|
||||
[tool.poetry.extras]
|
||||
pusht = ["gym-pusht"]
|
||||
xarm = ["gym-xarm"]
|
||||
aloha = ["gym-aloha"]
|
||||
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pre-commit = "^3.6.2"
|
||||
debugpy = "^1.8.1"
|
||||
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^8.1.0"
|
||||
pytest-cov = "^5.0.0"
|
||||
dev = ["pre-commit", "debugpy"]
|
||||
test = ["pytest", "pytest-cov"]
|
||||
|
||||
|
||||
[[tool.poetry.source]]
|
||||
|
@ -110,10 +102,6 @@ exclude = [
|
|||
select = ["E4", "E7", "E9", "F", "I", "N", "B", "C4", "SIM"]
|
||||
|
||||
|
||||
[tool.poetry-dynamic-versioning]
|
||||
enable = true
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"]
|
||||
build-backend = "poetry_dynamic_versioning.backend"
|
||||
requires = ["poetry-core>=1.5.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
|
|
@ -3,7 +3,7 @@ default_language_version:
|
|||
python: python3.10
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.5.0
|
||||
rev: v4.6.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: debug-statements
|
||||
|
@ -18,7 +18,7 @@ repos:
|
|||
hooks:
|
||||
- id: pyupgrade
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.3.4
|
||||
rev: v0.3.7
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix]
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official email address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at
|
||||
[feedback@huggingface.co](mailto:feedback@huggingface.co).
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
|
@ -0,0 +1,254 @@
|
|||
# How to contribute to 🤗 LeRobot?
|
||||
|
||||
Everyone is welcome to contribute, and we value everybody's contribution. Code
|
||||
is thus not the only way to help the community. Answering questions, helping
|
||||
others, reaching out and improving the documentations are immensely valuable to
|
||||
the community.
|
||||
|
||||
It also helps us if you spread the word: reference the library from blog posts
|
||||
on the awesome projects it made possible, shout out on Twitter when it has
|
||||
helped you, or simply ⭐️ the repo to say "thank you".
|
||||
|
||||
Whichever way you choose to contribute, please be mindful to respect our
|
||||
[code of conduct](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md).
|
||||
|
||||
## You can contribute in so many ways!
|
||||
|
||||
Some of the ways you can contribute to 🤗 LeRobot:
|
||||
* Fixing outstanding issues with the existing code.
|
||||
* Implementing new models, datasets or simulation environments.
|
||||
* Contributing to the examples or to the documentation.
|
||||
* Submitting issues related to bugs or desired new features.
|
||||
|
||||
Following the guides below, feel free to open issues and PRs and to coordinate your efforts with the community on our [Discord Channel](https://discord.gg/VjFz58wn3R). For specific inquiries, reach out to [Remi Cadene](remi.cadene@huggingface.co).
|
||||
|
||||
If you are not sure how to contribute or want to know the next features we working on, look on this project page: [LeRobot TODO](https://github.com/orgs/huggingface/projects/46)
|
||||
|
||||
## Submitting a new issue or feature request
|
||||
|
||||
Do your best to follow these guidelines when submitting an issue or a feature
|
||||
request. It will make it easier for us to come back to you quickly and with good
|
||||
feedback.
|
||||
|
||||
### Did you find a bug?
|
||||
|
||||
The 🤗 LeRobot library is robust and reliable thanks to the users who notify us of
|
||||
the problems they encounter. So thank you for reporting an issue.
|
||||
|
||||
First, we would really appreciate it if you could **make sure the bug was not
|
||||
already reported** (use the search bar on Github under Issues).
|
||||
|
||||
Did not find it? :( So we can act quickly on it, please follow these steps:
|
||||
|
||||
* Include your **OS type and version**, the versions of **Python** and **PyTorch**.
|
||||
* A short, self-contained, code snippet that allows us to reproduce the bug in
|
||||
less than 30s.
|
||||
* The full traceback if an exception is raised.
|
||||
* Attach any other additional information, like screenshots, you think may help.
|
||||
|
||||
### Do you want a new feature?
|
||||
|
||||
A good feature request addresses the following points:
|
||||
|
||||
1. Motivation first:
|
||||
* Is it related to a problem/frustration with the library? If so, please explain
|
||||
why. Providing a code snippet that demonstrates the problem is best.
|
||||
* Is it related to something you would need for a project? We'd love to hear
|
||||
about it!
|
||||
* Is it something you worked on and think could benefit the community?
|
||||
Awesome! Tell us what problem it solved for you.
|
||||
2. Write a *paragraph* describing the feature.
|
||||
3. Provide a **code snippet** that demonstrates its future use.
|
||||
4. In case this is related to a paper, please attach a link.
|
||||
5. Attach any additional information (drawings, screenshots, etc.) you think may help.
|
||||
|
||||
If your issue is well written we're already 80% of the way there by the time you
|
||||
post it.
|
||||
|
||||
## Submitting a pull request (PR)
|
||||
|
||||
Before writing code, we strongly advise you to search through the existing PRs or
|
||||
issues to make sure that nobody is already working on the same thing. If you are
|
||||
unsure, it is always a good idea to open an issue to get some feedback.
|
||||
|
||||
You will need basic `git` proficiency to be able to contribute to
|
||||
🤗 LeRobot. `git` is not the easiest tool to use but it has the greatest
|
||||
manual. Type `git --help` in a shell and enjoy. If you prefer books, [Pro
|
||||
Git](https://git-scm.com/book/en/v2) is a very good reference.
|
||||
|
||||
Follow these steps to start contributing:
|
||||
|
||||
1. Fork the [repository](https://github.com/huggingface/lerobot) by
|
||||
clicking on the 'Fork' button on the repository's page. This creates a copy of the code
|
||||
under your GitHub user account.
|
||||
|
||||
2. Clone your fork to your local disk, and add the base repository as a remote. The following command
|
||||
assumes you have your public SSH key uploaded to GitHub. See the following guide for more
|
||||
[information](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository).
|
||||
|
||||
```bash
|
||||
git clone git@github.com:<your Github handle>/lerobot.git
|
||||
cd lerobot
|
||||
git remote add upstream https://github.com/huggingface/lerobot.git
|
||||
```
|
||||
|
||||
3. Create a new branch to hold your development changes, and do this for every new PR you work on.
|
||||
|
||||
Start by synchronizing your `main` branch with the `upstream/main` branch (more details in the [GitHub Docs](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/syncing-a-fork)):
|
||||
|
||||
```bash
|
||||
git checkout main
|
||||
git fetch upstream
|
||||
git rebase upstream/main
|
||||
```
|
||||
|
||||
Once your `main` branch is synchronized, create a new branch from it:
|
||||
|
||||
```bash
|
||||
git checkout -b a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
🚨 **Do not** work on the `main` branch.
|
||||
|
||||
4. Instead of using `pip` directly, we use `poetry` for development purposes to easily track our dependencies.
|
||||
If you don't have it already, follow the [instructions](https://python-poetry.org/docs/#installation) to install it.
|
||||
Set up a development environment by running the following command in a conda or a virtual environment you've created for working on this library:
|
||||
Install the project with dev dependencies and all environments:
|
||||
```bash
|
||||
poetry install --sync --with dev --all-extras
|
||||
```
|
||||
This command should be run when pulling code with and updated version of `pyproject.toml` and `poetry.lock` in order to synchronize your virtual environment with the dependencies.
|
||||
|
||||
To selectively install environments (for example aloha and pusht) use:
|
||||
```bash
|
||||
poetry install --sync --with dev --extras "aloha pusht"
|
||||
```
|
||||
|
||||
The equivalent of `pip install some-package`, would just be:
|
||||
```bash
|
||||
poetry add some-package
|
||||
```
|
||||
|
||||
When changes are made to the poetry sections of the `pyproject.toml`, you should run the following command to lock dependencies.
|
||||
```bash
|
||||
poetry lock --no-update
|
||||
```
|
||||
|
||||
**NOTE:** Currently, to ensure the CI works properly, any new package must also be added in the CPU-only environment dedicated to the CI. To do this, you should create a separate environment and add the new package there as well. For example:
|
||||
```bash
|
||||
# Add the new package to your main poetry env
|
||||
poetry add some-package
|
||||
# Add the same package to the CPU-only env dedicated to CI
|
||||
conda create -y -n lerobot-ci python=3.10
|
||||
conda activate lerobot-ci
|
||||
cd .github/poetry/cpu
|
||||
poetry add some-package
|
||||
```
|
||||
|
||||
5. Develop the features on your branch.
|
||||
|
||||
As you work on the features, you should make sure that the test suite
|
||||
passes. You should run the tests impacted by your changes like this (see
|
||||
below an explanation regarding the environment variable):
|
||||
|
||||
```bash
|
||||
pytest tests/<TEST_TO_RUN>.py
|
||||
```
|
||||
|
||||
6. Follow our style.
|
||||
|
||||
`lerobot` relies on `ruff` to format its source code
|
||||
consistently. Set up [`pre-commit`](https://pre-commit.com/) to run these checks
|
||||
automatically as Git commit hooks.
|
||||
|
||||
Install `pre-commit` hooks:
|
||||
```bash
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
You can run these hooks whenever you need on staged files with:
|
||||
```bash
|
||||
pre-commit
|
||||
```
|
||||
|
||||
Once you're happy with your changes, add changed files using `git add` and
|
||||
make a commit with `git commit` to record your changes locally:
|
||||
|
||||
```bash
|
||||
git add modified_file.py
|
||||
git commit
|
||||
```
|
||||
|
||||
Please write [good commit messages](https://chris.beams.io/posts/git-commit/).
|
||||
|
||||
It is a good idea to sync your copy of the code with the original
|
||||
repository regularly. This way you can quickly account for changes:
|
||||
|
||||
```bash
|
||||
git fetch upstream
|
||||
git rebase upstream/main
|
||||
```
|
||||
|
||||
Push the changes to your account using:
|
||||
|
||||
```bash
|
||||
git push -u origin a-descriptive-name-for-my-changes
|
||||
```
|
||||
|
||||
6. Once you are satisfied (**and the checklist below is happy too**), go to the
|
||||
webpage of your fork on GitHub. Click on 'Pull request' to send your changes
|
||||
to the project maintainers for review.
|
||||
|
||||
7. It's ok if maintainers ask you for changes. It happens to core contributors
|
||||
too! So everyone can see the changes in the Pull request, work in your local
|
||||
branch and push the changes to your fork. They will automatically appear in
|
||||
the pull request.
|
||||
|
||||
|
||||
### Checklist
|
||||
|
||||
1. The title of your pull request should be a summary of its contribution;
|
||||
2. If your pull request addresses an issue, please mention the issue number in
|
||||
the pull request description to make sure they are linked (and people
|
||||
consulting the issue know you are working on it);
|
||||
3. To indicate a work in progress please prefix the title with `[WIP]`, or preferably mark
|
||||
the PR as a draft PR. These are useful to avoid duplicated work, and to differentiate
|
||||
it from PRs ready to be merged;
|
||||
4. Make sure existing tests pass;
|
||||
<!-- 5. Add high-coverage tests. No quality testing = no merge.
|
||||
|
||||
See an example of a good PR here: https://github.com/huggingface/lerobot/pull/ -->
|
||||
|
||||
### Tests
|
||||
|
||||
An extensive test suite is included to test the library behavior and several examples. Library tests can be found in the [tests folder](https://github.com/huggingface/lerobot/tree/main/tests).
|
||||
|
||||
Install [git lfs](https://git-lfs.com/) to retrieve test artifacts (if you don't have it already).
|
||||
|
||||
On Mac:
|
||||
```bash
|
||||
brew install git-lfs
|
||||
git lfs install
|
||||
```
|
||||
|
||||
On Ubuntu:
|
||||
```bash
|
||||
sudo apt-get install git-lfs
|
||||
git lfs install
|
||||
```
|
||||
|
||||
Pull artifacts if they're not in [tests/data](tests/data)
|
||||
```bash
|
||||
git lfs pull
|
||||
```
|
||||
|
||||
We use `pytest` in order to run the tests. From the root of the
|
||||
repository, here's how to run tests with `pytest` for the library:
|
||||
|
||||
```bash
|
||||
DATA_DIR="tests/data" python -m pytest -sv ./tests
|
||||
```
|
||||
|
||||
|
||||
You can specify a smaller set of tests in order to test only the feature
|
||||
you're working on.
|
85
README.md
85
README.md
|
@ -17,6 +17,7 @@
|
|||
[](https://pypi.org/project/lerobot/)
|
||||
[](https://pypi.org/project/lerobot/)
|
||||
[](https://github.com/huggingface/lerobot/tree/main/examples)
|
||||
[](https://github.com/huggingface/lerobot/blob/main/CODE_OF_CONDUCT.md)
|
||||
[](https://discord.gg/s3KuuzsPFb)
|
||||
|
||||
</div>
|
||||
|
@ -189,89 +190,7 @@ hydra.run.dir=outputs/train/aloha_act
|
|||
|
||||
## Contribute
|
||||
|
||||
Feel free to open issues and PRs, and to coordinate your efforts with the community on our [Discord Channel](https://discord.gg/VjFz58wn3R). For specific inquiries, reach out to [Remi Cadene](remi.cadene@huggingface.co).
|
||||
|
||||
### TODO
|
||||
|
||||
If you are not sure how to contribute or want to know the next features we working on, look on this project page: [LeRobot TODO](https://github.com/orgs/huggingface/projects/46)
|
||||
|
||||
### Follow our style
|
||||
|
||||
```bash
|
||||
# install if needed
|
||||
pre-commit install
|
||||
# apply style and linter checks before git commit
|
||||
pre-commit
|
||||
```
|
||||
|
||||
### Dependencies
|
||||
|
||||
Instead of using `pip` directly, we use `poetry` for development purposes to easily track our dependencies.
|
||||
If you don't have it already, follow the [instructions](https://python-poetry.org/docs/#installation) to install it.
|
||||
|
||||
Install the project with dev dependencies and all environments:
|
||||
```bash
|
||||
poetry install --sync --with dev --all-extras
|
||||
```
|
||||
This command should be run when pulling code with and updated version of `pyproject.toml` and `poetry.lock` in order to synchronize your virtual environment with the dependencies.
|
||||
|
||||
To selectively install environments (for example aloha and pusht) use:
|
||||
```bash
|
||||
poetry install --sync --with dev --extras "aloha pusht"
|
||||
```
|
||||
|
||||
The equivalent of `pip install some-package`, would just be:
|
||||
```bash
|
||||
poetry add some-package
|
||||
```
|
||||
|
||||
When changes are made to the poetry sections of the `pyproject.toml`, you should run the following command to lock dependencies.
|
||||
```bash
|
||||
poetry lock --no-update
|
||||
```
|
||||
|
||||
|
||||
**NOTE:** Currently, to ensure the CI works properly, any new package must also be added in the CPU-only environment dedicated to the CI. To do this, you should create a separate environment and add the new package there as well. For example:
|
||||
```bash
|
||||
# Add the new package to your main poetry env
|
||||
poetry add some-package
|
||||
# Add the same package to the CPU-only env dedicated to CI
|
||||
conda create -y -n lerobot-ci python=3.10
|
||||
conda activate lerobot-ci
|
||||
cd .github/poetry/cpu
|
||||
poetry add some-package
|
||||
```
|
||||
|
||||
### Run tests locally
|
||||
|
||||
Install [git lfs](https://git-lfs.com/) to retrieve test artifacts (if you don't have it already).
|
||||
|
||||
On Mac:
|
||||
```bash
|
||||
brew install git-lfs
|
||||
git lfs install
|
||||
```
|
||||
|
||||
On Ubuntu:
|
||||
```bash
|
||||
sudo apt-get install git-lfs
|
||||
git lfs install
|
||||
```
|
||||
|
||||
Pull artifacts if they're not in [tests/data](tests/data)
|
||||
```bash
|
||||
git lfs pull
|
||||
```
|
||||
|
||||
When adding a new dataset, mock it with
|
||||
```bash
|
||||
python tests/scripts/mock_dataset.py --in-data-dir data/$DATASET --out-data-dir tests/data/$DATASET
|
||||
```
|
||||
|
||||
Run tests
|
||||
```bash
|
||||
DATA_DIR="tests/data" pytest -sx tests
|
||||
```
|
||||
If you would like to contribute to 🤗 LeRobot, please check out our [contribution guide](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md).
|
||||
|
||||
### Add a new dataset
|
||||
|
||||
|
|
|
@ -0,0 +1,487 @@
|
|||
"""
|
||||
This file contains all obsolete download scripts. They are centralized here to not have to load
|
||||
useless dependencies when using datasets.
|
||||
"""
|
||||
|
||||
import io
|
||||
import pickle
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import einops
|
||||
import h5py
|
||||
import numpy as np
|
||||
import torch
|
||||
import tqdm
|
||||
from datasets import Dataset, Features, Image, Sequence, Value
|
||||
from PIL import Image as PILImage
|
||||
|
||||
|
||||
def download_and_upload(root, root_tests, dataset_id):
|
||||
if "pusht" in dataset_id:
|
||||
download_and_upload_pusht(root, root_tests, dataset_id)
|
||||
elif "xarm" in dataset_id:
|
||||
download_and_upload_xarm(root, root_tests, dataset_id)
|
||||
elif "aloha" in dataset_id:
|
||||
download_and_upload_aloha(root, root_tests, dataset_id)
|
||||
else:
|
||||
raise ValueError(dataset_id)
|
||||
|
||||
|
||||
def download_and_extract_zip(url: str, destination_folder: Path) -> bool:
|
||||
import zipfile
|
||||
|
||||
import requests
|
||||
|
||||
print(f"downloading from {url}")
|
||||
response = requests.get(url, stream=True)
|
||||
if response.status_code == 200:
|
||||
total_size = int(response.headers.get("content-length", 0))
|
||||
progress_bar = tqdm.tqdm(total=total_size, unit="B", unit_scale=True)
|
||||
|
||||
zip_file = io.BytesIO()
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
zip_file.write(chunk)
|
||||
progress_bar.update(len(chunk))
|
||||
|
||||
progress_bar.close()
|
||||
|
||||
zip_file.seek(0)
|
||||
|
||||
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
||||
zip_ref.extractall(destination_folder)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def download_and_upload_pusht(root, root_tests, dataset_id="pusht", fps=10):
|
||||
try:
|
||||
import pymunk
|
||||
from gym_pusht.envs.pusht import PushTEnv, pymunk_to_shapely
|
||||
|
||||
from lerobot.common.datasets._diffusion_policy_replay_buffer import (
|
||||
ReplayBuffer as DiffusionPolicyReplayBuffer,
|
||||
)
|
||||
except ModuleNotFoundError as e:
|
||||
print("`gym_pusht` is not installed. Please install it with `pip install 'lerobot[gym_pusht]'`")
|
||||
raise e
|
||||
|
||||
# as define in env
|
||||
success_threshold = 0.95 # 95% coverage,
|
||||
|
||||
pusht_url = "https://diffusion-policy.cs.columbia.edu/data/training/pusht.zip"
|
||||
pusht_zarr = Path("pusht/pusht_cchi_v7_replay.zarr")
|
||||
|
||||
root = Path(root)
|
||||
raw_dir = root / f"{dataset_id}_raw"
|
||||
zarr_path = (raw_dir / pusht_zarr).resolve()
|
||||
if not zarr_path.is_dir():
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
download_and_extract_zip(pusht_url, raw_dir)
|
||||
|
||||
# load
|
||||
dataset_dict = DiffusionPolicyReplayBuffer.copy_from_path(zarr_path) # , keys=['img', 'state', 'action'])
|
||||
|
||||
episode_ids = torch.from_numpy(dataset_dict.get_episode_idxs())
|
||||
num_episodes = dataset_dict.meta["episode_ends"].shape[0]
|
||||
assert len(
|
||||
{dataset_dict[key].shape[0] for key in dataset_dict.keys()} # noqa: SIM118
|
||||
), "Some data type dont have the same number of total frames."
|
||||
|
||||
# TODO: verify that goal pose is expected to be fixed
|
||||
goal_pos_angle = np.array([256, 256, np.pi / 4]) # x, y, theta (in radians)
|
||||
goal_body = PushTEnv.get_goal_pose_body(goal_pos_angle)
|
||||
|
||||
imgs = torch.from_numpy(dataset_dict["img"]) # b h w c
|
||||
states = torch.from_numpy(dataset_dict["state"])
|
||||
actions = torch.from_numpy(dataset_dict["action"])
|
||||
|
||||
ep_dicts = []
|
||||
|
||||
id_from = 0
|
||||
for episode_id in tqdm.tqdm(range(num_episodes)):
|
||||
id_to = dataset_dict.meta["episode_ends"][episode_id]
|
||||
|
||||
num_frames = id_to - id_from
|
||||
|
||||
assert (episode_ids[id_from:id_to] == episode_id).all()
|
||||
|
||||
image = imgs[id_from:id_to]
|
||||
assert image.min() >= 0.0
|
||||
assert image.max() <= 255.0
|
||||
image = image.type(torch.uint8)
|
||||
|
||||
state = states[id_from:id_to]
|
||||
agent_pos = state[:, :2]
|
||||
block_pos = state[:, 2:4]
|
||||
block_angle = state[:, 4]
|
||||
|
||||
reward = torch.zeros(num_frames)
|
||||
success = torch.zeros(num_frames, dtype=torch.bool)
|
||||
done = torch.zeros(num_frames, dtype=torch.bool)
|
||||
for i in range(num_frames):
|
||||
space = pymunk.Space()
|
||||
space.gravity = 0, 0
|
||||
space.damping = 0
|
||||
|
||||
# Add walls.
|
||||
walls = [
|
||||
PushTEnv.add_segment(space, (5, 506), (5, 5), 2),
|
||||
PushTEnv.add_segment(space, (5, 5), (506, 5), 2),
|
||||
PushTEnv.add_segment(space, (506, 5), (506, 506), 2),
|
||||
PushTEnv.add_segment(space, (5, 506), (506, 506), 2),
|
||||
]
|
||||
space.add(*walls)
|
||||
|
||||
block_body = PushTEnv.add_tee(space, block_pos[i].tolist(), block_angle[i].item())
|
||||
goal_geom = pymunk_to_shapely(goal_body, block_body.shapes)
|
||||
block_geom = pymunk_to_shapely(block_body, block_body.shapes)
|
||||
intersection_area = goal_geom.intersection(block_geom).area
|
||||
goal_area = goal_geom.area
|
||||
coverage = intersection_area / goal_area
|
||||
reward[i] = np.clip(coverage / success_threshold, 0, 1)
|
||||
success[i] = coverage > success_threshold
|
||||
|
||||
# last step of demonstration is considered done
|
||||
done[-1] = True
|
||||
|
||||
ep_dict = {
|
||||
"observation.image": [PILImage.fromarray(x.numpy()) for x in image],
|
||||
"observation.state": agent_pos,
|
||||
"action": actions[id_from:id_to],
|
||||
"episode_id": torch.tensor([episode_id] * num_frames, dtype=torch.int),
|
||||
"frame_id": torch.arange(0, num_frames, 1),
|
||||
"timestamp": torch.arange(0, num_frames, 1) / fps,
|
||||
# "next.observation.image": image[1:],
|
||||
# "next.observation.state": agent_pos[1:],
|
||||
# TODO(rcadene): verify that reward and done are aligned with image and agent_pos
|
||||
"next.reward": torch.cat([reward[1:], reward[[-1]]]),
|
||||
"next.done": torch.cat([done[1:], done[[-1]]]),
|
||||
"next.success": torch.cat([success[1:], success[[-1]]]),
|
||||
"episode_data_index_from": torch.tensor([id_from] * num_frames),
|
||||
"episode_data_index_to": torch.tensor([id_from + num_frames] * num_frames),
|
||||
}
|
||||
ep_dicts.append(ep_dict)
|
||||
|
||||
id_from += num_frames
|
||||
|
||||
data_dict = {}
|
||||
|
||||
keys = ep_dicts[0].keys()
|
||||
for key in keys:
|
||||
if torch.is_tensor(ep_dicts[0][key][0]):
|
||||
data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts])
|
||||
else:
|
||||
if key not in data_dict:
|
||||
data_dict[key] = []
|
||||
for ep_dict in ep_dicts:
|
||||
for x in ep_dict[key]:
|
||||
data_dict[key].append(x)
|
||||
|
||||
total_frames = id_from
|
||||
data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
|
||||
features = {
|
||||
"observation.image": Image(),
|
||||
"observation.state": Sequence(
|
||||
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
),
|
||||
"action": Sequence(length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)),
|
||||
"episode_id": Value(dtype="int64", id=None),
|
||||
"frame_id": Value(dtype="int64", id=None),
|
||||
"timestamp": Value(dtype="float32", id=None),
|
||||
"next.reward": Value(dtype="float32", id=None),
|
||||
"next.done": Value(dtype="bool", id=None),
|
||||
"next.success": Value(dtype="bool", id=None),
|
||||
"index": Value(dtype="int64", id=None),
|
||||
"episode_data_index_from": Value(dtype="int64", id=None),
|
||||
"episode_data_index_to": Value(dtype="int64", id=None),
|
||||
}
|
||||
features = Features(features)
|
||||
dataset = Dataset.from_dict(data_dict, features=features)
|
||||
dataset = dataset.with_format("torch")
|
||||
|
||||
num_items_first_ep = ep_dicts[0]["frame_id"].shape[0]
|
||||
dataset.select(range(num_items_first_ep)).save_to_disk(f"{root_tests}/{dataset_id}/train")
|
||||
dataset.push_to_hub(f"lerobot/{dataset_id}", token=True)
|
||||
dataset.push_to_hub(f"lerobot/{dataset_id}", token=True, revision="v1.0")
|
||||
|
||||
|
||||
def download_and_upload_xarm(root, root_tests, dataset_id, fps=15):
|
||||
root = Path(root)
|
||||
raw_dir = root / f"{dataset_id}_raw"
|
||||
if not raw_dir.exists():
|
||||
import zipfile
|
||||
|
||||
import gdown
|
||||
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
url = "https://drive.google.com/uc?id=1nhxpykGtPDhmQKm-_B8zBSywVRdgeVya"
|
||||
zip_path = raw_dir / "data.zip"
|
||||
gdown.download(url, str(zip_path), quiet=False)
|
||||
print("Extracting...")
|
||||
with zipfile.ZipFile(str(zip_path), "r") as zip_f:
|
||||
for member in zip_f.namelist():
|
||||
if member.startswith("data/xarm") and member.endswith(".pkl"):
|
||||
print(member)
|
||||
zip_f.extract(member=member)
|
||||
zip_path.unlink()
|
||||
|
||||
dataset_path = root / f"{dataset_id}" / "buffer.pkl"
|
||||
print(f"Using offline dataset '{dataset_path}'")
|
||||
with open(dataset_path, "rb") as f:
|
||||
dataset_dict = pickle.load(f)
|
||||
|
||||
total_frames = dataset_dict["actions"].shape[0]
|
||||
|
||||
ep_dicts = []
|
||||
|
||||
id_from = 0
|
||||
id_to = 0
|
||||
episode_id = 0
|
||||
for i in tqdm.tqdm(range(total_frames)):
|
||||
id_to += 1
|
||||
|
||||
if not dataset_dict["dones"][i]:
|
||||
continue
|
||||
|
||||
num_frames = id_to - id_from
|
||||
|
||||
image = torch.tensor(dataset_dict["observations"]["rgb"][id_from:id_to])
|
||||
image = einops.rearrange(image, "b c h w -> b h w c")
|
||||
state = torch.tensor(dataset_dict["observations"]["state"][id_from:id_to])
|
||||
action = torch.tensor(dataset_dict["actions"][id_from:id_to])
|
||||
# TODO(rcadene): we have a missing last frame which is the observation when the env is done
|
||||
# it is critical to have this frame for tdmpc to predict a "done observation/state"
|
||||
# next_image = torch.tensor(dataset_dict["next_observations"]["rgb"][id_from:id_to])
|
||||
# next_state = torch.tensor(dataset_dict["next_observations"]["state"][id_from:id_to])
|
||||
next_reward = torch.tensor(dataset_dict["rewards"][id_from:id_to])
|
||||
next_done = torch.tensor(dataset_dict["dones"][id_from:id_to])
|
||||
|
||||
ep_dict = {
|
||||
"observation.image": [PILImage.fromarray(x.numpy()) for x in image],
|
||||
"observation.state": state,
|
||||
"action": action,
|
||||
"episode_id": torch.tensor([episode_id] * num_frames, dtype=torch.int),
|
||||
"frame_id": torch.arange(0, num_frames, 1),
|
||||
"timestamp": torch.arange(0, num_frames, 1) / fps,
|
||||
# "next.observation.image": next_image,
|
||||
# "next.observation.state": next_state,
|
||||
"next.reward": next_reward,
|
||||
"next.done": next_done,
|
||||
"episode_data_index_from": torch.tensor([id_from] * num_frames),
|
||||
"episode_data_index_to": torch.tensor([id_from + num_frames] * num_frames),
|
||||
}
|
||||
ep_dicts.append(ep_dict)
|
||||
|
||||
id_from = id_to
|
||||
episode_id += 1
|
||||
|
||||
data_dict = {}
|
||||
keys = ep_dicts[0].keys()
|
||||
for key in keys:
|
||||
if torch.is_tensor(ep_dicts[0][key][0]):
|
||||
data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts])
|
||||
else:
|
||||
if key not in data_dict:
|
||||
data_dict[key] = []
|
||||
for ep_dict in ep_dicts:
|
||||
for x in ep_dict[key]:
|
||||
data_dict[key].append(x)
|
||||
|
||||
total_frames = id_from
|
||||
data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
|
||||
features = {
|
||||
"observation.image": Image(),
|
||||
"observation.state": Sequence(
|
||||
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
),
|
||||
"action": Sequence(length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)),
|
||||
"episode_id": Value(dtype="int64", id=None),
|
||||
"frame_id": Value(dtype="int64", id=None),
|
||||
"timestamp": Value(dtype="float32", id=None),
|
||||
"next.reward": Value(dtype="float32", id=None),
|
||||
"next.done": Value(dtype="bool", id=None),
|
||||
#'next.success': Value(dtype='bool', id=None),
|
||||
"index": Value(dtype="int64", id=None),
|
||||
"episode_data_index_from": Value(dtype="int64", id=None),
|
||||
"episode_data_index_to": Value(dtype="int64", id=None),
|
||||
}
|
||||
features = Features(features)
|
||||
dataset = Dataset.from_dict(data_dict, features=features)
|
||||
dataset = dataset.with_format("torch")
|
||||
|
||||
num_items_first_ep = ep_dicts[0]["frame_id"].shape[0]
|
||||
dataset.select(range(num_items_first_ep)).save_to_disk(f"{root_tests}/{dataset_id}/train")
|
||||
dataset.push_to_hub(f"lerobot/{dataset_id}", token=True)
|
||||
dataset.push_to_hub(f"lerobot/{dataset_id}", token=True, revision="v1.0")
|
||||
|
||||
|
||||
def download_and_upload_aloha(root, root_tests, dataset_id, fps=50):
|
||||
folder_urls = {
|
||||
"aloha_sim_insertion_human": "https://drive.google.com/drive/folders/1RgyD0JgTX30H4IM5XZn8I3zSV_mr8pyF",
|
||||
"aloha_sim_insertion_scripted": "https://drive.google.com/drive/folders/1TsojQQSXtHEoGnqgJ3gmpPQR2DPLtS2N",
|
||||
"aloha_sim_transfer_cube_human": "https://drive.google.com/drive/folders/1sc-E4QYW7A0o23m1u2VWNGVq5smAsfCo",
|
||||
"aloha_sim_transfer_cube_scripted": "https://drive.google.com/drive/folders/1aRyoOhQwxhyt1J8XgEig4s6kzaw__LXj",
|
||||
}
|
||||
|
||||
ep48_urls = {
|
||||
"aloha_sim_insertion_human": "https://drive.google.com/file/d/18Cudl6nikDtgRolea7je8iF_gGKzynOP/view?usp=drive_link",
|
||||
"aloha_sim_insertion_scripted": "https://drive.google.com/file/d/1wfMSZ24oOh5KR_0aaP3Cnu_c4ZCveduB/view?usp=drive_link",
|
||||
"aloha_sim_transfer_cube_human": "https://drive.google.com/file/d/18smMymtr8tIxaNUQ61gW6dG50pt3MvGq/view?usp=drive_link",
|
||||
"aloha_sim_transfer_cube_scripted": "https://drive.google.com/file/d/1pnGIOd-E4-rhz2P3VxpknMKRZCoKt6eI/view?usp=drive_link",
|
||||
}
|
||||
|
||||
ep49_urls = {
|
||||
"aloha_sim_insertion_human": "https://drive.google.com/file/d/1C1kZYyROzs-PrLc0SkDgUgMi4-L3lauE/view?usp=drive_link",
|
||||
"aloha_sim_insertion_scripted": "https://drive.google.com/file/d/17EuCUWS6uCCr6yyNzpXdcdE-_TTNCKtf/view?usp=drive_link",
|
||||
"aloha_sim_transfer_cube_human": "https://drive.google.com/file/d/1Nk7l53d9sJoGDBKAOnNrExX5nLacATc6/view?usp=drive_link",
|
||||
"aloha_sim_transfer_cube_scripted": "https://drive.google.com/file/d/1GKReZHrXU73NMiC5zKCq_UtqPVtYq8eo/view?usp=drive_link",
|
||||
}
|
||||
|
||||
num_episodes = {
|
||||
"aloha_sim_insertion_human": 50,
|
||||
"aloha_sim_insertion_scripted": 50,
|
||||
"aloha_sim_transfer_cube_human": 50,
|
||||
"aloha_sim_transfer_cube_scripted": 50,
|
||||
}
|
||||
|
||||
episode_len = {
|
||||
"aloha_sim_insertion_human": 500,
|
||||
"aloha_sim_insertion_scripted": 400,
|
||||
"aloha_sim_transfer_cube_human": 400,
|
||||
"aloha_sim_transfer_cube_scripted": 400,
|
||||
}
|
||||
|
||||
cameras = {
|
||||
"aloha_sim_insertion_human": ["top"],
|
||||
"aloha_sim_insertion_scripted": ["top"],
|
||||
"aloha_sim_transfer_cube_human": ["top"],
|
||||
"aloha_sim_transfer_cube_scripted": ["top"],
|
||||
}
|
||||
|
||||
root = Path(root)
|
||||
raw_dir = root / f"{dataset_id}_raw"
|
||||
if not raw_dir.is_dir():
|
||||
import gdown
|
||||
|
||||
assert dataset_id in folder_urls
|
||||
assert dataset_id in ep48_urls
|
||||
assert dataset_id in ep49_urls
|
||||
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
gdown.download_folder(folder_urls[dataset_id], output=str(raw_dir))
|
||||
|
||||
# because of the 50 files limit per directory, two files episode 48 and 49 were missing
|
||||
gdown.download(ep48_urls[dataset_id], output=str(raw_dir / "episode_48.hdf5"), fuzzy=True)
|
||||
gdown.download(ep49_urls[dataset_id], output=str(raw_dir / "episode_49.hdf5"), fuzzy=True)
|
||||
|
||||
ep_dicts = []
|
||||
|
||||
id_from = 0
|
||||
for ep_id in tqdm.tqdm(range(num_episodes[dataset_id])):
|
||||
ep_path = raw_dir / f"episode_{ep_id}.hdf5"
|
||||
with h5py.File(ep_path, "r") as ep:
|
||||
num_frames = ep["/action"].shape[0]
|
||||
assert episode_len[dataset_id] == num_frames
|
||||
|
||||
# last step of demonstration is considered done
|
||||
done = torch.zeros(num_frames, dtype=torch.bool)
|
||||
done[-1] = True
|
||||
|
||||
state = torch.from_numpy(ep["/observations/qpos"][:])
|
||||
action = torch.from_numpy(ep["/action"][:])
|
||||
|
||||
ep_dict = {}
|
||||
|
||||
for cam in cameras[dataset_id]:
|
||||
image = torch.from_numpy(ep[f"/observations/images/{cam}"][:]) # b h w c
|
||||
# image = einops.rearrange(image, "b h w c -> b c h w").contiguous()
|
||||
ep_dict[f"observation.images.{cam}"] = [PILImage.fromarray(x.numpy()) for x in image]
|
||||
# ep_dict[f"next.observation.images.{cam}"] = image
|
||||
|
||||
ep_dict.update(
|
||||
{
|
||||
"observation.state": state,
|
||||
"action": action,
|
||||
"episode_id": torch.tensor([ep_id] * num_frames),
|
||||
"frame_id": torch.arange(0, num_frames, 1),
|
||||
"timestamp": torch.arange(0, num_frames, 1) / fps,
|
||||
# "next.observation.state": state,
|
||||
# TODO(rcadene): compute reward and success
|
||||
# "next.reward": reward,
|
||||
"next.done": done,
|
||||
# "next.success": success,
|
||||
"episode_data_index_from": torch.tensor([id_from] * num_frames),
|
||||
"episode_data_index_to": torch.tensor([id_from + num_frames] * num_frames),
|
||||
}
|
||||
)
|
||||
|
||||
assert isinstance(ep_id, int)
|
||||
ep_dicts.append(ep_dict)
|
||||
|
||||
id_from += num_frames
|
||||
|
||||
data_dict = {}
|
||||
|
||||
data_dict = {}
|
||||
keys = ep_dicts[0].keys()
|
||||
for key in keys:
|
||||
if torch.is_tensor(ep_dicts[0][key][0]):
|
||||
data_dict[key] = torch.cat([ep_dict[key] for ep_dict in ep_dicts])
|
||||
else:
|
||||
if key not in data_dict:
|
||||
data_dict[key] = []
|
||||
for ep_dict in ep_dicts:
|
||||
for x in ep_dict[key]:
|
||||
data_dict[key].append(x)
|
||||
|
||||
total_frames = id_from
|
||||
data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
|
||||
features = {
|
||||
"observation.images.top": Image(),
|
||||
"observation.state": Sequence(
|
||||
length=data_dict["observation.state"].shape[1], feature=Value(dtype="float32", id=None)
|
||||
),
|
||||
"action": Sequence(length=data_dict["action"].shape[1], feature=Value(dtype="float32", id=None)),
|
||||
"episode_id": Value(dtype="int64", id=None),
|
||||
"frame_id": Value(dtype="int64", id=None),
|
||||
"timestamp": Value(dtype="float32", id=None),
|
||||
#'next.reward': Value(dtype='float32', id=None),
|
||||
"next.done": Value(dtype="bool", id=None),
|
||||
#'next.success': Value(dtype='bool', id=None),
|
||||
"index": Value(dtype="int64", id=None),
|
||||
"episode_data_index_from": Value(dtype="int64", id=None),
|
||||
"episode_data_index_to": Value(dtype="int64", id=None),
|
||||
}
|
||||
features = Features(features)
|
||||
dataset = Dataset.from_dict(data_dict, features=features)
|
||||
dataset = dataset.with_format("torch")
|
||||
|
||||
num_items_first_ep = ep_dicts[0]["frame_id"].shape[0]
|
||||
dataset.select(range(num_items_first_ep)).save_to_disk(f"{root_tests}/{dataset_id}/train")
|
||||
dataset.push_to_hub(f"lerobot/{dataset_id}", token=True)
|
||||
dataset.push_to_hub(f"lerobot/{dataset_id}", token=True, revision="v1.0")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
root = "data"
|
||||
root_tests = "tests/data"
|
||||
|
||||
dataset_ids = [
|
||||
# "pusht",
|
||||
# "xarm_lift_medium",
|
||||
# "aloha_sim_insertion_human",
|
||||
# "aloha_sim_insertion_scripted",
|
||||
# "aloha_sim_transfer_cube_human",
|
||||
"aloha_sim_transfer_cube_scripted",
|
||||
]
|
||||
for dataset_id in dataset_ids:
|
||||
download_and_upload(root, root_tests, dataset_id)
|
||||
# assume stats have been precomputed
|
||||
shutil.copy(f"{root}/{dataset_id}/stats.pth", f"{root_tests}/{dataset_id}/stats.pth")
|
|
@ -0,0 +1,43 @@
|
|||
import platform
|
||||
|
||||
import huggingface_hub
|
||||
|
||||
# import dataset
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from lerobot import __version__ as version
|
||||
|
||||
pt_version = torch.__version__
|
||||
pt_cuda_available = torch.cuda.is_available()
|
||||
pt_cuda_available = torch.cuda.is_available()
|
||||
cuda_version = torch._C._cuda_getCompiledVersion() if torch.version.cuda is not None else "N/A"
|
||||
|
||||
|
||||
# TODO(aliberts): refactor into an actual command `lerobot env`
|
||||
def get_env_info() -> dict:
|
||||
"""Run this to get basic system info to help for tracking issues & bugs."""
|
||||
info = {
|
||||
"`lerobot` version": version,
|
||||
"Platform": platform.platform(),
|
||||
"Python version": platform.python_version(),
|
||||
"Huggingface_hub version": huggingface_hub.__version__,
|
||||
# TODO(aliberts): Add dataset when https://github.com/huggingface/lerobot/pull/73 is merged
|
||||
# "Dataset version": dataset.__version__,
|
||||
"Numpy version": np.__version__,
|
||||
"PyTorch version (GPU?)": f"{pt_version} ({pt_cuda_available})",
|
||||
"Cuda version": cuda_version,
|
||||
"Using GPU in script?": "<fill in>",
|
||||
"Using distributed or parallel set-up in script?": "<fill in>",
|
||||
}
|
||||
print("\nCopy-and-paste the text below in your GitHub issue and FILL OUT the two last points.\n")
|
||||
print(format_dict(info))
|
||||
return info
|
||||
|
||||
|
||||
def format_dict(d: dict) -> str:
|
||||
return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
get_env_info()
|
|
@ -1,6 +1,6 @@
|
|||
"""Helper code for loading PushT dataset from Diffusion Policy (https://diffusion-policy.cs.columbia.edu/)
|
||||
|
||||
Copied from the original Diffusion Policy repository.
|
||||
Copied from the original Diffusion Policy repository and used in our `download_and_upload_dataset.py` script.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
|
@ -1,72 +1,19 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import einops
|
||||
import gdown
|
||||
import h5py
|
||||
import torch
|
||||
import tqdm
|
||||
from datasets import load_dataset, load_from_disk
|
||||
|
||||
from lerobot.common.datasets.utils import load_data_with_delta_timestamps
|
||||
|
||||
FOLDER_URLS = {
|
||||
"aloha_sim_insertion_human": "https://drive.google.com/drive/folders/1RgyD0JgTX30H4IM5XZn8I3zSV_mr8pyF",
|
||||
"aloha_sim_insertion_scripted": "https://drive.google.com/drive/folders/1TsojQQSXtHEoGnqgJ3gmpPQR2DPLtS2N",
|
||||
"aloha_sim_transfer_cube_human": "https://drive.google.com/drive/folders/1sc-E4QYW7A0o23m1u2VWNGVq5smAsfCo",
|
||||
"aloha_sim_transfer_cube_scripted": "https://drive.google.com/drive/folders/1aRyoOhQwxhyt1J8XgEig4s6kzaw__LXj",
|
||||
}
|
||||
|
||||
EP48_URLS = {
|
||||
"aloha_sim_insertion_human": "https://drive.google.com/file/d/18Cudl6nikDtgRolea7je8iF_gGKzynOP/view?usp=drive_link",
|
||||
"aloha_sim_insertion_scripted": "https://drive.google.com/file/d/1wfMSZ24oOh5KR_0aaP3Cnu_c4ZCveduB/view?usp=drive_link",
|
||||
"aloha_sim_transfer_cube_human": "https://drive.google.com/file/d/18smMymtr8tIxaNUQ61gW6dG50pt3MvGq/view?usp=drive_link",
|
||||
"aloha_sim_transfer_cube_scripted": "https://drive.google.com/file/d/1pnGIOd-E4-rhz2P3VxpknMKRZCoKt6eI/view?usp=drive_link",
|
||||
}
|
||||
|
||||
EP49_URLS = {
|
||||
"aloha_sim_insertion_human": "https://drive.google.com/file/d/1C1kZYyROzs-PrLc0SkDgUgMi4-L3lauE/view?usp=drive_link",
|
||||
"aloha_sim_insertion_scripted": "https://drive.google.com/file/d/17EuCUWS6uCCr6yyNzpXdcdE-_TTNCKtf/view?usp=drive_link",
|
||||
"aloha_sim_transfer_cube_human": "https://drive.google.com/file/d/1Nk7l53d9sJoGDBKAOnNrExX5nLacATc6/view?usp=drive_link",
|
||||
"aloha_sim_transfer_cube_scripted": "https://drive.google.com/file/d/1GKReZHrXU73NMiC5zKCq_UtqPVtYq8eo/view?usp=drive_link",
|
||||
}
|
||||
|
||||
NUM_EPISODES = {
|
||||
"aloha_sim_insertion_human": 50,
|
||||
"aloha_sim_insertion_scripted": 50,
|
||||
"aloha_sim_transfer_cube_human": 50,
|
||||
"aloha_sim_transfer_cube_scripted": 50,
|
||||
}
|
||||
|
||||
EPISODE_LEN = {
|
||||
"aloha_sim_insertion_human": 500,
|
||||
"aloha_sim_insertion_scripted": 400,
|
||||
"aloha_sim_transfer_cube_human": 400,
|
||||
"aloha_sim_transfer_cube_scripted": 400,
|
||||
}
|
||||
|
||||
CAMERAS = {
|
||||
"aloha_sim_insertion_human": ["top"],
|
||||
"aloha_sim_insertion_scripted": ["top"],
|
||||
"aloha_sim_transfer_cube_human": ["top"],
|
||||
"aloha_sim_transfer_cube_scripted": ["top"],
|
||||
}
|
||||
|
||||
|
||||
def download(data_dir, dataset_id):
|
||||
assert dataset_id in FOLDER_URLS
|
||||
assert dataset_id in EP48_URLS
|
||||
assert dataset_id in EP49_URLS
|
||||
|
||||
data_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
gdown.download_folder(FOLDER_URLS[dataset_id], output=str(data_dir))
|
||||
|
||||
# because of the 50 files limit per directory, two files episode 48 and 49 were missing
|
||||
gdown.download(EP48_URLS[dataset_id], output=str(data_dir / "episode_48.hdf5"), fuzzy=True)
|
||||
gdown.download(EP49_URLS[dataset_id], output=str(data_dir / "episode_49.hdf5"), fuzzy=True)
|
||||
from lerobot.common.datasets.utils import load_previous_and_future_frames
|
||||
|
||||
|
||||
class AlohaDataset(torch.utils.data.Dataset):
|
||||
"""
|
||||
https://huggingface.co/datasets/lerobot/aloha_sim_insertion_human
|
||||
https://huggingface.co/datasets/lerobot/aloha_sim_insertion_scripted
|
||||
https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_human
|
||||
https://huggingface.co/datasets/lerobot/aloha_sim_transfer_cube_scripted
|
||||
"""
|
||||
|
||||
available_datasets = [
|
||||
"aloha_sim_insertion_human",
|
||||
"aloha_sim_insertion_scripted",
|
||||
|
@ -79,8 +26,9 @@ class AlohaDataset(torch.utils.data.Dataset):
|
|||
def __init__(
|
||||
self,
|
||||
dataset_id: str,
|
||||
version: str | None = "v1.2",
|
||||
version: str | None = "v1.0",
|
||||
root: Path | None = None,
|
||||
split: str = "train",
|
||||
transform: callable = None,
|
||||
delta_timestamps: dict[list[float]] | None = None,
|
||||
):
|
||||
|
@ -88,120 +36,48 @@ class AlohaDataset(torch.utils.data.Dataset):
|
|||
self.dataset_id = dataset_id
|
||||
self.version = version
|
||||
self.root = root
|
||||
self.split = split
|
||||
self.transform = transform
|
||||
self.delta_timestamps = delta_timestamps
|
||||
|
||||
self.data_dir = self.root / f"{self.dataset_id}"
|
||||
if (self.data_dir / "data_dict.pth").exists() and (
|
||||
self.data_dir / "data_ids_per_episode.pth"
|
||||
).exists():
|
||||
self.data_dict = torch.load(self.data_dir / "data_dict.pth")
|
||||
self.data_ids_per_episode = torch.load(self.data_dir / "data_ids_per_episode.pth")
|
||||
if self.root is not None:
|
||||
self.data_dict = load_from_disk(Path(self.root) / self.dataset_id / self.split)
|
||||
else:
|
||||
self._download_and_preproc_obsolete()
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
torch.save(self.data_dict, self.data_dir / "data_dict.pth")
|
||||
torch.save(self.data_ids_per_episode, self.data_dir / "data_ids_per_episode.pth")
|
||||
self.data_dict = load_dataset(
|
||||
f"lerobot/{self.dataset_id}", revision=self.version, split=self.split
|
||||
)
|
||||
self.data_dict = self.data_dict.with_format("torch")
|
||||
|
||||
@property
|
||||
def num_samples(self) -> int:
|
||||
return len(self.data_dict["index"]) if "index" in self.data_dict else 0
|
||||
return len(self.data_dict)
|
||||
|
||||
@property
|
||||
def num_episodes(self) -> int:
|
||||
return len(self.data_ids_per_episode)
|
||||
return len(self.data_dict.unique("episode_id"))
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples
|
||||
|
||||
def __getitem__(self, idx):
|
||||
item = {}
|
||||
item = self.data_dict[idx]
|
||||
|
||||
# get episode id and timestamp of the sampled frame
|
||||
current_ts = self.data_dict["timestamp"][idx].item()
|
||||
episode = self.data_dict["episode"][idx].item()
|
||||
if self.delta_timestamps is not None:
|
||||
item = load_previous_and_future_frames(
|
||||
item,
|
||||
self.data_dict,
|
||||
self.delta_timestamps,
|
||||
)
|
||||
|
||||
for key in self.data_dict:
|
||||
if self.delta_timestamps is not None and key in self.delta_timestamps:
|
||||
data, is_pad = load_data_with_delta_timestamps(
|
||||
self.data_dict,
|
||||
self.data_ids_per_episode,
|
||||
self.delta_timestamps,
|
||||
key,
|
||||
current_ts,
|
||||
episode,
|
||||
)
|
||||
item[key] = data
|
||||
item[f"{key}_is_pad"] = is_pad
|
||||
# convert images from channel last (PIL) to channel first (pytorch)
|
||||
for key in self.image_keys:
|
||||
if item[key].ndim == 3:
|
||||
item[key] = item[key].permute((2, 0, 1)) # h w c -> c h w
|
||||
elif item[key].ndim == 4:
|
||||
item[key] = item[key].permute((0, 3, 1, 2)) # t h w c -> t c h w
|
||||
else:
|
||||
item[key] = self.data_dict[key][idx]
|
||||
raise ValueError(item[key].ndim)
|
||||
|
||||
if self.transform is not None:
|
||||
item = self.transform(item)
|
||||
|
||||
return item
|
||||
|
||||
def _download_and_preproc_obsolete(self):
|
||||
assert self.root is not None
|
||||
raw_dir = self.root / f"{self.dataset_id}_raw"
|
||||
if not raw_dir.is_dir():
|
||||
download(raw_dir, self.dataset_id)
|
||||
|
||||
total_frames = 0
|
||||
logging.info("Compute total number of frames to initialize offline buffer")
|
||||
for ep_id in range(NUM_EPISODES[self.dataset_id]):
|
||||
ep_path = raw_dir / f"episode_{ep_id}.hdf5"
|
||||
with h5py.File(ep_path, "r") as ep:
|
||||
total_frames += ep["/action"].shape[0] - 1
|
||||
logging.info(f"{total_frames=}")
|
||||
|
||||
self.data_ids_per_episode = {}
|
||||
ep_dicts = []
|
||||
|
||||
frame_idx = 0
|
||||
for ep_id in tqdm.tqdm(range(NUM_EPISODES[self.dataset_id])):
|
||||
ep_path = raw_dir / f"episode_{ep_id}.hdf5"
|
||||
with h5py.File(ep_path, "r") as ep:
|
||||
num_frames = ep["/action"].shape[0]
|
||||
|
||||
# last step of demonstration is considered done
|
||||
done = torch.zeros(num_frames, dtype=torch.bool)
|
||||
done[-1] = True
|
||||
|
||||
state = torch.from_numpy(ep["/observations/qpos"][:])
|
||||
action = torch.from_numpy(ep["/action"][:])
|
||||
|
||||
ep_dict = {
|
||||
"observation.state": state,
|
||||
"action": action,
|
||||
"episode": torch.tensor([ep_id] * num_frames),
|
||||
"frame_id": torch.arange(0, num_frames, 1),
|
||||
"timestamp": torch.arange(0, num_frames, 1) / self.fps,
|
||||
# "next.observation.state": state,
|
||||
# TODO(rcadene): compute reward and success
|
||||
# "next.reward": reward[1:],
|
||||
"next.done": done[1:],
|
||||
# "next.success": success[1:],
|
||||
}
|
||||
|
||||
for cam in CAMERAS[self.dataset_id]:
|
||||
image = torch.from_numpy(ep[f"/observations/images/{cam}"][:])
|
||||
image = einops.rearrange(image, "b h w c -> b c h w").contiguous()
|
||||
ep_dict[f"observation.images.{cam}"] = image[:-1]
|
||||
# ep_dict[f"next.observation.images.{cam}"] = image[1:]
|
||||
|
||||
assert isinstance(ep_id, int)
|
||||
self.data_ids_per_episode[ep_id] = torch.arange(frame_idx, frame_idx + num_frames, 1)
|
||||
assert len(self.data_ids_per_episode[ep_id]) == num_frames
|
||||
|
||||
ep_dicts.append(ep_dict)
|
||||
|
||||
frame_idx += num_frames
|
||||
|
||||
self.data_dict = {}
|
||||
|
||||
keys = ep_dicts[0].keys()
|
||||
for key in keys:
|
||||
self.data_dict[key] = torch.cat([x[key] for x in ep_dicts])
|
||||
|
||||
self.data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
|
|
|
@ -8,9 +8,6 @@ from torchvision.transforms import v2
|
|||
from lerobot.common.datasets.utils import compute_stats
|
||||
from lerobot.common.transforms import NormalizeTransform, Prod
|
||||
|
||||
# DATA_DIR specifies to location where datasets are loaded. By default, DATA_DIR is None and
|
||||
# we load from `$HOME/.cache/huggingface/hub/datasets`. For our unit tests, we set `DATA_DIR=tests/data`
|
||||
# to load a subset of our datasets for faster continuous integration.
|
||||
DATA_DIR = Path(os.environ["DATA_DIR"]) if "DATA_DIR" in os.environ else None
|
||||
|
||||
|
||||
|
@ -19,6 +16,7 @@ def make_dataset(
|
|||
# set normalize=False to remove all transformations and keep images unnormalized in [0,255]
|
||||
normalize=True,
|
||||
stats_path=None,
|
||||
split="train",
|
||||
):
|
||||
if cfg.env.name == "xarm":
|
||||
from lerobot.common.datasets.xarm import XarmDataset
|
||||
|
@ -54,19 +52,23 @@ def make_dataset(
|
|||
stats["action"]["min"] = torch.tensor([12.0, 25.0], dtype=torch.float32)
|
||||
stats["action"]["max"] = torch.tensor([511.0, 511.0], dtype=torch.float32)
|
||||
elif stats_path is None:
|
||||
# instantiate a one frame dataset with light transform
|
||||
stats_dataset = clsfunc(
|
||||
dataset_id=cfg.dataset_id,
|
||||
root=DATA_DIR,
|
||||
transform=Prod(in_keys=clsfunc.image_keys, prod=1 / 255.0),
|
||||
)
|
||||
|
||||
# load stats if the file exists already or compute stats and save it
|
||||
precomputed_stats_path = stats_dataset.data_dir / "stats.pth"
|
||||
if DATA_DIR is None:
|
||||
# TODO(rcadene): clean stats
|
||||
precomputed_stats_path = Path("data") / cfg.dataset_id / "stats.pth"
|
||||
else:
|
||||
precomputed_stats_path = DATA_DIR / cfg.dataset_id / "stats.pth"
|
||||
if precomputed_stats_path.exists():
|
||||
stats = torch.load(precomputed_stats_path)
|
||||
else:
|
||||
logging.info(f"compute_stats and save to {precomputed_stats_path}")
|
||||
# instantiate a one frame dataset with light transform
|
||||
stats_dataset = clsfunc(
|
||||
dataset_id=cfg.dataset_id,
|
||||
split="train",
|
||||
root=DATA_DIR,
|
||||
transform=Prod(in_keys=clsfunc.image_keys, prod=1 / 255.0),
|
||||
)
|
||||
stats = compute_stats(stats_dataset)
|
||||
torch.save(stats, stats_path)
|
||||
else:
|
||||
|
@ -94,6 +96,7 @@ def make_dataset(
|
|||
|
||||
dataset = clsfunc(
|
||||
dataset_id=cfg.dataset_id,
|
||||
split=split,
|
||||
root=DATA_DIR,
|
||||
delta_timestamps=delta_timestamps,
|
||||
transform=transforms,
|
||||
|
|
|
@ -1,24 +1,14 @@
|
|||
from pathlib import Path
|
||||
|
||||
import einops
|
||||
import numpy as np
|
||||
import torch
|
||||
import tqdm
|
||||
from datasets import load_dataset, load_from_disk
|
||||
|
||||
from lerobot.common.datasets._diffusion_policy_replay_buffer import (
|
||||
ReplayBuffer as DiffusionPolicyReplayBuffer,
|
||||
)
|
||||
from lerobot.common.datasets.utils import download_and_extract_zip, load_data_with_delta_timestamps
|
||||
|
||||
# as define in env
|
||||
SUCCESS_THRESHOLD = 0.95 # 95% coverage,
|
||||
|
||||
PUSHT_URL = "https://diffusion-policy.cs.columbia.edu/data/training/pusht.zip"
|
||||
PUSHT_ZARR = Path("pusht/pusht_cchi_v7_replay.zarr")
|
||||
from lerobot.common.datasets.utils import load_previous_and_future_frames
|
||||
|
||||
|
||||
class PushtDataset(torch.utils.data.Dataset):
|
||||
"""
|
||||
https://huggingface.co/datasets/lerobot/pusht
|
||||
|
||||
Arguments
|
||||
----------
|
||||
|
@ -34,8 +24,9 @@ class PushtDataset(torch.utils.data.Dataset):
|
|||
def __init__(
|
||||
self,
|
||||
dataset_id: str,
|
||||
version: str | None = "v1.2",
|
||||
version: str | None = "v1.0",
|
||||
root: Path | None = None,
|
||||
split: str = "train",
|
||||
transform: callable = None,
|
||||
delta_timestamps: dict[list[float]] | None = None,
|
||||
):
|
||||
|
@ -43,174 +34,48 @@ class PushtDataset(torch.utils.data.Dataset):
|
|||
self.dataset_id = dataset_id
|
||||
self.version = version
|
||||
self.root = root
|
||||
self.split = split
|
||||
self.transform = transform
|
||||
self.delta_timestamps = delta_timestamps
|
||||
|
||||
self.data_dir = self.root / f"{self.dataset_id}"
|
||||
if (self.data_dir / "data_dict.pth").exists() and (
|
||||
self.data_dir / "data_ids_per_episode.pth"
|
||||
).exists():
|
||||
self.data_dict = torch.load(self.data_dir / "data_dict.pth")
|
||||
self.data_ids_per_episode = torch.load(self.data_dir / "data_ids_per_episode.pth")
|
||||
if self.root is not None:
|
||||
self.data_dict = load_from_disk(Path(self.root) / self.dataset_id / self.split)
|
||||
else:
|
||||
self._download_and_preproc_obsolete()
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
torch.save(self.data_dict, self.data_dir / "data_dict.pth")
|
||||
torch.save(self.data_ids_per_episode, self.data_dir / "data_ids_per_episode.pth")
|
||||
self.data_dict = load_dataset(
|
||||
f"lerobot/{self.dataset_id}", revision=self.version, split=self.split
|
||||
)
|
||||
self.data_dict = self.data_dict.with_format("torch")
|
||||
|
||||
@property
|
||||
def num_samples(self) -> int:
|
||||
return len(self.data_dict["index"]) if "index" in self.data_dict else 0
|
||||
return len(self.data_dict)
|
||||
|
||||
@property
|
||||
def num_episodes(self) -> int:
|
||||
return len(self.data_ids_per_episode)
|
||||
return len(self.data_dict.unique("episode_id"))
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples
|
||||
|
||||
def __getitem__(self, idx):
|
||||
item = {}
|
||||
item = self.data_dict[idx]
|
||||
|
||||
# get episode id and timestamp of the sampled frame
|
||||
current_ts = self.data_dict["timestamp"][idx].item()
|
||||
episode = self.data_dict["episode"][idx].item()
|
||||
if self.delta_timestamps is not None:
|
||||
item = load_previous_and_future_frames(
|
||||
item,
|
||||
self.data_dict,
|
||||
self.delta_timestamps,
|
||||
)
|
||||
|
||||
for key in self.data_dict:
|
||||
if self.delta_timestamps is not None and key in self.delta_timestamps:
|
||||
data, is_pad = load_data_with_delta_timestamps(
|
||||
self.data_dict,
|
||||
self.data_ids_per_episode,
|
||||
self.delta_timestamps,
|
||||
key,
|
||||
current_ts,
|
||||
episode,
|
||||
)
|
||||
item[key] = data
|
||||
item[f"{key}_is_pad"] = is_pad
|
||||
# convert images from channel last (PIL) to channel first (pytorch)
|
||||
for key in self.image_keys:
|
||||
if item[key].ndim == 3:
|
||||
item[key] = item[key].permute((2, 0, 1)) # h w c -> c h w
|
||||
elif item[key].ndim == 4:
|
||||
item[key] = item[key].permute((0, 3, 1, 2)) # t h w c -> t c h w
|
||||
else:
|
||||
item[key] = self.data_dict[key][idx]
|
||||
raise ValueError(item[key].ndim)
|
||||
|
||||
if self.transform is not None:
|
||||
item = self.transform(item)
|
||||
|
||||
return item
|
||||
|
||||
def _download_and_preproc_obsolete(self):
|
||||
try:
|
||||
import pymunk
|
||||
from gym_pusht.envs.pusht import PushTEnv, pymunk_to_shapely
|
||||
except ModuleNotFoundError as e:
|
||||
print("`gym_pusht` is not installed. Please install it with `pip install 'lerobot[gym_pusht]'`")
|
||||
raise e
|
||||
|
||||
assert self.root is not None
|
||||
raw_dir = self.root / f"{self.dataset_id}_raw"
|
||||
zarr_path = (raw_dir / PUSHT_ZARR).resolve()
|
||||
if not zarr_path.is_dir():
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
download_and_extract_zip(PUSHT_URL, raw_dir)
|
||||
|
||||
# load
|
||||
dataset_dict = DiffusionPolicyReplayBuffer.copy_from_path(
|
||||
zarr_path
|
||||
) # , keys=['img', 'state', 'action'])
|
||||
|
||||
episode_ids = torch.from_numpy(dataset_dict.get_episode_idxs())
|
||||
num_episodes = dataset_dict.meta["episode_ends"].shape[0]
|
||||
total_frames = dataset_dict["action"].shape[0]
|
||||
# to create test artifact
|
||||
# num_episodes = 1
|
||||
# total_frames = 50
|
||||
assert len(
|
||||
{dataset_dict[key].shape[0] for key in dataset_dict.keys()} # noqa: SIM118
|
||||
), "Some data type dont have the same number of total frames."
|
||||
|
||||
# TODO: verify that goal pose is expected to be fixed
|
||||
goal_pos_angle = np.array([256, 256, np.pi / 4]) # x, y, theta (in radians)
|
||||
goal_body = PushTEnv.get_goal_pose_body(goal_pos_angle)
|
||||
|
||||
imgs = torch.from_numpy(dataset_dict["img"])
|
||||
imgs = einops.rearrange(imgs, "b h w c -> b c h w")
|
||||
states = torch.from_numpy(dataset_dict["state"])
|
||||
actions = torch.from_numpy(dataset_dict["action"])
|
||||
|
||||
self.data_ids_per_episode = {}
|
||||
ep_dicts = []
|
||||
|
||||
idx0 = 0
|
||||
for episode_id in tqdm.tqdm(range(num_episodes)):
|
||||
idx1 = dataset_dict.meta["episode_ends"][episode_id]
|
||||
|
||||
num_frames = idx1 - idx0
|
||||
|
||||
assert (episode_ids[idx0:idx1] == episode_id).all()
|
||||
|
||||
image = imgs[idx0:idx1]
|
||||
assert image.min() >= 0.0
|
||||
assert image.max() <= 255.0
|
||||
image = image.type(torch.uint8)
|
||||
|
||||
state = states[idx0:idx1]
|
||||
agent_pos = state[:, :2]
|
||||
block_pos = state[:, 2:4]
|
||||
block_angle = state[:, 4]
|
||||
|
||||
reward = torch.zeros(num_frames)
|
||||
success = torch.zeros(num_frames, dtype=torch.bool)
|
||||
done = torch.zeros(num_frames, dtype=torch.bool)
|
||||
for i in range(num_frames):
|
||||
space = pymunk.Space()
|
||||
space.gravity = 0, 0
|
||||
space.damping = 0
|
||||
|
||||
# Add walls.
|
||||
walls = [
|
||||
PushTEnv.add_segment(space, (5, 506), (5, 5), 2),
|
||||
PushTEnv.add_segment(space, (5, 5), (506, 5), 2),
|
||||
PushTEnv.add_segment(space, (506, 5), (506, 506), 2),
|
||||
PushTEnv.add_segment(space, (5, 506), (506, 506), 2),
|
||||
]
|
||||
space.add(*walls)
|
||||
|
||||
block_body = PushTEnv.add_tee(space, block_pos[i].tolist(), block_angle[i].item())
|
||||
goal_geom = pymunk_to_shapely(goal_body, block_body.shapes)
|
||||
block_geom = pymunk_to_shapely(block_body, block_body.shapes)
|
||||
intersection_area = goal_geom.intersection(block_geom).area
|
||||
goal_area = goal_geom.area
|
||||
coverage = intersection_area / goal_area
|
||||
reward[i] = np.clip(coverage / SUCCESS_THRESHOLD, 0, 1)
|
||||
success[i] = coverage > SUCCESS_THRESHOLD
|
||||
|
||||
# last step of demonstration is considered done
|
||||
done[-1] = True
|
||||
|
||||
ep_dict = {
|
||||
"observation.image": image,
|
||||
"observation.state": agent_pos,
|
||||
"action": actions[idx0:idx1],
|
||||
"episode": torch.tensor([episode_id] * num_frames, dtype=torch.int),
|
||||
"frame_id": torch.arange(0, num_frames, 1),
|
||||
"timestamp": torch.arange(0, num_frames, 1) / self.fps,
|
||||
# "next.observation.image": image[1:],
|
||||
# "next.observation.state": agent_pos[1:],
|
||||
# TODO(rcadene): verify that reward and done are aligned with image and agent_pos
|
||||
"next.reward": torch.cat([reward[1:], reward[[-1]]]),
|
||||
"next.done": torch.cat([done[1:], done[[-1]]]),
|
||||
"next.success": torch.cat([success[1:], success[[-1]]]),
|
||||
}
|
||||
ep_dicts.append(ep_dict)
|
||||
|
||||
assert isinstance(episode_id, int)
|
||||
self.data_ids_per_episode[episode_id] = torch.arange(idx0, idx1, 1)
|
||||
assert len(self.data_ids_per_episode[episode_id]) == num_frames
|
||||
|
||||
idx0 = idx1
|
||||
|
||||
self.data_dict = {}
|
||||
|
||||
keys = ep_dicts[0].keys()
|
||||
for key in keys:
|
||||
self.data_dict[key] = torch.cat([x[key] for x in ep_dicts])
|
||||
|
||||
self.data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
|
|
|
@ -1,51 +1,20 @@
|
|||
import io
|
||||
import zipfile
|
||||
from copy import deepcopy
|
||||
from math import ceil
|
||||
from pathlib import Path
|
||||
|
||||
import einops
|
||||
import requests
|
||||
import torch
|
||||
import tqdm
|
||||
|
||||
|
||||
def download_and_extract_zip(url: str, destination_folder: Path) -> bool:
|
||||
print(f"downloading from {url}")
|
||||
response = requests.get(url, stream=True)
|
||||
if response.status_code == 200:
|
||||
total_size = int(response.headers.get("content-length", 0))
|
||||
progress_bar = tqdm.tqdm(total=total_size, unit="B", unit_scale=True)
|
||||
|
||||
zip_file = io.BytesIO()
|
||||
for chunk in response.iter_content(chunk_size=1024):
|
||||
if chunk:
|
||||
zip_file.write(chunk)
|
||||
progress_bar.update(len(chunk))
|
||||
|
||||
progress_bar.close()
|
||||
|
||||
zip_file.seek(0)
|
||||
|
||||
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
||||
zip_ref.extractall(destination_folder)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def load_data_with_delta_timestamps(
|
||||
data_dict: dict[torch.Tensor],
|
||||
data_ids_per_episode: dict[torch.Tensor],
|
||||
delta_timestamps: list[float],
|
||||
key: str,
|
||||
current_ts: float,
|
||||
episode: int,
|
||||
def load_previous_and_future_frames(
|
||||
item: dict[str, torch.Tensor],
|
||||
data_dict: dict[str, torch.Tensor],
|
||||
delta_timestamps: dict[str, list[float]],
|
||||
tol: float = 0.04,
|
||||
):
|
||||
) -> dict[torch.Tensor]:
|
||||
"""
|
||||
Given a current timestamp (e.g. current_ts=0.6) and a list of timestamps differences (e.g. delta_timestamps=[-0.8, -0.2, 0, 0.2]),
|
||||
this function compute the query timestamps (e.g. [-0.2, 0.4, 0.6, 0.8]) and loads the closest frames of the specified modality (e.g. key="observation.image").
|
||||
Given a current item in the dataset containing a timestamp (e.g. 0.6 seconds), and a list of time differences of some modalities (e.g. delta_timestamps={"observation.image": [-0.8, -0.2, 0, 0.2]}),
|
||||
this function computes for each given modality a list of query timestamps (e.g. [-0.2, 0.4, 0.6, 0.8]) and loads the closest frames in the dataset.
|
||||
|
||||
Importantly, when no frame can be found around a query timestamp within a specified tolerance window (e.g. tol=0.04), this function raises an AssertionError.
|
||||
When a timestamp is queried before the first available timestamp of the episode or after the last available timestamp,
|
||||
|
@ -54,56 +23,57 @@ def load_data_with_delta_timestamps(
|
|||
or to pad the observations in a specific way. Note that by default the observation frames before the start of the episode are the same as the first frame of the episode.
|
||||
|
||||
Parameters:
|
||||
- data_dict (dict): A dictionary containing the data, where each key corresponds to a different modality (e.g., "timestamp", "observation.image", "action").
|
||||
- data_ids_per_episode (dict): A dictionary where keys are episode identifiers and values are lists of indices corresponding to frames associated with each episode.
|
||||
- delta_timestamps (dict): A dictionary containing lists of delta timestamps for each possible key to be retrieved. These deltas are added to the current_ts to form the query timestamps.
|
||||
- key (str): The key specifying which data modality is to be retrieved from the data_dict.
|
||||
- current_ts (float): The current timestamp to which the delta timestamps are added to form the query timestamps.
|
||||
- episode (int): The identifier of the episode from which frames are to be retrieved.
|
||||
- item (dict): A dictionary containing all the data related to a frame. It is the result of `dataset[idx]`. Each key corresponds to a different modality (e.g., "timestamp", "observation.image", "action").
|
||||
- data_dict (dict): A dictionary containing the full dataset. Each key corresponds to a different modality (e.g., "timestamp", "observation.image", "action").
|
||||
- delta_timestamps (dict): A dictionary containing lists of delta timestamps for each possible modality to be retrieved. These deltas are added to the item timestamp to form the query timestamps.
|
||||
- tol (float, optional): The tolerance level used to determine if a data point is close enough to the query timestamp. Defaults to 0.04.
|
||||
|
||||
Returns:
|
||||
- tuple: A tuple containing two elements:
|
||||
- The first element is the data retrieved from the specified modality based on the closest match to the query timestamps.
|
||||
- The second element is a boolean array indicating which frames were considered as padding (True if the distance to the closest timestamp was greater than the tolerance level).
|
||||
- The same item with the queried frames for each modality specified in delta_timestamps, with an additional key for each modality (e.g. "observation.image_is_pad").
|
||||
|
||||
Raises:
|
||||
- AssertionError: If any of the frames unexpectedly violate the tolerance level. This could indicate synchronization issues with timestamps during data collection.
|
||||
"""
|
||||
# get indices of the frames associated to the episode, and their timestamps
|
||||
ep_data_ids = data_ids_per_episode[episode]
|
||||
ep_timestamps = data_dict["timestamp"][ep_data_ids]
|
||||
ep_data_id_from = item["episode_data_index_from"].item()
|
||||
ep_data_id_to = item["episode_data_index_to"].item()
|
||||
ep_data_ids = torch.arange(ep_data_id_from, ep_data_id_to, 1)
|
||||
|
||||
# load timestamps
|
||||
ep_timestamps = data_dict.select_columns("timestamp")[ep_data_id_from:ep_data_id_to]["timestamp"]
|
||||
|
||||
# we make the assumption that the timestamps are sorted
|
||||
ep_first_ts = ep_timestamps[0]
|
||||
ep_last_ts = ep_timestamps[-1]
|
||||
current_ts = item["timestamp"].item()
|
||||
|
||||
# get timestamps used as query to retrieve data of previous/future frames
|
||||
delta_ts = delta_timestamps[key]
|
||||
query_ts = current_ts + torch.tensor(delta_ts)
|
||||
for key in delta_timestamps:
|
||||
# get timestamps used as query to retrieve data of previous/future frames
|
||||
delta_ts = delta_timestamps[key]
|
||||
query_ts = current_ts + torch.tensor(delta_ts)
|
||||
|
||||
# compute distances between each query timestamp and all timestamps of all the frames belonging to the episode
|
||||
dist = torch.cdist(query_ts[:, None], ep_timestamps[:, None], p=1)
|
||||
min_, argmin_ = dist.min(1)
|
||||
# compute distances between each query timestamp and all timestamps of all the frames belonging to the episode
|
||||
dist = torch.cdist(query_ts[:, None], ep_timestamps[:, None], p=1)
|
||||
min_, argmin_ = dist.min(1)
|
||||
|
||||
# get the indices of the data that are closest to the query timestamps
|
||||
data_ids = ep_data_ids[argmin_]
|
||||
# closest_ts = ep_timestamps[argmin_]
|
||||
# TODO(rcadene): synchronize timestamps + interpolation if needed
|
||||
|
||||
# get the data
|
||||
data = data_dict[key][data_ids].clone()
|
||||
is_pad = min_ > tol
|
||||
|
||||
# TODO(rcadene): synchronize timestamps + interpolation if needed
|
||||
# check violated query timestamps are all outside the episode range
|
||||
assert ((query_ts[is_pad] < ep_first_ts) | (ep_last_ts < query_ts[is_pad])).all(), (
|
||||
f"One or several timestamps unexpectedly violate the tolerance ({min_} > {tol=}) inside episode range."
|
||||
"This might be due to synchronization issues with timestamps during data collection."
|
||||
)
|
||||
|
||||
is_pad = min_ > tol
|
||||
# get dataset indices corresponding to frames to be loaded
|
||||
data_ids = ep_data_ids[argmin_]
|
||||
|
||||
# check violated query timestamps are all outside the episode range
|
||||
assert ((query_ts[is_pad] < ep_first_ts) | (ep_last_ts < query_ts[is_pad])).all(), (
|
||||
f"One or several timestamps unexpectedly violate the tolerance ({min_} > {tol=}) inside episode range."
|
||||
"This might be due to synchronization issues with timestamps during data collection."
|
||||
)
|
||||
# load frames modality
|
||||
item[key] = data_dict.select_columns(key)[data_ids][key]
|
||||
item[f"{key}_is_pad"] = is_pad
|
||||
|
||||
return data, is_pad
|
||||
return item
|
||||
|
||||
|
||||
def get_stats_einops_patterns(dataset):
|
||||
|
|
|
@ -1,30 +1,16 @@
|
|||
import pickle
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import tqdm
|
||||
from datasets import load_dataset, load_from_disk
|
||||
|
||||
from lerobot.common.datasets.utils import load_data_with_delta_timestamps
|
||||
|
||||
|
||||
def download(raw_dir):
|
||||
import gdown
|
||||
|
||||
raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
url = "https://drive.google.com/uc?id=1nhxpykGtPDhmQKm-_B8zBSywVRdgeVya"
|
||||
zip_path = raw_dir / "data.zip"
|
||||
gdown.download(url, str(zip_path), quiet=False)
|
||||
print("Extracting...")
|
||||
with zipfile.ZipFile(str(zip_path), "r") as zip_f:
|
||||
for member in zip_f.namelist():
|
||||
if member.startswith("data/xarm") and member.endswith(".pkl"):
|
||||
print(member)
|
||||
zip_f.extract(member=member)
|
||||
zip_path.unlink()
|
||||
from lerobot.common.datasets.utils import load_previous_and_future_frames
|
||||
|
||||
|
||||
class XarmDataset(torch.utils.data.Dataset):
|
||||
"""
|
||||
https://huggingface.co/datasets/lerobot/xarm_lift_medium
|
||||
"""
|
||||
|
||||
available_datasets = [
|
||||
"xarm_lift_medium",
|
||||
]
|
||||
|
@ -34,8 +20,9 @@ class XarmDataset(torch.utils.data.Dataset):
|
|||
def __init__(
|
||||
self,
|
||||
dataset_id: str,
|
||||
version: str | None = "v1.1",
|
||||
version: str | None = "v1.0",
|
||||
root: Path | None = None,
|
||||
split: str = "train",
|
||||
transform: callable = None,
|
||||
delta_timestamps: dict[list[float]] | None = None,
|
||||
):
|
||||
|
@ -43,121 +30,48 @@ class XarmDataset(torch.utils.data.Dataset):
|
|||
self.dataset_id = dataset_id
|
||||
self.version = version
|
||||
self.root = root
|
||||
self.split = split
|
||||
self.transform = transform
|
||||
self.delta_timestamps = delta_timestamps
|
||||
|
||||
self.data_dir = self.root / f"{self.dataset_id}"
|
||||
if (self.data_dir / "data_dict.pth").exists() and (
|
||||
self.data_dir / "data_ids_per_episode.pth"
|
||||
).exists():
|
||||
self.data_dict = torch.load(self.data_dir / "data_dict.pth")
|
||||
self.data_ids_per_episode = torch.load(self.data_dir / "data_ids_per_episode.pth")
|
||||
if self.root is not None:
|
||||
self.data_dict = load_from_disk(Path(self.root) / self.dataset_id / self.split)
|
||||
else:
|
||||
self._download_and_preproc_obsolete()
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
torch.save(self.data_dict, self.data_dir / "data_dict.pth")
|
||||
torch.save(self.data_ids_per_episode, self.data_dir / "data_ids_per_episode.pth")
|
||||
self.data_dict = load_dataset(
|
||||
f"lerobot/{self.dataset_id}", revision=self.version, split=self.split
|
||||
)
|
||||
self.data_dict = self.data_dict.with_format("torch")
|
||||
|
||||
@property
|
||||
def num_samples(self) -> int:
|
||||
return len(self.data_dict["index"]) if "index" in self.data_dict else 0
|
||||
return len(self.data_dict)
|
||||
|
||||
@property
|
||||
def num_episodes(self) -> int:
|
||||
return len(self.data_ids_per_episode)
|
||||
return len(self.data_dict.unique("episode_id"))
|
||||
|
||||
def __len__(self):
|
||||
return self.num_samples
|
||||
|
||||
def __getitem__(self, idx):
|
||||
item = {}
|
||||
item = self.data_dict[idx]
|
||||
|
||||
# get episode id and timestamp of the sampled frame
|
||||
current_ts = self.data_dict["timestamp"][idx].item()
|
||||
episode = self.data_dict["episode"][idx].item()
|
||||
if self.delta_timestamps is not None:
|
||||
item = load_previous_and_future_frames(
|
||||
item,
|
||||
self.data_dict,
|
||||
self.delta_timestamps,
|
||||
)
|
||||
|
||||
for key in self.data_dict:
|
||||
if self.delta_timestamps is not None and key in self.delta_timestamps:
|
||||
data, is_pad = load_data_with_delta_timestamps(
|
||||
self.data_dict,
|
||||
self.data_ids_per_episode,
|
||||
self.delta_timestamps,
|
||||
key,
|
||||
current_ts,
|
||||
episode,
|
||||
)
|
||||
item[key] = data
|
||||
item[f"{key}_is_pad"] = is_pad
|
||||
# convert images from channel last (PIL) to channel first (pytorch)
|
||||
for key in self.image_keys:
|
||||
if item[key].ndim == 3:
|
||||
item[key] = item[key].permute((2, 0, 1)) # h w c -> c h w
|
||||
elif item[key].ndim == 4:
|
||||
item[key] = item[key].permute((0, 3, 1, 2)) # t h w c -> t c h w
|
||||
else:
|
||||
item[key] = self.data_dict[key][idx]
|
||||
raise ValueError(item[key].ndim)
|
||||
|
||||
if self.transform is not None:
|
||||
item = self.transform(item)
|
||||
|
||||
return item
|
||||
|
||||
def _download_and_preproc_obsolete(self):
|
||||
assert self.root is not None
|
||||
raw_dir = self.root / f"{self.dataset_id}_raw"
|
||||
if not raw_dir.exists():
|
||||
download(raw_dir)
|
||||
|
||||
dataset_path = self.root / f"{self.dataset_id}" / "buffer.pkl"
|
||||
print(f"Using offline dataset '{dataset_path}'")
|
||||
with open(dataset_path, "rb") as f:
|
||||
dataset_dict = pickle.load(f)
|
||||
|
||||
total_frames = dataset_dict["actions"].shape[0]
|
||||
|
||||
self.data_ids_per_episode = {}
|
||||
ep_dicts = []
|
||||
|
||||
idx0 = 0
|
||||
idx1 = 0
|
||||
episode_id = 0
|
||||
for i in tqdm.tqdm(range(total_frames)):
|
||||
idx1 += 1
|
||||
|
||||
if not dataset_dict["dones"][i]:
|
||||
continue
|
||||
|
||||
num_frames = idx1 - idx0
|
||||
|
||||
image = torch.tensor(dataset_dict["observations"]["rgb"][idx0:idx1])
|
||||
state = torch.tensor(dataset_dict["observations"]["state"][idx0:idx1])
|
||||
action = torch.tensor(dataset_dict["actions"][idx0:idx1])
|
||||
# TODO(rcadene): we have a missing last frame which is the observation when the env is done
|
||||
# it is critical to have this frame for tdmpc to predict a "done observation/state"
|
||||
# next_image = torch.tensor(dataset_dict["next_observations"]["rgb"][idx0:idx1])
|
||||
# next_state = torch.tensor(dataset_dict["next_observations"]["state"][idx0:idx1])
|
||||
next_reward = torch.tensor(dataset_dict["rewards"][idx0:idx1])
|
||||
next_done = torch.tensor(dataset_dict["dones"][idx0:idx1])
|
||||
|
||||
ep_dict = {
|
||||
"observation.image": image,
|
||||
"observation.state": state,
|
||||
"action": action,
|
||||
"episode": torch.tensor([episode_id] * num_frames, dtype=torch.int),
|
||||
"frame_id": torch.arange(0, num_frames, 1),
|
||||
"timestamp": torch.arange(0, num_frames, 1) / self.fps,
|
||||
# "next.observation.image": next_image,
|
||||
# "next.observation.state": next_state,
|
||||
"next.reward": next_reward,
|
||||
"next.done": next_done,
|
||||
}
|
||||
ep_dicts.append(ep_dict)
|
||||
|
||||
assert isinstance(episode_id, int)
|
||||
self.data_ids_per_episode[episode_id] = torch.arange(idx0, idx1, 1)
|
||||
assert len(self.data_ids_per_episode[episode_id]) == num_frames
|
||||
|
||||
idx0 = idx1
|
||||
episode_id += 1
|
||||
|
||||
self.data_dict = {}
|
||||
|
||||
keys = ep_dicts[0].keys()
|
||||
for key in keys:
|
||||
self.data_dict[key] = torch.cat([x[key] for x in ep_dicts])
|
||||
|
||||
self.data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
|
|
|
@ -14,7 +14,7 @@ def preprocess_observation(observation, transform=None):
|
|||
imgs = {"observation.image": observation["pixels"]}
|
||||
|
||||
for imgkey, img in imgs.items():
|
||||
img = torch.from_numpy(img).float()
|
||||
img = torch.from_numpy(img)
|
||||
# convert to (b c h w) torch format
|
||||
img = einops.rearrange(img, "b h w c -> b c h w")
|
||||
obs[imgkey] = img
|
||||
|
|
|
@ -41,7 +41,9 @@ import gymnasium as gym
|
|||
import imageio
|
||||
import numpy as np
|
||||
import torch
|
||||
from datasets import Dataset
|
||||
from huggingface_hub import snapshot_download
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from lerobot.common.datasets.factory import make_dataset
|
||||
from lerobot.common.envs.factory import make_env
|
||||
|
@ -199,38 +201,48 @@ def eval_policy(
|
|||
ep_dicts = []
|
||||
num_episodes = dones.shape[0]
|
||||
total_frames = 0
|
||||
idx0 = idx1 = 0
|
||||
data_ids_per_episode = {}
|
||||
idx_from = 0
|
||||
for ep_id in range(num_episodes):
|
||||
num_frames = done_indices[ep_id].item() + 1
|
||||
total_frames += num_frames
|
||||
|
||||
# TODO(rcadene): We need to add a missing last frame which is the observation
|
||||
# of a done state. it is critical to have this frame for tdmpc to predict a "done observation/state"
|
||||
ep_dict = {
|
||||
"action": actions[ep_id, :num_frames],
|
||||
"episode": torch.tensor([ep_id] * num_frames),
|
||||
"episode_id": torch.tensor([ep_id] * num_frames),
|
||||
"frame_id": torch.arange(0, num_frames, 1),
|
||||
"timestamp": torch.arange(0, num_frames, 1) / fps,
|
||||
"next.done": dones[ep_id, :num_frames],
|
||||
"next.reward": rewards[ep_id, :num_frames].type(torch.float32),
|
||||
"episode_data_index_from": torch.tensor([idx_from] * num_frames),
|
||||
"episode_data_index_to": torch.tensor([idx_from + num_frames] * num_frames),
|
||||
}
|
||||
for key in observations:
|
||||
ep_dict[key] = observations[key][ep_id, :num_frames]
|
||||
ep_dict[key] = observations[key][ep_id][:num_frames]
|
||||
ep_dicts.append(ep_dict)
|
||||
|
||||
total_frames += num_frames
|
||||
idx1 += num_frames
|
||||
|
||||
data_ids_per_episode[ep_id] = torch.arange(idx0, idx1, 1)
|
||||
|
||||
idx0 = idx1
|
||||
idx_from += num_frames
|
||||
|
||||
# similar logic is implemented in dataset preprocessing
|
||||
data_dict = {}
|
||||
keys = ep_dicts[0].keys()
|
||||
for key in keys:
|
||||
data_dict[key] = torch.cat([x[key] for x in ep_dicts])
|
||||
if "image" not in key:
|
||||
data_dict[key] = torch.cat([x[key] for x in ep_dicts])
|
||||
else:
|
||||
if key not in data_dict:
|
||||
data_dict[key] = []
|
||||
for ep_dict in ep_dicts:
|
||||
for x in ep_dict[key]:
|
||||
# c h w -> h w c
|
||||
img = PILImage.fromarray(x.permute(1, 2, 0).numpy())
|
||||
data_dict[key].append(img)
|
||||
|
||||
data_dict["index"] = torch.arange(0, total_frames, 1)
|
||||
|
||||
data_dict = Dataset.from_dict(data_dict).with_format("torch")
|
||||
|
||||
if max_episodes_rendered > 0:
|
||||
batch_stacked_frames = np.stack(ep_frames, 1) # (b, t, *)
|
||||
|
||||
|
@ -280,10 +292,7 @@ def eval_policy(
|
|||
"eval_s": time.time() - start,
|
||||
"eval_ep_s": (time.time() - start) / num_episodes,
|
||||
},
|
||||
"episodes": {
|
||||
"data_dict": data_dict,
|
||||
"data_ids_per_episode": data_ids_per_episode,
|
||||
},
|
||||
"episodes": data_dict,
|
||||
}
|
||||
if max_episodes_rendered > 0:
|
||||
info["videos"] = videos
|
||||
|
|
|
@ -4,6 +4,8 @@ from pathlib import Path
|
|||
|
||||
import hydra
|
||||
import torch
|
||||
from datasets import concatenate_datasets
|
||||
from datasets.utils.logging import disable_progress_bar
|
||||
|
||||
from lerobot.common.datasets.factory import make_dataset
|
||||
from lerobot.common.datasets.utils import cycle
|
||||
|
@ -128,29 +130,33 @@ def calculate_online_sample_weight(n_off: int, n_on: int, pc_on: float):
|
|||
return -(n_off * pc_on) / (n_on * (pc_on - 1))
|
||||
|
||||
|
||||
def add_episodes_inplace(episodes, online_dataset, concat_dataset, sampler, pc_online_samples):
|
||||
data_dict = episodes["data_dict"]
|
||||
data_ids_per_episode = episodes["data_ids_per_episode"]
|
||||
def add_episodes_inplace(data_dict, online_dataset, concat_dataset, sampler, pc_online_samples):
|
||||
first_episode_id = data_dict.select_columns("episode_id")[0]["episode_id"].item()
|
||||
first_index = data_dict.select_columns("index")[0]["index"].item()
|
||||
assert first_episode_id == 0, f"We expect the first episode_id to be 0 and not {first_episode_id}"
|
||||
assert first_index == 0, f"We expect the first first_index to be 0 and not {first_index}"
|
||||
|
||||
if len(online_dataset) == 0:
|
||||
# initialize online dataset
|
||||
online_dataset.data_dict = data_dict
|
||||
online_dataset.data_ids_per_episode = data_ids_per_episode
|
||||
else:
|
||||
# find episode index and data frame indices according to previous episode in online_dataset
|
||||
start_episode = max(online_dataset.data_ids_per_episode.keys()) + 1
|
||||
start_index = online_dataset.data_dict["index"][-1].item() + 1
|
||||
data_dict["episode"] += start_episode
|
||||
data_dict["index"] += start_index
|
||||
start_episode = online_dataset.select_columns("episode_id")[-1]["episode_id"].item() + 1
|
||||
start_index = online_dataset.select_columns("index")[-1]["index"].item() + 1
|
||||
|
||||
def shift_indices(example):
|
||||
# note: we dont shift "frame_id" since it represents the index of the frame in the episode it belongs to
|
||||
example["episode_id"] += start_episode
|
||||
example["index"] += start_index
|
||||
example["episode_data_index_from"] += start_index
|
||||
example["episode_data_index_to"] += start_index
|
||||
return example
|
||||
|
||||
disable_progress_bar() # map has a tqdm progress bar
|
||||
data_dict = data_dict.map(shift_indices)
|
||||
|
||||
# extend online dataset
|
||||
for key in data_dict:
|
||||
# TODO(rcadene): avoid reallocating memory at every step by preallocating memory or changing our data structure
|
||||
online_dataset.data_dict[key] = torch.cat([online_dataset.data_dict[key], data_dict[key]])
|
||||
for ep_id in data_ids_per_episode:
|
||||
online_dataset.data_ids_per_episode[ep_id + start_episode] = (
|
||||
data_ids_per_episode[ep_id] + start_index
|
||||
)
|
||||
online_dataset.data_dict = concatenate_datasets([online_dataset.data_dict, data_dict])
|
||||
|
||||
# update the concatenated dataset length used during sampling
|
||||
concat_dataset.cumulative_sizes = concat_dataset.cumsum(concat_dataset.datasets)
|
||||
|
@ -269,7 +275,6 @@ def train(cfg: dict, out_dir=None, job_name=None):
|
|||
# create an empty online dataset similar to offline dataset
|
||||
online_dataset = deepcopy(offline_dataset)
|
||||
online_dataset.data_dict = {}
|
||||
online_dataset.data_ids_per_episode = {}
|
||||
|
||||
# create dataloader for online training
|
||||
concat_dataset = torch.utils.data.ConcatDataset([offline_dataset, online_dataset])
|
||||
|
|
|
@ -62,12 +62,12 @@ def render_dataset(dataset, out_dir, max_num_episodes):
|
|||
)
|
||||
dl_iter = iter(dataloader)
|
||||
|
||||
num_episodes = len(dataset.data_ids_per_episode)
|
||||
for ep_id in range(min(max_num_episodes, num_episodes)):
|
||||
for ep_id in range(min(max_num_episodes, dataset.num_episodes)):
|
||||
logging.info(f"Rendering episode {ep_id}")
|
||||
|
||||
frames = {}
|
||||
for _ in dataset.data_ids_per_episode[ep_id]:
|
||||
end_of_episode = False
|
||||
while not end_of_episode:
|
||||
item = next(dl_iter)
|
||||
|
||||
for im_key in dataset.image_keys:
|
||||
|
@ -77,6 +77,8 @@ def render_dataset(dataset, out_dir, max_num_episodes):
|
|||
# add current frame to list of frames to render
|
||||
frames[im_key].append(item[im_key])
|
||||
|
||||
end_of_episode = item["index"].item() == item["episode_data_index_to"].item() - 1
|
||||
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
for im_key in dataset.image_keys:
|
||||
if len(dataset.image_keys) > 1:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,19 +1,25 @@
|
|||
[tool.poetry]
|
||||
name = "lerobot"
|
||||
version = "0.1.0"
|
||||
description = "Le robot is learning"
|
||||
description = "🤗 LeRobot: State-of-the-art Machine Learning for Real-World Robotics in Pytorch"
|
||||
authors = [
|
||||
"Rémi Cadène <re.cadene@gmail.com>",
|
||||
"Alexander Soare <alexander.soare159@gmail.com>",
|
||||
"Quentin Gallouédec <quentin.gallouedec@ec-lyon.fr>",
|
||||
"Simon Alibert <alibert.sim@gmail.com>",
|
||||
"Thomas Wolf <thomaswolfcontact@gmail.com>",
|
||||
]
|
||||
repository = "https://github.com/Cadene/lerobot"
|
||||
repository = "https://github.com/huggingface/lerobot"
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
license = "Apache-2.0"
|
||||
classifiers=[
|
||||
"Development Status :: 3 - Alpha",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Education",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Topic :: Software Development :: Build Tools",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
]
|
||||
packages = [{include = "lerobot"}]
|
||||
|
@ -23,52 +29,38 @@ packages = [{include = "lerobot"}]
|
|||
python = "^3.10"
|
||||
termcolor = "^2.4.0"
|
||||
omegaconf = "^2.3.0"
|
||||
pandas = "^2.2.1"
|
||||
wandb = "^0.16.3"
|
||||
moviepy = "^1.0.3"
|
||||
imageio = {extras = ["pyav"], version = "^2.34.0"}
|
||||
imageio = {extras = ["ffmpeg"], version = "^2.34.0"}
|
||||
gdown = "^5.1.0"
|
||||
hydra-core = "^1.3.2"
|
||||
einops = "^0.7.0"
|
||||
pygame = "^2.5.2"
|
||||
pymunk = "^6.6.0"
|
||||
zarr = "^2.17.0"
|
||||
numba = "^0.59.0"
|
||||
mpmath = "^1.3.0"
|
||||
torch = "^2.2.1"
|
||||
opencv-python = "^4.9.0.80"
|
||||
diffusers = "^0.26.3"
|
||||
torchvision = "^0.17.1"
|
||||
h5py = "^3.10.0"
|
||||
huggingface-hub = {extras = ["hf-transfer"], version = "^0.21.4"}
|
||||
huggingface-hub = "^0.21.4"
|
||||
robomimic = "0.2.0"
|
||||
gymnasium = "^0.29.1"
|
||||
cmake = "^3.29.0.1"
|
||||
gym-pusht = { git = "git@github.com:huggingface/gym-pusht.git", optional = true}
|
||||
gym-xarm = { git = "git@github.com:huggingface/gym-xarm.git", optional = true}
|
||||
gym-aloha = { git = "git@github.com:huggingface/gym-aloha.git", optional = true}
|
||||
# gym-pusht = { path = "../gym-pusht", develop = true, optional = true}
|
||||
# gym-xarm = { path = "../gym-xarm", develop = true, optional = true}
|
||||
# gym-aloha = { path = "../gym-aloha", develop = true, optional = true}
|
||||
pre-commit = {version = "^3.7.0", optional = true}
|
||||
debugpy = {version = "^1.8.1", optional = true}
|
||||
pytest = {version = "^8.1.0", optional = true}
|
||||
pytest-cov = {version = "^5.0.0", optional = true}
|
||||
datasets = "^2.18.0"
|
||||
|
||||
[tool.poetry.extras]
|
||||
pusht = ["gym-pusht"]
|
||||
xarm = ["gym-xarm"]
|
||||
aloha = ["gym-aloha"]
|
||||
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pre-commit = "^3.6.2"
|
||||
debugpy = "^1.8.1"
|
||||
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^8.1.0"
|
||||
pytest-cov = "^5.0.0"
|
||||
dev = ["pre-commit", "debugpy"]
|
||||
test = ["pytest", "pytest-cov"]
|
||||
|
||||
|
||||
[tool.ruff]
|
||||
|
@ -103,13 +95,7 @@ exclude = [
|
|||
select = ["E4", "E7", "E9", "F", "I", "N", "B", "C4", "SIM"]
|
||||
ignore-init-module-imports = true
|
||||
|
||||
[tool.poetry-dynamic-versioning]
|
||||
enable = true
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"]
|
||||
build-backend = "poetry_dynamic_versioning.backend"
|
||||
|
||||
[tool.black]
|
||||
line-length = 110
|
||||
requires = ["poetry-core>=1.5.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,55 @@
|
|||
{
|
||||
"citation": "",
|
||||
"description": "",
|
||||
"features": {
|
||||
"observation.images.top": {
|
||||
"_type": "Image"
|
||||
},
|
||||
"observation.state": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 14,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"action": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 14,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"episode_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"frame_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"timestamp": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"next.done": {
|
||||
"dtype": "bool",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_from": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_to": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"index": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
}
|
||||
},
|
||||
"homepage": "",
|
||||
"license": ""
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"_data_files": [
|
||||
{
|
||||
"filename": "data-00000-of-00001.arrow"
|
||||
}
|
||||
],
|
||||
"_fingerprint": "d79cf82ffc86f110",
|
||||
"_format_columns": null,
|
||||
"_format_kwargs": {},
|
||||
"_format_type": "torch",
|
||||
"_output_all_columns": false,
|
||||
"_split": null
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,55 @@
|
|||
{
|
||||
"citation": "",
|
||||
"description": "",
|
||||
"features": {
|
||||
"observation.images.top": {
|
||||
"_type": "Image"
|
||||
},
|
||||
"observation.state": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 14,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"action": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 14,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"episode_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"frame_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"timestamp": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"next.done": {
|
||||
"dtype": "bool",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_from": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_to": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"index": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
}
|
||||
},
|
||||
"homepage": "",
|
||||
"license": ""
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"_data_files": [
|
||||
{
|
||||
"filename": "data-00000-of-00001.arrow"
|
||||
}
|
||||
],
|
||||
"_fingerprint": "d8e4a817b5449498",
|
||||
"_format_columns": null,
|
||||
"_format_kwargs": {},
|
||||
"_format_type": "torch",
|
||||
"_output_all_columns": false,
|
||||
"_split": null
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,55 @@
|
|||
{
|
||||
"citation": "",
|
||||
"description": "",
|
||||
"features": {
|
||||
"observation.images.top": {
|
||||
"_type": "Image"
|
||||
},
|
||||
"observation.state": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 14,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"action": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 14,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"episode_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"frame_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"timestamp": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"next.done": {
|
||||
"dtype": "bool",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_from": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_to": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"index": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
}
|
||||
},
|
||||
"homepage": "",
|
||||
"license": ""
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"_data_files": [
|
||||
{
|
||||
"filename": "data-00000-of-00001.arrow"
|
||||
}
|
||||
],
|
||||
"_fingerprint": "f03482befa767127",
|
||||
"_format_columns": null,
|
||||
"_format_kwargs": {},
|
||||
"_format_type": "torch",
|
||||
"_output_all_columns": false,
|
||||
"_split": null
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,55 @@
|
|||
{
|
||||
"citation": "",
|
||||
"description": "",
|
||||
"features": {
|
||||
"observation.images.top": {
|
||||
"_type": "Image"
|
||||
},
|
||||
"observation.state": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 14,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"action": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 14,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"episode_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"frame_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"timestamp": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"next.done": {
|
||||
"dtype": "bool",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_from": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_to": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"index": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
}
|
||||
},
|
||||
"homepage": "",
|
||||
"license": ""
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"_data_files": [
|
||||
{
|
||||
"filename": "data-00000-of-00001.arrow"
|
||||
}
|
||||
],
|
||||
"_fingerprint": "93e03c6320c7d56e",
|
||||
"_format_columns": null,
|
||||
"_format_kwargs": {},
|
||||
"_format_type": "torch",
|
||||
"_output_all_columns": false,
|
||||
"_split": null
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,63 @@
|
|||
{
|
||||
"citation": "",
|
||||
"description": "",
|
||||
"features": {
|
||||
"observation.image": {
|
||||
"_type": "Image"
|
||||
},
|
||||
"observation.state": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 2,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"action": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 2,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"episode_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"frame_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"timestamp": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"next.reward": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"next.done": {
|
||||
"dtype": "bool",
|
||||
"_type": "Value"
|
||||
},
|
||||
"next.success": {
|
||||
"dtype": "bool",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_from": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_to": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"index": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
}
|
||||
},
|
||||
"homepage": "",
|
||||
"license": ""
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"_data_files": [
|
||||
{
|
||||
"filename": "data-00000-of-00001.arrow"
|
||||
}
|
||||
],
|
||||
"_fingerprint": "21bb9a76ed78a475",
|
||||
"_format_columns": null,
|
||||
"_format_kwargs": {},
|
||||
"_format_type": "torch",
|
||||
"_output_all_columns": false,
|
||||
"_split": null
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,59 @@
|
|||
{
|
||||
"citation": "",
|
||||
"description": "",
|
||||
"features": {
|
||||
"observation.image": {
|
||||
"_type": "Image"
|
||||
},
|
||||
"observation.state": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 4,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"action": {
|
||||
"feature": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"length": 4,
|
||||
"_type": "Sequence"
|
||||
},
|
||||
"episode_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"frame_id": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"timestamp": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"next.reward": {
|
||||
"dtype": "float32",
|
||||
"_type": "Value"
|
||||
},
|
||||
"next.done": {
|
||||
"dtype": "bool",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_from": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"episode_data_index_to": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
},
|
||||
"index": {
|
||||
"dtype": "int64",
|
||||
"_type": "Value"
|
||||
}
|
||||
},
|
||||
"homepage": "",
|
||||
"license": ""
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"_data_files": [
|
||||
{
|
||||
"filename": "data-00000-of-00001.arrow"
|
||||
}
|
||||
],
|
||||
"_fingerprint": "a95cbec45e3bb9d6",
|
||||
"_format_columns": null,
|
||||
"_format_kwargs": {},
|
||||
"_format_type": "torch",
|
||||
"_output_all_columns": false,
|
||||
"_split": null
|
||||
}
|
|
@ -4,13 +4,12 @@ import einops
|
|||
import pytest
|
||||
import torch
|
||||
|
||||
from lerobot.common.datasets.utils import compute_stats, get_stats_einops_patterns, load_data_with_delta_timestamps
|
||||
from lerobot.common.datasets.xarm import XarmDataset
|
||||
from lerobot.common.datasets.utils import compute_stats, get_stats_einops_patterns, load_previous_and_future_frames
|
||||
from lerobot.common.transforms import Prod
|
||||
from lerobot.common.utils import init_hydra_config
|
||||
import logging
|
||||
from lerobot.common.datasets.factory import make_dataset
|
||||
|
||||
from datasets import Dataset
|
||||
from .utils import DEVICE, DEFAULT_CONFIG_PATH
|
||||
|
||||
|
||||
|
@ -38,7 +37,7 @@ def test_factory(env_name, dataset_id, policy_name):
|
|||
|
||||
keys_ndim_required = [
|
||||
("action", 1, True),
|
||||
("episode", 0, True),
|
||||
("episode_id", 0, True),
|
||||
("frame_id", 0, True),
|
||||
("timestamp", 0, True),
|
||||
# TODO(rcadene): should we rename it agent_pos?
|
||||
|
@ -94,6 +93,8 @@ def test_compute_stats():
|
|||
We compare with taking a straight min, mean, max, std of all the data in one pass (which we can do
|
||||
because we are working with a small dataset).
|
||||
"""
|
||||
from lerobot.common.datasets.xarm import XarmDataset
|
||||
|
||||
DATA_DIR = Path(os.environ["DATA_DIR"]) if "DATA_DIR" in os.environ else None
|
||||
|
||||
# get transform to convert images from uint8 [0,255] to float32 [0,1]
|
||||
|
@ -114,7 +115,13 @@ def test_compute_stats():
|
|||
stats_patterns = get_stats_einops_patterns(dataset)
|
||||
|
||||
# get all frames from the dataset in the same dtype and range as during compute_stats
|
||||
data_dict = transform(dataset.data_dict)
|
||||
dataloader = torch.utils.data.DataLoader(
|
||||
dataset,
|
||||
num_workers=8,
|
||||
batch_size=len(dataset),
|
||||
shuffle=False,
|
||||
)
|
||||
data_dict = next(iter(dataloader))
|
||||
|
||||
# compute stats based on all frames from the dataset without any batching
|
||||
expected_stats = {}
|
||||
|
@ -145,47 +152,50 @@ def test_compute_stats():
|
|||
# assert torch.allclose(loaded_stats[k]["max"], expected_stats[k]["max"])
|
||||
|
||||
|
||||
def test_load_data_with_delta_timestamps_within_tolerance():
|
||||
data_dict = {
|
||||
"timestamp": torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]),
|
||||
"index": torch.tensor([0, 1, 2, 3, 4]),
|
||||
}
|
||||
data_ids_per_episode = {0: torch.tensor([0, 1, 2, 3, 4])}
|
||||
def test_load_previous_and_future_frames_within_tolerance():
|
||||
data_dict = Dataset.from_dict({
|
||||
"timestamp": [0.1, 0.2, 0.3, 0.4, 0.5],
|
||||
"index": [0, 1, 2, 3, 4],
|
||||
"episode_data_index_from": [0, 0, 0, 0, 0],
|
||||
"episode_data_index_to": [5, 5, 5, 5, 5],
|
||||
})
|
||||
data_dict = data_dict.with_format("torch")
|
||||
item = data_dict[2]
|
||||
delta_timestamps = {"index": [-0.2, 0, 0.139]}
|
||||
key = "index"
|
||||
current_ts = 0.3
|
||||
episode = 0
|
||||
tol = 0.04
|
||||
data, is_pad = load_data_with_delta_timestamps(data_dict, data_ids_per_episode, delta_timestamps, key, current_ts, episode, tol)
|
||||
assert not is_pad.any(), "Unexpected padding detected"
|
||||
item = load_previous_and_future_frames(item, data_dict, delta_timestamps, tol)
|
||||
data, is_pad = item["index"], item["index_is_pad"]
|
||||
assert torch.equal(data, torch.tensor([0, 2, 3])), "Data does not match expected values"
|
||||
assert not is_pad.any(), "Unexpected padding detected"
|
||||
|
||||
def test_load_data_with_delta_timestamps_outside_tolerance_inside_episode_range():
|
||||
data_dict = {
|
||||
"timestamp": torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]),
|
||||
"index": torch.tensor([0, 1, 2, 3, 4]),
|
||||
}
|
||||
data_ids_per_episode = {0: torch.tensor([0, 1, 2, 3, 4])}
|
||||
def test_load_previous_and_future_frames_outside_tolerance_inside_episode_range():
|
||||
data_dict = Dataset.from_dict({
|
||||
"timestamp": [0.1, 0.2, 0.3, 0.4, 0.5],
|
||||
"index": [0, 1, 2, 3, 4],
|
||||
"episode_data_index_from": [0, 0, 0, 0, 0],
|
||||
"episode_data_index_to": [5, 5, 5, 5, 5],
|
||||
})
|
||||
data_dict = data_dict.with_format("torch")
|
||||
item = data_dict[2]
|
||||
delta_timestamps = {"index": [-0.2, 0, 0.141]}
|
||||
key = "index"
|
||||
current_ts = 0.3
|
||||
episode = 0
|
||||
tol = 0.04
|
||||
with pytest.raises(AssertionError):
|
||||
load_data_with_delta_timestamps(data_dict, data_ids_per_episode, delta_timestamps, key, current_ts, episode, tol)
|
||||
load_previous_and_future_frames(item, data_dict, delta_timestamps, tol)
|
||||
|
||||
def test_load_data_with_delta_timestamps_outside_tolerance_outside_episode_range():
|
||||
data_dict = {
|
||||
"timestamp": torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]),
|
||||
"index": torch.tensor([0, 1, 2, 3, 4]),
|
||||
}
|
||||
data_ids_per_episode = {0: torch.tensor([0, 1, 2, 3, 4])}
|
||||
def test_load_previous_and_future_frames_outside_tolerance_outside_episode_range():
|
||||
data_dict = Dataset.from_dict({
|
||||
"timestamp": [0.1, 0.2, 0.3, 0.4, 0.5],
|
||||
"index": [0, 1, 2, 3, 4],
|
||||
"episode_data_index_from": [0, 0, 0, 0, 0],
|
||||
"episode_data_index_to": [5, 5, 5, 5, 5],
|
||||
})
|
||||
data_dict = data_dict.with_format("torch")
|
||||
item = data_dict[2]
|
||||
delta_timestamps = {"index": [-0.3, -0.24, 0, 0.26, 0.3]}
|
||||
key = "index"
|
||||
current_ts = 0.3
|
||||
episode = 0
|
||||
tol = 0.04
|
||||
data, is_pad = load_data_with_delta_timestamps(data_dict, data_ids_per_episode, delta_timestamps, key, current_ts, episode, tol)
|
||||
assert torch.equal(is_pad, torch.tensor([True, False, False, True, True])), "Padding does not match expected values"
|
||||
item = load_previous_and_future_frames(item, data_dict, delta_timestamps, tol)
|
||||
data, is_pad = item["index"], item["index_is_pad"]
|
||||
assert torch.equal(data, torch.tensor([0, 0, 2, 4, 4])), "Data does not match expected values"
|
||||
assert torch.equal(is_pad, torch.tensor([True, False, False, True, True])), "Padding does not match expected values"
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue