Updates RSL-RL to version 2.0 (#14)
* Adds files from internal * Updates version to 2.0 * Fix to split and pad for rnn policies (#15) * Small fix to adapt to new configs from orbit (#16) * Fixes dummy config * Adds onnx to deps * Adds contributors --------- Co-authored-by: Nikita Rudin <48368649+nikitardn@users.noreply.github.com>
This commit is contained in:
parent
2ad79cf0ca
commit
51d06cff92
|
@ -0,0 +1,22 @@
|
|||
[flake8]
|
||||
show-source=True
|
||||
statistics=True
|
||||
per-file-ignores=*/__init__.py:F401
|
||||
# E402: Module level import not at top of file
|
||||
# E501: Line too long
|
||||
# W503: Line break before binary operator
|
||||
# E203: Whitespace before ':' -> conflicts with black
|
||||
# D401: First line should be in imperative mood
|
||||
# R504: Unnecessary variable assignment before return statement.
|
||||
# R505: Unnecessary elif after return statement
|
||||
# SIM102: Use a single if-statement instead of nested if-statements
|
||||
# SIM117: Merge with statements for context managers that have same scope.
|
||||
ignore=E402,E501,W503,E203,D401,R504,R505,SIM102,SIM117
|
||||
max-line-length = 120
|
||||
max-complexity = 18
|
||||
exclude=_*,.vscode,.git,docs/**
|
||||
# docstrings
|
||||
docstring-convention=google
|
||||
# annotations
|
||||
suppress-none-returning=True
|
||||
allow-star-arg-any=True
|
|
@ -9,4 +9,4 @@ __pycache__
|
|||
.pytest_cache
|
||||
|
||||
# vs code
|
||||
.vscode
|
||||
.vscode
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
repos:
|
||||
- repo: https://github.com/python/black
|
||||
rev: 23.10.1
|
||||
hooks:
|
||||
- id: black
|
||||
args: ["--line-length", "120", "--preview"]
|
||||
- repo: https://github.com/pycqa/flake8
|
||||
rev: 6.1.0
|
||||
hooks:
|
||||
- id: flake8
|
||||
additional_dependencies: [flake8-simplify, flake8-return]
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.5.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: check-symlinks
|
||||
- id: destroyed-symlinks
|
||||
- id: check-yaml
|
||||
- id: check-merge-conflict
|
||||
- id: check-case-conflict
|
||||
- id: check-executables-have-shebangs
|
||||
- id: check-toml
|
||||
- id: end-of-file-fixer
|
||||
- id: check-shebang-scripts-are-executable
|
||||
- id: detect-private-key
|
||||
- id: debug-statements
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.12.0
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort (python)
|
||||
args: ["--profile", "black", "--filter-files"]
|
||||
- repo: https://github.com/asottile/pyupgrade
|
||||
rev: v3.15.0
|
||||
hooks:
|
||||
- id: pyupgrade
|
||||
args: ["--py37-plus"]
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.2.6
|
||||
hooks:
|
||||
- id: codespell
|
||||
additional_dependencies:
|
||||
- tomli
|
||||
# FIXME: Figure out why this is getting stuck under VPN.
|
||||
# - repo: https://github.com/RobertCraigie/pyright-python
|
||||
# rev: v1.1.315
|
||||
# hooks:
|
||||
# - id: pyright
|
||||
# Note: We disable this by default since not all code is compatible with it.
|
||||
# - repo: https://github.com/Lucas-C/pre-commit-hooks
|
||||
# rev: v1.5.1
|
||||
# hooks:
|
||||
# - id: insert-license
|
||||
# files: \.py$
|
||||
# args:
|
||||
# # - --remove-header # Remove existing license headers. Useful when updating license.
|
||||
# - --license-filepath
|
||||
# - .github/LICENSE_HEADER.txt
|
|
@ -0,0 +1,31 @@
|
|||
# RSL-RL Maintainers and Contributors
|
||||
|
||||
This is the official list of developers and contributors.
|
||||
|
||||
To see the full list of contributors, see the revision history in the source control.
|
||||
|
||||
Names should be added to this file as: individual names or organizations.
|
||||
|
||||
Email addresses are tracked elsewhere to avoid spam.
|
||||
|
||||
Please keep the lists sorted alphabetically.
|
||||
|
||||
## Maintainers
|
||||
|
||||
* Robotic Syetms Lab, ETH Zurich
|
||||
* NVIDIA Corporation
|
||||
|
||||
---
|
||||
|
||||
* David Hoeller
|
||||
* Nikita Rudin
|
||||
|
||||
## Contributors
|
||||
|
||||
* Eric Vollenweider
|
||||
* Fabian Jenelten
|
||||
* Lorenzo Terenzi
|
||||
* Marko Bjelonic
|
||||
* Matthijs van der Boon
|
||||
* Mayank Mittal
|
||||
* Zhang Chong
|
2
LICENSE
2
LICENSE
|
@ -27,4 +27,4 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
See licenses/dependencies for license information of dependencies of this package.
|
||||
See licenses/dependencies for license information of dependencies of this package.
|
||||
|
|
47
README.md
47
README.md
|
@ -1,26 +1,57 @@
|
|||
# RSL RL
|
||||
|
||||
Fast and simple implementation of RL algorithms, designed to run fully on GPU.
|
||||
This code is an evolution of `rl-pytorch` provided with NVIDIA's Isaac GYM.
|
||||
|
||||
Only PPO is implemented for now. More algorithms will be added later.
|
||||
Contributions are welcome.
|
||||
|
||||
**Maintainer**: David Hoeller and Nikita Rudin <br/>
|
||||
**Affiliation**: Robotic Systems Lab, ETH Zurich & NVIDIA <br/>
|
||||
**Contact**: rudinn@ethz.ch
|
||||
|
||||
## Setup
|
||||
|
||||
```
|
||||
Following are the instructions to setup the repository for your workspace:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/leggedrobotics/rsl_rl
|
||||
cd rsl_rl
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
### Useful Links ###
|
||||
Example use case: https://github.com/leggedrobotics/legged_gym
|
||||
Project website: https://leggedrobotics.github.io/legged_gym/
|
||||
Paper: https://arxiv.org/abs/2109.11978
|
||||
The framework supports the following logging frameworks which can be configured through `logger`:
|
||||
|
||||
**Maintainer**: Nikita Rudin
|
||||
**Affiliation**: Robotic Systems Lab, ETH Zurich & NVIDIA
|
||||
**Contact**: rudinn@ethz.ch
|
||||
* Tensorboard: https://www.tensorflow.org/tensorboard/
|
||||
* Weights & Biases: https://wandb.ai/site
|
||||
* Neptune: https://docs.neptune.ai/
|
||||
|
||||
For a demo configuration of the PPO, please check: [dummy_config.yaml](config/dummy_config.yaml) file.
|
||||
|
||||
|
||||
## Contribution Guidelines
|
||||
|
||||
For documentation, we adopt the [Google Style Guide](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for docstrings. We use [Sphinx](https://www.sphinx-doc.org/en/master/) for generating the documentation. Please make sure that your code is well-documented and follows the guidelines.
|
||||
|
||||
We use the following tools for maintaining code quality:
|
||||
|
||||
- [pre-commit](https://pre-commit.com/): Runs a list of formatters and linters over the codebase.
|
||||
- [black](https://black.readthedocs.io/en/stable/): The uncompromising code formatter.
|
||||
- [flake8](https://flake8.pycqa.org/en/latest/): A wrapper around PyFlakes, pycodestyle, and McCabe complexity checker.
|
||||
|
||||
Please check [here](https://pre-commit.com/#install) for instructions to set these up. To run over the entire repository, please execute the following command in the terminal:
|
||||
|
||||
|
||||
```bash
|
||||
# for installation (only once)
|
||||
pre-commit install
|
||||
# for running
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
### Useful Links
|
||||
|
||||
Environment repositories using the framework:
|
||||
|
||||
* `Legged-Gym` (built on top of NVIDIA Isaac Gym): https://leggedrobotics.github.io/legged_gym/
|
||||
* `Orbit` (built on top of NVIDIA Isaac Sim): https://isaac-orbit.github.io/
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
algorithm:
|
||||
class_name: PPO
|
||||
# training parameters
|
||||
# -- value function
|
||||
value_loss_coef: 1.0
|
||||
clip_param: 0.2
|
||||
use_clipped_value_loss: true
|
||||
# -- surrogate loss
|
||||
desired_kl: 0.01
|
||||
entropy_coef: 0.01
|
||||
gamma: 0.99
|
||||
lam: 0.95
|
||||
max_grad_norm: 1.0
|
||||
# -- training
|
||||
learning_rate: 0.001
|
||||
num_learning_epochs: 5
|
||||
num_mini_batches: 4 # mini batch size = num_envs * num_steps / num_mini_batches
|
||||
schedule: adaptive # adaptive, fixed
|
||||
policy:
|
||||
class_name: ActorCritic
|
||||
# for MLP i.e. `ActorCritic`
|
||||
activation: elu
|
||||
actor_hidden_dims: [128, 128, 128]
|
||||
critic_hidden_dims: [128, 128, 128]
|
||||
init_noise_std: 1.0
|
||||
# only needed for `ActorCriticRecurrent`
|
||||
# rnn_type: 'lstm'
|
||||
# rnn_hidden_size: 512
|
||||
# rnn_num_layers: 1
|
||||
runner:
|
||||
num_steps_per_env: 24 # number of steps per environment per iteration
|
||||
max_iterations: 1500 # number of policy updates
|
||||
empirical_normalization: false
|
||||
# -- logging parameters
|
||||
save_interval: 50 # check for potential saves every `save_interval` iterations
|
||||
experiment_name: walking_experiment
|
||||
run_name: ""
|
||||
# -- logging writer
|
||||
logger: tensorboard # tensorboard, neptune, wandb
|
||||
neptune_project: legged_gym
|
||||
wandb_project: legged_gym
|
||||
# -- load and resuming
|
||||
resume: false
|
||||
load_run: -1 # -1 means load latest run
|
||||
resume_path: null # updated from load_run and checkpoint
|
||||
checkpoint: -1 # -1 means load latest checkpoint
|
||||
runner_class_name: OnPolicyRunner
|
||||
seed: 1
|
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2018 Łukasz Langa
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,339 @@
|
|||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Lesser General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License.
|
|
@ -0,0 +1,22 @@
|
|||
== Flake8 License (MIT) ==
|
||||
|
||||
Copyright (C) 2011-2013 Tarek Ziade <tarek@ziade.org>
|
||||
Copyright (C) 2012-2016 Ian Cordasco <graffatcolmingov@gmail.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2013 Timothy Edmund Crosley
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -27,4 +27,4 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -0,0 +1,47 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2021 Robert Craigie
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
|
||||
===============================================================================
|
||||
|
||||
MIT License
|
||||
|
||||
Pyright - A static type checker for the Python language
|
||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE
|
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2017 Anthony Sottile
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -70,4 +70,4 @@ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
[tool.isort]
|
||||
|
||||
py_version = 37
|
||||
line_length = 120
|
||||
group_by_package = true
|
||||
|
||||
# Files to skip
|
||||
skip_glob = [".vscode/*"]
|
||||
|
||||
# Order of imports
|
||||
sections = [
|
||||
"FUTURE",
|
||||
"STDLIB",
|
||||
"THIRDPARTY",
|
||||
"FIRSTPARTY",
|
||||
"LOCALFOLDER",
|
||||
]
|
||||
|
||||
# Extra standard libraries considered as part of python (permissive licenses)
|
||||
extra_standard_library = [
|
||||
"numpy",
|
||||
"torch",
|
||||
"tensordict",
|
||||
"warp",
|
||||
"typing_extensions",
|
||||
"git",
|
||||
]
|
||||
# Imports from this repository
|
||||
known_first_party = "rsl_rl"
|
||||
|
||||
[tool.pyright]
|
||||
|
||||
include = ["rsl_rl"]
|
||||
|
||||
typeCheckingMode = "basic"
|
||||
pythonVersion = "3.7"
|
||||
pythonPlatform = "Linux"
|
||||
enableTypeIgnoreComments = true
|
||||
|
||||
# This is required as the CI pre-commit does not download the module (i.e. numpy, torch, prettytable)
|
||||
# Therefore, we have to ignore missing imports
|
||||
reportMissingImports = "none"
|
||||
# This is required to ignore for type checks of modules with stubs missing.
|
||||
reportMissingModuleSource = "none" # -> most common: prettytable in mdp managers
|
||||
|
||||
reportGeneralTypeIssues = "none" # -> raises 218 errors (usage of literal MISSING in dataclasses)
|
||||
reportOptionalMemberAccess = "warning" # -> raises 8 errors
|
||||
reportPrivateUsage = "warning"
|
|
@ -1,29 +1,7 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
"""Main module for the rsl_rl package."""
|
||||
|
||||
__version__ = "2.0.0"
|
||||
__license__ = "BSD-3"
|
||||
|
|
|
@ -1,31 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from .ppo import PPO
|
||||
"""Implementation of different RL agents."""
|
||||
|
||||
from .ppo import PPO
|
||||
|
||||
__all__ = ["PPO"]
|
||||
|
|
|
@ -1,32 +1,7 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
@ -35,25 +10,27 @@ import torch.optim as optim
|
|||
from rsl_rl.modules import ActorCritic
|
||||
from rsl_rl.storage import RolloutStorage
|
||||
|
||||
|
||||
class PPO:
|
||||
actor_critic: ActorCritic
|
||||
def __init__(self,
|
||||
actor_critic,
|
||||
num_learning_epochs=1,
|
||||
num_mini_batches=1,
|
||||
clip_param=0.2,
|
||||
gamma=0.998,
|
||||
lam=0.95,
|
||||
value_loss_coef=1.0,
|
||||
entropy_coef=0.0,
|
||||
learning_rate=1e-3,
|
||||
max_grad_norm=1.0,
|
||||
use_clipped_value_loss=True,
|
||||
schedule="fixed",
|
||||
desired_kl=0.01,
|
||||
device='cpu',
|
||||
):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
actor_critic,
|
||||
num_learning_epochs=1,
|
||||
num_mini_batches=1,
|
||||
clip_param=0.2,
|
||||
gamma=0.998,
|
||||
lam=0.95,
|
||||
value_loss_coef=1.0,
|
||||
entropy_coef=0.0,
|
||||
learning_rate=1e-3,
|
||||
max_grad_norm=1.0,
|
||||
use_clipped_value_loss=True,
|
||||
schedule="fixed",
|
||||
desired_kl=0.01,
|
||||
device="cpu",
|
||||
):
|
||||
self.device = device
|
||||
|
||||
self.desired_kl = desired_kl
|
||||
|
@ -63,7 +40,7 @@ class PPO:
|
|||
# PPO components
|
||||
self.actor_critic = actor_critic
|
||||
self.actor_critic.to(self.device)
|
||||
self.storage = None # initialized later
|
||||
self.storage = None # initialized later
|
||||
self.optimizer = optim.Adam(self.actor_critic.parameters(), lr=learning_rate)
|
||||
self.transition = RolloutStorage.Transition()
|
||||
|
||||
|
@ -79,11 +56,13 @@ class PPO:
|
|||
self.use_clipped_value_loss = use_clipped_value_loss
|
||||
|
||||
def init_storage(self, num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape):
|
||||
self.storage = RolloutStorage(num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device)
|
||||
self.storage = RolloutStorage(
|
||||
num_envs, num_transitions_per_env, actor_obs_shape, critic_obs_shape, action_shape, self.device
|
||||
)
|
||||
|
||||
def test_mode(self):
|
||||
self.actor_critic.test()
|
||||
|
||||
|
||||
def train_mode(self):
|
||||
self.actor_critic.train()
|
||||
|
||||
|
@ -100,21 +79,23 @@ class PPO:
|
|||
self.transition.observations = obs
|
||||
self.transition.critic_observations = critic_obs
|
||||
return self.transition.actions
|
||||
|
||||
|
||||
def process_env_step(self, rewards, dones, infos):
|
||||
self.transition.rewards = rewards.clone()
|
||||
self.transition.dones = dones
|
||||
# Bootstrapping on time outs
|
||||
if 'time_outs' in infos:
|
||||
self.transition.rewards += self.gamma * torch.squeeze(self.transition.values * infos['time_outs'].unsqueeze(1).to(self.device), 1)
|
||||
if "time_outs" in infos:
|
||||
self.transition.rewards += self.gamma * torch.squeeze(
|
||||
self.transition.values * infos["time_outs"].unsqueeze(1).to(self.device), 1
|
||||
)
|
||||
|
||||
# Record the transition
|
||||
self.storage.add_transitions(self.transition)
|
||||
self.transition.clear()
|
||||
self.actor_critic.reset(dones)
|
||||
|
||||
|
||||
def compute_returns(self, last_critic_obs):
|
||||
last_values= self.actor_critic.evaluate(last_critic_obs).detach()
|
||||
last_values = self.actor_critic.evaluate(last_critic_obs).detach()
|
||||
self.storage.compute_returns(last_values, self.gamma, self.lam)
|
||||
|
||||
def update(self):
|
||||
|
@ -124,60 +105,77 @@ class PPO:
|
|||
generator = self.storage.reccurent_mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
|
||||
else:
|
||||
generator = self.storage.mini_batch_generator(self.num_mini_batches, self.num_learning_epochs)
|
||||
for obs_batch, critic_obs_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, \
|
||||
old_mu_batch, old_sigma_batch, hid_states_batch, masks_batch in generator:
|
||||
for (
|
||||
obs_batch,
|
||||
critic_obs_batch,
|
||||
actions_batch,
|
||||
target_values_batch,
|
||||
advantages_batch,
|
||||
returns_batch,
|
||||
old_actions_log_prob_batch,
|
||||
old_mu_batch,
|
||||
old_sigma_batch,
|
||||
hid_states_batch,
|
||||
masks_batch,
|
||||
) in generator:
|
||||
self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0])
|
||||
actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
|
||||
value_batch = self.actor_critic.evaluate(
|
||||
critic_obs_batch, masks=masks_batch, hidden_states=hid_states_batch[1]
|
||||
)
|
||||
mu_batch = self.actor_critic.action_mean
|
||||
sigma_batch = self.actor_critic.action_std
|
||||
entropy_batch = self.actor_critic.entropy
|
||||
|
||||
# KL
|
||||
if self.desired_kl is not None and self.schedule == "adaptive":
|
||||
with torch.inference_mode():
|
||||
kl = torch.sum(
|
||||
torch.log(sigma_batch / old_sigma_batch + 1.0e-5)
|
||||
+ (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch))
|
||||
/ (2.0 * torch.square(sigma_batch))
|
||||
- 0.5,
|
||||
axis=-1,
|
||||
)
|
||||
kl_mean = torch.mean(kl)
|
||||
|
||||
self.actor_critic.act(obs_batch, masks=masks_batch, hidden_states=hid_states_batch[0])
|
||||
actions_log_prob_batch = self.actor_critic.get_actions_log_prob(actions_batch)
|
||||
value_batch = self.actor_critic.evaluate(critic_obs_batch, masks=masks_batch, hidden_states=hid_states_batch[1])
|
||||
mu_batch = self.actor_critic.action_mean
|
||||
sigma_batch = self.actor_critic.action_std
|
||||
entropy_batch = self.actor_critic.entropy
|
||||
if kl_mean > self.desired_kl * 2.0:
|
||||
self.learning_rate = max(1e-5, self.learning_rate / 1.5)
|
||||
elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0:
|
||||
self.learning_rate = min(1e-2, self.learning_rate * 1.5)
|
||||
|
||||
# KL
|
||||
if self.desired_kl != None and self.schedule == 'adaptive':
|
||||
with torch.inference_mode():
|
||||
kl = torch.sum(
|
||||
torch.log(sigma_batch / old_sigma_batch + 1.e-5) + (torch.square(old_sigma_batch) + torch.square(old_mu_batch - mu_batch)) / (2.0 * torch.square(sigma_batch)) - 0.5, axis=-1)
|
||||
kl_mean = torch.mean(kl)
|
||||
for param_group in self.optimizer.param_groups:
|
||||
param_group["lr"] = self.learning_rate
|
||||
|
||||
if kl_mean > self.desired_kl * 2.0:
|
||||
self.learning_rate = max(1e-5, self.learning_rate / 1.5)
|
||||
elif kl_mean < self.desired_kl / 2.0 and kl_mean > 0.0:
|
||||
self.learning_rate = min(1e-2, self.learning_rate * 1.5)
|
||||
|
||||
for param_group in self.optimizer.param_groups:
|
||||
param_group['lr'] = self.learning_rate
|
||||
# Surrogate loss
|
||||
ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
|
||||
surrogate = -torch.squeeze(advantages_batch) * ratio
|
||||
surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(
|
||||
ratio, 1.0 - self.clip_param, 1.0 + self.clip_param
|
||||
)
|
||||
surrogate_loss = torch.max(surrogate, surrogate_clipped).mean()
|
||||
|
||||
# Value function loss
|
||||
if self.use_clipped_value_loss:
|
||||
value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(
|
||||
-self.clip_param, self.clip_param
|
||||
)
|
||||
value_losses = (value_batch - returns_batch).pow(2)
|
||||
value_losses_clipped = (value_clipped - returns_batch).pow(2)
|
||||
value_loss = torch.max(value_losses, value_losses_clipped).mean()
|
||||
else:
|
||||
value_loss = (returns_batch - value_batch).pow(2).mean()
|
||||
|
||||
# Surrogate loss
|
||||
ratio = torch.exp(actions_log_prob_batch - torch.squeeze(old_actions_log_prob_batch))
|
||||
surrogate = -torch.squeeze(advantages_batch) * ratio
|
||||
surrogate_clipped = -torch.squeeze(advantages_batch) * torch.clamp(ratio, 1.0 - self.clip_param,
|
||||
1.0 + self.clip_param)
|
||||
surrogate_loss = torch.max(surrogate, surrogate_clipped).mean()
|
||||
loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean()
|
||||
|
||||
# Value function loss
|
||||
if self.use_clipped_value_loss:
|
||||
value_clipped = target_values_batch + (value_batch - target_values_batch).clamp(-self.clip_param,
|
||||
self.clip_param)
|
||||
value_losses = (value_batch - returns_batch).pow(2)
|
||||
value_losses_clipped = (value_clipped - returns_batch).pow(2)
|
||||
value_loss = torch.max(value_losses, value_losses_clipped).mean()
|
||||
else:
|
||||
value_loss = (returns_batch - value_batch).pow(2).mean()
|
||||
# Gradient step
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
|
||||
self.optimizer.step()
|
||||
|
||||
loss = surrogate_loss + self.value_loss_coef * value_loss - self.entropy_coef * entropy_batch.mean()
|
||||
|
||||
# Gradient step
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
nn.utils.clip_grad_norm_(self.actor_critic.parameters(), self.max_grad_norm)
|
||||
self.optimizer.step()
|
||||
|
||||
mean_value_loss += value_loss.item()
|
||||
mean_surrogate_loss += surrogate_loss.item()
|
||||
mean_value_loss += value_loss.item()
|
||||
mean_surrogate_loss += surrogate_loss.item()
|
||||
|
||||
num_updates = self.num_learning_epochs * self.num_mini_batches
|
||||
mean_value_loss /= num_updates
|
||||
|
|
|
@ -1,31 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from .vec_env import VecEnv
|
||||
"""Submodule defining the environment definitions."""
|
||||
|
||||
from .vec_env import VecEnv
|
||||
|
||||
__all__ = ["VecEnv"]
|
||||
|
|
|
@ -1,60 +1,85 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
import torch
|
||||
from typing import Tuple, Union
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
# minimal interface of the environment
|
||||
class VecEnv(ABC):
|
||||
"""Abstract class for vectorized environment.
|
||||
|
||||
The vectorized environment is a collection of environments that are synchronized. This means that
|
||||
the same action is applied to all environments and the same observation is returned from all environments.
|
||||
|
||||
All extra observations must be provided as a dictionary to "extras" in the step() method. Based on the
|
||||
configuration, the extra observations are used for different purposes. The following keys are reserved
|
||||
in the "observations" dictionary (if they are present):
|
||||
|
||||
- "critic": The observation is used as input to the critic network. Useful for asymmetric observation spaces.
|
||||
"""
|
||||
|
||||
num_envs: int
|
||||
"""Number of environments."""
|
||||
num_obs: int
|
||||
"""Number of observations."""
|
||||
num_privileged_obs: int
|
||||
"""Number of privileged observations."""
|
||||
num_actions: int
|
||||
"""Number of actions."""
|
||||
max_episode_length: int
|
||||
"""Maximum episode length."""
|
||||
privileged_obs_buf: torch.Tensor
|
||||
obs_buf: torch.Tensor
|
||||
"""Buffer for privileged observations."""
|
||||
obs_buf: torch.Tensor
|
||||
"""Buffer for observations."""
|
||||
rew_buf: torch.Tensor
|
||||
"""Buffer for rewards."""
|
||||
reset_buf: torch.Tensor
|
||||
episode_length_buf: torch.Tensor # current episode duration
|
||||
"""Buffer for resets."""
|
||||
episode_length_buf: torch.Tensor # current episode duration
|
||||
"""Buffer for current episode lengths."""
|
||||
extras: dict
|
||||
"""Extra information (metrics).
|
||||
|
||||
Extra information is stored in a dictionary. This includes metrics such as the episode reward, episode length,
|
||||
etc. Additional information can be stored in the dictionary such as observations for the critic network, etc.
|
||||
"""
|
||||
device: torch.device
|
||||
"""Device to use."""
|
||||
|
||||
"""
|
||||
Operations.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, Union[torch.Tensor, None], torch.Tensor, torch.Tensor, dict]:
|
||||
pass
|
||||
def get_observations(self) -> tuple[torch.Tensor, dict]:
|
||||
"""Return the current observations.
|
||||
|
||||
Returns:
|
||||
Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def reset(self, env_ids: Union[list, torch.Tensor]):
|
||||
pass
|
||||
def reset(self) -> tuple[torch.Tensor, dict]:
|
||||
"""Reset all environment instances.
|
||||
|
||||
Returns:
|
||||
Tuple[torch.Tensor, dict]: Tuple containing the observations and extras.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_observations(self) -> torch.Tensor:
|
||||
pass
|
||||
@abstractmethod
|
||||
def get_privileged_observations(self) -> Union[torch.Tensor, None]:
|
||||
pass
|
||||
def step(self, actions: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
|
||||
"""Apply input action on the environment.
|
||||
|
||||
Args:
|
||||
actions (torch.Tensor): Input actions to apply. Shape: (num_envs, num_actions)
|
||||
|
||||
Returns:
|
||||
Tuple[torch.Tensor, torch.Tensor, torch.Tensor, dict]:
|
||||
A tuple containing the observations, rewards, dones and extra information (metrics).
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -1,32 +1,10 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
"""Definitions for neural-network components for RL-agents."""
|
||||
|
||||
from .actor_critic import ActorCritic
|
||||
from .actor_critic_recurrent import ActorCriticRecurrent
|
||||
from .actor_critic_recurrent import ActorCriticRecurrent
|
||||
from .normalizer import EmpiricalNormalization
|
||||
|
||||
__all__ = ["ActorCritic", "ActorCriticRecurrent"]
|
||||
|
|
|
@ -1,68 +1,47 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.distributions import Normal
|
||||
from torch.nn.modules import rnn
|
||||
|
||||
|
||||
class ActorCritic(nn.Module):
|
||||
is_recurrent = False
|
||||
def __init__(self, num_actor_obs,
|
||||
num_critic_obs,
|
||||
num_actions,
|
||||
actor_hidden_dims=[256, 256, 256],
|
||||
critic_hidden_dims=[256, 256, 256],
|
||||
activation='elu',
|
||||
init_noise_std=1.0,
|
||||
**kwargs):
|
||||
if kwargs:
|
||||
print("ActorCritic.__init__ got unexpected arguments, which will be ignored: " + str([key for key in kwargs.keys()]))
|
||||
super(ActorCritic, self).__init__()
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_actor_obs,
|
||||
num_critic_obs,
|
||||
num_actions,
|
||||
actor_hidden_dims=[256, 256, 256],
|
||||
critic_hidden_dims=[256, 256, 256],
|
||||
activation="elu",
|
||||
init_noise_std=1.0,
|
||||
**kwargs,
|
||||
):
|
||||
if kwargs:
|
||||
print(
|
||||
"ActorCritic.__init__ got unexpected arguments, which will be ignored: "
|
||||
+ str([key for key in kwargs.keys()])
|
||||
)
|
||||
super().__init__()
|
||||
activation = get_activation(activation)
|
||||
|
||||
mlp_input_dim_a = num_actor_obs
|
||||
mlp_input_dim_c = num_critic_obs
|
||||
|
||||
# Policy
|
||||
actor_layers = []
|
||||
actor_layers.append(nn.Linear(mlp_input_dim_a, actor_hidden_dims[0]))
|
||||
actor_layers.append(activation)
|
||||
for l in range(len(actor_hidden_dims)):
|
||||
if l == len(actor_hidden_dims) - 1:
|
||||
actor_layers.append(nn.Linear(actor_hidden_dims[l], num_actions))
|
||||
for layer_index in range(len(actor_hidden_dims)):
|
||||
if layer_index == len(actor_hidden_dims) - 1:
|
||||
actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], num_actions))
|
||||
else:
|
||||
actor_layers.append(nn.Linear(actor_hidden_dims[l], actor_hidden_dims[l + 1]))
|
||||
actor_layers.append(nn.Linear(actor_hidden_dims[layer_index], actor_hidden_dims[layer_index + 1]))
|
||||
actor_layers.append(activation)
|
||||
self.actor = nn.Sequential(*actor_layers)
|
||||
|
||||
|
@ -70,11 +49,11 @@ class ActorCritic(nn.Module):
|
|||
critic_layers = []
|
||||
critic_layers.append(nn.Linear(mlp_input_dim_c, critic_hidden_dims[0]))
|
||||
critic_layers.append(activation)
|
||||
for l in range(len(critic_hidden_dims)):
|
||||
if l == len(critic_hidden_dims) - 1:
|
||||
critic_layers.append(nn.Linear(critic_hidden_dims[l], 1))
|
||||
for layer_index in range(len(critic_hidden_dims)):
|
||||
if layer_index == len(critic_hidden_dims) - 1:
|
||||
critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], 1))
|
||||
else:
|
||||
critic_layers.append(nn.Linear(critic_hidden_dims[l], critic_hidden_dims[l + 1]))
|
||||
critic_layers.append(nn.Linear(critic_hidden_dims[layer_index], critic_hidden_dims[layer_index + 1]))
|
||||
critic_layers.append(activation)
|
||||
self.critic = nn.Sequential(*critic_layers)
|
||||
|
||||
|
@ -86,7 +65,7 @@ class ActorCritic(nn.Module):
|
|||
self.distribution = None
|
||||
# disable args validation for speedup
|
||||
Normal.set_default_validate_args = False
|
||||
|
||||
|
||||
# seems that we get better performance without init
|
||||
# self.init_memory_weights(self.memory_a, 0.001, 0.)
|
||||
# self.init_memory_weights(self.memory_c, 0.001, 0.)
|
||||
|
@ -94,16 +73,17 @@ class ActorCritic(nn.Module):
|
|||
@staticmethod
|
||||
# not used at the moment
|
||||
def init_weights(sequential, scales):
|
||||
[torch.nn.init.orthogonal_(module.weight, gain=scales[idx]) for idx, module in
|
||||
enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))]
|
||||
|
||||
[
|
||||
torch.nn.init.orthogonal_(module.weight, gain=scales[idx])
|
||||
for idx, module in enumerate(mod for mod in sequential if isinstance(mod, nn.Linear))
|
||||
]
|
||||
|
||||
def reset(self, dones=None):
|
||||
pass
|
||||
|
||||
def forward(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@property
|
||||
def action_mean(self):
|
||||
return self.distribution.mean
|
||||
|
@ -111,19 +91,19 @@ class ActorCritic(nn.Module):
|
|||
@property
|
||||
def action_std(self):
|
||||
return self.distribution.stddev
|
||||
|
||||
|
||||
@property
|
||||
def entropy(self):
|
||||
return self.distribution.entropy().sum(dim=-1)
|
||||
|
||||
def update_distribution(self, observations):
|
||||
mean = self.actor(observations)
|
||||
self.distribution = Normal(mean, mean*0. + self.std)
|
||||
self.distribution = Normal(mean, mean * 0.0 + self.std)
|
||||
|
||||
def act(self, observations, **kwargs):
|
||||
self.update_distribution(observations)
|
||||
return self.distribution.sample()
|
||||
|
||||
|
||||
def get_actions_log_prob(self, actions):
|
||||
return self.distribution.log_prob(actions).sum(dim=-1)
|
||||
|
||||
|
@ -135,6 +115,7 @@ class ActorCritic(nn.Module):
|
|||
value = self.critic(critic_observations)
|
||||
return value
|
||||
|
||||
|
||||
def get_activation(act_name):
|
||||
if act_name == "elu":
|
||||
return nn.ELU()
|
||||
|
@ -143,7 +124,7 @@ def get_activation(act_name):
|
|||
elif act_name == "relu":
|
||||
return nn.ReLU()
|
||||
elif act_name == "crelu":
|
||||
return nn.ReLU()
|
||||
return nn.CReLU()
|
||||
elif act_name == "lrelu":
|
||||
return nn.LeakyReLU()
|
||||
elif act_name == "tanh":
|
||||
|
|
|
@ -1,65 +1,46 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import numpy as np
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.distributions import Normal
|
||||
from torch.nn.modules import rnn
|
||||
from .actor_critic import ActorCritic, get_activation
|
||||
|
||||
from rsl_rl.modules.actor_critic import ActorCritic, get_activation
|
||||
from rsl_rl.utils import unpad_trajectories
|
||||
|
||||
|
||||
class ActorCriticRecurrent(ActorCritic):
|
||||
is_recurrent = True
|
||||
def __init__(self, num_actor_obs,
|
||||
num_critic_obs,
|
||||
num_actions,
|
||||
actor_hidden_dims=[256, 256, 256],
|
||||
critic_hidden_dims=[256, 256, 256],
|
||||
activation='elu',
|
||||
rnn_type='lstm',
|
||||
rnn_hidden_size=256,
|
||||
rnn_num_layers=1,
|
||||
init_noise_std=1.0,
|
||||
**kwargs):
|
||||
if kwargs:
|
||||
print("ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()),)
|
||||
|
||||
super().__init__(num_actor_obs=rnn_hidden_size,
|
||||
num_critic_obs=rnn_hidden_size,
|
||||
num_actions=num_actions,
|
||||
actor_hidden_dims=actor_hidden_dims,
|
||||
critic_hidden_dims=critic_hidden_dims,
|
||||
activation=activation,
|
||||
init_noise_std=init_noise_std)
|
||||
def __init__(
|
||||
self,
|
||||
num_actor_obs,
|
||||
num_critic_obs,
|
||||
num_actions,
|
||||
actor_hidden_dims=[256, 256, 256],
|
||||
critic_hidden_dims=[256, 256, 256],
|
||||
activation="elu",
|
||||
rnn_type="lstm",
|
||||
rnn_hidden_size=256,
|
||||
rnn_num_layers=1,
|
||||
init_noise_std=1.0,
|
||||
**kwargs,
|
||||
):
|
||||
if kwargs:
|
||||
print(
|
||||
"ActorCriticRecurrent.__init__ got unexpected arguments, which will be ignored: " + str(kwargs.keys()),
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
num_actor_obs=rnn_hidden_size,
|
||||
num_critic_obs=rnn_hidden_size,
|
||||
num_actions=num_actions,
|
||||
actor_hidden_dims=actor_hidden_dims,
|
||||
critic_hidden_dims=critic_hidden_dims,
|
||||
activation=activation,
|
||||
init_noise_std=init_noise_std,
|
||||
)
|
||||
|
||||
activation = get_activation(activation)
|
||||
|
||||
|
@ -84,19 +65,19 @@ class ActorCriticRecurrent(ActorCritic):
|
|||
def evaluate(self, critic_observations, masks=None, hidden_states=None):
|
||||
input_c = self.memory_c(critic_observations, masks, hidden_states)
|
||||
return super().evaluate(input_c.squeeze(0))
|
||||
|
||||
|
||||
def get_hidden_states(self):
|
||||
return self.memory_a.hidden_states, self.memory_c.hidden_states
|
||||
|
||||
|
||||
class Memory(torch.nn.Module):
|
||||
def __init__(self, input_size, type='lstm', num_layers=1, hidden_size=256):
|
||||
def __init__(self, input_size, type="lstm", num_layers=1, hidden_size=256):
|
||||
super().__init__()
|
||||
# RNN
|
||||
rnn_cls = nn.GRU if type.lower() == 'gru' else nn.LSTM
|
||||
rnn_cls = nn.GRU if type.lower() == "gru" else nn.LSTM
|
||||
self.rnn = rnn_cls(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
|
||||
self.hidden_states = None
|
||||
|
||||
|
||||
def forward(self, input, masks=None, hidden_states=None):
|
||||
batch_mode = masks is not None
|
||||
if batch_mode:
|
||||
|
@ -113,4 +94,4 @@ class Memory(torch.nn.Module):
|
|||
def reset(self, dones=None):
|
||||
# When the RNN is an LSTM, self.hidden_states_a is a list with hidden_state and cell_state
|
||||
for hidden_state in self.hidden_states:
|
||||
hidden_state[..., dones, :] = 0.0
|
||||
hidden_state[..., dones, :] = 0.0
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
# Copyright (c) 2020 Preferred Networks, Inc.
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
|
||||
class EmpiricalNormalization(nn.Module):
|
||||
"""Normalize mean and variance of values based on empirical values."""
|
||||
|
||||
def __init__(self, shape, eps=1e-2, until=None):
|
||||
"""Initialize EmpiricalNormalization module.
|
||||
|
||||
Args:
|
||||
shape (int or tuple of int): Shape of input values except batch axis.
|
||||
eps (float): Small value for stability.
|
||||
until (int or None): If this arg is specified, the link learns input values until the sum of batch sizes
|
||||
exceeds it.
|
||||
"""
|
||||
super().__init__()
|
||||
self.eps = eps
|
||||
self.until = until
|
||||
self.register_buffer("_mean", torch.zeros(shape).unsqueeze(0))
|
||||
self.register_buffer("_var", torch.ones(shape).unsqueeze(0))
|
||||
self.register_buffer("_std", torch.ones(shape).unsqueeze(0))
|
||||
self.count = 0
|
||||
|
||||
@property
|
||||
def mean(self):
|
||||
return self._mean.squeeze(0).clone()
|
||||
|
||||
@property
|
||||
def std(self):
|
||||
return self._std.squeeze(0).clone()
|
||||
|
||||
def forward(self, x):
|
||||
"""Normalize mean and variance of values based on empirical values.
|
||||
|
||||
Args:
|
||||
x (ndarray or Variable): Input values
|
||||
|
||||
Returns:
|
||||
ndarray or Variable: Normalized output values
|
||||
"""
|
||||
|
||||
if self.training:
|
||||
self.update(x)
|
||||
return (x - self._mean) / (self._std + self.eps)
|
||||
|
||||
@torch.jit.unused
|
||||
def update(self, x):
|
||||
"""Learn input values without computing the output values of them"""
|
||||
|
||||
if self.until is not None and self.count >= self.until:
|
||||
return
|
||||
|
||||
count_x = x.shape[0]
|
||||
self.count += count_x
|
||||
rate = count_x / self.count
|
||||
|
||||
var_x = torch.var(x, dim=0, unbiased=False, keepdim=True)
|
||||
mean_x = torch.mean(x, dim=0, keepdim=True)
|
||||
delta_mean = mean_x - self._mean
|
||||
self._mean += rate * delta_mean
|
||||
self._var += rate * (var_x - self._var + delta_mean * (mean_x - self._mean))
|
||||
self._std = torch.sqrt(self._var)
|
||||
|
||||
@torch.jit.unused
|
||||
def inverse(self, y):
|
||||
return y * (self._std + self.eps) + self._mean
|
|
@ -1,31 +1,8 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from .on_policy_runner import OnPolicyRunner
|
||||
"""Implementation of runners for environment-agent interaction."""
|
||||
|
||||
from .on_policy_runner import OnPolicyRunner
|
||||
|
||||
__all__ = ["OnPolicyRunner"]
|
||||
|
|
|
@ -1,75 +1,60 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
import os
|
||||
from collections import deque
|
||||
import statistics
|
||||
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
import time
|
||||
import torch
|
||||
from collections import deque
|
||||
from torch.utils.tensorboard import SummaryWriter as TensorboardSummaryWriter
|
||||
|
||||
import rsl_rl
|
||||
from rsl_rl.algorithms import PPO
|
||||
from rsl_rl.modules import ActorCritic, ActorCriticRecurrent
|
||||
from rsl_rl.env import VecEnv
|
||||
from rsl_rl.modules import ActorCritic, ActorCriticRecurrent, EmpiricalNormalization
|
||||
from rsl_rl.utils import store_code_state
|
||||
|
||||
|
||||
class OnPolicyRunner:
|
||||
"""On-policy runner for training and evaluation."""
|
||||
|
||||
def __init__(self,
|
||||
env: VecEnv,
|
||||
train_cfg,
|
||||
log_dir=None,
|
||||
device='cpu'):
|
||||
|
||||
self.cfg=train_cfg["runner"]
|
||||
def __init__(self, env: VecEnv, train_cfg, log_dir=None, device="cpu"):
|
||||
self.cfg = train_cfg
|
||||
self.alg_cfg = train_cfg["algorithm"]
|
||||
self.policy_cfg = train_cfg["policy"]
|
||||
self.device = device
|
||||
self.env = env
|
||||
if self.env.num_privileged_obs is not None:
|
||||
num_critic_obs = self.env.num_privileged_obs
|
||||
obs, extras = self.env.get_observations()
|
||||
num_obs = obs.shape[1]
|
||||
if "critic" in extras["observations"]:
|
||||
num_critic_obs = extras["observations"]["critic"].shape[1]
|
||||
else:
|
||||
num_critic_obs = self.env.num_obs
|
||||
actor_critic_class = eval(self.cfg["policy_class_name"]) # ActorCritic
|
||||
actor_critic: ActorCritic = actor_critic_class( self.env.num_obs,
|
||||
num_critic_obs,
|
||||
self.env.num_actions,
|
||||
**self.policy_cfg).to(self.device)
|
||||
alg_class = eval(self.cfg["algorithm_class_name"]) # PPO
|
||||
num_critic_obs = num_obs
|
||||
actor_critic_class = eval(self.policy_cfg.pop("class_name")) # ActorCritic
|
||||
actor_critic: ActorCritic | ActorCriticRecurrent = actor_critic_class(
|
||||
num_obs, num_critic_obs, self.env.num_actions, **self.policy_cfg
|
||||
).to(self.device)
|
||||
alg_class = eval(self.alg_cfg.pop("class_name")) # PPO
|
||||
self.alg: PPO = alg_class(actor_critic, device=self.device, **self.alg_cfg)
|
||||
self.num_steps_per_env = self.cfg["num_steps_per_env"]
|
||||
self.save_interval = self.cfg["save_interval"]
|
||||
|
||||
self.empirical_normalization = self.cfg["empirical_normalization"]
|
||||
if self.empirical_normalization:
|
||||
self.obs_normalizer = EmpiricalNormalization(shape=[num_obs], until=1.0e8).to(self.device)
|
||||
self.critic_obs_normalizer = EmpiricalNormalization(shape=[num_critic_obs], until=1.0e8).to(self.device)
|
||||
else:
|
||||
self.obs_normalizer = torch.nn.Identity() # no normalization
|
||||
self.critic_obs_normalizer = torch.nn.Identity() # no normalization
|
||||
# init storage and model
|
||||
self.alg.init_storage(self.env.num_envs, self.num_steps_per_env, [self.env.num_obs], [self.env.num_privileged_obs], [self.env.num_actions])
|
||||
self.alg.init_storage(
|
||||
self.env.num_envs,
|
||||
self.num_steps_per_env,
|
||||
[num_obs],
|
||||
[num_critic_obs],
|
||||
[self.env.num_actions],
|
||||
)
|
||||
|
||||
# Log
|
||||
self.log_dir = log_dir
|
||||
|
@ -77,20 +62,38 @@ class OnPolicyRunner:
|
|||
self.tot_timesteps = 0
|
||||
self.tot_time = 0
|
||||
self.current_learning_iteration = 0
|
||||
self.git_status_repos = [rsl_rl.__file__]
|
||||
|
||||
_, _ = self.env.reset()
|
||||
|
||||
def learn(self, num_learning_iterations, init_at_random_ep_len=False):
|
||||
def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = False):
|
||||
# initialize writer
|
||||
if self.log_dir is not None and self.writer is None:
|
||||
self.writer = SummaryWriter(log_dir=self.log_dir, flush_secs=10)
|
||||
# Launch either Tensorboard or Neptune & Tensorboard summary writer(s), default: Tensorboard.
|
||||
self.logger_type = self.cfg.get("logger", "tensorboard")
|
||||
self.logger_type = self.logger_type.lower()
|
||||
|
||||
if self.logger_type == "neptune":
|
||||
from rsl_rl.utils.neptune_utils import NeptuneSummaryWriter
|
||||
|
||||
self.writer = NeptuneSummaryWriter(log_dir=self.log_dir, flush_secs=10, cfg=self.cfg)
|
||||
self.writer.log_config(self.env.cfg, self.cfg, self.alg_cfg, self.policy_cfg)
|
||||
elif self.logger_type == "wandb":
|
||||
from rsl_rl.utils.wandb_utils import WandbSummaryWriter
|
||||
|
||||
self.writer = WandbSummaryWriter(log_dir=self.log_dir, flush_secs=10, cfg=self.cfg)
|
||||
self.writer.log_config(self.env.cfg, self.cfg, self.alg_cfg, self.policy_cfg)
|
||||
elif self.logger_type == "tensorboard":
|
||||
self.writer = TensorboardSummaryWriter(log_dir=self.log_dir, flush_secs=10)
|
||||
else:
|
||||
raise AssertionError("logger type not found")
|
||||
|
||||
if init_at_random_ep_len:
|
||||
self.env.episode_length_buf = torch.randint_like(self.env.episode_length_buf, high=int(self.env.max_episode_length))
|
||||
obs = self.env.get_observations()
|
||||
privileged_obs = self.env.get_privileged_observations()
|
||||
critic_obs = privileged_obs if privileged_obs is not None else obs
|
||||
self.env.episode_length_buf = torch.randint_like(
|
||||
self.env.episode_length_buf, high=int(self.env.max_episode_length)
|
||||
)
|
||||
obs, extras = self.env.get_observations()
|
||||
critic_obs = extras["observations"].get("critic", obs)
|
||||
obs, critic_obs = obs.to(self.device), critic_obs.to(self.device)
|
||||
self.alg.actor_critic.train() # switch to train mode (for dropout for example)
|
||||
self.train_mode() # switch to train mode (for dropout for example)
|
||||
|
||||
ep_infos = []
|
||||
rewbuffer = deque(maxlen=100)
|
||||
|
@ -98,22 +101,36 @@ class OnPolicyRunner:
|
|||
cur_reward_sum = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
|
||||
cur_episode_length = torch.zeros(self.env.num_envs, dtype=torch.float, device=self.device)
|
||||
|
||||
tot_iter = self.current_learning_iteration + num_learning_iterations
|
||||
for it in range(self.current_learning_iteration, tot_iter):
|
||||
start_iter = self.current_learning_iteration
|
||||
tot_iter = start_iter + num_learning_iterations
|
||||
for it in range(start_iter, tot_iter):
|
||||
start = time.time()
|
||||
# Rollout
|
||||
with torch.inference_mode():
|
||||
for i in range(self.num_steps_per_env):
|
||||
actions = self.alg.act(obs, critic_obs)
|
||||
obs, privileged_obs, rewards, dones, infos = self.env.step(actions)
|
||||
critic_obs = privileged_obs if privileged_obs is not None else obs
|
||||
obs, critic_obs, rewards, dones = obs.to(self.device), critic_obs.to(self.device), rewards.to(self.device), dones.to(self.device)
|
||||
obs, rewards, dones, infos = self.env.step(actions)
|
||||
obs = self.obs_normalizer(obs)
|
||||
if "critic" in infos["observations"]:
|
||||
critic_obs = self.critic_obs_normalizer(infos["observations"]["critic"])
|
||||
else:
|
||||
critic_obs = obs
|
||||
obs, critic_obs, rewards, dones = (
|
||||
obs.to(self.device),
|
||||
critic_obs.to(self.device),
|
||||
rewards.to(self.device),
|
||||
dones.to(self.device),
|
||||
)
|
||||
self.alg.process_env_step(rewards, dones, infos)
|
||||
|
||||
|
||||
if self.log_dir is not None:
|
||||
# Book keeping
|
||||
if 'episode' in infos:
|
||||
ep_infos.append(infos['episode'])
|
||||
# note: we changed logging to use "log" instead of "episode" to avoid confusion with
|
||||
# different types of logging data (rewards, curriculum, etc.)
|
||||
if "episode" in infos:
|
||||
ep_infos.append(infos["episode"])
|
||||
elif "log" in infos:
|
||||
ep_infos.append(infos["log"])
|
||||
cur_reward_sum += rewards
|
||||
cur_episode_length += 1
|
||||
new_ids = (dones > 0).nonzero(as_tuple=False)
|
||||
|
@ -128,106 +145,155 @@ class OnPolicyRunner:
|
|||
# Learning step
|
||||
start = stop
|
||||
self.alg.compute_returns(critic_obs)
|
||||
|
||||
|
||||
mean_value_loss, mean_surrogate_loss = self.alg.update()
|
||||
stop = time.time()
|
||||
learn_time = stop - start
|
||||
self.current_learning_iteration = it
|
||||
if self.log_dir is not None:
|
||||
self.log(locals())
|
||||
if it % self.save_interval == 0:
|
||||
self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(it)))
|
||||
self.save(os.path.join(self.log_dir, f"model_{it}.pt"))
|
||||
ep_infos.clear()
|
||||
|
||||
self.current_learning_iteration += num_learning_iterations
|
||||
self.save(os.path.join(self.log_dir, 'model_{}.pt'.format(self.current_learning_iteration)))
|
||||
if it == start_iter:
|
||||
store_code_state(self.log_dir, self.git_status_repos)
|
||||
|
||||
def log(self, locs, width=80, pad=35):
|
||||
self.save(os.path.join(self.log_dir, f"model_{self.current_learning_iteration}.pt"))
|
||||
|
||||
def log(self, locs: dict, width: int = 80, pad: int = 35):
|
||||
self.tot_timesteps += self.num_steps_per_env * self.env.num_envs
|
||||
self.tot_time += locs['collection_time'] + locs['learn_time']
|
||||
iteration_time = locs['collection_time'] + locs['learn_time']
|
||||
self.tot_time += locs["collection_time"] + locs["learn_time"]
|
||||
iteration_time = locs["collection_time"] + locs["learn_time"]
|
||||
|
||||
ep_string = f''
|
||||
if locs['ep_infos']:
|
||||
for key in locs['ep_infos'][0]:
|
||||
ep_string = ""
|
||||
if locs["ep_infos"]:
|
||||
for key in locs["ep_infos"][0]:
|
||||
infotensor = torch.tensor([], device=self.device)
|
||||
for ep_info in locs['ep_infos']:
|
||||
for ep_info in locs["ep_infos"]:
|
||||
# handle scalar and zero dimensional tensor infos
|
||||
if key not in ep_info:
|
||||
continue
|
||||
if not isinstance(ep_info[key], torch.Tensor):
|
||||
ep_info[key] = torch.Tensor([ep_info[key]])
|
||||
if len(ep_info[key].shape) == 0:
|
||||
ep_info[key] = ep_info[key].unsqueeze(0)
|
||||
infotensor = torch.cat((infotensor, ep_info[key].to(self.device)))
|
||||
value = torch.mean(infotensor)
|
||||
self.writer.add_scalar('Episode/' + key, value, locs['it'])
|
||||
ep_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n"""
|
||||
# log to logger and terminal
|
||||
if "/" in key:
|
||||
self.writer.add_scalar(key, value, locs["it"])
|
||||
ep_string += f"""{f'{key}:':>{pad}} {value:.4f}\n"""
|
||||
else:
|
||||
self.writer.add_scalar("Episode/" + key, value, locs["it"])
|
||||
ep_string += f"""{f'Mean episode {key}:':>{pad}} {value:.4f}\n"""
|
||||
mean_std = self.alg.actor_critic.std.mean()
|
||||
fps = int(self.num_steps_per_env * self.env.num_envs / (locs['collection_time'] + locs['learn_time']))
|
||||
fps = int(self.num_steps_per_env * self.env.num_envs / (locs["collection_time"] + locs["learn_time"]))
|
||||
|
||||
self.writer.add_scalar('Loss/value_function', locs['mean_value_loss'], locs['it'])
|
||||
self.writer.add_scalar('Loss/surrogate', locs['mean_surrogate_loss'], locs['it'])
|
||||
self.writer.add_scalar('Loss/learning_rate', self.alg.learning_rate, locs['it'])
|
||||
self.writer.add_scalar('Policy/mean_noise_std', mean_std.item(), locs['it'])
|
||||
self.writer.add_scalar('Perf/total_fps', fps, locs['it'])
|
||||
self.writer.add_scalar('Perf/collection time', locs['collection_time'], locs['it'])
|
||||
self.writer.add_scalar('Perf/learning_time', locs['learn_time'], locs['it'])
|
||||
if len(locs['rewbuffer']) > 0:
|
||||
self.writer.add_scalar('Train/mean_reward', statistics.mean(locs['rewbuffer']), locs['it'])
|
||||
self.writer.add_scalar('Train/mean_episode_length', statistics.mean(locs['lenbuffer']), locs['it'])
|
||||
self.writer.add_scalar('Train/mean_reward/time', statistics.mean(locs['rewbuffer']), self.tot_time)
|
||||
self.writer.add_scalar('Train/mean_episode_length/time', statistics.mean(locs['lenbuffer']), self.tot_time)
|
||||
self.writer.add_scalar("Loss/value_function", locs["mean_value_loss"], locs["it"])
|
||||
self.writer.add_scalar("Loss/surrogate", locs["mean_surrogate_loss"], locs["it"])
|
||||
self.writer.add_scalar("Loss/learning_rate", self.alg.learning_rate, locs["it"])
|
||||
self.writer.add_scalar("Policy/mean_noise_std", mean_std.item(), locs["it"])
|
||||
self.writer.add_scalar("Perf/total_fps", fps, locs["it"])
|
||||
self.writer.add_scalar("Perf/collection time", locs["collection_time"], locs["it"])
|
||||
self.writer.add_scalar("Perf/learning_time", locs["learn_time"], locs["it"])
|
||||
if len(locs["rewbuffer"]) > 0:
|
||||
self.writer.add_scalar("Train/mean_reward", statistics.mean(locs["rewbuffer"]), locs["it"])
|
||||
self.writer.add_scalar("Train/mean_episode_length", statistics.mean(locs["lenbuffer"]), locs["it"])
|
||||
if self.logger_type != "wandb": # wandb does not support non-integer x-axis logging
|
||||
self.writer.add_scalar("Train/mean_reward/time", statistics.mean(locs["rewbuffer"]), self.tot_time)
|
||||
self.writer.add_scalar(
|
||||
"Train/mean_episode_length/time", statistics.mean(locs["lenbuffer"]), self.tot_time
|
||||
)
|
||||
|
||||
str = f" \033[1m Learning iteration {locs['it']}/{self.current_learning_iteration + locs['num_learning_iterations']} \033[0m "
|
||||
str = f" \033[1m Learning iteration {locs['it']}/{locs['tot_iter']} \033[0m "
|
||||
|
||||
if len(locs['rewbuffer']) > 0:
|
||||
log_string = (f"""{'#' * width}\n"""
|
||||
f"""{str.center(width, ' ')}\n\n"""
|
||||
f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
|
||||
if len(locs["rewbuffer"]) > 0:
|
||||
log_string = (
|
||||
f"""{'#' * width}\n"""
|
||||
f"""{str.center(width, ' ')}\n\n"""
|
||||
f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
|
||||
'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
|
||||
f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
|
||||
f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
|
||||
f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
|
||||
f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
|
||||
f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n""")
|
||||
# f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
|
||||
# f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
|
||||
f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
|
||||
f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
|
||||
f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
|
||||
f"""{'Mean reward:':>{pad}} {statistics.mean(locs['rewbuffer']):.2f}\n"""
|
||||
f"""{'Mean episode length:':>{pad}} {statistics.mean(locs['lenbuffer']):.2f}\n"""
|
||||
)
|
||||
# f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
|
||||
# f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
|
||||
else:
|
||||
log_string = (f"""{'#' * width}\n"""
|
||||
f"""{str.center(width, ' ')}\n\n"""
|
||||
f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
|
||||
log_string = (
|
||||
f"""{'#' * width}\n"""
|
||||
f"""{str.center(width, ' ')}\n\n"""
|
||||
f"""{'Computation:':>{pad}} {fps:.0f} steps/s (collection: {locs[
|
||||
'collection_time']:.3f}s, learning {locs['learn_time']:.3f}s)\n"""
|
||||
f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
|
||||
f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
|
||||
f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n""")
|
||||
# f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
|
||||
# f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
|
||||
f"""{'Value function loss:':>{pad}} {locs['mean_value_loss']:.4f}\n"""
|
||||
f"""{'Surrogate loss:':>{pad}} {locs['mean_surrogate_loss']:.4f}\n"""
|
||||
f"""{'Mean action noise std:':>{pad}} {mean_std.item():.2f}\n"""
|
||||
)
|
||||
# f"""{'Mean reward/step:':>{pad}} {locs['mean_reward']:.2f}\n"""
|
||||
# f"""{'Mean episode length/episode:':>{pad}} {locs['mean_trajectory_length']:.2f}\n""")
|
||||
|
||||
log_string += ep_string
|
||||
log_string += (f"""{'-' * width}\n"""
|
||||
f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n"""
|
||||
f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n"""
|
||||
f"""{'Total time:':>{pad}} {self.tot_time:.2f}s\n"""
|
||||
f"""{'ETA:':>{pad}} {self.tot_time / (locs['it'] + 1) * (
|
||||
locs['num_learning_iterations'] - locs['it']):.1f}s\n""")
|
||||
log_string += (
|
||||
f"""{'-' * width}\n"""
|
||||
f"""{'Total timesteps:':>{pad}} {self.tot_timesteps}\n"""
|
||||
f"""{'Iteration time:':>{pad}} {iteration_time:.2f}s\n"""
|
||||
f"""{'Total time:':>{pad}} {self.tot_time:.2f}s\n"""
|
||||
f"""{'ETA:':>{pad}} {self.tot_time / (locs['it'] + 1) * (
|
||||
locs['num_learning_iterations'] - locs['it']):.1f}s\n"""
|
||||
)
|
||||
print(log_string)
|
||||
|
||||
def save(self, path, infos=None):
|
||||
torch.save({
|
||||
'model_state_dict': self.alg.actor_critic.state_dict(),
|
||||
'optimizer_state_dict': self.alg.optimizer.state_dict(),
|
||||
'iter': self.current_learning_iteration,
|
||||
'infos': infos,
|
||||
}, path)
|
||||
saved_dict = {
|
||||
"model_state_dict": self.alg.actor_critic.state_dict(),
|
||||
"optimizer_state_dict": self.alg.optimizer.state_dict(),
|
||||
"iter": self.current_learning_iteration,
|
||||
"infos": infos,
|
||||
}
|
||||
if self.empirical_normalization:
|
||||
saved_dict["obs_norm_state_dict"] = self.obs_normalizer.state_dict()
|
||||
saved_dict["critic_obs_norm_state_dict"] = self.critic_obs_normalizer.state_dict()
|
||||
torch.save(saved_dict, path)
|
||||
|
||||
# Upload model to external logging service
|
||||
if self.logger_type in ["neptune", "wandb"]:
|
||||
self.writer.save_model(path, self.current_learning_iteration)
|
||||
|
||||
def load(self, path, load_optimizer=True):
|
||||
loaded_dict = torch.load(path)
|
||||
self.alg.actor_critic.load_state_dict(loaded_dict['model_state_dict'])
|
||||
self.alg.actor_critic.load_state_dict(loaded_dict["model_state_dict"])
|
||||
if self.empirical_normalization:
|
||||
self.obs_normalizer.load_state_dict(loaded_dict["obs_norm_state_dict"])
|
||||
self.critic_obs_normalizer.load_state_dict(loaded_dict["critic_obs_norm_state_dict"])
|
||||
if load_optimizer:
|
||||
self.alg.optimizer.load_state_dict(loaded_dict['optimizer_state_dict'])
|
||||
self.current_learning_iteration = loaded_dict['iter']
|
||||
return loaded_dict['infos']
|
||||
self.alg.optimizer.load_state_dict(loaded_dict["optimizer_state_dict"])
|
||||
self.current_learning_iteration = loaded_dict["iter"]
|
||||
return loaded_dict["infos"]
|
||||
|
||||
def get_inference_policy(self, device=None):
|
||||
self.alg.actor_critic.eval() # switch to evaluation mode (dropout for example)
|
||||
self.eval_mode() # switch to evaluation mode (dropout for example)
|
||||
if device is not None:
|
||||
self.alg.actor_critic.to(device)
|
||||
return self.alg.actor_critic.act_inference
|
||||
policy = self.alg.actor_critic.act_inference
|
||||
if self.cfg["empirical_normalization"]:
|
||||
if device is not None:
|
||||
self.obs_normalizer.to(device)
|
||||
policy = lambda x: self.alg.actor_critic.act_inference(self.obs_normalizer(x)) # noqa: E731
|
||||
return policy
|
||||
|
||||
def train_mode(self):
|
||||
self.alg.actor_critic.train()
|
||||
if self.empirical_normalization:
|
||||
self.obs_normalizer.train()
|
||||
self.critic_obs_normalizer.train()
|
||||
|
||||
def eval_mode(self):
|
||||
self.alg.actor_critic.eval()
|
||||
if self.empirical_normalization:
|
||||
self.obs_normalizer.eval()
|
||||
self.critic_obs_normalizer.eval()
|
||||
|
||||
def add_git_repo_to_log(self, repo_file_path):
|
||||
self.git_status_repos.append(repo_file_path)
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from .rollout_storage import RolloutStorage
|
||||
"""Implementation of transitions storage for RL-agent."""
|
||||
|
||||
from .rollout_storage import RolloutStorage
|
||||
|
||||
__all__ = ["RolloutStorage"]
|
||||
|
|
|
@ -1,38 +1,14 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from rsl_rl.utils import split_and_pad_trajectories
|
||||
|
||||
|
||||
class RolloutStorage:
|
||||
class Transition:
|
||||
def __init__(self):
|
||||
|
@ -46,12 +22,11 @@ class RolloutStorage:
|
|||
self.action_mean = None
|
||||
self.action_sigma = None
|
||||
self.hidden_states = None
|
||||
|
||||
|
||||
def clear(self):
|
||||
self.__init__()
|
||||
|
||||
def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device='cpu'):
|
||||
|
||||
def __init__(self, num_envs, num_transitions_per_env, obs_shape, privileged_obs_shape, actions_shape, device="cpu"):
|
||||
self.device = device
|
||||
|
||||
self.obs_shape = obs_shape
|
||||
|
@ -61,7 +36,9 @@ class RolloutStorage:
|
|||
# Core
|
||||
self.observations = torch.zeros(num_transitions_per_env, num_envs, *obs_shape, device=self.device)
|
||||
if privileged_obs_shape[0] is not None:
|
||||
self.privileged_observations = torch.zeros(num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device)
|
||||
self.privileged_observations = torch.zeros(
|
||||
num_transitions_per_env, num_envs, *privileged_obs_shape, device=self.device
|
||||
)
|
||||
else:
|
||||
self.privileged_observations = None
|
||||
self.rewards = torch.zeros(num_transitions_per_env, num_envs, 1, device=self.device)
|
||||
|
@ -89,7 +66,8 @@ class RolloutStorage:
|
|||
if self.step >= self.num_transitions_per_env:
|
||||
raise AssertionError("Rollout buffer overflow")
|
||||
self.observations[self.step].copy_(transition.observations)
|
||||
if self.privileged_observations is not None: self.privileged_observations[self.step].copy_(transition.critic_observations)
|
||||
if self.privileged_observations is not None:
|
||||
self.privileged_observations[self.step].copy_(transition.critic_observations)
|
||||
self.actions[self.step].copy_(transition.actions)
|
||||
self.rewards[self.step].copy_(transition.rewards.view(-1, 1))
|
||||
self.dones[self.step].copy_(transition.dones.view(-1, 1))
|
||||
|
@ -101,22 +79,25 @@ class RolloutStorage:
|
|||
self.step += 1
|
||||
|
||||
def _save_hidden_states(self, hidden_states):
|
||||
if hidden_states is None or hidden_states==(None, None):
|
||||
if hidden_states is None or hidden_states == (None, None):
|
||||
return
|
||||
# make a tuple out of GRU hidden state sto match the LSTM format
|
||||
hid_a = hidden_states[0] if isinstance(hidden_states[0], tuple) else (hidden_states[0],)
|
||||
hid_c = hidden_states[1] if isinstance(hidden_states[1], tuple) else (hidden_states[1],)
|
||||
|
||||
# initialize if needed
|
||||
# initialize if needed
|
||||
if self.saved_hidden_states_a is None:
|
||||
self.saved_hidden_states_a = [torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))]
|
||||
self.saved_hidden_states_c = [torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))]
|
||||
self.saved_hidden_states_a = [
|
||||
torch.zeros(self.observations.shape[0], *hid_a[i].shape, device=self.device) for i in range(len(hid_a))
|
||||
]
|
||||
self.saved_hidden_states_c = [
|
||||
torch.zeros(self.observations.shape[0], *hid_c[i].shape, device=self.device) for i in range(len(hid_c))
|
||||
]
|
||||
# copy the states
|
||||
for i in range(len(hid_a)):
|
||||
self.saved_hidden_states_a[i][self.step].copy_(hid_a[i])
|
||||
self.saved_hidden_states_c[i][self.step].copy_(hid_c[i])
|
||||
|
||||
|
||||
def clear(self):
|
||||
self.step = 0
|
||||
|
||||
|
@ -140,14 +121,16 @@ class RolloutStorage:
|
|||
done = self.dones
|
||||
done[-1] = 1
|
||||
flat_dones = done.permute(1, 0, 2).reshape(-1, 1)
|
||||
done_indices = torch.cat((flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0]))
|
||||
trajectory_lengths = (done_indices[1:] - done_indices[:-1])
|
||||
done_indices = torch.cat(
|
||||
(flat_dones.new_tensor([-1], dtype=torch.int64), flat_dones.nonzero(as_tuple=False)[:, 0])
|
||||
)
|
||||
trajectory_lengths = done_indices[1:] - done_indices[:-1]
|
||||
return trajectory_lengths.float().mean(), self.rewards.mean()
|
||||
|
||||
def mini_batch_generator(self, num_mini_batches, num_epochs=8):
|
||||
batch_size = self.num_envs * self.num_transitions_per_env
|
||||
mini_batch_size = batch_size // num_mini_batches
|
||||
indices = torch.randperm(num_mini_batches*mini_batch_size, requires_grad=False, device=self.device)
|
||||
indices = torch.randperm(num_mini_batches * mini_batch_size, requires_grad=False, device=self.device)
|
||||
|
||||
observations = self.observations.flatten(0, 1)
|
||||
if self.privileged_observations is not None:
|
||||
|
@ -165,9 +148,8 @@ class RolloutStorage:
|
|||
|
||||
for epoch in range(num_epochs):
|
||||
for i in range(num_mini_batches):
|
||||
|
||||
start = i*mini_batch_size
|
||||
end = (i+1)*mini_batch_size
|
||||
start = i * mini_batch_size
|
||||
end = (i + 1) * mini_batch_size
|
||||
batch_idx = indices[start:end]
|
||||
|
||||
obs_batch = observations[batch_idx]
|
||||
|
@ -179,24 +161,25 @@ class RolloutStorage:
|
|||
advantages_batch = advantages[batch_idx]
|
||||
old_mu_batch = old_mu[batch_idx]
|
||||
old_sigma_batch = old_sigma[batch_idx]
|
||||
yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, \
|
||||
old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (None, None), None
|
||||
yield obs_batch, critic_observations_batch, actions_batch, target_values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (
|
||||
None,
|
||||
None,
|
||||
), None
|
||||
|
||||
# for RNNs only
|
||||
def reccurent_mini_batch_generator(self, num_mini_batches, num_epochs=8):
|
||||
|
||||
padded_obs_trajectories, trajectory_masks = split_and_pad_trajectories(self.observations, self.dones)
|
||||
if self.privileged_observations is not None:
|
||||
if self.privileged_observations is not None:
|
||||
padded_critic_obs_trajectories, _ = split_and_pad_trajectories(self.privileged_observations, self.dones)
|
||||
else:
|
||||
else:
|
||||
padded_critic_obs_trajectories = padded_obs_trajectories
|
||||
|
||||
mini_batch_size = self.num_envs // num_mini_batches
|
||||
for ep in range(num_epochs):
|
||||
first_traj = 0
|
||||
for i in range(num_mini_batches):
|
||||
start = i*mini_batch_size
|
||||
stop = (i+1)*mini_batch_size
|
||||
start = i * mini_batch_size
|
||||
stop = (i + 1) * mini_batch_size
|
||||
|
||||
dones = self.dones.squeeze(-1)
|
||||
last_was_done = torch.zeros_like(dones, dtype=torch.bool)
|
||||
|
@ -204,7 +187,7 @@ class RolloutStorage:
|
|||
last_was_done[0] = True
|
||||
trajectories_batch_size = torch.sum(last_was_done[:, start:stop])
|
||||
last_traj = first_traj + trajectories_batch_size
|
||||
|
||||
|
||||
masks_batch = trajectory_masks[:, first_traj:last_traj]
|
||||
obs_batch = padded_obs_trajectories[:, first_traj:last_traj]
|
||||
critic_obs_batch = padded_critic_obs_trajectories[:, first_traj:last_traj]
|
||||
|
@ -221,15 +204,25 @@ class RolloutStorage:
|
|||
# then take only time steps after dones (flattens num envs and time dimensions),
|
||||
# take a batch of trajectories and finally reshape back to [num_layers, batch, hidden_dim]
|
||||
last_was_done = last_was_done.permute(1, 0)
|
||||
hid_a_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
|
||||
for saved_hidden_states in self.saved_hidden_states_a ]
|
||||
hid_c_batch = [ saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj].transpose(1, 0).contiguous()
|
||||
for saved_hidden_states in self.saved_hidden_states_c ]
|
||||
hid_a_batch = [
|
||||
saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj]
|
||||
.transpose(1, 0)
|
||||
.contiguous()
|
||||
for saved_hidden_states in self.saved_hidden_states_a
|
||||
]
|
||||
hid_c_batch = [
|
||||
saved_hidden_states.permute(2, 0, 1, 3)[last_was_done][first_traj:last_traj]
|
||||
.transpose(1, 0)
|
||||
.contiguous()
|
||||
for saved_hidden_states in self.saved_hidden_states_c
|
||||
]
|
||||
# remove the tuple for GRU
|
||||
hid_a_batch = hid_a_batch[0] if len(hid_a_batch)==1 else hid_a_batch
|
||||
hid_c_batch = hid_c_batch[0] if len(hid_c_batch)==1 else hid_a_batch
|
||||
hid_a_batch = hid_a_batch[0] if len(hid_a_batch) == 1 else hid_a_batch
|
||||
hid_c_batch = hid_c_batch[0] if len(hid_c_batch) == 1 else hid_c_batch
|
||||
|
||||
yield obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, \
|
||||
old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (hid_a_batch, hid_c_batch), masks_batch
|
||||
|
||||
first_traj = last_traj
|
||||
yield obs_batch, critic_obs_batch, actions_batch, values_batch, advantages_batch, returns_batch, old_actions_log_prob_batch, old_mu_batch, old_sigma_batch, (
|
||||
hid_a_batch,
|
||||
hid_c_batch,
|
||||
), masks_batch
|
||||
|
||||
first_traj = last_traj
|
||||
|
|
|
@ -1,31 +1,6 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from .utils import split_and_pad_trajectories, unpad_trajectories
|
||||
"""Helper functions."""
|
||||
|
||||
from .utils import split_and_pad_trajectories, store_code_state, unpad_trajectories
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import asdict
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
try:
|
||||
import neptune.new as neptune
|
||||
except ModuleNotFoundError:
|
||||
raise ModuleNotFoundError("neptune-client is required to log to Neptune.")
|
||||
|
||||
|
||||
class NeptuneLogger:
|
||||
def __init__(self, project, token):
|
||||
self.run = neptune.init(project=project, api_token=token)
|
||||
|
||||
def store_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg):
|
||||
self.run["runner_cfg"] = runner_cfg
|
||||
self.run["policy_cfg"] = policy_cfg
|
||||
self.run["alg_cfg"] = alg_cfg
|
||||
self.run["env_cfg"] = asdict(env_cfg)
|
||||
|
||||
|
||||
class NeptuneSummaryWriter(SummaryWriter):
|
||||
"""Summary writer for Neptune."""
|
||||
|
||||
def __init__(self, log_dir: str, flush_secs: int, cfg):
|
||||
super().__init__(log_dir, flush_secs)
|
||||
|
||||
try:
|
||||
project = cfg["neptune_project"]
|
||||
except KeyError:
|
||||
raise KeyError("Please specify neptune_project in the runner config, e.g. legged_gym.")
|
||||
|
||||
try:
|
||||
token = os.environ["NEPTUNE_API_TOKEN"]
|
||||
except KeyError:
|
||||
raise KeyError(
|
||||
"Neptune api token not found. Please run or add to ~/.bashrc: export NEPTUNE_API_TOKEN=YOUR_API_TOKEN"
|
||||
)
|
||||
|
||||
try:
|
||||
entity = os.environ["NEPTUNE_USERNAME"]
|
||||
except KeyError:
|
||||
raise KeyError(
|
||||
"Neptune username not found. Please run or add to ~/.bashrc: export NEPTUNE_USERNAME=YOUR_USERNAME"
|
||||
)
|
||||
|
||||
neptune_project = entity + "/" + project
|
||||
|
||||
self.neptune_logger = NeptuneLogger(neptune_project, token)
|
||||
|
||||
self.name_map = {
|
||||
"Train/mean_reward/time": "Train/mean_reward_time",
|
||||
"Train/mean_episode_length/time": "Train/mean_episode_length_time",
|
||||
}
|
||||
|
||||
run_name = os.path.split(log_dir)[-1]
|
||||
|
||||
self.neptune_logger.run["log_dir"].log(run_name)
|
||||
|
||||
def _map_path(self, path):
|
||||
if path in self.name_map:
|
||||
return self.name_map[path]
|
||||
else:
|
||||
return path
|
||||
|
||||
def add_scalar(self, tag, scalar_value, global_step=None, walltime=None, new_style=False):
|
||||
super().add_scalar(
|
||||
tag,
|
||||
scalar_value,
|
||||
global_step=global_step,
|
||||
walltime=walltime,
|
||||
new_style=new_style,
|
||||
)
|
||||
self.neptune_logger.run[self._map_path(tag)].log(scalar_value, step=global_step)
|
||||
|
||||
def stop(self):
|
||||
self.neptune_logger.run.stop()
|
||||
|
||||
def log_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg):
|
||||
self.neptune_logger.store_config(env_cfg, runner_cfg, alg_cfg, policy_cfg)
|
||||
|
||||
def save_model(self, model_path, iter):
|
||||
self.neptune_logger.run["model/saved_model_" + str(iter)].upload(model_path)
|
|
@ -1,39 +1,18 @@
|
|||
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Copyright (c) 2021 ETH Zurich, Nikita Rudin
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import git
|
||||
import os
|
||||
import pathlib
|
||||
import torch
|
||||
|
||||
|
||||
def split_and_pad_trajectories(tensor, dones):
|
||||
""" Splits trajectories at done indices. Then concatenates them and padds with zeros up to the length og the longest trajectory.
|
||||
"""Splits trajectories at done indices. Then concatenates them and pads with zeros up to the length og the longest trajectory.
|
||||
Returns masks corresponding to valid parts of the trajectories
|
||||
Example:
|
||||
Example:
|
||||
Input: [ [a1, a2, a3, a4 | a5, a6],
|
||||
[b1, b2 | b3, b4, b5 | b6]
|
||||
]
|
||||
|
@ -43,9 +22,9 @@ def split_and_pad_trajectories(tensor, dones):
|
|||
[b1, b2, 0, 0], | [True, True, False, False],
|
||||
[b3, b4, b5, 0], | [True, True, True, False],
|
||||
[b6, 0, 0, 0] | [True, False, False, False],
|
||||
] | ]
|
||||
|
||||
Assumes that the inputy has the following dimension order: [time, number of envs, aditional dimensions]
|
||||
] | ]
|
||||
|
||||
Assumes that the inputy has the following dimension order: [time, number of envs, additional dimensions]
|
||||
"""
|
||||
dones = dones.clone()
|
||||
dones[-1] = 1
|
||||
|
@ -57,15 +36,33 @@ def split_and_pad_trajectories(tensor, dones):
|
|||
trajectory_lengths = done_indices[1:] - done_indices[:-1]
|
||||
trajectory_lengths_list = trajectory_lengths.tolist()
|
||||
# Extract the individual trajectories
|
||||
trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1),trajectory_lengths_list)
|
||||
trajectories = torch.split(tensor.transpose(1, 0).flatten(0, 1), trajectory_lengths_list)
|
||||
# add at least one full length trajectory
|
||||
trajectories = trajectories + (torch.zeros(tensor.shape[0], tensor.shape[-1], device=tensor.device),)
|
||||
# pad the trajectories to the length of the longest trajectory
|
||||
padded_trajectories = torch.nn.utils.rnn.pad_sequence(trajectories)
|
||||
|
||||
# remove the added tensor
|
||||
padded_trajectories = padded_trajectories[:, :-1]
|
||||
|
||||
trajectory_masks = trajectory_lengths > torch.arange(0, tensor.shape[0], device=tensor.device).unsqueeze(1)
|
||||
return padded_trajectories, trajectory_masks
|
||||
|
||||
|
||||
def unpad_trajectories(trajectories, masks):
|
||||
""" Does the inverse operation of split_and_pad_trajectories()
|
||||
"""
|
||||
"""Does the inverse operation of split_and_pad_trajectories()"""
|
||||
# Need to transpose before and after the masking to have proper reshaping
|
||||
return trajectories.transpose(1, 0)[masks.transpose(1, 0)].view(-1, trajectories.shape[0], trajectories.shape[-1]).transpose(1, 0)
|
||||
return (
|
||||
trajectories.transpose(1, 0)[masks.transpose(1, 0)]
|
||||
.view(-1, trajectories.shape[0], trajectories.shape[-1])
|
||||
.transpose(1, 0)
|
||||
)
|
||||
|
||||
|
||||
def store_code_state(logdir, repositories):
|
||||
for repository_file_path in repositories:
|
||||
repo = git.Repo(repository_file_path, search_parent_directories=True)
|
||||
repo_name = pathlib.Path(repo.working_dir).name
|
||||
t = repo.head.commit.tree
|
||||
content = f"--- git status ---\n{repo.git.status()} \n\n\n--- git diff ---\n{repo.git.diff(t)}"
|
||||
with open(os.path.join(logdir, f"{repo_name}_git.diff"), "x") as f:
|
||||
f.write(content)
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import asdict
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
try:
|
||||
import wandb
|
||||
except ModuleNotFoundError:
|
||||
raise ModuleNotFoundError("Wandb is required to log to Weights and Biases.")
|
||||
|
||||
|
||||
class WandbSummaryWriter(SummaryWriter):
|
||||
"""Summary writer for Weights and Biases."""
|
||||
|
||||
def __init__(self, log_dir: str, flush_secs: int, cfg):
|
||||
super().__init__(log_dir, flush_secs)
|
||||
|
||||
try:
|
||||
project = cfg["wandb_project"]
|
||||
except KeyError:
|
||||
raise KeyError("Please specify wandb_project in the runner config, e.g. legged_gym.")
|
||||
|
||||
try:
|
||||
entity = os.environ["WANDB_USERNAME"]
|
||||
except KeyError:
|
||||
raise KeyError(
|
||||
"Wandb username not found. Please run or add to ~/.bashrc: export WANDB_USERNAME=YOUR_USERNAME"
|
||||
)
|
||||
|
||||
wandb.init(project=project, entity=entity)
|
||||
|
||||
# Change generated name to project-number format
|
||||
wandb.run.name = project + wandb.run.name.split("-")[-1]
|
||||
|
||||
self.name_map = {
|
||||
"Train/mean_reward/time": "Train/mean_reward_time",
|
||||
"Train/mean_episode_length/time": "Train/mean_episode_length_time",
|
||||
}
|
||||
|
||||
run_name = os.path.split(log_dir)[-1]
|
||||
|
||||
wandb.log({"log_dir": run_name})
|
||||
|
||||
def store_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg):
|
||||
wandb.config.update({"runner_cfg": runner_cfg})
|
||||
wandb.config.update({"policy_cfg": policy_cfg})
|
||||
wandb.config.update({"alg_cfg": alg_cfg})
|
||||
wandb.config.update({"env_cfg": asdict(env_cfg)})
|
||||
|
||||
def _map_path(self, path):
|
||||
if path in self.name_map:
|
||||
return self.name_map[path]
|
||||
else:
|
||||
return path
|
||||
|
||||
def add_scalar(self, tag, scalar_value, global_step=None, walltime=None, new_style=False):
|
||||
super().add_scalar(
|
||||
tag,
|
||||
scalar_value,
|
||||
global_step=global_step,
|
||||
walltime=walltime,
|
||||
new_style=new_style,
|
||||
)
|
||||
wandb.log({self._map_path(tag): scalar_value}, step=global_step)
|
||||
|
||||
def stop(self):
|
||||
wandb.finish()
|
||||
|
||||
def log_config(self, env_cfg, runner_cfg, alg_cfg, policy_cfg):
|
||||
self.store_config(env_cfg, runner_cfg, alg_cfg, policy_cfg)
|
||||
|
||||
def save_model(self, model_path, iter):
|
||||
wandb.save(model_path)
|
34
setup.py
34
setup.py
|
@ -1,16 +1,20 @@
|
|||
from setuptools import setup, find_packages
|
||||
# Copyright 2021 ETH Zurich, NVIDIA CORPORATION
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
setup(name='rsl_rl',
|
||||
version='1.0.2',
|
||||
author='Nikita Rudin',
|
||||
author_email='rudinn@ethz.ch',
|
||||
license="BSD-3-Clause",
|
||||
packages=find_packages(),
|
||||
description='Fast and simple RL algorithms implemented in pytorch',
|
||||
python_requires='>=3.6',
|
||||
install_requires=[
|
||||
"torch>=1.4.0",
|
||||
"torchvision>=0.5.0",
|
||||
"numpy>=1.16.4"
|
||||
],
|
||||
)
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
setup(
|
||||
name="rsl_rl",
|
||||
version="2.0.0",
|
||||
packages=find_packages(),
|
||||
license="BSD-3",
|
||||
description="Fast and simple RL algorithms implemented in pytorch",
|
||||
python_requires=">=3.6",
|
||||
install_requires=[
|
||||
"torch>=1.10.0",
|
||||
"torchvision>=0.5.0",
|
||||
"numpy>=1.16.4",
|
||||
"GitPython",
|
||||
"onnx",
|
||||
],
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue