diff --git a/README.md b/README.md index bcc4b6be34..fd49b3312a 100644 --- a/README.md +++ b/README.md @@ -277,7 +277,7 @@ pip install -e .\[dev\] Code style check: -```markdown +```shell pre-commit run --all-files ``` @@ -285,7 +285,7 @@ pre-commit run --all-files Unit tests: -```markdown +```shell python -m pytest tests ``` diff --git a/docs/sphinx_doc/Makefile b/docs/sphinx_doc/Makefile index d0c3cbf102..e8c684ae21 100644 --- a/docs/sphinx_doc/Makefile +++ b/docs/sphinx_doc/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line, and also # from the environment for the first two. -SPHINXOPTS ?= +SPHINXOPTS ?= -a -E -j auto SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build diff --git a/docs/sphinx_doc/_templates/package.rst_t b/docs/sphinx_doc/_templates/package.rst_t index 2951c55303..8b89dd1ab0 100644 --- a/docs/sphinx_doc/_templates/package.rst_t +++ b/docs/sphinx_doc/_templates/package.rst_t @@ -15,4 +15,39 @@ {%- endfor %} {%- endmacro %} +{%- if is_namespace %} +.. py:module:: {{ pkgname }} +{% endif %} + +{%- if modulefirst and not is_namespace %} +{{ automodule(pkgname, automodule_options) }} +{% endif %} + +{%- if subpackages %} +Subpackages +----------- + +{{ toctree(subpackages) }} +{% endif %} + +{%- if submodules %} +Submodules +---------- +{% if separatemodules %} +{{ toctree(submodules) }} +{% else %} +{%- for submodule in submodules %} +{% if show_headings %} +{{- [submodule, "module"] | join(" ") | e | heading(2) }} +{% endif %} +{{ automodule(submodule, automodule_options) }} +{% endfor %} +{%- endif %} +{%- endif %} + +{%- if not modulefirst and not is_namespace %} +Module contents +--------------- + {{ automodule(pkgname, automodule_options) }} +{% endif %} diff --git a/docs/sphinx_doc/source/tutorial/opmd.pdf b/docs/sphinx_doc/assets/opmd.pdf similarity index 100% rename from docs/sphinx_doc/source/tutorial/opmd.pdf rename to docs/sphinx_doc/assets/opmd.pdf diff --git a/docs/sphinx_doc/build_doc.sh b/docs/sphinx_doc/build_doc.sh index 46c81041b6..7b034ee622 100755 --- a/docs/sphinx_doc/build_doc.sh +++ b/docs/sphinx_doc/build_doc.sh @@ -1,3 +1,3 @@ #!/bin/bash -sphinx-apidoc -f -o source ../../trinity -t _templates +sphinx-apidoc -f -o source/build_api ../../trinity -t _templates make clean html diff --git a/docs/sphinx_doc/source/conf.py b/docs/sphinx_doc/source/conf.py index 5a7a0f1a1d..605ea1002c 100644 --- a/docs/sphinx_doc/source/conf.py +++ b/docs/sphinx_doc/source/conf.py @@ -5,8 +5,6 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -import sphinx_rtd_theme - from trinity import __version__ as version project = "Trinity-RFT" @@ -36,16 +34,22 @@ autosectionlabel_prefix_document = True autosummary_generate = True autosummary_ignore_module_all = False +napoleon_google_docstring = True autodoc_member_order = "bysource" templates_path = ["_templates"] exclude_patterns = ["build"] +autodoc_mock_imports = ["ray"] + +autodoc_default_options = { + "members": True, + "special-members": "__init__", +} # -- Options for HTML output ------------------------------------------------- html_theme = "sphinx_rtd_theme" -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] html_theme_options = { "navigation_depth": 3, @@ -55,13 +59,3 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] - - -def skip(app, what, name, obj, would_skip, options): - if name == "__init__": - return False - return would_skip - - -def setup(app): - app.connect("autodoc-skip-member", skip) diff --git a/docs/sphinx_doc/source/index.rst b/docs/sphinx_doc/source/index.rst index 72fee2bb81..e25a3a321e 100644 --- a/docs/sphinx_doc/source/index.rst +++ b/docs/sphinx_doc/source/index.rst @@ -6,7 +6,7 @@ Welcome to Trinity-RFT's documentation! ======================================= -.. include:: tutorial/main.md +.. include:: main.md :parser: myst_parser.sphinx_ @@ -25,31 +25,13 @@ Welcome to Trinity-RFT's documentation! tutorial/trinity_programming_guide.md .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :glob: :caption: API Reference - example.md - - trinity.buffer.reader - trinity.buffer - trinity.buffer.schema - trinity.buffer.writer - trinity.common.models - trinity.common.rewards - trinity.common - trinity.common.workflows - trinity.explorer - trinity.manager - trinity - trinity.trainer - trinity.trainer.verl - trinity.utils - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` + build_api/trinity.buffer + build_api/trinity.explorer + build_api/trinity.trainer + build_api/trinity.manager + build_api/trinity.common + build_api/trinity.utils diff --git a/docs/sphinx_doc/source/tutorial/main.md b/docs/sphinx_doc/source/main.md similarity index 92% rename from docs/sphinx_doc/source/tutorial/main.md rename to docs/sphinx_doc/source/main.md index 9547e1caf5..5871d654fe 100644 --- a/docs/sphinx_doc/source/tutorial/main.md +++ b/docs/sphinx_doc/source/main.md @@ -1,11 +1,8 @@ +# Trinity-RFT + - - -
-
-
+
You can click and enter into this project, and all the samples that need to be annotated are listed on the page.
diff --git a/docs/sphinx_doc/source/tutorial/example_multi_turn.md b/docs/sphinx_doc/source/tutorial/example_multi_turn.md
index b3686dc3ee..0b7e6abe79 100644
--- a/docs/sphinx_doc/source/tutorial/example_multi_turn.md
+++ b/docs/sphinx_doc/source/tutorial/example_multi_turn.md
@@ -125,5 +125,5 @@ and include them in the init files in `trinity/common/workflows/__init__.py`
Then you are all set! It should be pretty simple😄, and both environments converge.
-
-
+
+
diff --git a/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md b/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md
index d188ffb70b..81ce89440e 100644
--- a/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md
+++ b/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md
@@ -11,7 +11,7 @@ Let's continue with the [previous GSM8k example](./example_reasoning_basic.md) a
As an experimental feature of Trinity-RFT, we develop an embarrasingly simple off-policy RL algorithm, termed as OPMD (Online Policy Mirror Descent, inspired by [Kimi k1.5](https://arxiv.org/abs/2501.12599)).
-The algorithm design and analysis can be found in this [technical report](./opmd.pdf).
+The algorithm design and analysis can be found in this [technical report](../../assets/opmd.pdf).
diff --git a/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md b/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md
index b25fa718cd..582e99159d 100644
--- a/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md
+++ b/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md
@@ -35,7 +35,7 @@ Each `Task` is a Python dictionary (`Dict[str, Any]`), containing various parame
In the math problem scenario, the `Task` dataset can be a `jsonl` file, where each line’s JSON contains `question` and `answer` fields representing the problem description and standard answer, respectively.
-```jsonl
+```json
{"question": "1+1=", "answer": "2"}
{"question": "2+2=", "answer": "4"}
...
diff --git a/pyproject.toml b/pyproject.toml
index 6c78ec8b83..5aa419882c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,7 @@ readme = "README.md"
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
- "License :: OSI Approved :: MIT License",
+ "License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
diff --git a/trinity/common/experience.py b/trinity/common/experience.py
index 60b8ca8013..a1b5008681 100644
--- a/trinity/common/experience.py
+++ b/trinity/common/experience.py
@@ -48,16 +48,16 @@ def deserialize(data: bytes) -> Experience:
class Experiences:
"""A container for a batch of experiences, for high performance communication usage.
- Structure:
-
- |<- prompt_length ->| |
- tokens: ('P' represents prompt, 'O' represents output)
- exp1: |........PPPPPPPPPPP|OOOOOOOOOO.....|
- exp2: |......PPPPPPPPPPPPP|OOOOOOO........|
-
- attention_masks: ('.' represents False and '1' represents True)
- exp1: |........11111111111|1111111111.....|
- exp2: |......1111111111111|1111111........|
+ Example:
+
+ >>> |<- prompt_length ->| |
+ >>> tokens: ('P' represents prompt, 'O' represents output)
+ >>> exp1: |........PPPPPPPPPPP|OOOOOOOOOO.....|
+ >>> exp2: |......PPPPPPPPPPPPP|OOOOOOO........|
+ >>>
+ >>> attention_masks: ('.' represents False and '1' represents True)
+ >>> exp1: |........11111111111|1111111111.....|
+ >>> exp2: |......1111111111111|1111111........|
"""
tokens: Tensor
diff --git a/trinity/explorer/__init__.py b/trinity/explorer/__init__.py
index 8665a1b125..e7794c7cf6 100644
--- a/trinity/explorer/__init__.py
+++ b/trinity/explorer/__init__.py
@@ -1,3 +1,4 @@
from trinity.explorer.explorer import Explorer
+from trinity.explorer.runner_pool import RunnerPool
-__all__ = ["Explorer"]
+__all__ = ["Explorer", "RunnerPool"]
diff --git a/trinity/manager/__init__.py b/trinity/manager/__init__.py
index e69de29bb2..b64f42af15 100644
--- a/trinity/manager/__init__.py
+++ b/trinity/manager/__init__.py
@@ -0,0 +1,7 @@
+from trinity.trainer.trainer import TrainEngineWrapper, Trainer, get_trainer_wrapper
+
+__all__ = [
+ "Trainer",
+ "TrainEngineWrapper",
+ "get_trainer_wrapper",
+]
diff --git a/trinity/trainer/__init__.py b/trinity/trainer/__init__.py
index 3f6501f5b8..fa97b063b0 100644
--- a/trinity/trainer/__init__.py
+++ b/trinity/trainer/__init__.py
@@ -1,3 +1,3 @@
-from trinity.trainer.trainer import Trainer
+from trinity.trainer.trainer import TrainEngineWrapper, Trainer, get_trainer_wrapper
-__all__ = ["Trainer"]
+__all__ = ["Trainer", "TrainEngineWrapper", "get_trainer_wrapper"]