diff --git a/README.md b/README.md index bcc4b6be34..fd49b3312a 100644 --- a/README.md +++ b/README.md @@ -277,7 +277,7 @@ pip install -e .\[dev\] Code style check: -```markdown +```shell pre-commit run --all-files ``` @@ -285,7 +285,7 @@ pre-commit run --all-files Unit tests: -```markdown +```shell python -m pytest tests ``` diff --git a/docs/sphinx_doc/Makefile b/docs/sphinx_doc/Makefile index d0c3cbf102..e8c684ae21 100644 --- a/docs/sphinx_doc/Makefile +++ b/docs/sphinx_doc/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line, and also # from the environment for the first two. -SPHINXOPTS ?= +SPHINXOPTS ?= -a -E -j auto SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build diff --git a/docs/sphinx_doc/_templates/package.rst_t b/docs/sphinx_doc/_templates/package.rst_t index 2951c55303..8b89dd1ab0 100644 --- a/docs/sphinx_doc/_templates/package.rst_t +++ b/docs/sphinx_doc/_templates/package.rst_t @@ -15,4 +15,39 @@ {%- endfor %} {%- endmacro %} +{%- if is_namespace %} +.. py:module:: {{ pkgname }} +{% endif %} + +{%- if modulefirst and not is_namespace %} +{{ automodule(pkgname, automodule_options) }} +{% endif %} + +{%- if subpackages %} +Subpackages +----------- + +{{ toctree(subpackages) }} +{% endif %} + +{%- if submodules %} +Submodules +---------- +{% if separatemodules %} +{{ toctree(submodules) }} +{% else %} +{%- for submodule in submodules %} +{% if show_headings %} +{{- [submodule, "module"] | join(" ") | e | heading(2) }} +{% endif %} +{{ automodule(submodule, automodule_options) }} +{% endfor %} +{%- endif %} +{%- endif %} + +{%- if not modulefirst and not is_namespace %} +Module contents +--------------- + {{ automodule(pkgname, automodule_options) }} +{% endif %} diff --git a/docs/sphinx_doc/source/tutorial/opmd.pdf b/docs/sphinx_doc/assets/opmd.pdf similarity index 100% rename from docs/sphinx_doc/source/tutorial/opmd.pdf rename to docs/sphinx_doc/assets/opmd.pdf diff --git a/docs/sphinx_doc/build_doc.sh b/docs/sphinx_doc/build_doc.sh index 46c81041b6..7b034ee622 100755 --- a/docs/sphinx_doc/build_doc.sh +++ b/docs/sphinx_doc/build_doc.sh @@ -1,3 +1,3 @@ #!/bin/bash -sphinx-apidoc -f -o source ../../trinity -t _templates +sphinx-apidoc -f -o source/build_api ../../trinity -t _templates make clean html diff --git a/docs/sphinx_doc/source/conf.py b/docs/sphinx_doc/source/conf.py index 5a7a0f1a1d..605ea1002c 100644 --- a/docs/sphinx_doc/source/conf.py +++ b/docs/sphinx_doc/source/conf.py @@ -5,8 +5,6 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -import sphinx_rtd_theme - from trinity import __version__ as version project = "Trinity-RFT" @@ -36,16 +34,22 @@ autosectionlabel_prefix_document = True autosummary_generate = True autosummary_ignore_module_all = False +napoleon_google_docstring = True autodoc_member_order = "bysource" templates_path = ["_templates"] exclude_patterns = ["build"] +autodoc_mock_imports = ["ray"] + +autodoc_default_options = { + "members": True, + "special-members": "__init__", +} # -- Options for HTML output ------------------------------------------------- html_theme = "sphinx_rtd_theme" -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] html_theme_options = { "navigation_depth": 3, @@ -55,13 +59,3 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] - - -def skip(app, what, name, obj, would_skip, options): - if name == "__init__": - return False - return would_skip - - -def setup(app): - app.connect("autodoc-skip-member", skip) diff --git a/docs/sphinx_doc/source/index.rst b/docs/sphinx_doc/source/index.rst index 72fee2bb81..e25a3a321e 100644 --- a/docs/sphinx_doc/source/index.rst +++ b/docs/sphinx_doc/source/index.rst @@ -6,7 +6,7 @@ Welcome to Trinity-RFT's documentation! ======================================= -.. include:: tutorial/main.md +.. include:: main.md :parser: myst_parser.sphinx_ @@ -25,31 +25,13 @@ Welcome to Trinity-RFT's documentation! tutorial/trinity_programming_guide.md .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :glob: :caption: API Reference - example.md - - trinity.buffer.reader - trinity.buffer - trinity.buffer.schema - trinity.buffer.writer - trinity.common.models - trinity.common.rewards - trinity.common - trinity.common.workflows - trinity.explorer - trinity.manager - trinity - trinity.trainer - trinity.trainer.verl - trinity.utils - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` + build_api/trinity.buffer + build_api/trinity.explorer + build_api/trinity.trainer + build_api/trinity.manager + build_api/trinity.common + build_api/trinity.utils diff --git a/docs/sphinx_doc/source/tutorial/main.md b/docs/sphinx_doc/source/main.md similarity index 92% rename from docs/sphinx_doc/source/tutorial/main.md rename to docs/sphinx_doc/source/main.md index 9547e1caf5..5871d654fe 100644 --- a/docs/sphinx_doc/source/tutorial/main.md +++ b/docs/sphinx_doc/source/main.md @@ -1,11 +1,8 @@ +# Trinity-RFT +![trinity-rft](../assets/trinity-title.png) - - -
- Trinity-RFT -
Trinity-RFT is a general-purpose, flexible and scalable framework designed for reinforcement fine-tuning (RFT) of large language models (LLM). @@ -46,12 +43,7 @@ These include converting raw datasets to prompt/task sets for RL, cleaning/filte ## The design of Trinity-RFT - - -
- Trinity-RFT -
- +![design](../assets/trinity-design.png) @@ -140,7 +132,7 @@ Trinity-RFT supports most datasets and models from Huggingface and ModelScope. **Prepare the model** in the local directory `$MODEL_PATH/{model_name}`: -```plain +```shell # Using Huggingface huggingface-cli download {model_name} --local-dir $MODEL_PATH/{model_name} @@ -154,7 +146,7 @@ For more details about model downloading, please refer to [Huggingface](https:// **Prepare the dataset** in the local directory `$DATASET_PATH/{dataset_name}`: -```plain +```shell # Using Huggingface huggingface-cli download {dataset_name} --repo-type dataset --local-dir $DATASET_PATH/{dataset_name} @@ -171,7 +163,7 @@ For more details about dataset downloading, please refer to [Huggingface](https: You may customize the configurations in `scripts/config/{config_name}.yaml`and `scripts/config/{train_config_name}.yaml`. For example, the model and dataset are specified as: -```plain +```yaml model: model_path: $MODEL_PATH/{model_name} @@ -231,11 +223,11 @@ More example config files can be found in `scripts/config`. For more detailed examples about how to use Trinity-RFT, please refer to the following documents: -+ [A quick example with GSM8k](./example_reasoning_basic.md); -+ [Off-policy / asynchronous modes of RFT](./example_reasoning_advanced.md); -+ [Multi-turn tasks](./example_multi_turn.md); -+ [Data processing pipelines](./example_data_functionalities.md); -+ [Offline learning by DPO](./example_dpo.md). ++ [A quick example with GSM8k](tutorial/example_reasoning_basic.md); ++ [Off-policy / asynchronous modes of RFT](tutorial/example_reasoning_advanced.md); ++ [Multi-turn tasks](tutorial/example_multi_turn.md); ++ [Data processing pipelines](tutorial/example_data_functionalities.md); ++ [Offline learning by DPO](tutorial/example_dpo.md). @@ -244,7 +236,7 @@ For more detailed examples about how to use Trinity-RFT, please refer to the fol ## Advanced usage and full configurations -Please refer to [this document](./trinity_configs.md). +Please refer to [this document](tutorial/trinity_configs.md). @@ -253,7 +245,7 @@ Please refer to [this document](./trinity_configs.md). ## Programming guide for developers -Please refer to [this document](./trinity_programming_guide.md). +Please refer to [this document](tutorial/trinity_programming_guide.md). @@ -277,7 +269,7 @@ pip install -e .\[dev\] Code style check: -```markdown +```shell pre-commit run --all-files ``` @@ -285,7 +277,7 @@ pre-commit run --all-files Unit tests: -```markdown +```shell python -m pytest tests ``` @@ -309,7 +301,7 @@ This project is built upon many excellent open-source projects, including: ## Citation -```plain +``` @misc{Trinity-RFT, title={Trinity-RFT}, author={{Trinity-RFT Team}}, diff --git a/docs/sphinx_doc/source/tutorial/example_data_functionalities.md b/docs/sphinx_doc/source/tutorial/example_data_functionalities.md index 9c50f5aafa..12362b5739 100644 --- a/docs/sphinx_doc/source/tutorial/example_data_functionalities.md +++ b/docs/sphinx_doc/source/tutorial/example_data_functionalities.md @@ -8,7 +8,7 @@ In this example, you will learn how to apply the data module of Trinity-RFT to p 2. how to configure the data module 3. what the data module can do -Before getting started, you need to prepare the main environment of Trinity-RFT according to the [installation section of the README file](main.md#getting-started), and you need to install [postgresql](https://www.postgresql.org/docs/current/tutorial-install.html) as well. +Before getting started, you need to prepare the main environment of Trinity-RFT according to the [installation section of the README file](../main.md), and you need to install [postgresql](https://www.postgresql.org/docs/current/tutorial-install.html) as well. ### Data Preparation @@ -243,7 +243,7 @@ You can set more config items for this OP (e.g. notification when annotation is When you start running with the RFT config, the data module will start the OP `human_preference_annotation_mapper`, and then you can find a new project on the "Projects" page of the label-studio server. - +![]("../../assets/data-projects.png") You can click and enter into this project, and all the samples that need to be annotated are listed on the page. diff --git a/docs/sphinx_doc/source/tutorial/example_multi_turn.md b/docs/sphinx_doc/source/tutorial/example_multi_turn.md index b3686dc3ee..0b7e6abe79 100644 --- a/docs/sphinx_doc/source/tutorial/example_multi_turn.md +++ b/docs/sphinx_doc/source/tutorial/example_multi_turn.md @@ -125,5 +125,5 @@ and include them in the init files in `trinity/common/workflows/__init__.py` Then you are all set! It should be pretty simple😄, and both environments converge. - - +![]("../../assets/alfworld_reward_curve.png") +![]("../../assets/webshop_reward_curve.png") diff --git a/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md b/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md index d188ffb70b..81ce89440e 100644 --- a/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md +++ b/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md @@ -11,7 +11,7 @@ Let's continue with the [previous GSM8k example](./example_reasoning_basic.md) a As an experimental feature of Trinity-RFT, we develop an embarrasingly simple off-policy RL algorithm, termed as OPMD (Online Policy Mirror Descent, inspired by [Kimi k1.5](https://arxiv.org/abs/2501.12599)). -The algorithm design and analysis can be found in this [technical report](./opmd.pdf). +The algorithm design and analysis can be found in this [technical report](../../assets/opmd.pdf). diff --git a/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md b/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md index b25fa718cd..582e99159d 100644 --- a/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md +++ b/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md @@ -35,7 +35,7 @@ Each `Task` is a Python dictionary (`Dict[str, Any]`), containing various parame In the math problem scenario, the `Task` dataset can be a `jsonl` file, where each line’s JSON contains `question` and `answer` fields representing the problem description and standard answer, respectively. -```jsonl +```json {"question": "1+1=", "answer": "2"} {"question": "2+2=", "answer": "4"} ... diff --git a/pyproject.toml b/pyproject.toml index 6c78ec8b83..5aa419882c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ readme = "README.md" classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", + "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/trinity/common/experience.py b/trinity/common/experience.py index 60b8ca8013..a1b5008681 100644 --- a/trinity/common/experience.py +++ b/trinity/common/experience.py @@ -48,16 +48,16 @@ def deserialize(data: bytes) -> Experience: class Experiences: """A container for a batch of experiences, for high performance communication usage. - Structure: - - |<- prompt_length ->| | - tokens: ('P' represents prompt, 'O' represents output) - exp1: |........PPPPPPPPPPP|OOOOOOOOOO.....| - exp2: |......PPPPPPPPPPPPP|OOOOOOO........| - - attention_masks: ('.' represents False and '1' represents True) - exp1: |........11111111111|1111111111.....| - exp2: |......1111111111111|1111111........| + Example: + + >>> |<- prompt_length ->| | + >>> tokens: ('P' represents prompt, 'O' represents output) + >>> exp1: |........PPPPPPPPPPP|OOOOOOOOOO.....| + >>> exp2: |......PPPPPPPPPPPPP|OOOOOOO........| + >>> + >>> attention_masks: ('.' represents False and '1' represents True) + >>> exp1: |........11111111111|1111111111.....| + >>> exp2: |......1111111111111|1111111........| """ tokens: Tensor diff --git a/trinity/explorer/__init__.py b/trinity/explorer/__init__.py index 8665a1b125..e7794c7cf6 100644 --- a/trinity/explorer/__init__.py +++ b/trinity/explorer/__init__.py @@ -1,3 +1,4 @@ from trinity.explorer.explorer import Explorer +from trinity.explorer.runner_pool import RunnerPool -__all__ = ["Explorer"] +__all__ = ["Explorer", "RunnerPool"] diff --git a/trinity/manager/__init__.py b/trinity/manager/__init__.py index e69de29bb2..b64f42af15 100644 --- a/trinity/manager/__init__.py +++ b/trinity/manager/__init__.py @@ -0,0 +1,7 @@ +from trinity.trainer.trainer import TrainEngineWrapper, Trainer, get_trainer_wrapper + +__all__ = [ + "Trainer", + "TrainEngineWrapper", + "get_trainer_wrapper", +] diff --git a/trinity/trainer/__init__.py b/trinity/trainer/__init__.py index 3f6501f5b8..fa97b063b0 100644 --- a/trinity/trainer/__init__.py +++ b/trinity/trainer/__init__.py @@ -1,3 +1,3 @@ -from trinity.trainer.trainer import Trainer +from trinity.trainer.trainer import TrainEngineWrapper, Trainer, get_trainer_wrapper -__all__ = ["Trainer"] +__all__ = ["Trainer", "TrainEngineWrapper", "get_trainer_wrapper"]