modelscope · pan-x-c · Apr 22, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 22, 2025
diff --git a/README.md b/README.md
@@ -277,15 +277,15 @@ pip install -e .\[dev\]
 
 Code style check:
 
-```markdown
+```shell
 pre-commit run --all-files
 ```
 
 
 
 Unit tests:
 
-```markdown
+```shell
 python -m pytest tests
 ```
 

diff --git a/docs/sphinx_doc/Makefile b/docs/sphinx_doc/Makefile
@@ -3,7 +3,7 @@
 
 # You can set these variables from the command line, and also
 # from the environment for the first two.
-SPHINXOPTS    ?=
+SPHINXOPTS    ?= -a -E -j auto
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = source
 BUILDDIR      = build

diff --git a/docs/sphinx_doc/_templates/package.rst_t b/docs/sphinx_doc/_templates/package.rst_t
@@ -15,4 +15,39 @@
 {%- endfor %}
 {%- endmacro %}
 
+{%- if is_namespace %}
+.. py:module:: {{ pkgname }}
+{% endif %}
+
+{%- if modulefirst and not is_namespace %}
+{{ automodule(pkgname, automodule_options) }}
+{% endif %}
+
+{%- if subpackages %}
+Subpackages
+-----------
+
+{{ toctree(subpackages) }}
+{% endif %}
+
+{%- if submodules %}
+Submodules
+----------
+{% if separatemodules %}
+{{ toctree(submodules) }}
+{% else %}
+{%- for submodule in submodules %}
+{% if show_headings %}
+{{- [submodule, "module"] | join(" ") | e | heading(2) }}
+{% endif %}
+{{ automodule(submodule, automodule_options) }}
+{% endfor %}
+{%- endif %}
+{%- endif %}
+
+{%- if not modulefirst and not is_namespace %}
+Module contents
+---------------
+
 {{ automodule(pkgname, automodule_options) }}
+{% endif %}
diff --git a/docs/sphinx_doc/source/tutorial/opmd.pdf → docs/sphinx_doc/assets/opmd.pdf b/docs/sphinx_doc/source/tutorial/opmd.pdf → docs/sphinx_doc/assets/opmd.pdf
diff --git a/docs/sphinx_doc/build_doc.sh b/docs/sphinx_doc/build_doc.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
-sphinx-apidoc -f -o source ../../trinity -t _templates
+sphinx-apidoc -f -o source/build_api ../../trinity -t _templates
 make clean html
diff --git a/docs/sphinx_doc/source/conf.py b/docs/sphinx_doc/source/conf.py
@@ -5,8 +5,6 @@
 
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
-import sphinx_rtd_theme
-
 from trinity import __version__ as version
 
 project = "Trinity-RFT"
@@ -36,16 +34,22 @@
 autosectionlabel_prefix_document = True
 autosummary_generate = True
 autosummary_ignore_module_all = False
+napoleon_google_docstring = True
 
 autodoc_member_order = "bysource"
 
 templates_path = ["_templates"]
 exclude_patterns = ["build"]
+autodoc_mock_imports = ["ray"]
+
+autodoc_default_options = {
+    "members": True,
+    "special-members": "__init__",
+}
 
 # -- Options for HTML output -------------------------------------------------
 
 html_theme = "sphinx_rtd_theme"
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 
 html_theme_options = {
     "navigation_depth": 3,
@@ -55,13 +59,3 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ["_static"]
-
-
-def skip(app, what, name, obj, would_skip, options):
-    if name == "__init__":
-        return False
-    return would_skip
-
-
-def setup(app):
-    app.connect("autodoc-skip-member", skip)
diff --git a/docs/sphinx_doc/source/index.rst b/docs/sphinx_doc/source/index.rst
@@ -6,7 +6,7 @@
 Welcome to Trinity-RFT's documentation!
 =======================================
 
-.. include:: tutorial/main.md
+.. include:: main.md
    :parser: myst_parser.sphinx_
 
 
@@ -25,31 +25,13 @@ Welcome to Trinity-RFT's documentation!
    tutorial/trinity_programming_guide.md
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
    :glob:
    :caption: API Reference
 
-   example.md
-
-   trinity.buffer.reader
-   trinity.buffer
-   trinity.buffer.schema
-   trinity.buffer.writer
-   trinity.common.models
-   trinity.common.rewards
-   trinity.common
-   trinity.common.workflows
-   trinity.explorer
-   trinity.manager
-   trinity
-   trinity.trainer
-   trinity.trainer.verl
-   trinity.utils
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
+   build_api/trinity.buffer
+   build_api/trinity.explorer
+   build_api/trinity.trainer
+   build_api/trinity.manager
+   build_api/trinity.common
+   build_api/trinity.utils
diff --git a/docs/sphinx_doc/source/tutorial/main.md → docs/sphinx_doc/source/main.md b/docs/sphinx_doc/source/tutorial/main.md → docs/sphinx_doc/source/main.md
@@ -1,11 +1,8 @@
+# Trinity-RFT
 
+![trinity-rft](../assets/trinity-title.png)
 
 
-<!-- ![trinity-rft](../../assets/trinity-title.png) -->
-
-<div align="center">
-  <img src="../../assets/trinity-title.png" alt="Trinity-RFT">
-</div>
 
 
 Trinity-RFT is a general-purpose, flexible and scalable framework designed for reinforcement fine-tuning (RFT) of large language models (LLM).
@@ -46,12 +43,7 @@ These include converting raw datasets to prompt/task sets for RL, cleaning/filte
 ## The design of Trinity-RFT
 
 
-<!-- ![design](../../assets/trinity-design.png) -->
-
-<div align="center">
-  <img src="../../assets/trinity-design.png" alt="Trinity-RFT">
-</div>
-
+![design](../assets/trinity-design.png)
 
 
 
@@ -140,7 +132,7 @@ Trinity-RFT supports most datasets and models from Huggingface and ModelScope.
 
 **Prepare the model** in the local directory `$MODEL_PATH/{model_name}`:
 
-```plain
+```shell
 # Using Huggingface
 huggingface-cli download {model_name} --local-dir $MODEL_PATH/{model_name}
 
@@ -154,7 +146,7 @@ For more details about model downloading, please refer to [Huggingface](https://
 
 **Prepare the dataset** in the local directory `$DATASET_PATH/{dataset_name}`:
 
-```plain
+```shell
 # Using Huggingface
 huggingface-cli download {dataset_name} --repo-type dataset --local-dir $DATASET_PATH/{dataset_name}
 
@@ -171,7 +163,7 @@ For more details about dataset downloading, please refer to [Huggingface](https:
 
 You may customize the configurations in `scripts/config/{config_name}.yaml`and `scripts/config/{train_config_name}.yaml`. For example, the model and dataset are specified as:
 
-```plain
+```yaml
 model:
   model_path: $MODEL_PATH/{model_name}
 
@@ -231,11 +223,11 @@ More example config files can be found in `scripts/config`.
 
 
 For more detailed examples about how to use Trinity-RFT, please refer to the following documents:
-+ [A quick example with GSM8k](./example_reasoning_basic.md);
-+ [Off-policy / asynchronous modes of RFT](./example_reasoning_advanced.md);
-+ [Multi-turn tasks](./example_multi_turn.md);
-+ [Data processing pipelines](./example_data_functionalities.md);
-+ [Offline learning by DPO](./example_dpo.md).
++ [A quick example with GSM8k](tutorial/example_reasoning_basic.md);
++ [Off-policy / asynchronous modes of RFT](tutorial/example_reasoning_advanced.md);
++ [Multi-turn tasks](tutorial/example_multi_turn.md);
++ [Data processing pipelines](tutorial/example_data_functionalities.md);
++ [Offline learning by DPO](tutorial/example_dpo.md).
 
 
 
@@ -244,7 +236,7 @@ For more detailed examples about how to use Trinity-RFT, please refer to the fol
 ## Advanced usage and full configurations
 
 
-Please refer to [this document](./trinity_configs.md).
+Please refer to [this document](tutorial/trinity_configs.md).
 
 
 
@@ -253,7 +245,7 @@ Please refer to [this document](./trinity_configs.md).
 ## Programming guide for developers
 
 
-Please refer to [this document](./trinity_programming_guide.md).
+Please refer to [this document](tutorial/trinity_programming_guide.md).
 
 
 
@@ -277,15 +269,15 @@ pip install -e .\[dev\]
 
 Code style check:
 
-```markdown
+```shell
 pre-commit run --all-files
 ```
 
 
 
 Unit tests:
 
-```markdown
+```shell
 python -m pytest tests
 ```
 
@@ -309,7 +301,7 @@ This project is built upon many excellent open-source projects, including:
 
 
 ## Citation
-```plain
+```
 @misc{Trinity-RFT,
   title={Trinity-RFT},
   author={{Trinity-RFT Team}},

diff --git a/docs/sphinx_doc/source/tutorial/example_data_functionalities.md b/docs/sphinx_doc/source/tutorial/example_data_functionalities.md
@@ -8,7 +8,7 @@ In this example, you will learn how to apply the data module of Trinity-RFT to p
 2. how to configure the data module
 3. what the data module can do
 
-Before getting started, you need to prepare the main environment of Trinity-RFT according to the [installation section of the README file](main.md#getting-started), and you need to install [postgresql](https://www.postgresql.org/docs/current/tutorial-install.html) as well.
+Before getting started, you need to prepare the main environment of Trinity-RFT according to the [installation section of the README file](../main.md), and you need to install [postgresql](https://www.postgresql.org/docs/current/tutorial-install.html) as well.
 
 ### Data Preparation
 
@@ -243,7 +243,7 @@ You can set more config items for this OP (e.g. notification when annotation is
 
 When you start running with the RFT config, the data module will start the OP `human_preference_annotation_mapper`, and then you can find a new project on the "Projects" page of the label-studio server.
 
-<img src="../../assets/data-projects.png" width="300">
+![]("../../assets/data-projects.png")
 
 You can click and enter into this project, and all the samples that need to be annotated are listed on the page.
 

diff --git a/docs/sphinx_doc/source/tutorial/example_multi_turn.md b/docs/sphinx_doc/source/tutorial/example_multi_turn.md
@@ -125,5 +125,5 @@ and include them in the init files in `trinity/common/workflows/__init__.py`
 
 Then you are all set! It should be pretty simple😄, and both environments converge.
 
-<img src="../../assets/alfworld_reward_curve.png" width="300" height="150">
-<img src="../../assets/webshop_reward_curve.png" width="300" height="150">
+![]("../../assets/alfworld_reward_curve.png")
+![]("../../assets/webshop_reward_curve.png")
diff --git a/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md b/docs/sphinx_doc/source/tutorial/example_reasoning_advanced.md
@@ -11,7 +11,7 @@ Let's continue with the [previous GSM8k example](./example_reasoning_basic.md) a
 
 
 As an experimental feature of Trinity-RFT, we develop an embarrasingly simple off-policy RL algorithm, termed as OPMD (Online Policy Mirror Descent, inspired by [Kimi k1.5](https://arxiv.org/abs/2501.12599)).
-The algorithm design and analysis can be found in this [technical report](./opmd.pdf).
+The algorithm design and analysis can be found in this [technical report](../../assets/opmd.pdf).
 
 
 

diff --git a/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md b/docs/sphinx_doc/source/tutorial/trinity_programming_guide.md
@@ -35,7 +35,7 @@ Each `Task` is a Python dictionary (`Dict[str, Any]`), containing various parame
 
 In the math problem scenario, the `Task` dataset can be a `jsonl` file, where each line’s JSON contains `question` and `answer` fields representing the problem description and standard answer, respectively.
 
-```jsonl
+```json
 {"question": "1+1=", "answer": "2"}
 {"question": "2+2=", "answer": "4"}
 ...

diff --git a/pyproject.toml b/pyproject.toml
@@ -11,7 +11,7 @@ readme = "README.md"
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Intended Audience :: Developers",
-    "License :: OSI Approved :: MIT License",
+    "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python :: 3 :: Only",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",

diff --git a/trinity/common/experience.py b/trinity/common/experience.py
@@ -48,16 +48,16 @@ def deserialize(data: bytes) -> Experience:
 class Experiences:
     """A container for a batch of experiences, for high performance communication usage.
 
-    Structure:
-
-                    |<- prompt_length ->|               |
-        tokens: ('P' represents prompt, 'O' represents output)
-        exp1:       |........PPPPPPPPPPP|OOOOOOOOOO.....|
-        exp2:       |......PPPPPPPPPPPPP|OOOOOOO........|
-
-        attention_masks: ('.' represents False and '1' represents True)
-        exp1:       |........11111111111|1111111111.....|
-        exp2:       |......1111111111111|1111111........|
+    Example:
+
+        >>>             |<- prompt_length ->|               |
+        >>> tokens: ('P' represents prompt, 'O' represents output)
+        >>> exp1:       |........PPPPPPPPPPP|OOOOOOOOOO.....|
+        >>> exp2:       |......PPPPPPPPPPPPP|OOOOOOO........|
+        >>>
+        >>> attention_masks: ('.' represents False and '1' represents True)
+        >>> exp1:       |........11111111111|1111111111.....|
+        >>> exp2:       |......1111111111111|1111111........|
     """
 
     tokens: Tensor

diff --git a/trinity/explorer/__init__.py b/trinity/explorer/__init__.py
@@ -1,3 +1,4 @@
 from trinity.explorer.explorer import Explorer
+from trinity.explorer.runner_pool import RunnerPool
 
-__all__ = ["Explorer"]
+__all__ = ["Explorer", "RunnerPool"]
diff --git a/trinity/manager/__init__.py b/trinity/manager/__init__.py
@@ -0,0 +1,7 @@
+from trinity.trainer.trainer import TrainEngineWrapper, Trainer, get_trainer_wrapper
+
+__all__ = [
+    "Trainer",
+    "TrainEngineWrapper",
+    "get_trainer_wrapper",
+]
diff --git a/trinity/trainer/__init__.py b/trinity/trainer/__init__.py
@@ -1,3 +1,3 @@
-from trinity.trainer.trainer import Trainer
+from trinity.trainer.trainer import TrainEngineWrapper, Trainer, get_trainer_wrapper
 
-__all__ = ["Trainer"]
+__all__ = ["Trainer", "TrainEngineWrapper", "get_trainer_wrapper"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,7 +11,7 @@ Let's continue with the [previous GSM8k example](./example_reasoning_basic.md) a


		As an experimental feature of Trinity-RFT, we develop an embarrasingly simple off-policy RL algorithm, termed as OPMD (Online Policy Mirror Descent, inspired by [Kimi k1.5](https://arxiv.org/abs/2501.12599)).
		The algorithm design and analysis can be found in this [technical report](./opmd.pdf).
		The algorithm design and analysis can be found in this [technical report](../../assets/opmd.pdf).



Expand Down