Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ env:
jobs:
build-and-push-image:
runs-on: self-hosted
timeout-minutes: 240 # wait up to 4 hours
timeout-minutes: 480 # wait up to 8 hours
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
permissions:
contents: read
Expand Down Expand Up @@ -61,8 +61,8 @@ jobs:
with:
context: trinity-${{ github.run_id }}
push: true
file: trinity-${{ github.run_id }}/scripts/docker/Dockerfile
shm-size: 64g
file: trinity-${{ github.run_id }}/scripts/docker/Dockerfile.uv
shm-size: 128g
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/docker/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
services:
trinity-node-1:
image: trinity-rft-unittest:20251030
image: trinity-rft-unittest:20251225
pull_policy: never
command: sh -c "pip install -e .[dev] && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
command: bash -c "source /opt/venv/bin/activate && uv pip install -e .[dev] && ray start --head --dashboard-host 0.0.0.0 --include-dashboard true --block"
environment:
- HF_ENDPOINT=https://hf-mirror.com
- RAY_ADDRESS=auto
Expand All @@ -13,6 +13,7 @@ services:
- TRINITY_MODEL_PATH=/mnt/models/Qwen3-0.6B
- TRINITY_API_MODEL_PATH=/mnt/models/Qwen3-1.7B
- TRINITY_VLM_MODEL_PATH=/mnt/models/Qwen2.5-VL-3B
- VIRTUAL_ENV=/opt/venv
working_dir: /workspace
networks:
- trinity-network
Expand All @@ -29,14 +30,15 @@ services:
capabilities: [gpu]

trinity-node-2:
image: trinity-rft-unittest:20251030
image: trinity-rft-unittest:20251225
pull_policy: never
command: sh -c "pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block"
command: bash -c "source /opt/venv/bin/activate && uv pip install -e .[dev] && ray start --address=trinity-node-1:6379 --block"
environment:
- HF_ENDPOINT=https://hf-mirror.com
- TRINITY_CHECKPOINT_ROOT_DIR=/mnt/checkpoints
- TRINITY_TASKSET_PATH=/mnt/data
- TRINITY_MODEL_PATH=/mnt/models/Qwen3-1.7B
- VIRTUAL_ENV=/opt/venv
working_dir: /workspace
volumes:
- trinity-volume:/mnt
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pre-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: '3.10'
python-version: '3.12'
- uses: pre-commit/[email protected]
4 changes: 2 additions & 2 deletions .github/workflows/sphinx-doc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
python-version: ['3.10']
python-version: ['3.12']
env:
OS: ${{ matrix.os }}
PYTHON: '3.10'
PYTHON: '3.12'
steps:
- name: Free up disk space
run: |
Expand Down
11 changes: 7 additions & 4 deletions .github/workflows/unittest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ jobs:
MAX_RETRIES=20
RETRY_INTERVAL=5
for i in $(seq 1 $MAX_RETRIES); do
docker compose exec trinity-node-1 ray status && docker compose exec trinity-node-2 ray status && break
if docker compose exec trinity-node-1 bash -c "source /opt/venv/bin/activate && ray status" \
&& docker compose exec trinity-node-2 bash -c "source /opt/venv/bin/activate && ray status"; then
break
fi
echo "Waiting for ray cluster to be ready... ($i/$MAX_RETRIES)"
sleep $RETRY_INTERVAL
if [ "$i" -eq "$MAX_RETRIES" ]; then
Expand Down Expand Up @@ -76,12 +79,12 @@ jobs:
TYPE="${{ steps.test_type.outputs.type }}"
if [ "$TYPE" = "all" ]; then
echo "tests_run=true" >> $GITHUB_ENV
docker compose exec trinity-node-1 pytest tests -v -s --ctrf report.json
docker compose exec trinity-node-1 bash -c "source /opt/venv/bin/activate && pytest tests -v -s --ctrf report.json"
elif [ "$TYPE" = "diff" ]; then
if [ -s ../../../test_dirs.txt ]; then
echo "tests_run=true" >> $GITHUB_ENV
TEST_DIRS=$(cat ../../../test_dirs.txt | xargs)
docker compose exec trinity-node-1 pytest $TEST_DIRS -v -s --ctrf report.json
docker compose exec trinity-node-1 bash -c "source /opt/venv/bin/activate && pytest $TEST_DIRS -v -s --ctrf report.json"
else
echo "No changed modules detected, skipping tests."
echo "tests_run=false" >> $GITHUB_ENV
Expand All @@ -90,7 +93,7 @@ jobs:
MODULE="${{ steps.test_type.outputs.module }}"
if [ -n "$MODULE" ]; then
echo "tests_run=true" >> $GITHUB_ENV
docker compose exec trinity-node-1 pytest tests/$MODULE -v -s --ctrf report.json
docker compose exec trinity-node-1 bash -c "source /opt/venv/bin/activate && pytest tests/$MODULE -v -s --ctrf report.json"
else
echo "No module specified, skipping tests."
echo "tests_run=false" >> $GITHUB_ENV
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
rev: 23.7.0
hooks:
- id: black
language_version: python3.10
language_version: python3.12
args: [--line-length=100]

- repo: https://github.com/pycqa/isort
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ Choose one of the following options:
###### Using Conda

```bash
conda create -n trinity python=3.10
conda create -n trinity python=3.12
conda activate trinity

pip install -e ".[dev]"
Expand Down
2 changes: 1 addition & 1 deletion README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ cd Trinity-RFT
#### 使用 Conda

```bash
conda create -n trinity python=3.10
conda create -n trinity python=3.12
conda activate trinity

pip install -e ".[dev]"
Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx_doc/source/tutorial/trinity_installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Choose one of the following options:
#### Using Conda

```bash
conda create -n trinity python=3.10
conda create -n trinity python=3.12
conda activate trinity

pip install -e ".[dev]"
Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx_doc/source_zh/tutorial/trinity_installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ cd Trinity-RFT
#### 使用 Conda

```bash
conda create -n trinity python=3.10
conda create -n trinity python=3.12
conda activate trinity

pip install -e ".[dev]"
Expand Down
2 changes: 1 addition & 1 deletion environments/data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: trinity_data
channels:
- defaults
dependencies:
- python=3.10
- python=3.12
- pip:
- py-data-juicer
- agentscope
Expand Down
2 changes: 1 addition & 1 deletion environments/training.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ name: trinity
channels:
- defaults
dependencies:
- python=3.10
- python=3.12
- pip:
- "-e ..[dev]"
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "trinity-rft"
version = "0.3.3"
version = "0.4.0"
authors = [
{name="Trinity-RFT Team", email="[email protected]"},
]
Expand Down Expand Up @@ -73,6 +73,8 @@ dev = [
]
megatron = [
"megatron-core[mlm]==0.13.1",
# if you found "undefined symbol" error in transformer engine
# reinstall it with --no-build-isolation and `--no-cache-dir` flag
"transformer_engine[pytorch]==2.8.0",
"mbridge>=0.13.0",
]
Expand Down
3 changes: 2 additions & 1 deletion scripts/docker/Dockerfile.uv
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ RUN . /opt/venv/bin/activate && \

# Install flash_attn and Megatron
RUN . /opt/venv/bin/activate && \
uv pip install flash_attn==2.8.1 --no-cache-dir && \
uv pip install flash_attn==2.8.1 --no-build-isolation && \
uv pip install -e .[megatron] && \
uv pip install --reinstall transformer_engine[pytorch]==2.8.0 --no-build-isolation --no-cache-dir && \
NVCC_APPEND_FLAGS="--threads 4" APEX_PARALLEL_BUILD=8 \
uv pip install -v --no-build-isolation \
--config-settings="--build-option=--cpp_ext" \
Expand Down
7 changes: 3 additions & 4 deletions tests/utils/swanlab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
class TestSwanlabMonitor(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._original_env = {
"SWANLAB_API_KEY": os.environ.get("SWANLAB_API_KEY"),
}
os.environ["SWANLAB_API_KEY"] = "xxxxxxxxxxxxxxxxxxxxx"

@classmethod
Expand All @@ -31,7 +34,3 @@ def test_swanlab_monitor_smoke(self):
# Log a minimal metric to verify basic flow
mon.log({"smoke/metric": 1.0}, step=1)
mon.close()


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion trinity/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
"""Trinity-RFT (Reinforcement Fine-Tuning)"""

__version__ = "0.3.3"
__version__ = "0.4.0"
2 changes: 1 addition & 1 deletion trinity/common/models/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ async def convert_messages_to_experience(
if len(token_ids) > self.config.max_model_len - 1:
truncate_status = "response_truncated"
self.logger.warning(
f"Warning: {len(token_ids) = } exceeds the length limit {self.config.max_model_len-1 = }"
f"Warning: {len(token_ids)=} exceeds the length limit {(self.config.max_model_len - 1)=}"
)
token_ids = token_ids[: self.config.max_model_len - 1]
action_mask = action_mask[: self.config.max_model_len - 1]
Expand Down