Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dockerfile/Dockerfile.triton.trt_llm_backend
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ ARG TORCH_VER=2.9.0a0+145a3a7bda.nv25.10
ARG TORCHVISION_VER=0.24.0a0+094e7af5

ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git
ARG TENSORRTLLM_REPO_TAG=release/1.0
ARG TENSORRTLLM_VER=1.2.0rc2
ARG TENSORRTLLM_REPO_TAG=release/1.1
ARG TENSORRTLLM_VER=1.1.0

FROM ${PYTORCH_IMAGE} AS pytorch_image
FROM ${BASE_IMAGE} AS install_dependencies
Expand Down
2 changes: 1 addition & 1 deletion tensorrt_llm
Submodule tensorrt_llm updated 95 files
+3 −0 .github/CODEOWNERS
+1 −0 .github/pull_request_template.md
+108 −0 .github/tava_architecture_diagram.md
+2 −2 .pre-commit-config.yaml
+15,143 −0 ATTRIBUTIONS-CPP-aarch64.md
+14,951 −0 ATTRIBUTIONS-CPP-x86_64.md
+45,884 −0 ATTRIBUTIONS-Python.md
+3 −0 constraints.txt
+2 −0 cpp/tensorrt_llm/kernels/communicationKernels/mnnvlTwoShotAllreduceKernels.cu
+0 −4 cpp/tensorrt_llm/kernels/cutlass_kernels/fp8_blockscale_gemm/fp8_blockscale_tma_utils.cuh
+4 −5 cpp/tensorrt_llm/kernels/recoverFromRingAtten.cu
+6 −3 cpp/tensorrt_llm/kernels/trtllmGenKernels/gemm/KernelRunner.cpp
+4 −0 docker/Dockerfile.multi
+1 −1 docker/common/install_base.sh
+43 −24 docs/source/commands/trtllm-serve/run-benchmark-with-trtllm-serve.md
+33 −10 docs/source/developer-guide/perf-benchmarking.md
+246 −229 docs/source/developer-guide/perf-overview.md
+4 −14 docs/source/examples/dynamo_k8s_example.rst
+6 −5 docs/source/features/quantization.md
+2 −1 docs/source/legacy/reference/support-matrix.md
+2 −2 docs/source/models/supported-models.md
+12 −12 docs/source/overview.md
+2 −0 docs/source/quick-start-guide.md
+107 −11 docs/source/release-notes.md
+3 −3 examples/auto_deploy/README.md
+40 −2 examples/llm-api/llm_mgmn_llm_distributed.sh
+38 −2 examples/llm-api/llm_mgmn_trtllm_bench.sh
+38 −2 examples/llm-api/llm_mgmn_trtllm_serve.sh
+4 −4 jenkins/current_image_tags.properties
+3 −2 requirements.txt
+16 −16 security_scanning/docs/poetry.lock
+1 −1 security_scanning/docs/pyproject.toml
+10 −11 security_scanning/examples/apps/poetry.lock
+19 −31 security_scanning/examples/auto_deploy/poetry.lock
+12 −24 security_scanning/examples/draft_target_model/poetry.lock
+12 −24 security_scanning/examples/eagle/poetry.lock
+12 −24 security_scanning/examples/llm-eval/lm-eval-harness/poetry.lock
+12 −24 security_scanning/examples/lookahead/poetry.lock
+12 −24 security_scanning/examples/medusa/poetry.lock
+161 −51 security_scanning/examples/models/contrib/baichuan/poetry.lock
+12 −24 security_scanning/examples/models/contrib/bloom/poetry.lock
+24 −36 security_scanning/examples/models/contrib/chatglm-6b/poetry.lock
+1 −1 security_scanning/examples/models/contrib/chatglm-6b/pyproject.toml
+24 −36 security_scanning/examples/models/contrib/chatglm2-6b/poetry.lock
+1 −1 security_scanning/examples/models/contrib/chatglm2-6b/pyproject.toml
+24 −36 security_scanning/examples/models/contrib/chatglm3-6b-32k/poetry.lock
+1 −1 security_scanning/examples/models/contrib/chatglm3-6b-32k/pyproject.toml
+12 −24 security_scanning/examples/models/contrib/dbrx/poetry.lock
+12 −24 security_scanning/examples/models/contrib/deepseek_v1/poetry.lock
+12 −24 security_scanning/examples/models/contrib/deepseek_v2/poetry.lock
+8 −8 security_scanning/examples/models/contrib/falcon/poetry.lock
+12 −24 security_scanning/examples/models/contrib/gptj/poetry.lock
+12 −24 security_scanning/examples/models/contrib/gptneox/poetry.lock
+27 −39 security_scanning/examples/models/contrib/grok/poetry.lock
+11 −23 security_scanning/examples/models/contrib/hyperclovax/poetry.lock
+12 −24 security_scanning/examples/models/contrib/internlm/poetry.lock
+12 −24 security_scanning/examples/models/contrib/jais/poetry.lock
+112 −18 security_scanning/examples/models/contrib/mmdit/poetry.lock
+12 −24 security_scanning/examples/models/contrib/mpt/poetry.lock
+12 −24 security_scanning/examples/models/contrib/opt/poetry.lock
+12 −24 security_scanning/examples/models/contrib/skywork/poetry.lock
+12 −24 security_scanning/examples/models/contrib/smaug/poetry.lock
+168 −167 security_scanning/examples/models/contrib/stdit/poetry.lock
+2 −2 security_scanning/examples/models/contrib/stdit/pyproject.toml
+12 −24 security_scanning/examples/models/core/commandr/poetry.lock
+29 −41 security_scanning/examples/models/core/gemma/poetry.lock
+24 −36 security_scanning/examples/models/core/glm-4-9b/poetry.lock
+1 −1 security_scanning/examples/models/core/glm-4-9b/pyproject.toml
+12 −24 security_scanning/examples/models/core/gpt/poetry.lock
+8 −8 security_scanning/examples/models/core/llama/poetry.lock
+8 −8 security_scanning/examples/models/core/mamba/poetry.lock
+9 −9 security_scanning/examples/models/core/mixtral/poetry.lock
+12 −12 security_scanning/examples/models/core/mllama/poetry.lock
+12 −24 security_scanning/examples/models/core/nemotron/poetry.lock
+12 −24 security_scanning/examples/models/core/phi/poetry.lock
+244 −253 security_scanning/examples/models/core/qwen/poetry.lock
+2 −2 security_scanning/examples/models/core/qwen/pyproject.toml
+8 −8 security_scanning/examples/models/core/qwen2audio/poetry.lock
+273 −171 security_scanning/examples/models/core/qwenvl/poetry.lock
+1 −1 security_scanning/examples/models/core/qwenvl/pyproject.toml
+19 −19 security_scanning/examples/models/core/recurrentgemma/poetry.lock
+57 −57 security_scanning/examples/models/core/whisper/poetry.lock
+1 −1 security_scanning/examples/models/core/whisper/pyproject.toml
+12 −24 security_scanning/examples/ngram/poetry.lock
+161 −51 security_scanning/examples/quantization/poetry.lock
+12 −24 security_scanning/examples/redrafter/poetry.lock
+12 −24 security_scanning/examples/trtllm-eval/poetry.lock
+248 −226 security_scanning/poetry.lock
+7 −6 security_scanning/pyproject.toml
+112 −120 security_scanning/tests/integration/defs/perf/poetry.lock
+1 −1 security_scanning/tests/integration/defs/perf/pyproject.toml
+9 −9 security_scanning/triton_backend/poetry.lock
+18 −10 tensorrt_llm/_torch/modules/fused_moe/fused_moe_trtllm_gen.py
+2 −1 tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
+1 −1 tests/integration/test_lists/waives.txt
Loading