revert to ubuntu-latest runners, switch dataset source to BAB v0.4.0 #47
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright (c) Microsoft Corporation. All rights reserved. | |
| # Licensed under the MIT license. | |
| # DiskANN Benchmarks Workflow | |
| # | |
| # This workflow runs macro benchmarks comparing the current branch against a baseline. | |
| # It is manually triggered and requires a baseline reference (branch, tag, or commit). | |
| name: Benchmarks | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| baseline_ref: | |
| description: 'A branch, commit SHA, or tag name to compare the current branch with' | |
| required: true | |
| default: 'main' | |
| type: string | |
| push: | |
| branches: | |
| - 'user/tianyuanyuan/add-benchmark-pipeline' | |
| paths: | |
| - 'diskann-benchmark/perf_test_inputs/**-disk-index.json' | |
| - '.github/workflows/benchmarks.yml' | |
| - '.github/scripts/benchmark_validate.py' | |
| # Cancel in-progress runs when a new run is triggered | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} | |
| cancel-in-progress: true | |
| env: | |
| RUST_BACKTRACE: 1 | |
| # Use the Rust version specified in rust-toolchain.toml | |
| rust_stable: "1.92" | |
| defaults: | |
| run: | |
| shell: bash | |
| permissions: | |
| contents: read | |
| pull-requests: write # Required for posting PR comments | |
| jobs: | |
| # Macro benchmark: Wikipedia-100K dataset | |
| macro-benchmark-wikipedia-100K: | |
| name: Macro Benchmark - Wikipedia 100K | |
| runs-on: ubuntu-latest | |
| # TODO: For production benchmarks, consider using a self-hosted runner with: | |
| # - NVMe storage for consistent I/O performance | |
| # - CPU pinning (taskset) for reduced variance | |
| # - Dedicated hardware to avoid noisy neighbor effects | |
| timeout-minutes: 120 | |
| steps: | |
| - name: Checkout current branch | |
| uses: actions/checkout@v4 | |
| with: | |
| path: diskann_rust | |
| lfs: true | |
| - name: Checkout baseline (${{ inputs.baseline_ref || 'main' }}) | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.baseline_ref || 'main' }} | |
| path: baseline | |
| lfs: true | |
| - name: Install Rust ${{ env.rust_stable }} | |
| uses: dtolnay/rust-toolchain@master | |
| with: | |
| toolchain: ${{ env.rust_stable }} | |
| - name: Cache Rust dependencies (current) | |
| uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: diskann_rust -> target | |
| key: benchmark-current | |
| - name: Cache Rust dependencies (baseline) | |
| uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: baseline -> target | |
| key: benchmark-baseline | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y openssl libssl-dev pkg-config | |
| # Download pre-packaged Wikipedia-100K dataset from GitHub Release | |
| # Dataset: 100K Cohere Wikipedia embeddings (768-dim, float32, cosine distance) | |
| # Source: https://github.com/harsha-simhadri/big-ann-benchmarks | |
| - name: Download wikipedia-100K dataset | |
| run: | | |
| mkdir -p diskann_rust/target/tmp baseline/target/tmp | |
| curl -L -o wikipedia-100K.tar.gz https://github.com/harsha-simhadri/big-ann-benchmarks/releases/download/v0.4.0/wikipedia-100K.tar.gz | |
| tar xzf wikipedia-100K.tar.gz -C diskann_rust/target/tmp/ | |
| cp -r diskann_rust/target/tmp/wikipedia_cohere baseline/target/tmp/ | |
| - name: Run baseline benchmark | |
| working-directory: baseline | |
| run: | | |
| # Note: For accurate benchmarks, consider using CPU pinning on self-hosted runners: | |
| # sudo taskset -c 0,2,4,6 ionice -c 1 -n 0 cargo run ... | |
| cargo run -p diskann-benchmark --features disk-index --release -- \ | |
| run --input-file ../diskann_rust/diskann-benchmark/perf_test_inputs/wikipedia-100K-disk-index.json \ | |
| --output-file target/tmp/wikipedia-100K_benchmark_crate_baseline.json | |
| - name: Run current branch benchmark | |
| working-directory: diskann_rust | |
| run: | | |
| cargo run -p diskann-benchmark --features disk-index --release -- \ | |
| run --input-file diskann-benchmark/perf_test_inputs/wikipedia-100K-disk-index.json \ | |
| --output-file target/tmp/wikipedia-100K_benchmark_crate_target.json | |
| - name: Validate benchmark results | |
| run: | | |
| python diskann_rust/.github/scripts/benchmark_validate.py \ | |
| --mode pr \ | |
| --baseline baseline/target/tmp/wikipedia-100K_benchmark_crate_baseline.json \ | |
| --target diskann_rust/target/tmp/wikipedia-100K_benchmark_crate_target.json \ | |
| --title 'Benchmark Results: Wikipedia-100K Dataset' | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_REPOSITORY: ${{ github.repository }} | |
| GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| - name: Upload benchmark results | |
| uses: actions/upload-artifact@v4 | |
| if: always() # Upload even if validation fails | |
| with: | |
| name: benchmark-results-wikipedia-100K | |
| path: | | |
| diskann_rust/target/tmp/wikipedia-100K_benchmark_crate_target.json | |
| baseline/target/tmp/wikipedia-100K_benchmark_crate_baseline.json | |
| retention-days: 30 | |
| # Macro benchmark: OpenAI ArXiv dataset | |
| macro-benchmark-oai-large: | |
| name: Macro Benchmark - OAI ArXiv 100K | |
| runs-on: ubuntu-latest | |
| # TODO: For production benchmarks, consider using a self-hosted runner | |
| timeout-minutes: 120 | |
| steps: | |
| - name: Checkout current branch | |
| uses: actions/checkout@v4 | |
| with: | |
| path: diskann_rust | |
| lfs: true | |
| - name: Checkout baseline (${{ inputs.baseline_ref || 'main' }}) | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.baseline_ref || 'main' }} | |
| path: baseline | |
| lfs: true | |
| - name: Install Rust ${{ env.rust_stable }} | |
| uses: dtolnay/rust-toolchain@master | |
| with: | |
| toolchain: ${{ env.rust_stable }} | |
| - name: Cache Rust dependencies (current) | |
| uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: diskann_rust -> target | |
| key: benchmark-current | |
| - name: Cache Rust dependencies (baseline) | |
| uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: baseline -> target | |
| key: benchmark-baseline | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y openssl libssl-dev pkg-config | |
| # Download pre-packaged OpenAI ArXiv 100K dataset from GitHub Release | |
| # Dataset: 100K OpenAI embeddings of ArXiv papers (1536-dim, float32, euclidean distance) | |
| # Source: https://github.com/harsha-simhadri/big-ann-benchmarks | |
| - name: Download openai-100K dataset | |
| run: | | |
| mkdir -p diskann_rust/target/tmp baseline/target/tmp | |
| curl -L -o openai-100K.tar.gz https://github.com/harsha-simhadri/big-ann-benchmarks/releases/download/v0.4.0/openai-100K.tar.gz | |
| tar xzf openai-100K.tar.gz -C diskann_rust/target/tmp/ | |
| cp -r diskann_rust/target/tmp/OpenAIArXiv baseline/target/tmp/ | |
| - name: Run baseline benchmark | |
| working-directory: baseline | |
| run: | | |
| cargo run -p diskann-benchmark --features disk-index --release -- \ | |
| run --input-file ../diskann_rust/diskann-benchmark/perf_test_inputs/openai-100K-disk-index.json \ | |
| --output-file target/tmp/openai-100K_benchmark_crate_baseline.json | |
| - name: Run current branch benchmark | |
| working-directory: diskann_rust | |
| run: | | |
| cargo run -p diskann-benchmark --features disk-index --release -- \ | |
| run --input-file diskann-benchmark/perf_test_inputs/openai-100K-disk-index.json \ | |
| --output-file target/tmp/openai-100K_benchmark_crate_target.json | |
| - name: Validate benchmark results | |
| run: | | |
| python diskann_rust/.github/scripts/benchmark_validate.py \ | |
| --mode pr \ | |
| --baseline baseline/target/tmp/openai-100K_benchmark_crate_baseline.json \ | |
| --target diskann_rust/target/tmp/openai-100K_benchmark_crate_target.json \ | |
| --title 'Benchmark Results: OpenAI ArXiv 100K Dataset' | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| GITHUB_REPOSITORY: ${{ github.repository }} | |
| GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }} | |
| GITHUB_RUN_ID: ${{ github.run_id }} | |
| - name: Upload benchmark results | |
| uses: actions/upload-artifact@v4 | |
| if: always() # Upload even if validation fails | |
| with: | |
| name: benchmark-results-openai-100K | |
| path: | | |
| diskann_rust/target/tmp/openai-100K_benchmark_crate_target.json | |
| baseline/target/tmp/openai-100K_benchmark_crate_baseline.json | |
| retention-days: 30 |