Skip to content

revert to ubuntu-latest runners, switch dataset source to BAB v0.4.0 #47

revert to ubuntu-latest runners, switch dataset source to BAB v0.4.0

revert to ubuntu-latest runners, switch dataset source to BAB v0.4.0 #47

Workflow file for this run

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.
# DiskANN Benchmarks Workflow
#
# This workflow runs macro benchmarks comparing the current branch against a baseline.
# It is manually triggered and requires a baseline reference (branch, tag, or commit).
name: Benchmarks
on:
workflow_dispatch:
inputs:
baseline_ref:
description: 'A branch, commit SHA, or tag name to compare the current branch with'
required: true
default: 'main'
type: string
push:
branches:
- 'user/tianyuanyuan/add-benchmark-pipeline'
paths:
- 'diskann-benchmark/perf_test_inputs/**-disk-index.json'
- '.github/workflows/benchmarks.yml'
- '.github/scripts/benchmark_validate.py'
# Cancel in-progress runs when a new run is triggered
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
# Use the Rust version specified in rust-toolchain.toml
rust_stable: "1.92"
defaults:
run:
shell: bash
permissions:
contents: read
pull-requests: write # Required for posting PR comments
jobs:
# Macro benchmark: Wikipedia-100K dataset
macro-benchmark-wikipedia-100K:
name: Macro Benchmark - Wikipedia 100K
runs-on: ubuntu-latest
# TODO: For production benchmarks, consider using a self-hosted runner with:
# - NVMe storage for consistent I/O performance
# - CPU pinning (taskset) for reduced variance
# - Dedicated hardware to avoid noisy neighbor effects
timeout-minutes: 120
steps:
- name: Checkout current branch
uses: actions/checkout@v4
with:
path: diskann_rust
lfs: true
- name: Checkout baseline (${{ inputs.baseline_ref || 'main' }})
uses: actions/checkout@v4
with:
ref: ${{ inputs.baseline_ref || 'main' }}
path: baseline
lfs: true
- name: Install Rust ${{ env.rust_stable }}
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.rust_stable }}
- name: Cache Rust dependencies (current)
uses: Swatinem/rust-cache@v2
with:
workspaces: diskann_rust -> target
key: benchmark-current
- name: Cache Rust dependencies (baseline)
uses: Swatinem/rust-cache@v2
with:
workspaces: baseline -> target
key: benchmark-baseline
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y openssl libssl-dev pkg-config
# Download pre-packaged Wikipedia-100K dataset from GitHub Release
# Dataset: 100K Cohere Wikipedia embeddings (768-dim, float32, cosine distance)
# Source: https://github.com/harsha-simhadri/big-ann-benchmarks
- name: Download wikipedia-100K dataset
run: |
mkdir -p diskann_rust/target/tmp baseline/target/tmp
curl -L -o wikipedia-100K.tar.gz https://github.com/harsha-simhadri/big-ann-benchmarks/releases/download/v0.4.0/wikipedia-100K.tar.gz
tar xzf wikipedia-100K.tar.gz -C diskann_rust/target/tmp/
cp -r diskann_rust/target/tmp/wikipedia_cohere baseline/target/tmp/
- name: Run baseline benchmark
working-directory: baseline
run: |
# Note: For accurate benchmarks, consider using CPU pinning on self-hosted runners:
# sudo taskset -c 0,2,4,6 ionice -c 1 -n 0 cargo run ...
cargo run -p diskann-benchmark --features disk-index --release -- \
run --input-file ../diskann_rust/diskann-benchmark/perf_test_inputs/wikipedia-100K-disk-index.json \
--output-file target/tmp/wikipedia-100K_benchmark_crate_baseline.json
- name: Run current branch benchmark
working-directory: diskann_rust
run: |
cargo run -p diskann-benchmark --features disk-index --release -- \
run --input-file diskann-benchmark/perf_test_inputs/wikipedia-100K-disk-index.json \
--output-file target/tmp/wikipedia-100K_benchmark_crate_target.json
- name: Validate benchmark results
run: |
python diskann_rust/.github/scripts/benchmark_validate.py \
--mode pr \
--baseline baseline/target/tmp/wikipedia-100K_benchmark_crate_baseline.json \
--target diskann_rust/target/tmp/wikipedia-100K_benchmark_crate_target.json \
--title 'Benchmark Results: Wikipedia-100K Dataset'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
GITHUB_RUN_ID: ${{ github.run_id }}
- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: always() # Upload even if validation fails
with:
name: benchmark-results-wikipedia-100K
path: |
diskann_rust/target/tmp/wikipedia-100K_benchmark_crate_target.json
baseline/target/tmp/wikipedia-100K_benchmark_crate_baseline.json
retention-days: 30
# Macro benchmark: OpenAI ArXiv dataset
macro-benchmark-oai-large:
name: Macro Benchmark - OAI ArXiv 100K
runs-on: ubuntu-latest
# TODO: For production benchmarks, consider using a self-hosted runner
timeout-minutes: 120
steps:
- name: Checkout current branch
uses: actions/checkout@v4
with:
path: diskann_rust
lfs: true
- name: Checkout baseline (${{ inputs.baseline_ref || 'main' }})
uses: actions/checkout@v4
with:
ref: ${{ inputs.baseline_ref || 'main' }}
path: baseline
lfs: true
- name: Install Rust ${{ env.rust_stable }}
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.rust_stable }}
- name: Cache Rust dependencies (current)
uses: Swatinem/rust-cache@v2
with:
workspaces: diskann_rust -> target
key: benchmark-current
- name: Cache Rust dependencies (baseline)
uses: Swatinem/rust-cache@v2
with:
workspaces: baseline -> target
key: benchmark-baseline
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y openssl libssl-dev pkg-config
# Download pre-packaged OpenAI ArXiv 100K dataset from GitHub Release
# Dataset: 100K OpenAI embeddings of ArXiv papers (1536-dim, float32, euclidean distance)
# Source: https://github.com/harsha-simhadri/big-ann-benchmarks
- name: Download openai-100K dataset
run: |
mkdir -p diskann_rust/target/tmp baseline/target/tmp
curl -L -o openai-100K.tar.gz https://github.com/harsha-simhadri/big-ann-benchmarks/releases/download/v0.4.0/openai-100K.tar.gz
tar xzf openai-100K.tar.gz -C diskann_rust/target/tmp/
cp -r diskann_rust/target/tmp/OpenAIArXiv baseline/target/tmp/
- name: Run baseline benchmark
working-directory: baseline
run: |
cargo run -p diskann-benchmark --features disk-index --release -- \
run --input-file ../diskann_rust/diskann-benchmark/perf_test_inputs/openai-100K-disk-index.json \
--output-file target/tmp/openai-100K_benchmark_crate_baseline.json
- name: Run current branch benchmark
working-directory: diskann_rust
run: |
cargo run -p diskann-benchmark --features disk-index --release -- \
run --input-file diskann-benchmark/perf_test_inputs/openai-100K-disk-index.json \
--output-file target/tmp/openai-100K_benchmark_crate_target.json
- name: Validate benchmark results
run: |
python diskann_rust/.github/scripts/benchmark_validate.py \
--mode pr \
--baseline baseline/target/tmp/openai-100K_benchmark_crate_baseline.json \
--target diskann_rust/target/tmp/openai-100K_benchmark_crate_target.json \
--title 'Benchmark Results: OpenAI ArXiv 100K Dataset'
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
GITHUB_RUN_ID: ${{ github.run_id }}
- name: Upload benchmark results
uses: actions/upload-artifact@v4
if: always() # Upload even if validation fails
with:
name: benchmark-results-openai-100K
path: |
diskann_rust/target/tmp/openai-100K_benchmark_crate_target.json
baseline/target/tmp/openai-100K_benchmark_crate_baseline.json
retention-days: 30