tpch: fix Parquet streaming + align batch size with tpcds #103
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI - Build & Benchmark | |
| on: | |
| push: | |
| branches: | |
| - master | |
| - develop | |
| pull_request: | |
| branches: | |
| - master | |
| - develop | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
| cancel-in-progress: true | |
| jobs: | |
| # Resolve the best available Docker image tag for each config. | |
| # Prefers a branch-specific tag (e.g. tsafin-lance_stream) over :latest so | |
| # that PRs with updated lance-ffi/third_party sources automatically use the | |
| # matching pre-compiled image built by the docker-images workflow. | |
| resolve-images: | |
| name: Resolve Docker image tags | |
| runs-on: ubuntu-22.04 | |
| outputs: | |
| base_image: ${{ steps.resolve.outputs.base_image }} | |
| orc_image: ${{ steps.resolve.outputs.orc_image }} | |
| lance_image: ${{ steps.resolve.outputs.lance_image }} | |
| steps: | |
| - name: Log in to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Resolve image tags | |
| id: resolve | |
| run: | | |
| IMAGE_PREFIX="ghcr.io/${{ github.repository_owner }}/tpch-cpp" | |
| # Sanitize branch name the same way docker/metadata-action does: | |
| # type=ref,event=branch replaces '/' with '-' and lowercases, keeps underscores | |
| BRANCH="${{ github.head_ref || github.ref_name }}" | |
| BRANCH_TAG=$(echo "$BRANCH" | tr '/' '-' | tr '[:upper:]' '[:lower:]') | |
| echo "Branch: $BRANCH → tag: $BRANCH_TAG" | |
| for CONFIG in base orc lance; do | |
| BRANCH_IMAGE="${IMAGE_PREFIX}-${CONFIG}:${BRANCH_TAG}" | |
| LATEST_IMAGE="${IMAGE_PREFIX}-${CONFIG}:latest" | |
| # Try to pull the branch-specific image; fall back to :latest | |
| if docker manifest inspect "$BRANCH_IMAGE" > /dev/null 2>&1; then | |
| echo "Using branch image: $BRANCH_IMAGE" | |
| echo "${CONFIG}_image=$BRANCH_IMAGE" >> $GITHUB_OUTPUT | |
| else | |
| echo "Branch image not found, falling back to: $LATEST_IMAGE" | |
| echo "${CONFIG}_image=$LATEST_IMAGE" >> $GITHUB_OUTPUT | |
| fi | |
| done | |
| build-matrix: | |
| name: Build (${{ matrix.config }}) | |
| runs-on: ubuntu-22.04 | |
| needs: resolve-images | |
| timeout-minutes: 20 | |
| container: | |
| image: ${{ matrix.config == 'base' && needs.resolve-images.outputs.base_image || matrix.config == 'orc' && needs.resolve-images.outputs.orc_image || needs.resolve-images.outputs.lance_image }} | |
| options: --user root | |
| credentials: | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - config: base | |
| enable_orc: OFF | |
| enable_lance: OFF | |
| enable_tests: ON | |
| deps_path: /opt/dependencies | |
| - config: orc | |
| enable_orc: ON | |
| enable_lance: OFF | |
| enable_tests: ON | |
| deps_path: /opt/dependencies | |
| - config: lance | |
| enable_orc: OFF | |
| enable_lance: ON | |
| enable_tests: ON | |
| deps_path: /opt/dependencies | |
| steps: | |
| - name: Checkout code with submodules | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| fetch-depth: 1 | |
| - name: Verify pre-compiled dependencies | |
| run: | | |
| echo "=== Checking pre-compiled dependencies in Docker image ===" | |
| ls -lh /opt/dependencies/lib/libarrow* /opt/dependencies/lib/libparquet* || true | |
| if [ "${{ matrix.config }}" = "orc" ]; then | |
| ls -lh /opt/dependencies/lib/liborc* || true | |
| fi | |
| if [ "${{ matrix.config }}" = "lance" ]; then | |
| ls -lh /opt/dependencies/lib/liblance_ffi.a || true | |
| rustc --version | |
| cargo --version | |
| fi | |
| - name: Configure CMake | |
| run: | | |
| cmake -B build \ | |
| -DCMAKE_BUILD_TYPE=RelWithDebInfo \ | |
| -DCMAKE_PREFIX_PATH=${{ matrix.deps_path }} \ | |
| -DTPCH_ENABLE_ORC=${{ matrix.enable_orc }} \ | |
| -DTPCH_ENABLE_LANCE=${{ matrix.enable_lance }} \ | |
| -DTPCH_ENABLE_NATIVE_OPTIMIZATIONS=OFF \ | |
| -DTPCH_ENABLE_ASYNC_IO=ON \ | |
| -DTPCH_ENABLE_ASAN=OFF \ | |
| -DTPCH_BUILD_TESTS=${{ matrix.enable_tests }} \ | |
| -DTPCDS_ENABLE=ON | |
| - name: Build project | |
| run: cmake --build build -j$(nproc) | |
| - name: Verify executable and tests | |
| run: | | |
| test -f build/tpch_benchmark && echo "✓ tpch_benchmark created" | |
| test -f build/tpcds_benchmark && echo "✓ tpcds_benchmark created" | |
| test -f build/tests/buffer_lifetime_manager_test && echo "✓ buffer_lifetime_manager_test created" || true | |
| test -f build/tests/dbgen_batch_iterator_test && echo "✓ dbgen_batch_iterator_test created" || true | |
| if [ "${{ matrix.enable_lance }}" = "ON" ]; then | |
| test -f build/tests/lance_writer_test && echo "✓ lance_writer_test created" || echo "✗ lance_writer_test missing" | |
| fi | |
| - name: Run unit tests | |
| run: | | |
| # Define which tests to run for each configuration | |
| case "${{ matrix.config }}" in | |
| base) | |
| # Run common/core tests only in base build | |
| TESTS="buffer_lifetime_manager_test dbgen_batch_iterator_test" | |
| ;; | |
| orc) | |
| # ORC doesn't have format-specific tests yet, skip to avoid redundancy | |
| TESTS="" | |
| echo "No ORC-specific tests to run (common tests run in base config)" | |
| ;; | |
| lance) | |
| # Run Lance-specific tests only | |
| TESTS="lance_writer_test" | |
| ;; | |
| *) | |
| echo "Unknown config: ${{ matrix.config }}" | |
| exit 1 | |
| ;; | |
| esac | |
| # Run the specified tests from build directory (where dists.dss exists) | |
| cd build | |
| for test_name in $TESTS; do | |
| if [ -x "tests/$test_name" ]; then | |
| echo "============================" | |
| echo "Running: $test_name" | |
| echo "============================" | |
| "./tests/$test_name" --gtest_output=xml:"tests/${test_name}_results.xml" || exit 1 | |
| else | |
| echo "ERROR: Expected test executable not found: tests/$test_name" | |
| exit 1 | |
| fi | |
| done | |
| - name: Upload build artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: tpch-benchmark-${{ matrix.config }} | |
| path: | | |
| build/tpch_benchmark | |
| build/tpcds_benchmark | |
| build/tests/*_test | |
| retention-days: 1 | |
| if-no-files-found: error | |
| tpch-benchmark-suite: | |
| name: TPC-H Benchmark Suite | |
| runs-on: ubuntu-22.04 | |
| needs: [resolve-images, build-matrix] | |
| timeout-minutes: 20 | |
| container: | |
| image: ${{ matrix.build == 'base' && needs.resolve-images.outputs.base_image || matrix.build == 'orc' && needs.resolve-images.outputs.orc_image || needs.resolve-images.outputs.lance_image }} | |
| options: --user root | |
| credentials: | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # CSV format - all tables | |
| - format: csv | |
| table: lineitem | |
| build: base | |
| - format: csv | |
| table: orders | |
| build: base | |
| - format: csv | |
| table: customer | |
| build: base | |
| - format: csv | |
| table: part | |
| build: base | |
| - format: csv | |
| table: partsupp | |
| build: base | |
| - format: csv | |
| table: supplier | |
| build: base | |
| - format: csv | |
| table: nation | |
| build: base | |
| - format: csv | |
| table: region | |
| build: base | |
| # Parquet format - all tables | |
| - format: parquet | |
| table: lineitem | |
| build: base | |
| - format: parquet | |
| table: orders | |
| build: base | |
| - format: parquet | |
| table: customer | |
| build: base | |
| - format: parquet | |
| table: part | |
| build: base | |
| - format: parquet | |
| table: partsupp | |
| build: base | |
| - format: parquet | |
| table: supplier | |
| build: base | |
| - format: parquet | |
| table: nation | |
| build: base | |
| - format: parquet | |
| table: region | |
| build: base | |
| # ORC format - lineitem, customer, orders | |
| - format: orc | |
| table: lineitem | |
| build: orc | |
| - format: orc | |
| table: customer | |
| build: orc | |
| - format: orc | |
| table: orders | |
| build: orc | |
| # Lance format - lineitem, customer, orders | |
| - format: lance | |
| table: lineitem | |
| build: lance | |
| - format: lance | |
| table: customer | |
| build: lance | |
| - format: lance | |
| table: orders | |
| build: lance | |
| steps: | |
| - name: Checkout code (for dists.dss and scripts) | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - name: Checkout tpch submodule (for dists.dss) | |
| run: | | |
| git config --global --add safe.directory "$GITHUB_WORKSPACE" | |
| git submodule update --init --depth 1 -- third_party/tpch | |
| - name: Download build artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: tpch-benchmark-${{ matrix.build }} | |
| path: . | |
| - name: Setup benchmark executable | |
| run: | | |
| chmod +x tpch_benchmark | |
| mkdir -p benchmark-results | |
| export LD_LIBRARY_PATH=/opt/dependencies/lib:$LD_LIBRARY_PATH | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Verify executable supports required format | |
| run: | | |
| echo "=== Verifying build artifact ===" | |
| echo "Expected format: ${{ matrix.format }}" | |
| echo "Expected build: ${{ matrix.build }}" | |
| ./tpch_benchmark --help | head -20 || true | |
| - name: Run format coverage benchmark | |
| run: | | |
| # Copy dists.dss to current directory (required by dbgen) | |
| cp third_party/tpch/dbgen/dists.dss . 2>/dev/null || true | |
| if ! timeout 600 ./tpch_benchmark \ | |
| --use-dbgen \ | |
| --scale-factor 1 \ | |
| --format ${{ matrix.format }} \ | |
| --table ${{ matrix.table }} \ | |
| --output-dir benchmark-results/ \ | |
| 2>&1 | grep -v "^DEBUG:" | tee "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log"; then | |
| echo "ERROR: Benchmark failed with exit code $?" | |
| exit 1 | |
| fi | |
| # Fail if process dumped core | |
| if grep -q "dumped core" "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log"; then | |
| echo "ERROR: Benchmark crashed with core dump" | |
| exit 1 | |
| fi | |
| # Fail if unsupported format | |
| if grep -qi "unknown format\|unsupported format\|not supported" "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log"; then | |
| echo "ERROR: Format ${{ matrix.format }} not supported by this build" | |
| exit 1 | |
| fi | |
| - name: Upload benchmark logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: tpch-benchmark-logs-suite-${{ matrix.format }}-${{ matrix.table }} | |
| path: benchmark-results/${{ matrix.format }}_${{ matrix.table }}_baseline.log | |
| retention-days: 30 | |
| if-no-files-found: ignore | |
| tpch-optimization-benchmarks: | |
| name: TPC-H Optimization Benchmarks (${{ matrix.format }}-${{ matrix.mode }}) | |
| runs-on: ubuntu-22.04 | |
| needs: [resolve-images, build-matrix] | |
| timeout-minutes: 20 | |
| container: | |
| image: ${{ matrix.image == 'base' && needs.resolve-images.outputs.base_image || matrix.image == 'orc' && needs.resolve-images.outputs.orc_image || needs.resolve-images.outputs.lance_image }} | |
| options: --user root | |
| credentials: | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # Parquet benchmarks use base image | |
| - format: parquet | |
| mode: baseline | |
| table: lineitem | |
| image: base | |
| - format: parquet | |
| mode: baseline | |
| table: orders | |
| image: base | |
| - format: parquet | |
| mode: baseline | |
| table: part | |
| image: base | |
| - format: parquet | |
| mode: zero-copy | |
| table: lineitem | |
| image: base | |
| - format: parquet | |
| mode: zero-copy | |
| table: orders | |
| image: base | |
| - format: parquet | |
| mode: zero-copy | |
| table: part | |
| image: base | |
| - format: parquet | |
| mode: true-zero-copy | |
| table: lineitem | |
| image: base | |
| - format: parquet | |
| mode: true-zero-copy | |
| table: orders | |
| image: base | |
| - format: parquet | |
| mode: true-zero-copy | |
| table: part | |
| image: base | |
| # ORC benchmarks use orc image | |
| - format: orc | |
| mode: baseline | |
| table: lineitem | |
| image: orc | |
| - format: orc | |
| mode: baseline | |
| table: orders | |
| image: orc | |
| - format: orc | |
| mode: baseline | |
| table: part | |
| image: orc | |
| - format: orc | |
| mode: zero-copy | |
| table: lineitem | |
| image: orc | |
| - format: orc | |
| mode: zero-copy | |
| table: orders | |
| image: orc | |
| - format: orc | |
| mode: zero-copy | |
| table: part | |
| image: orc | |
| - format: orc | |
| mode: true-zero-copy | |
| table: lineitem | |
| image: orc | |
| - format: orc | |
| mode: true-zero-copy | |
| table: orders | |
| image: orc | |
| - format: orc | |
| mode: true-zero-copy | |
| table: part | |
| image: orc | |
| # Lance benchmarks use lance image | |
| - format: lance | |
| mode: baseline | |
| table: lineitem | |
| image: lance | |
| - format: lance | |
| mode: baseline | |
| table: orders | |
| image: lance | |
| - format: lance | |
| mode: baseline | |
| table: part | |
| image: lance | |
| - format: lance | |
| mode: zero-copy | |
| table: lineitem | |
| image: lance | |
| - format: lance | |
| mode: zero-copy | |
| table: orders | |
| image: lance | |
| - format: lance | |
| mode: zero-copy | |
| table: part | |
| image: lance | |
| - format: lance | |
| mode: true-zero-copy | |
| table: lineitem | |
| image: lance | |
| - format: lance | |
| mode: true-zero-copy | |
| table: orders | |
| image: lance | |
| - format: lance | |
| mode: true-zero-copy | |
| table: part | |
| image: lance | |
| steps: | |
| - name: Checkout code (for dists.dss and scripts) | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - name: Checkout tpch submodule (for dists.dss) | |
| run: | | |
| git config --global --add safe.directory "$GITHUB_WORKSPACE" | |
| git submodule update --init --depth 1 -- third_party/tpch | |
| - name: Download build artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: tpch-benchmark-${{ matrix.image }} | |
| path: . | |
| - name: Setup benchmark executable | |
| run: | | |
| chmod +x tpch_benchmark | |
| mkdir -p benchmark-results | |
| export LD_LIBRARY_PATH=/opt/dependencies/lib:$LD_LIBRARY_PATH | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Verify executable supports required format | |
| run: | | |
| echo "=== Verifying build artifact ===" | |
| echo "Expected format: ${{ matrix.format }}" | |
| ./tpch_benchmark --help | head -20 || true | |
| - name: Run optimization benchmark | |
| run: | | |
| # Copy dists.dss to current directory (required by dbgen) | |
| cp third_party/tpch/dbgen/dists.dss . 2>/dev/null || true | |
| MODE_FLAGS="" | |
| if [ "${{ matrix.mode }}" = "zero-copy" ]; then | |
| MODE_FLAGS="--zero-copy" | |
| elif [ "${{ matrix.mode }}" = "true-zero-copy" ]; then | |
| MODE_FLAGS="--true-zero-copy" | |
| fi | |
| if ! timeout 600 ./tpch_benchmark \ | |
| --use-dbgen \ | |
| --scale-factor 1 \ | |
| --format ${{ matrix.format }} \ | |
| --table ${{ matrix.table }} \ | |
| --output-dir benchmark-results/ \ | |
| $MODE_FLAGS \ | |
| 2>&1 | grep -v "^DEBUG:" | tee "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_${{ matrix.mode }}.log"; then | |
| echo "ERROR: Benchmark failed with exit code $?" | |
| exit 1 | |
| fi | |
| # Fail if process dumped core | |
| if grep -q "dumped core" "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_${{ matrix.mode }}.log"; then | |
| echo "ERROR: Benchmark crashed with core dump" | |
| exit 1 | |
| fi | |
| # Fail if unsupported format | |
| if grep -qi "unknown format\|unsupported format\|not supported" "benchmark-results/${{ matrix.format }}_${{ matrix.table }}_${{ matrix.mode }}.log"; then | |
| echo "ERROR: Format ${{ matrix.format }} not supported by this build" | |
| exit 1 | |
| fi | |
| - name: Upload benchmark logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: tpch-benchmark-logs-optimization-${{ matrix.format }}-${{ matrix.mode }}-${{ matrix.table }} | |
| path: benchmark-results/${{ matrix.format }}_${{ matrix.table }}_${{ matrix.mode }}.log | |
| retention-days: 30 | |
| if-no-files-found: ignore | |
| tpcds-benchmark-suite: | |
| name: TPC-DS Benchmark Suite | |
| runs-on: ubuntu-22.04 | |
| needs: [resolve-images, build-matrix] | |
| timeout-minutes: 20 | |
| container: | |
| image: ${{ matrix.build == 'base' && needs.resolve-images.outputs.base_image || matrix.build == 'orc' && needs.resolve-images.outputs.orc_image || needs.resolve-images.outputs.lance_image }} | |
| options: --user root | |
| credentials: | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # CSV format | |
| - format: csv | |
| table: store_returns | |
| build: base | |
| - format: csv | |
| table: store_sales | |
| build: base | |
| - format: csv | |
| table: customer | |
| build: base | |
| - format: csv | |
| table: item | |
| build: base | |
| # Parquet format | |
| - format: parquet | |
| table: store_returns | |
| build: base | |
| - format: parquet | |
| table: store_sales | |
| build: base | |
| - format: parquet | |
| table: customer | |
| build: base | |
| - format: parquet | |
| table: item | |
| build: base | |
| # ORC format | |
| - format: orc | |
| table: store_returns | |
| build: orc | |
| - format: orc | |
| table: store_sales | |
| build: orc | |
| # Lance format | |
| - format: lance | |
| table: store_returns | |
| build: lance | |
| - format: lance | |
| table: store_sales | |
| build: lance | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - name: Download build artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: tpch-benchmark-${{ matrix.build }} | |
| path: . | |
| - name: Setup benchmark executable | |
| run: | | |
| chmod +x tpcds_benchmark | |
| mkdir -p benchmark-results | |
| export LD_LIBRARY_PATH=/opt/dependencies/lib:$LD_LIBRARY_PATH | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Run format coverage benchmark | |
| run: | | |
| if ! timeout 600 ./tpcds_benchmark \ | |
| --scale-factor 1 \ | |
| --format ${{ matrix.format }} \ | |
| --table ${{ matrix.table }} \ | |
| --output-dir benchmark-results/ \ | |
| 2>&1 | grep -v "^DEBUG:" | tee "benchmark-results/tpcds_${{ matrix.format }}_${{ matrix.table }}_baseline.log"; then | |
| echo "ERROR: Benchmark failed with exit code $?" | |
| exit 1 | |
| fi | |
| if grep -q "dumped core" "benchmark-results/tpcds_${{ matrix.format }}_${{ matrix.table }}_baseline.log"; then | |
| echo "ERROR: Benchmark crashed with core dump" | |
| exit 1 | |
| fi | |
| if grep -qi "unknown format\|unsupported format\|not supported" "benchmark-results/tpcds_${{ matrix.format }}_${{ matrix.table }}_baseline.log"; then | |
| echo "ERROR: Format ${{ matrix.format }} not supported by this build" | |
| exit 1 | |
| fi | |
| - name: Upload benchmark logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: tpcds-benchmark-logs-suite-${{ matrix.format }}-${{ matrix.table }} | |
| path: benchmark-results/tpcds_${{ matrix.format }}_${{ matrix.table }}_baseline.log | |
| retention-days: 30 | |
| if-no-files-found: ignore | |
| tpcds-optimization-benchmarks: | |
| name: TPC-DS Optimization Benchmarks (${{ matrix.format }}-${{ matrix.mode }}) | |
| runs-on: ubuntu-22.04 | |
| needs: [resolve-images, build-matrix] | |
| timeout-minutes: 20 | |
| container: | |
| image: ${{ matrix.image == 'base' && needs.resolve-images.outputs.base_image || needs.resolve-images.outputs.lance_image }} | |
| options: --user root | |
| credentials: | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # Parquet benchmarks | |
| - format: parquet | |
| mode: baseline | |
| table: store_returns | |
| image: base | |
| - format: parquet | |
| mode: baseline | |
| table: store_sales | |
| image: base | |
| - format: parquet | |
| mode: zero-copy | |
| table: store_returns | |
| image: base | |
| - format: parquet | |
| mode: zero-copy | |
| table: store_sales | |
| image: base | |
| # Lance benchmarks | |
| - format: lance | |
| mode: baseline | |
| table: store_returns | |
| image: lance | |
| - format: lance | |
| mode: baseline | |
| table: store_sales | |
| image: lance | |
| - format: lance | |
| mode: zero-copy | |
| table: store_returns | |
| image: lance | |
| - format: lance | |
| mode: zero-copy | |
| table: store_sales | |
| image: lance | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - name: Download build artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: tpch-benchmark-${{ matrix.image }} | |
| path: . | |
| - name: Setup benchmark executable | |
| run: | | |
| chmod +x tpcds_benchmark | |
| mkdir -p benchmark-results | |
| export LD_LIBRARY_PATH=/opt/dependencies/lib:$LD_LIBRARY_PATH | |
| echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
| - name: Run optimization benchmark | |
| run: | | |
| MODE_FLAGS="" | |
| if [ "${{ matrix.mode }}" = "zero-copy" ]; then | |
| MODE_FLAGS="--zero-copy" | |
| fi | |
| if ! timeout 600 ./tpcds_benchmark \ | |
| --scale-factor 1 \ | |
| --format ${{ matrix.format }} \ | |
| --table ${{ matrix.table }} \ | |
| --output-dir benchmark-results/ \ | |
| $MODE_FLAGS \ | |
| 2>&1 | grep -v "^DEBUG:" | tee "benchmark-results/tpcds_${{ matrix.format }}_${{ matrix.table }}_${{ matrix.mode }}.log"; then | |
| echo "ERROR: Benchmark failed with exit code $?" | |
| exit 1 | |
| fi | |
| if grep -q "dumped core" "benchmark-results/tpcds_${{ matrix.format }}_${{ matrix.table }}_${{ matrix.mode }}.log"; then | |
| echo "ERROR: Benchmark crashed with core dump" | |
| exit 1 | |
| fi | |
| if grep -qi "unknown format\|unsupported format\|not supported" "benchmark-results/tpcds_${{ matrix.format }}_${{ matrix.table }}_${{ matrix.mode }}.log"; then | |
| echo "ERROR: Format ${{ matrix.format }} not supported by this build" | |
| exit 1 | |
| fi | |
| - name: Upload benchmark logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: tpcds-benchmark-logs-optimization-${{ matrix.format }}-${{ matrix.mode }}-${{ matrix.table }} | |
| path: benchmark-results/tpcds_${{ matrix.format }}_${{ matrix.table }}_${{ matrix.mode }}.log | |
| retention-days: 30 | |
| if-no-files-found: ignore | |
| results-aggregation: | |
| name: Aggregate Results | |
| runs-on: ubuntu-22.04 | |
| needs: [tpch-benchmark-suite, tpch-optimization-benchmarks, tpcds-benchmark-suite, tpcds-optimization-benchmarks] | |
| if: always() | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download all benchmark artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: all-results | |
| - name: Prepare benchmark results directory | |
| run: | | |
| mkdir -p benchmark-results | |
| find all-results -name "*.log" -exec cp {} benchmark-results/ \; | |
| - name: Generate summary report | |
| run: | | |
| python3 scripts/parse_benchmark_logs.py benchmark-results > benchmark-results/ci_summary.json || true | |
| - name: Generate HTML visualization | |
| if: always() | |
| run: | | |
| if [ -f benchmark-results/ci_summary.json ]; then | |
| python3 scripts/visualize_benchmark_results.py benchmark-results/ci_summary.json benchmark-results/report.html | |
| else | |
| echo "No summary JSON found, skipping visualization" | |
| fi | |
| - name: Upload aggregated results | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results | |
| path: | | |
| benchmark-results/*.log | |
| benchmark-results/*.json | |
| benchmark-results/*.html | |
| retention-days: 30 | |
| if-no-files-found: ignore | |
| - name: Print summary | |
| if: always() | |
| run: | | |
| if [ -f benchmark-results/ci_summary.json ]; then | |
| echo "=== Benchmark Summary ===" | |
| python3 -m json.tool benchmark-results/ci_summary.json || cat benchmark-results/ci_summary.json | |
| else | |
| echo "No summary generated (logs may not exist yet)" | |
| fi | |
| status-check: | |
| name: Status Check | |
| runs-on: ubuntu-22.04 | |
| needs: [build-matrix] | |
| if: always() | |
| steps: | |
| - name: Check build status | |
| run: | | |
| if [ "${{ needs.build-matrix.result }}" = "success" ]; then | |
| echo "✓ All builds and tests passed" | |
| exit 0 | |
| else | |
| echo "✗ Some builds or tests failed" | |
| echo "Build status: ${{ needs.build-matrix.result }}" | |
| exit 1 | |
| fi |