Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions .github/workflows/pr_benchmark_check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Lightweight CI for benchmark-only changes - verifies compilation and linting
# without running full test suites

name: PR Benchmark Check

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

on:
push:
paths:
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
pull_request:
paths:
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
workflow_dispatch:

env:
RUST_VERSION: stable

jobs:
benchmark-check:
name: Benchmark Compile & Lint Check
runs-on: ubuntu-latest
container:
image: amd64/rust
steps:
- uses: actions/checkout@v6

- name: Setup Rust & Java toolchain
uses: ./.github/actions/setup-builder
with:
rust-version: ${{ env.RUST_VERSION }}
jdk-version: 17

- name: Check Cargo fmt
run: |
cd native
cargo fmt --all -- --check --color=never

- name: Check Cargo clippy
run: |
cd native
cargo clippy --color=never --all-targets --workspace -- -D warnings

- name: Check benchmark compilation
run: |
cd native
cargo check --benches

- name: Cache Maven dependencies
uses: actions/cache@v4
with:
path: |
~/.m2/repository
/root/.m2/repository
key: ${{ runner.os }}-benchmark-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-benchmark-maven-

- name: Check Scala compilation and linting
run: |
./mvnw -B compile test-compile scalafix:scalafix -Dscalafix.mode=CHECK -Psemanticdb -DskipTests
6 changes: 6 additions & 0 deletions .github/workflows/pr_build_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,17 @@ on:
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
pull_request:
paths-ignore:
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/pr_build_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,17 @@ on:
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
pull_request:
paths-ignore:
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/spark_sql_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,17 @@ on:
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
pull_request:
paths-ignore:
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
Expand Down Expand Up @@ -59,6 +65,10 @@ jobs:
- {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
- {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
# Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
exclude:
- spark-version: {short: '4.0', full: '4.0.1', java: 17}
module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
fail-fast: false
name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.spark-version.java }}
runs-on: ${{ matrix.os }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ case class StringExprConfig(
query: String,
extraCometConfigs: Map[String, String] = Map.empty)

// spotless:off
/**
* Benchmark to measure performance of Comet string expressions. To run this benchmark:
* `SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometStringExpressionBenchmark`
* {{{
* SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometStringExpressionBenchmark
* }}}
* Results will be written to "spark/benchmarks/CometStringExpressionBenchmark-**results.txt".
*/
// spotless:on
object CometStringExpressionBenchmark extends CometBenchmarkBase {

/**
Expand All @@ -50,7 +50,7 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {
def runStringExprBenchmark(config: StringExprConfig, values: Int): Unit = {
withTempPath { dir =>
withTempTable("parquetV1Table") {
prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 20) AS c1 FROM $tbl"))

val extraConfigs =
Map(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") ++ config.extraCometConfigs
Expand All @@ -62,23 +62,36 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {

// Configuration for all string expression benchmarks
private val stringExpressions = List(
StringExprConfig("Substring", "select substring(c1, 1, 100) from parquetV1Table"),
StringExprConfig("ascii", "select ascii(c1) from parquetV1Table"),
StringExprConfig("bitLength", "select bit_length(c1) from parquetV1Table"),
StringExprConfig("octet_length", "select octet_length(c1) from parquetV1Table"),
StringExprConfig("upper", "select upper(c1) from parquetV1Table"),
StringExprConfig("lower", "select lower(c1) from parquetV1Table"),
StringExprConfig("bit_length", "select bit_length(c1) from parquetV1Table"),
StringExprConfig("chr", "select chr(c1) from parquetV1Table"),
StringExprConfig("concat", "select concat(c1, c1) from parquetV1Table"),
StringExprConfig("concat_ws", "select concat_ws(' ', c1, c1) from parquetV1Table"),
StringExprConfig("contains", "select contains(c1, '123') from parquetV1Table"),
StringExprConfig("endswith", "select endswith(c1, '9') from parquetV1Table"),
StringExprConfig("initCap", "select initCap(c1) from parquetV1Table"),
StringExprConfig("trim", "select trim(c1) from parquetV1Table"),
StringExprConfig("concatws", "select concat_ws(' ', c1, c1) from parquetV1Table"),
StringExprConfig("instr", "select instr(c1, '123') from parquetV1Table"),
StringExprConfig("length", "select length(c1) from parquetV1Table"),
StringExprConfig("like", "select c1 like '%123%' from parquetV1Table"),
StringExprConfig("lower", "select lower(c1) from parquetV1Table"),
StringExprConfig("lpad", "select lpad(c1, 150, 'x') from parquetV1Table"),
StringExprConfig("ltrim", "select ltrim(c1) from parquetV1Table"),
StringExprConfig("octet_length", "select octet_length(c1) from parquetV1Table"),
StringExprConfig(
"regexp_replace",
"select regexp_replace(c1, '[0-9]', 'X') from parquetV1Table"),
StringExprConfig("repeat", "select repeat(c1, 3) from parquetV1Table"),
StringExprConfig("reverse", "select reverse(c1) from parquetV1Table"),
StringExprConfig("instr", "select instr(c1, '123') from parquetV1Table"),
StringExprConfig("replace", "select replace(c1, '123', 'ab') from parquetV1Table"),
StringExprConfig("reverse", "select reverse(c1) from parquetV1Table"),
StringExprConfig("rlike", "select c1 rlike '[0-9]+' from parquetV1Table"),
StringExprConfig("rpad", "select rpad(c1, 150, 'x') from parquetV1Table"),
StringExprConfig("rtrim", "select rtrim(c1) from parquetV1Table"),
StringExprConfig("space", "select space(2) from parquetV1Table"),
StringExprConfig("translate", "select translate(c1, '123456', 'aBcDeF') from parquetV1Table"))
StringExprConfig("startswith", "select startswith(c1, '1') from parquetV1Table"),
StringExprConfig("substring", "select substring(c1, 1, 100) from parquetV1Table"),
StringExprConfig("translate", "select translate(c1, '123456', 'aBcDeF') from parquetV1Table"),
StringExprConfig("trim", "select trim(c1) from parquetV1Table"),
StringExprConfig("upper", "select upper(c1) from parquetV1Table"))

override def runCometBenchmark(mainArgs: Array[String]): Unit = {
val values = 1024 * 1024;
Expand Down
Loading