From 1212c4bd1d2ef89a0e2414d10fe037421fa87a6e Mon Sep 17 00:00:00 2001 From: Philip Moore Date: Wed, 31 Dec 2025 15:26:14 -0500 Subject: [PATCH 1/5] Completed GizmoSQL c6a.4xlarge benchmark --- gizmosql/benchmark.sh | 69 +++++++++++++++++++ gizmosql/create.sql | 108 ++++++++++++++++++++++++++++++ gizmosql/load.sql | 7 ++ gizmosql/queries.sql | 43 ++++++++++++ gizmosql/results/c6a.4xlarge.json | 57 ++++++++++++++++ gizmosql/run.sh | 32 +++++++++ gizmosql/template.json | 13 ++++ 7 files changed, 329 insertions(+) create mode 100755 gizmosql/benchmark.sh create mode 100644 gizmosql/create.sql create mode 100644 gizmosql/load.sql create mode 100644 gizmosql/queries.sql create mode 100644 gizmosql/results/c6a.4xlarge.json create mode 100755 gizmosql/run.sh create mode 100644 gizmosql/template.json diff --git a/gizmosql/benchmark.sh b/gizmosql/benchmark.sh new file mode 100755 index 000000000..cbd1b6906 --- /dev/null +++ b/gizmosql/benchmark.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +# Server setup Install +sudo apt install docker.io -y +sudo usermod -aG docker ${USER} +sudo systemctl start docker + +# You must log out, then back into the VM at this point for Docker to work... + +# Run the GizmoSQL server in the background with Docker +docker run --name gizmosql \ + --detach \ + --rm \ + --tty \ + --init \ + --publish 31337:31337 \ + --pull always \ + --mount type=bind,source=/nfs_data,target=/opt/gizmosql/data \ + --env GIZMOSQL_USERNAME=clickbench \ + --env GIZMOSQL_PASSWORD=clickbench \ + --env DATABASE_FILENAME=/opt/gizmosql/data/clickbench.db \ + gizmodata/gizmosql:latest + +# Install Java and the GizmoSQLLine CLI client +sudo apt install openjdk-17-jre-headless -y +pushd /tmp +curl -L -o gizmosqlline https://github.com/gizmodata/gizmosqlline/releases/latest/download/gizmosqlline +chmod +x gizmosqlline +sudo mv gizmosqlline /usr/local/bin/ +popd + +# Create the table +gizmosqlline \ + -u 'jdbc:arrow-flight-sql://localhost:31337?useEncryption=true&disableCertificateVerification=true' \ + -n clickbench \ + -p clickbench \ + -f create.sql + +# Load the data +pushd /nfs_data +wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.parquet' +popd + +echo -n "Load time: " +time gizmosqlline \ + -u 'jdbc:arrow-flight-sql://localhost:31337?useEncryption=true&disableCertificateVerification=true' \ + -n clickbench \ + -p clickbench \ + -f load.sql + +# Run the queries +script --quiet --command="./run.sh" log.txt + +# Remove carriage returns from the log +sed -i 's/\r$//' log.txt + +echo -n "Data size: " +wc -c /nfs_data/clickbench.db + +cat log.txt | \ + grep -E 'rows? selected \([0-9.]+ seconds\)|Killed|Segmentation' | \ + sed -E 's/.*rows? selected \(([0-9.]+) seconds\).*/\1/; s/.*(Killed|Segmentation).*/null/' | \ + awk '{ + if (NR % 3 == 1) printf "["; + if ($1 == "null") printf "null"; + else printf $1; + if (NR % 3 == 0) printf "],\n"; + else printf ", "; + }' diff --git a/gizmosql/create.sql b/gizmosql/create.sql new file mode 100644 index 000000000..4d23eaac6 --- /dev/null +++ b/gizmosql/create.sql @@ -0,0 +1,108 @@ +CREATE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT, + Referer TEXT, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT, + Params TEXT, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT, + BrowserCountry TEXT, + SocialNetwork TEXT, + SocialAction TEXT, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT, + ParamCurrency TEXT, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT, + OpenstatCampaignID TEXT, + OpenstatAdID TEXT, + OpenstatSourceID TEXT, + UTMSource TEXT, + UTMMedium TEXT, + UTMCampaign TEXT, + UTMContent TEXT, + UTMTerm TEXT, + FromTag TEXT, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL +); diff --git a/gizmosql/load.sql b/gizmosql/load.sql new file mode 100644 index 000000000..71d334df4 --- /dev/null +++ b/gizmosql/load.sql @@ -0,0 +1,7 @@ +INSERT INTO hits BY NAME +SELECT * REPLACE ( + make_date(EventDate) AS EventDate, + epoch_ms(EventTime * 1000) AS EventTime, + epoch_ms(ClientEventTime * 1000) AS ClientEventTime, + epoch_ms(LocalEventTime * 1000) AS LocalEventTime) +FROM read_parquet('/opt/gizmosql/data/hits.parquet', binary_as_string=True); diff --git a/gizmosql/queries.sql b/gizmosql/queries.sql new file mode 100644 index 000000000..b4115ee3a --- /dev/null +++ b/gizmosql/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(STRLEN(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(STRLEN(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/gizmosql/results/c6a.4xlarge.json b/gizmosql/results/c6a.4xlarge.json new file mode 100644 index 000000000..0e540d0c4 --- /dev/null +++ b/gizmosql/results/c6a.4xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "GizmoSQL", + "date": "2025-12-31", + "machine": "c6a.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["C++","column-oriented","arrow-flight-sql","duckdb-backend","lukewarm-cold-run","tls-enabled"], + "load_time": 131.966, + "data_size": 26882879488, + "result": [ + [0.033, 0.011, 0.011], + [0.027, 0.012, 0.011], + [0.052, 0.028, 0.049], + [0.041, 0.037, 0.039], + [0.322, 0.293, 0.292], + [0.465, 0.444, 0.442], + [0.015, 0.011, 0.011], + [0.012, 0.012, 0.013], + [0.398, 0.394, 0.397], + [0.56, 0.557, 0.553], + [0.138, 0.132, 0.128], + [0.139, 0.139, 0.141], + [0.427, 0.41, 0.404], + [0.727, 0.713, 0.712], + [0.447, 0.441, 0.44], + [0.36, 0.356, 0.358], + [0.904, 0.872, 0.88], + [0.618, 0.627, 0.623], + [1.627, 1.579, 1.585], + [0.012, 0.01, 0.017], + [0.546, 0.536, 0.502], + [0.473, 0.48, 0.477], + [0.612, 0.524, 0.548], + [0.117, 0.113, 0.111], + [0.038, 0.035, 0.035], + [0.149, 0.143, 0.145], + [0.05, 0.031, 0.031], + [0.37, 0.368, 0.362], + [7.325, 7.068, 7.046], + [0.033, 0.053, 0.035], + [0.375, 0.371, 0.374], + [0.472, 0.459, 0.432], + [2.071, 4.712, 2.025], + [1.757, 1.723, 1.745], + [1.884, 1.856, 1.852], + [0.49, 0.487, 0.513], + [0.065, 0.04, 0.039], + [0.05, 0.013, 0.014], + [0.051, 0.016, 0.041], + [0.107, 0.111, 0.091], + [0.026, 0.012, 0.053], + [0.015, 0.011, 0.023], + [0.019, 0.028, 0.022] + ] +} diff --git a/gizmosql/run.sh b/gizmosql/run.sh new file mode 100755 index 000000000..fd638faf5 --- /dev/null +++ b/gizmosql/run.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +TRIES=3 +TEMP_SQL_FILE="/tmp/benchmark_queries_$$.sql" + +# Read queries from file +mapfile -t queries < queries.sql + +# Create the combined SQL script with each query repeated TRIES times +> "${TEMP_SQL_FILE}" + +for query in "${queries[@]}"; do + # Add a comment to identify the query in the output + echo "-- Query: ${query}" >> "${TEMP_SQL_FILE}" + + # Repeat each query TRIES times + for i in $(seq 1 ${TRIES}); do + echo "${query}" >> "${TEMP_SQL_FILE}" + done +done + +# Execute all queries in one session (so authentication overhead is minimized) +echo "Running benchmark with $(wc -l < queries.sql) queries, ${TRIES} tries each..." + +gizmosqlline \ + -u 'jdbc:arrow-flight-sql://localhost:31337?useEncryption=true&disableCertificateVerification=true' \ + -n clickbench \ + -p clickbench \ + -f "${TEMP_SQL_FILE}" + +# Clean up +rm -f "${TEMP_SQL_FILE}" diff --git a/gizmosql/template.json b/gizmosql/template.json new file mode 100644 index 000000000..78f4e0a0e --- /dev/null +++ b/gizmosql/template.json @@ -0,0 +1,13 @@ +{ + "system": "GizmoSQL", + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "C++", + "column-oriented", + "arrow-flight-sql", + "duckdb", + "lukewarm-cold-run" + ] +} From 81e0d211774ae85f06dc8a2f25fd1e46ba7c29db Mon Sep 17 00:00:00 2001 From: Philip Moore Date: Sat, 24 Jan 2026 17:23:34 -0500 Subject: [PATCH 2/5] Updated per feedback on Pull Request. No longer using docker. --- gizmosql/benchmark.sh | 57 ++++++++++---------- gizmosql/create.sql | 2 +- gizmosql/load.sql | 2 +- gizmosql/queries.sql | 0 gizmosql/results/c6a.4xlarge.json | 88 +++++++++++++++---------------- gizmosql/run.sh | 35 ++++++++---- gizmosql/template.json | 0 gizmosql/util.sh | 39 ++++++++++++++ 8 files changed, 139 insertions(+), 84 deletions(-) mode change 100644 => 100755 gizmosql/create.sql mode change 100644 => 100755 gizmosql/load.sql mode change 100644 => 100755 gizmosql/queries.sql mode change 100644 => 100755 gizmosql/template.json create mode 100755 gizmosql/util.sh diff --git a/gizmosql/benchmark.sh b/gizmosql/benchmark.sh index cbd1b6906..1cbf2bbee 100755 --- a/gizmosql/benchmark.sh +++ b/gizmosql/benchmark.sh @@ -1,53 +1,54 @@ #!/bin/bash -# Server setup Install -sudo apt install docker.io -y -sudo usermod -aG docker ${USER} -sudo systemctl start docker +# Install requirements +sudo apt-get update -y +sudo apt install openjdk-17-jre-headless unzip netcat-openbsd -y -# You must log out, then back into the VM at this point for Docker to work... +# Detect architecture (maps x86_64->amd64, aarch64->arm64) +ARCH=$(uname -m) +if [ "$ARCH" = "x86_64" ]; then + ARCH="amd64" +elif [ "$ARCH" = "aarch64" ]; then + ARCH="arm64" +fi -# Run the GizmoSQL server in the background with Docker -docker run --name gizmosql \ - --detach \ - --rm \ - --tty \ - --init \ - --publish 31337:31337 \ - --pull always \ - --mount type=bind,source=/nfs_data,target=/opt/gizmosql/data \ - --env GIZMOSQL_USERNAME=clickbench \ - --env GIZMOSQL_PASSWORD=clickbench \ - --env DATABASE_FILENAME=/opt/gizmosql/data/clickbench.db \ - gizmodata/gizmosql:latest +# Server setup Install +curl -L -o gizmosql.zip "https://github.com/gizmodata/gizmosql/releases/latest/download/gizmosql_cli_linux_${ARCH}.zip" +unzip gizmosql.zip +sudo mv gizmosql_server gizmosql_client /usr/local/bin/ # Install Java and the GizmoSQLLine CLI client -sudo apt install openjdk-17-jre-headless -y pushd /tmp curl -L -o gizmosqlline https://github.com/gizmodata/gizmosqlline/releases/latest/download/gizmosqlline chmod +x gizmosqlline sudo mv gizmosqlline /usr/local/bin/ popd +# Source our env vars and utility functions for starting/stopping gizmosql server +. util.sh + +# Start the GizmoSQL server in the background +start_gizmosql + # Create the table gizmosqlline \ - -u 'jdbc:arrow-flight-sql://localhost:31337?useEncryption=true&disableCertificateVerification=true' \ - -n clickbench \ - -p clickbench \ + -u ${GIZMOSQL_SERVER_URI} \ + -n ${GIZMOSQL_USERNAME} \ + -p ${GIZMOSQL_PASSWORD} \ -f create.sql # Load the data -pushd /nfs_data wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.parquet' -popd echo -n "Load time: " time gizmosqlline \ - -u 'jdbc:arrow-flight-sql://localhost:31337?useEncryption=true&disableCertificateVerification=true' \ - -n clickbench \ - -p clickbench \ + -u ${GIZMOSQL_SERVER_URI} \ + -n ${GIZMOSQL_USERNAME} \ + -p ${GIZMOSQL_PASSWORD} \ -f load.sql +stop_gizmosql + # Run the queries script --quiet --command="./run.sh" log.txt @@ -55,7 +56,7 @@ script --quiet --command="./run.sh" log.txt sed -i 's/\r$//' log.txt echo -n "Data size: " -wc -c /nfs_data/clickbench.db +wc -c clickbench.db cat log.txt | \ grep -E 'rows? selected \([0-9.]+ seconds\)|Killed|Segmentation' | \ diff --git a/gizmosql/create.sql b/gizmosql/create.sql old mode 100644 new mode 100755 index 4d23eaac6..77e583c88 --- a/gizmosql/create.sql +++ b/gizmosql/create.sql @@ -1,4 +1,4 @@ -CREATE TABLE hits +CREATE OR REPLACE TABLE hits ( WatchID BIGINT NOT NULL, JavaEnable SMALLINT NOT NULL, diff --git a/gizmosql/load.sql b/gizmosql/load.sql old mode 100644 new mode 100755 index 71d334df4..e384ab356 --- a/gizmosql/load.sql +++ b/gizmosql/load.sql @@ -4,4 +4,4 @@ SELECT * REPLACE ( epoch_ms(EventTime * 1000) AS EventTime, epoch_ms(ClientEventTime * 1000) AS ClientEventTime, epoch_ms(LocalEventTime * 1000) AS LocalEventTime) -FROM read_parquet('/opt/gizmosql/data/hits.parquet', binary_as_string=True); +FROM read_parquet('hits.parquet', binary_as_string=True); diff --git a/gizmosql/queries.sql b/gizmosql/queries.sql old mode 100644 new mode 100755 diff --git a/gizmosql/results/c6a.4xlarge.json b/gizmosql/results/c6a.4xlarge.json index 0e540d0c4..4005b23b9 100644 --- a/gizmosql/results/c6a.4xlarge.json +++ b/gizmosql/results/c6a.4xlarge.json @@ -8,50 +8,50 @@ "tuned": "no", "tags": ["C++","column-oriented","arrow-flight-sql","duckdb-backend","lukewarm-cold-run","tls-enabled"], "load_time": 131.966, - "data_size": 26882879488, + "data_size": 26924298240, "result": [ - [0.033, 0.011, 0.011], - [0.027, 0.012, 0.011], - [0.052, 0.028, 0.049], - [0.041, 0.037, 0.039], - [0.322, 0.293, 0.292], - [0.465, 0.444, 0.442], - [0.015, 0.011, 0.011], - [0.012, 0.012, 0.013], - [0.398, 0.394, 0.397], - [0.56, 0.557, 0.553], - [0.138, 0.132, 0.128], - [0.139, 0.139, 0.141], - [0.427, 0.41, 0.404], - [0.727, 0.713, 0.712], - [0.447, 0.441, 0.44], - [0.36, 0.356, 0.358], - [0.904, 0.872, 0.88], - [0.618, 0.627, 0.623], - [1.627, 1.579, 1.585], - [0.012, 0.01, 0.017], - [0.546, 0.536, 0.502], - [0.473, 0.48, 0.477], - [0.612, 0.524, 0.548], - [0.117, 0.113, 0.111], - [0.038, 0.035, 0.035], - [0.149, 0.143, 0.145], - [0.05, 0.031, 0.031], - [0.37, 0.368, 0.362], - [7.325, 7.068, 7.046], - [0.033, 0.053, 0.035], - [0.375, 0.371, 0.374], - [0.472, 0.459, 0.432], - [2.071, 4.712, 2.025], - [1.757, 1.723, 1.745], - [1.884, 1.856, 1.852], - [0.49, 0.487, 0.513], - [0.065, 0.04, 0.039], - [0.05, 0.013, 0.014], - [0.051, 0.016, 0.041], - [0.107, 0.111, 0.091], - [0.026, 0.012, 0.053], - [0.015, 0.011, 0.023], - [0.019, 0.028, 0.022] + [0.053, 0.008, 0.007], + [0.129, 0.012, 0.01], + [0.175, 0.029, 0.027], + [0.321, 0.039, 0.039], + [0.363, 0.261, 0.26], + [0.838, 0.423, 0.416], + [0.096, 0.015, 0.014], + [0.091, 0.016, 0.015], + [0.529, 0.358, 0.361], + [0.679, 0.521, 0.524], + [0.299, 0.14, 0.137], + [0.295, 0.147, 0.144], + [0.554, 0.404, 0.403], + [0.915, 0.696, 0.702], + [0.62, 0.43, 0.426], + [0.451, 0.324, 0.33], + [1.023, 0.832, 0.84], + [0.783, 0.562, 0.576], + [1.993, 1.517, 1.539], + [0.074, 0.014, 0.012], + [17.562, 0.483, 0.479], + [0.953, 0.493, 0.482], + [11.484, 0.531, 0.541], + [0.54, 0.125, 0.117], + [0.108, 0.04, 0.041], + [0.227, 0.149, 0.149], + [0.112, 0.037, 0.036], + [0.862, 0.374, 0.375], + [12.601, 7.034, 7.025], + [0.14, 0.053, 0.046], + [0.629, 0.345, 0.346], + [2.031, 0.424, 0.43], + [2.052, 1.713, 1.772], + [2.309, 1.704, 1.716], + [2.438, 1.841, 1.816], + [0.526, 0.416, 0.426], + [0.097, 0.047, 0.044], + [0.079, 0.021, 0.018], + [0.08, 0.023, 0.021], + [0.152, 0.074, 0.066], + [0.079, 0.02, 0.015], + [0.077, 0.02, 0.016], + [0.08, 0.022, 0.021] ] } diff --git a/gizmosql/run.sh b/gizmosql/run.sh index fd638faf5..ab99c0576 100755 --- a/gizmosql/run.sh +++ b/gizmosql/run.sh @@ -1,15 +1,29 @@ #!/bin/bash +# Source our env vars +. util.sh + TRIES=3 TEMP_SQL_FILE="/tmp/benchmark_queries_$$.sql" +# Ensure server is stopped on script exit +trap stop_gizmosql EXIT + +echo "Clear Linux memory caches to ensure fair benchmark comparisons" +sync +echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null + # Read queries from file mapfile -t queries < queries.sql -# Create the combined SQL script with each query repeated TRIES times -> "${TEMP_SQL_FILE}" +echo "Running benchmark with ${#queries[@]} queries, ${TRIES} tries each..." for query in "${queries[@]}"; do + > "${TEMP_SQL_FILE}" + + # Start the GizmoSQL server + start_gizmosql + # Add a comment to identify the query in the output echo "-- Query: ${query}" >> "${TEMP_SQL_FILE}" @@ -17,16 +31,17 @@ for query in "${queries[@]}"; do for i in $(seq 1 ${TRIES}); do echo "${query}" >> "${TEMP_SQL_FILE}" done -done -# Execute all queries in one session (so authentication overhead is minimized) -echo "Running benchmark with $(wc -l < queries.sql) queries, ${TRIES} tries each..." + # Execute the query script + gizmosqlline \ + -u ${GIZMOSQL_SERVER_URI} \ + -n ${GIZMOSQL_USERNAME} \ + -p ${GIZMOSQL_PASSWORD} \ + -f "${TEMP_SQL_FILE}" -gizmosqlline \ - -u 'jdbc:arrow-flight-sql://localhost:31337?useEncryption=true&disableCertificateVerification=true' \ - -n clickbench \ - -p clickbench \ - -f "${TEMP_SQL_FILE}" + # Stop the server before next query + stop_gizmosql +done # Clean up rm -f "${TEMP_SQL_FILE}" diff --git a/gizmosql/template.json b/gizmosql/template.json old mode 100644 new mode 100755 diff --git a/gizmosql/util.sh b/gizmosql/util.sh new file mode 100755 index 000000000..076c5bac4 --- /dev/null +++ b/gizmosql/util.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Variables +GIZMOSQL_SERVER_URI="jdbc:arrow-flight-sql://localhost:31337?useEncryption=false" +GIZMOSQL_USERNAME=clickbench +GIZMOSQL_PASSWORD=clickbench +PID_FILE="/tmp/gizmosql_server_$$.pid" + +# Function to start the GizmoSQL server +start_gizmosql() { + export GIZMOSQL_PASSWORD="${GIZMOSQL_PASSWORD}" + + nohup gizmosql_server \ + --username ${GIZMOSQL_USERNAME} \ + --database-filename clickbench.db \ + --print-queries >> gizmosql_server.log 2>&1 & + + echo $! > "${PID_FILE}" + + # Wait for server to be ready + echo "Waiting for gizmosql_server to start..." + while ! nc -z localhost 31337 2>/dev/null; do + sleep 1 + done + echo "gizmosql_server is ready (PID: $(cat ${PID_FILE}))" +} + +# Function to stop the GizmoSQL server +stop_gizmosql() { + if [ -f "${PID_FILE}" ]; then + local pid=$(cat "${PID_FILE}") + if kill -0 "$pid" 2>/dev/null; then + echo "Stopping gizmosql_server (PID: $pid)..." + kill "$pid" + wait "$pid" 2>/dev/null + fi + rm -f "${PID_FILE}" + fi +} From eeeb4faa7c6c4e925ca3d1b096a0b98aea195a7e Mon Sep 17 00:00:00 2001 From: Philip Moore Date: Sat, 24 Jan 2026 17:29:28 -0500 Subject: [PATCH 3/5] Fixed date stamp on results --- gizmosql/results/c6a.4xlarge.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gizmosql/results/c6a.4xlarge.json b/gizmosql/results/c6a.4xlarge.json index 4005b23b9..82ed5ad9d 100644 --- a/gizmosql/results/c6a.4xlarge.json +++ b/gizmosql/results/c6a.4xlarge.json @@ -1,6 +1,6 @@ { "system": "GizmoSQL", - "date": "2025-12-31", + "date": "2026-01-24", "machine": "c6a.4xlarge", "cluster_size": 1, "proprietary": "no", From ca98a84136c9013710b5bb4d810e1b4923ebb5a6 Mon Sep 17 00:00:00 2001 From: Philip Moore Date: Sat, 24 Jan 2026 17:31:35 -0500 Subject: [PATCH 4/5] Fixing tags in json to reflect that we do a cold run... --- gizmosql/results/c6a.4xlarge.json | 2 +- gizmosql/template.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gizmosql/results/c6a.4xlarge.json b/gizmosql/results/c6a.4xlarge.json index 82ed5ad9d..d87b57b1a 100644 --- a/gizmosql/results/c6a.4xlarge.json +++ b/gizmosql/results/c6a.4xlarge.json @@ -6,7 +6,7 @@ "proprietary": "no", "hardware": "cpu", "tuned": "no", - "tags": ["C++","column-oriented","arrow-flight-sql","duckdb-backend","lukewarm-cold-run","tls-enabled"], + "tags": ["C++","column-oriented","arrow-flight-sql","duckdb-backend","cold-run","tls-enabled"], "load_time": 131.966, "data_size": 26924298240, "result": [ diff --git a/gizmosql/template.json b/gizmosql/template.json index 78f4e0a0e..8d289eac0 100755 --- a/gizmosql/template.json +++ b/gizmosql/template.json @@ -8,6 +8,6 @@ "column-oriented", "arrow-flight-sql", "duckdb", - "lukewarm-cold-run" + "cold-run" ] } From 9a04d7ef645110812eeb0e41590df9e685f5f597 Mon Sep 17 00:00:00 2001 From: Philip Moore Date: Sat, 24 Jan 2026 17:34:15 -0500 Subject: [PATCH 5/5] More tag fixes (sorry) --- gizmosql/results/c6a.4xlarge.json | 2 +- gizmosql/template.json | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gizmosql/results/c6a.4xlarge.json b/gizmosql/results/c6a.4xlarge.json index d87b57b1a..f1c26f491 100644 --- a/gizmosql/results/c6a.4xlarge.json +++ b/gizmosql/results/c6a.4xlarge.json @@ -6,7 +6,7 @@ "proprietary": "no", "hardware": "cpu", "tuned": "no", - "tags": ["C++","column-oriented","arrow-flight-sql","duckdb-backend","cold-run","tls-enabled"], + "tags": ["C++","column-oriented","arrow-flight-sql","duckdb-backend","cold-run","tls-disabled"], "load_time": 131.966, "data_size": 26924298240, "result": [ diff --git a/gizmosql/template.json b/gizmosql/template.json index 8d289eac0..396d16ded 100755 --- a/gizmosql/template.json +++ b/gizmosql/template.json @@ -7,7 +7,8 @@ "C++", "column-oriented", "arrow-flight-sql", - "duckdb", - "cold-run" + "duckdb-backend", + "cold-run", + "tls-disabled" ] }