Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ log.txt
load_out.txt
server.log
server.pid
server.endpoint
server.endpoint.tmp
arc_token.txt
data-size.txt
.doris_home
Expand Down
11 changes: 5 additions & 6 deletions hyper-parquet/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-parquet-partitioned"
export BENCH_DURABLE=yes
export BENCH_RESTARTABLE=no
# Single-process engine: each query forks a fresh full-machine process with no
# shared scheduler across connections, so the concurrent-QPS test only
# oversubscribes RAM rather than measuring throughput. Skip it by default;
# override BENCH_CONCURRENT_DURATION to re-enable. See issue #946.
export BENCH_CONCURRENT_DURATION="${BENCH_CONCURRENT_DURATION:-0}"
# RESTARTABLE=yes: ./start now launches a persistent hyperd whose lifecycle
# matters, so the driver's cold cycle (stop -> wait_stopped -> drop_caches ->
# start) gives an honest cold try 1 while tries 2..N stay hot on the warm
# server. See issue #936.
export BENCH_RESTARTABLE=yes
exec ../lib/benchmark-common.sh
20 changes: 16 additions & 4 deletions hyper-parquet/check
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
#!/bin/bash
# Readiness probe: connect to the persistent Hyper server (via the descriptor
# ./start published to server.endpoint) and run SELECT 1. Non-zero exit means
# "not up yet" — the benchmark driver polls this in a loop after ./start and
# uses its transition to failing as the "server is really stopped" signal in
# the cold cycle.
set -e

# shellcheck disable=SC1091
source myenv/bin/activate

# No endpoint published => server isn't up.
[ -s server.endpoint ] || exit 1

python3 - <<'PY'
from tableauhyperapi import HyperProcess, Telemetry, Connection
with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(hyper.endpoint) as connection:
connection.execute_list_query("SELECT 1")
from tableauhyperapi import Connection, Endpoint

with open("server.endpoint") as f:
descriptor = f.read().strip()

endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench")
with Connection(endpoint) as connection:
connection.execute_list_query("SELECT 1")
PY
30 changes: 20 additions & 10 deletions hyper-parquet/query
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
#!/bin/bash
# Reads a SQL query from stdin, runs it via tableau hyperapi against the
# partitioned parquet files (registered as a temp external table from
# create.sql).
# Reads a SQL query from stdin, runs it once against the partitioned parquet
# files on the PERSISTENT Hyper server started by ./start (descriptor in
# server.endpoint). The temp external table is (re)created from create.sql
# before the timer starts, so its setup is not counted.
# Stdout: query result.
# Stderr: query runtime in fractional seconds on the last line.
#
# The benchmark driver calls this once per try (BENCH_TRIES). Because every
# call connects to the SAME long-lived server (and the parquet files stay in
# the OS page cache between tries), try 1 (right after the driver's
# stop/drop_caches/start cold cycle) is cold and tries 2..N are genuinely hot.
# See issue #936.
set -e

# shellcheck disable=SC1091
Expand All @@ -18,17 +25,20 @@ cat > "$query_file"
python3 - "$query_file" <<'PY'
import sys
import timeit
from tableauhyperapi import HyperProcess, Telemetry, Connection
from tableauhyperapi import Connection, Endpoint

with open(sys.argv[1]) as f:
query = f.read()

with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(hyper.endpoint) as connection:
connection.execute_command(open("create.sql").read())
start = timeit.default_timer()
rows = connection.execute_list_query(query)
end = timeit.default_timer()
with open("server.endpoint") as f:
descriptor = f.read().strip()

endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench")
with Connection(endpoint) as connection:
connection.execute_command(open("create.sql").read())
start = timeit.default_timer()
rows = connection.execute_list_query(query)
end = timeit.default_timer()

for r in rows:
print(r)
Expand Down
75 changes: 74 additions & 1 deletion hyper-parquet/start
Original file line number Diff line number Diff line change
@@ -1,2 +1,75 @@
#!/bin/bash
exit 0
# Launch ONE long-lived Hyper server (hyperd) and publish its connection
# descriptor to server.endpoint. Every ./query invocation then connects to
# this single persistent process instead of spawning its own.
#
# This is the fix for issue #936: the per-query benchmark driver calls
# ./query once per try (BENCH_TRIES), keeping a daemon-backed server warm so
# tries 2..N are genuinely hot. The previous Hyper ./query opened a brand-new
# HyperProcess on every call, so each "hot" try paid the full cold cost (fresh
# server, re-parsed parquet metadata, cold OS page cache). With a persistent
# server those caches stay warm across tries, and the driver's cold cycle
# (stop -> drop_caches -> start) still gives an honest cold try 1.
#
# Unlike hyper/, there is no hits.hyper to keep attached: the data is external
# parquet read through the OS page cache (which the driver preserves between
# tries), and the temp external table is connection-scoped so each ./query
# recreates it untimed.
set -e

# shellcheck disable=SC1091
source myenv/bin/activate

# Already running? The pidfile + a live process is authoritative.
if [ -f server.pid ] && kill -0 "$(cat server.pid 2>/dev/null)" 2>/dev/null; then
exit 0
fi

# Clean up stale artifacts from a previous (possibly crashed) server.
rm -f server.pid server.endpoint

# Background a supervisor that opens HyperProcess, writes the descriptor, then
# blocks until ./stop signals it. nohup so it survives this script exiting;
# $! is the supervisor PID we kill in ./stop.
nohup python3 - >server.log 2>&1 <<'PY' &
import os
import signal
import sys
from tableauhyperapi import HyperProcess, Telemetry


def _terminate(*_):
# Raise SystemExit so the `with HyperProcess` block exits cleanly and
# hyperd is shut down with us (it is terminated when its controlling
# process exits).
sys.exit(0)


signal.signal(signal.SIGTERM, _terminate)
signal.signal(signal.SIGINT, _terminate)

with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
# Publish the descriptor atomically so ./check and ./query never read a
# half-written file.
with open("server.endpoint.tmp", "w") as f:
f.write(hyper.endpoint.connection_descriptor)
os.replace("server.endpoint.tmp", "server.endpoint")
# Block until a signal arrives; loop so a stray signal can't tear the
# server down (only the handler's sys.exit does).
while True:
signal.pause()
PY
echo $! > server.pid

# Give the supervisor a moment to publish the endpoint. The benchmark driver
# also runs ./check in a loop afterwards, so this is just a fast-path / clean
# error rather than the authoritative readiness gate.
for _ in $(seq 1 60); do
if [ -s server.endpoint ]; then
exit 0
fi
sleep 1
done

echo "hyper-parquet: server did not publish server.endpoint within 60s" >&2
exit 1
22 changes: 22 additions & 0 deletions hyper-parquet/stop
Original file line number Diff line number Diff line change
@@ -1,2 +1,24 @@
#!/bin/bash
# Stop the persistent Hyper server started by ./start. SIGTERM the supervisor
# (see ./start); its handler exits the `with HyperProcess` block, which shuts
# down hyperd. Idempotent: a missing/stale pidfile is not an error.
set -e

if [ -f server.pid ]; then
pid="$(cat server.pid 2>/dev/null || true)"
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
kill "$pid" 2>/dev/null || true
# Wait for the supervisor (and thus hyperd) to actually exit so the
# benchmark driver's drop_caches isn't defeated by pages still pinned
# by a live mmap.
for _ in $(seq 1 60); do
kill -0 "$pid" 2>/dev/null || break
sleep 1
done
# Still alive after 60s? Force it.
kill -9 "$pid" 2>/dev/null || true
fi
fi

rm -f server.pid server.endpoint
exit 0
11 changes: 5 additions & 6 deletions hyper/benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
# Thin shim — actual flow is in lib/benchmark-common.sh.
export BENCH_DOWNLOAD_SCRIPT="download-hits-csv"
export BENCH_DURABLE=yes
export BENCH_RESTARTABLE=no
# Single-process engine: each query forks a fresh full-machine process with no
# shared scheduler across connections, so the concurrent-QPS test only
# oversubscribes RAM rather than measuring throughput. Skip it by default;
# override BENCH_CONCURRENT_DURATION to re-enable. See issue #946.
export BENCH_CONCURRENT_DURATION="${BENCH_CONCURRENT_DURATION:-0}"
# RESTARTABLE=yes: ./start now launches a persistent hyperd whose lifecycle
# matters, so the driver's cold cycle (stop -> wait_stopped -> drop_caches ->
# start) gives an honest cold try 1 while tries 2..N stay hot on the warm
# server. See issue #936.
export BENCH_RESTARTABLE=yes
exec ../lib/benchmark-common.sh
20 changes: 16 additions & 4 deletions hyper/check
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
#!/bin/bash
# Readiness probe: connect to the persistent Hyper server (via the descriptor
# ./start published to server.endpoint) and run SELECT 1. Non-zero exit means
# "not up yet" — the benchmark driver polls this in a loop after ./start and
# uses its transition to failing as the "server is really stopped" signal in
# the cold cycle.
set -e

# shellcheck disable=SC1091
source myenv/bin/activate

# No endpoint published => server isn't up.
[ -s server.endpoint ] || exit 1

python3 - <<'PY'
from tableauhyperapi import HyperProcess, Telemetry, Connection
with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(hyper.endpoint) as connection:
connection.execute_list_query("SELECT 1")
from tableauhyperapi import Connection, Endpoint

with open("server.endpoint") as f:
descriptor = f.read().strip()

endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench")
with Connection(endpoint) as connection:
connection.execute_list_query("SELECT 1")
PY
17 changes: 12 additions & 5 deletions hyper/load
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/bin/bash
# Create hits.hyper and COPY hits.csv into it, using the PERSISTENT Hyper
# server started by ./start (descriptor in server.endpoint). Loading through
# the already-running server avoids briefly running two hyperd instances
# (each of which would try to claim up to 80% of RAM) during the heavy COPY.
set -e

# shellcheck disable=SC1091
Expand All @@ -8,12 +12,15 @@ source myenv/bin/activate
rm -f hits.hyper

python3 - <<'PY'
from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode
from tableauhyperapi import Connection, Endpoint, CreateMode

with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(hyper.endpoint, 'hits.hyper', CreateMode.CREATE_AND_REPLACE) as connection:
connection.execute_command(open("create.sql").read())
connection.execute_command("copy hits from 'hits.csv' with (format csv)")
with open("server.endpoint") as f:
descriptor = f.read().strip()

endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench")
with Connection(endpoint, 'hits.hyper', CreateMode.CREATE_AND_REPLACE) as connection:
connection.execute_command(open("create.sql").read())
connection.execute_command("copy hits from 'hits.csv' with (format csv)")
PY

rm -f hits.csv
Expand Down
24 changes: 16 additions & 8 deletions hyper/query
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
#!/bin/bash
# Reads a SQL query from stdin, runs it via tableau hyperapi against
# hits.hyper.
# Reads a SQL query from stdin, runs it once against hits.hyper on the
# PERSISTENT Hyper server started by ./start (descriptor in server.endpoint).
# Stdout: query result.
# Stderr: query runtime in fractional seconds on the last line.
#
# The benchmark driver calls this once per try (BENCH_TRIES). Because every
# call connects to the SAME long-lived server, the buffer pool stays warm
# across tries: try 1 (right after the driver's stop/drop_caches/start cold
# cycle) is cold, tries 2..N are genuinely hot. See issue #936.
set -e

# shellcheck disable=SC1091
Expand All @@ -17,16 +22,19 @@ cat > "$query_file"
python3 - "$query_file" <<'PY'
import sys
import timeit
from tableauhyperapi import HyperProcess, Telemetry, Connection, CreateMode
from tableauhyperapi import Connection, Endpoint

with open(sys.argv[1]) as f:
query = f.read()

with HyperProcess(telemetry=Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU) as hyper:
with Connection(hyper.endpoint, 'hits.hyper', CreateMode.NONE) as connection:
start = timeit.default_timer()
rows = connection.execute_list_query(query)
end = timeit.default_timer()
with open("server.endpoint") as f:
descriptor = f.read().strip()

endpoint = Endpoint(connection_descriptor=descriptor, user_agent="clickbench")
with Connection(endpoint, 'hits.hyper') as connection:
start = timeit.default_timer()
rows = connection.execute_list_query(query)
end = timeit.default_timer()

for r in rows:
print(r)
Expand Down
Loading