From 521569ffae0544098c612c44ad1ccbed33d9ee1e Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 10 Mar 2026 18:07:54 +0100 Subject: [PATCH 1/2] wait for nodes and crds, change to FQDN names as per integration test --- .../getting_started/getting_started.sh | 62 +++++++++++-------- .../getting_started/getting_started.sh.j2 | 62 +++++++++++-------- 2 files changed, 70 insertions(+), 54 deletions(-) diff --git a/docs/modules/druid/examples/getting_started/getting_started.sh b/docs/modules/druid/examples/getting_started/getting_started.sh index d4ced141..4a626ea3 100755 --- a/docs/modules/druid/examples/getting_started/getting_started.sh +++ b/docs/modules/druid/examples/getting_started/getting_started.sh @@ -20,6 +20,9 @@ then exit 1 fi +echo "Waiting for node(s) to be ready..." +kubectl wait node --all --for=condition=Ready --timeout=120s + cd "$(dirname "$0")" case "$1" in @@ -52,6 +55,10 @@ exit 1 ;; esac +# As of SDP 26.3 CRDs are managed by the operator not helm, so there should be an initial delay +# to allow the CRDs to be detected +sleep 10 + echo "Installing ZooKeeper from zookeeper.yaml" # tag::install-zookeeper[] kubectl apply -f zookeeper.yaml @@ -89,9 +96,9 @@ done echo "Awaiting HDFS rollout finish" # tag::watch-hdfs-rollout[] -kubectl rollout status --watch statefulset/simple-hdfs-datanode-default --timeout=300s -kubectl rollout status --watch statefulset/simple-hdfs-journalnode-default --timeout=300s -kubectl rollout status --watch statefulset/simple-hdfs-namenode-default --timeout=300s +kubectl rollout status --watch statefulset/simple-hdfs-datanode-default --timeout=600s +kubectl rollout status --watch statefulset/simple-hdfs-journalnode-default --timeout=600s +kubectl rollout status --watch statefulset/simple-hdfs-namenode-default --timeout=600s # end::watch-hdfs-rollout[] echo "Installing PostgreSQL for Druid" @@ -125,39 +132,36 @@ done echo "Awaiting Druid rollout finish" # tag::watch-druid-rollout[] -kubectl rollout status --watch statefulset/simple-druid-broker-default --timeout=300s -kubectl rollout status --watch statefulset/simple-druid-coordinator-default --timeout=300s -kubectl rollout status --watch statefulset/simple-druid-historical-default --timeout=300s -kubectl rollout status --watch statefulset/simple-druid-middlemanager-default --timeout=300s -kubectl rollout status --watch statefulset/simple-druid-router-default --timeout=300s +kubectl rollout status --watch statefulset/simple-druid-broker-default --timeout=600s +kubectl rollout status --watch statefulset/simple-druid-coordinator-default --timeout=600s +kubectl rollout status --watch statefulset/simple-druid-historical-default --timeout=600s +kubectl rollout status --watch statefulset/simple-druid-middlemanager-default --timeout=600s +kubectl rollout status --watch statefulset/simple-druid-router-default --timeout=600s # end::watch-druid-rollout[] -echo "Starting port-forwarding of port 9088" -# shellcheck disable=2069 # we want all output to be blackholed -# tag::port-forwarding[] -kubectl port-forward svc/simple-druid-router 9088 > /dev/null 2>&1 & -# end::port-forwarding[] -PORT_FORWARD_PID=$! -# shellcheck disable=2064 # we want the PID evaluated now, not at the time the trap is -trap "kill $PORT_FORWARD_PID" EXIT -sleep 5 +COORDINATOR="simple-druid-coordinator-default-headless.default.svc.cluster.local" +BROKER="simple-druid-broker-default-headless.default.svc.cluster.local" submit_job() { -# tag::submit-job[] -curl -s -k -X 'POST' -H 'Content-Type:application/json' -d @ingestion_spec.json https://localhost:9088/druid/indexer/v1/task -# end::submit-job[] + # tag::submit-job[] + kubectl exec simple-druid-coordinator-default-0 -i -- \ + curl -s -k -X POST -H 'Content-Type:application/json' --data-binary @- \ + "https://${COORDINATOR}:8281/druid/indexer/v1/task" < ingestion_spec.json + # end::submit-job[] } echo "Submitting job" task_id=$(submit_job | sed -e 's/.*":"\([^"]\+\).*/\1/g') request_job_status() { - curl -s -k "https://localhost:9088/druid/indexer/v1/task/${task_id}/status" | sed -e 's/.*statusCode":"\([^"]\+\).*/\1/g' + kubectl exec simple-druid-coordinator-default-0 -- \ + curl -s -k "https://${COORDINATOR}:8281/druid/indexer/v1/task/${task_id}/status" \ + | sed -e 's/.*statusCode":"\([^"]\+\).*/\1/g' } while [ "$(request_job_status)" == "RUNNING" ]; do echo "Task still running..." - sleep 5 + sleep 10 done task_status=$(request_job_status) @@ -170,18 +174,22 @@ else fi segment_load_status() { - curl -s -k https://localhost:9088/druid/coordinator/v1/loadstatus | sed -e 's/.*wikipedia":\([0-9\.]\+\).*/\1/g' + kubectl exec simple-druid-coordinator-default-0 -- \ + curl -s -k "https://${COORDINATOR}:8281/druid/coordinator/v1/loadstatus" \ + | sed -e 's/.*wikipedia":\([0-9\.]\+\).*/\1/g' } while [ "$(segment_load_status)" != "100.0" ]; do echo "Segments still loading..." - sleep 5 + sleep 10 done query_data() { -# tag::query-data[] -curl -s -k -X 'POST' -H 'Content-Type:application/json' -d @query.json https://localhost:9088/druid/v2/sql -# end::query-data[] + # tag::query-data[] + kubectl exec simple-druid-broker-default-0 -i -- \ + curl -s -k -X POST -H 'Content-Type:application/json' --data-binary @- \ + "https://${BROKER}:8282/druid/v2/sql" < query.json + # end::query-data[] } echo "Querying data..." diff --git a/docs/modules/druid/examples/getting_started/getting_started.sh.j2 b/docs/modules/druid/examples/getting_started/getting_started.sh.j2 index 8ca5a992..f94dce9a 100755 --- a/docs/modules/druid/examples/getting_started/getting_started.sh.j2 +++ b/docs/modules/druid/examples/getting_started/getting_started.sh.j2 @@ -20,6 +20,9 @@ then exit 1 fi +echo "Waiting for node(s) to be ready..." +kubectl wait node --all --for=condition=Ready --timeout=120s + cd "$(dirname "$0")" case "$1" in @@ -52,6 +55,10 @@ exit 1 ;; esac +# As of SDP 26.3 CRDs are managed by the operator not helm, so there should be an initial delay +# to allow the CRDs to be detected +sleep 10 + echo "Installing ZooKeeper from zookeeper.yaml" # tag::install-zookeeper[] kubectl apply -f zookeeper.yaml @@ -89,9 +96,9 @@ done echo "Awaiting HDFS rollout finish" # tag::watch-hdfs-rollout[] -kubectl rollout status --watch statefulset/simple-hdfs-datanode-default --timeout=300s -kubectl rollout status --watch statefulset/simple-hdfs-journalnode-default --timeout=300s -kubectl rollout status --watch statefulset/simple-hdfs-namenode-default --timeout=300s +kubectl rollout status --watch statefulset/simple-hdfs-datanode-default --timeout=600s +kubectl rollout status --watch statefulset/simple-hdfs-journalnode-default --timeout=600s +kubectl rollout status --watch statefulset/simple-hdfs-namenode-default --timeout=600s # end::watch-hdfs-rollout[] echo "Installing PostgreSQL for Druid" @@ -125,39 +132,36 @@ done echo "Awaiting Druid rollout finish" # tag::watch-druid-rollout[] -kubectl rollout status --watch statefulset/simple-druid-broker-default --timeout=300s -kubectl rollout status --watch statefulset/simple-druid-coordinator-default --timeout=300s -kubectl rollout status --watch statefulset/simple-druid-historical-default --timeout=300s -kubectl rollout status --watch statefulset/simple-druid-middlemanager-default --timeout=300s -kubectl rollout status --watch statefulset/simple-druid-router-default --timeout=300s +kubectl rollout status --watch statefulset/simple-druid-broker-default --timeout=600s +kubectl rollout status --watch statefulset/simple-druid-coordinator-default --timeout=600s +kubectl rollout status --watch statefulset/simple-druid-historical-default --timeout=600s +kubectl rollout status --watch statefulset/simple-druid-middlemanager-default --timeout=600s +kubectl rollout status --watch statefulset/simple-druid-router-default --timeout=600s # end::watch-druid-rollout[] -echo "Starting port-forwarding of port 9088" -# shellcheck disable=2069 # we want all output to be blackholed -# tag::port-forwarding[] -kubectl port-forward svc/simple-druid-router 9088 > /dev/null 2>&1 & -# end::port-forwarding[] -PORT_FORWARD_PID=$! -# shellcheck disable=2064 # we want the PID evaluated now, not at the time the trap is -trap "kill $PORT_FORWARD_PID" EXIT -sleep 5 +COORDINATOR="simple-druid-coordinator-default-headless.default.svc.cluster.local" +BROKER="simple-druid-broker-default-headless.default.svc.cluster.local" submit_job() { -# tag::submit-job[] -curl -s -k -X 'POST' -H 'Content-Type:application/json' -d @ingestion_spec.json https://localhost:9088/druid/indexer/v1/task -# end::submit-job[] + # tag::submit-job[] + kubectl exec simple-druid-coordinator-default-0 -i -- \ + curl -s -k -X POST -H 'Content-Type:application/json' --data-binary @- \ + "https://${COORDINATOR}:8281/druid/indexer/v1/task" < ingestion_spec.json + # end::submit-job[] } echo "Submitting job" task_id=$(submit_job | sed -e 's/.*":"\([^"]\+\).*/\1/g') request_job_status() { - curl -s -k "https://localhost:9088/druid/indexer/v1/task/${task_id}/status" | sed -e 's/.*statusCode":"\([^"]\+\).*/\1/g' + kubectl exec simple-druid-coordinator-default-0 -- \ + curl -s -k "https://${COORDINATOR}:8281/druid/indexer/v1/task/${task_id}/status" \ + | sed -e 's/.*statusCode":"\([^"]\+\).*/\1/g' } while [ "$(request_job_status)" == "RUNNING" ]; do echo "Task still running..." - sleep 5 + sleep 10 done task_status=$(request_job_status) @@ -170,18 +174,22 @@ else fi segment_load_status() { - curl -s -k https://localhost:9088/druid/coordinator/v1/loadstatus | sed -e 's/.*wikipedia":\([0-9\.]\+\).*/\1/g' + kubectl exec simple-druid-coordinator-default-0 -- \ + curl -s -k "https://${COORDINATOR}:8281/druid/coordinator/v1/loadstatus" \ + | sed -e 's/.*wikipedia":\([0-9\.]\+\).*/\1/g' } while [ "$(segment_load_status)" != "100.0" ]; do echo "Segments still loading..." - sleep 5 + sleep 10 done query_data() { -# tag::query-data[] -curl -s -k -X 'POST' -H 'Content-Type:application/json' -d @query.json https://localhost:9088/druid/v2/sql -# end::query-data[] + # tag::query-data[] + kubectl exec simple-druid-broker-default-0 -i -- \ + curl -s -k -X POST -H 'Content-Type:application/json' --data-binary @- \ + "https://${BROKER}:8282/druid/v2/sql" < query.json + # end::query-data[] } echo "Querying data..." From 589bbdb25a26efe3b9e4c9a9aff568902660b5be Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 11 Mar 2026 14:51:23 +0100 Subject: [PATCH 2/2] corrected docs, improved wait condition --- .../examples/getting_started/getting_started.sh | 5 ++--- .../examples/getting_started/getting_started.sh.j2 | 5 ++--- .../druid/pages/getting_started/first_steps.adoc | 12 +++++------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/docs/modules/druid/examples/getting_started/getting_started.sh b/docs/modules/druid/examples/getting_started/getting_started.sh index 4a626ea3..7c082b18 100755 --- a/docs/modules/druid/examples/getting_started/getting_started.sh +++ b/docs/modules/druid/examples/getting_started/getting_started.sh @@ -55,9 +55,8 @@ exit 1 ;; esac -# As of SDP 26.3 CRDs are managed by the operator not helm, so there should be an initial delay -# to allow the CRDs to be detected -sleep 10 +# TODO: Remove once https://github.com/stackabletech/issues/issues/828 has been implemented (see that issue for details). +until kubectl get crd druidclusters.druid.stackable.tech >/dev/null 2>&1; do echo "Waiting for CRDs to be installed" && sleep 1; done echo "Installing ZooKeeper from zookeeper.yaml" # tag::install-zookeeper[] diff --git a/docs/modules/druid/examples/getting_started/getting_started.sh.j2 b/docs/modules/druid/examples/getting_started/getting_started.sh.j2 index f94dce9a..3f17b4eb 100755 --- a/docs/modules/druid/examples/getting_started/getting_started.sh.j2 +++ b/docs/modules/druid/examples/getting_started/getting_started.sh.j2 @@ -55,9 +55,8 @@ exit 1 ;; esac -# As of SDP 26.3 CRDs are managed by the operator not helm, so there should be an initial delay -# to allow the CRDs to be detected -sleep 10 +# TODO: Remove once https://github.com/stackabletech/issues/issues/828 has been implemented (see that issue for details). +until kubectl get crd druidclusters.druid.stackable.tech >/dev/null 2>&1; do echo "Waiting for CRDs to be installed" && sleep 1; done echo "Installing ZooKeeper from zookeeper.yaml" # tag::install-zookeeper[] diff --git a/docs/modules/druid/pages/getting_started/first_steps.adoc b/docs/modules/druid/pages/getting_started/first_steps.adoc index c1e3065b..7a7db01a 100644 --- a/docs/modules/druid/pages/getting_started/first_steps.adoc +++ b/docs/modules/druid/pages/getting_started/first_steps.adoc @@ -106,20 +106,18 @@ simple-hdfs-namenode-default 2/2 6m simple-zk-server-default 3/3 7m ---- -Ideally you use `stackablectl stacklet list` to find out the address the Druid router is reachable at and use that address. +=== Ingest example data -As an alternative, you can create a port-forward for the Druid Router: +Next, ingest some example data using the web interface. +You can either use `stackablectl stacklet list` to find out the address the Druid router is reachable at and use that address, or you can create a port-forward for the Druid Router: +[source,bash] ---- -include::example$getting_started/getting_started.sh[tag=port-forwarding] +kubectl port-forward svc/simple-druid-router 9088 > /dev/null 2>&1 & ---- -=== Ingest example data - -Next, ingest some example data using the web interface. If you prefer to use the command line instead, follow the instructions in the collapsed section below. - [#ingest-cmd-line] .Alternative: Using the command line [%collapsible]