From 9d1bbe72cb339f1cbe452a0130e63d1f840ab187 Mon Sep 17 00:00:00 2001 From: vivekr-splunk Date: Sat, 28 Feb 2026 22:51:13 -0800 Subject: [PATCH 1/6] feat: opt-in multi-container pod layout (init + sidecar) - Gate via SPLUNK_POD_ARCH=multi-container\n- Inject splunk-init and splunk-sidecar using RELATED_IMAGE_SPLUNK_INIT/SIDECAR\n- Rewire splunk probes to kubelet httpGet against sidecar pod health endpoints\n- Ensure only the splunk container receives splunk env/probes/resources --- pkg/splunk/enterprise/configuration.go | 189 +++++++++++++++++++ pkg/splunk/enterprise/multicontainer.go | 13 ++ pkg/splunk/enterprise/multicontainer_test.go | 107 +++++++++++ pkg/splunk/enterprise/names.go | 10 + 4 files changed, 319 insertions(+) create mode 100644 pkg/splunk/enterprise/multicontainer.go create mode 100644 pkg/splunk/enterprise/multicontainer_test.go diff --git a/pkg/splunk/enterprise/configuration.go b/pkg/splunk/enterprise/configuration.go index c9cc6838b..48fc43ac5 100644 --- a/pkg/splunk/enterprise/configuration.go +++ b/pkg/splunk/enterprise/configuration.go @@ -247,6 +247,10 @@ func getSplunkService(ctx context.Context, cr splcommon.MetaObject, spec *enterp // required for SHC bootstrap process; use services with heads when readiness is desired service.Spec.PublishNotReadyAddresses = true } + if isHeadless && isMultiContainerPodEnabled() { + // In multi-container mode the operator may need to reach the sidecar before the pod is Ready. + service.Spec.PublishNotReadyAddresses = true + } service.SetOwnerReferences(append(service.GetOwnerReferences(), splcommon.AsOwner(cr, true))) @@ -818,6 +822,9 @@ func updateSplunkPodTemplateWithConfig(ctx context.Context, client splcommon.Con // Add custom ports to splunk containers if spec.ServiceTemplate.Spec.Ports != nil { for idx := range podTemplateSpec.Spec.Containers { + if podTemplateSpec.Spec.Containers[idx].Name != "splunk" { + continue + } for _, p := range spec.ServiceTemplate.Spec.Ports { podTemplateSpec.Spec.Containers[idx].Ports = append(podTemplateSpec.Spec.Containers[idx].Ports, corev1.ContainerPort{ @@ -833,6 +840,9 @@ func updateSplunkPodTemplateWithConfig(ctx context.Context, client splcommon.Con if spec.Volumes != nil { podTemplateSpec.Spec.Volumes = append(podTemplateSpec.Spec.Volumes, spec.Volumes...) for idx := range podTemplateSpec.Spec.Containers { + if podTemplateSpec.Spec.Containers[idx].Name != "splunk" { + continue + } for v := range spec.Volumes { podTemplateSpec.Spec.Containers[idx].VolumeMounts = append(podTemplateSpec.Spec.Containers[idx].VolumeMounts, corev1.VolumeMount{ Name: spec.Volumes[v].Name, @@ -1087,9 +1097,188 @@ func updateSplunkPodTemplateWithConfig(ctx context.Context, client splcommon.Con env = removeDuplicateEnvVars(env) } + // Multi-container mode: inject init + sidecar, and rewire Splunk probes to HTTP so the main container can be distroless. + // This is strictly opt-in via SPLUNK_POD_ARCH to avoid changing legacy behavior and fixtures. + if isMultiContainerPodEnabled() { + // Ensure podTemplate annotations map is initialized (we may append later in other paths). + if podTemplateSpec.ObjectMeta.Annotations == nil { + podTemplateSpec.ObjectMeta.Annotations = make(map[string]string) + } + + // Sidecar health endpoints back Splunk probes via kubelet httpGet. + livenessProbe = &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz/pod-live", + Port: intstr.FromInt(8080), + }, + }, + InitialDelaySeconds: 60, + TimeoutSeconds: 5, + PeriodSeconds: 10, + FailureThreshold: 3, + } + startupProbe = &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz/pod-startup", + Port: intstr.FromInt(8080), + }, + }, + InitialDelaySeconds: 10, + TimeoutSeconds: 5, + PeriodSeconds: 10, + FailureThreshold: 30, + } + readinessProbe = &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz/pod-ready", + Port: intstr.FromInt(8080), + }, + }, + InitialDelaySeconds: 10, + TimeoutSeconds: 5, + PeriodSeconds: 10, + FailureThreshold: 3, + } + + // Choose sidecar role. For SHC members we want strict gating until SHC join completes. + sidecarRole := instanceType.ToString() + if instanceType == SplunkSearchHead { + if strings.EqualFold(cr.GetObjectKind().GroupVersionKind().Kind, "SearchHeadCluster") { + sidecarRole = "shc-member" + } + } + + // Copy selected mounts from the Splunk container so the sidecar and init container see the same filesystem. + var splunkVM []corev1.VolumeMount + for i := range podTemplateSpec.Spec.Containers { + if podTemplateSpec.Spec.Containers[i].Name == "splunk" { + splunkVM = append([]corev1.VolumeMount(nil), podTemplateSpec.Spec.Containers[i].VolumeMounts...) + break + } + } + needMount := func(mountPath string) bool { + switch mountPath { + case "/opt/splunk/etc", "/opt/splunk/var", "/mnt/splunk-secrets": + return true + default: + // also propagate custom /mnt/ mounts + return strings.HasPrefix(mountPath, "/mnt/") + } + } + sharedMounts := make([]corev1.VolumeMount, 0, len(splunkVM)) + for _, m := range splunkVM { + if needMount(m.MountPath) { + sharedMounts = append(sharedMounts, m) + } + } + + // Inject sidecar container (if image provided). + if img := strings.TrimSpace(GetSplunkSidecarImage()); img != "" { + privileged := false + sc := corev1.Container{ + Name: "splunk-sidecar", + Image: img, + ImagePullPolicy: corev1.PullPolicy(spec.ImagePullPolicy), + Ports: []corev1.ContainerPort{ + {Name: "sidecar-http", ContainerPort: 8080, Protocol: corev1.ProtocolTCP}, + {Name: "sidecar-metrics", ContainerPort: 8081, Protocol: corev1.ProtocolTCP}, + }, + Env: []corev1.EnvVar{ + {Name: "SPLUNK_HOME", Value: "/opt/splunk"}, + {Name: "SPLUNK_ROLE", Value: sidecarRole}, + {Name: "WATCH_PATHS", Value: "/mnt/certificates,/mnt/splunk-secrets"}, + }, + VolumeMounts: sharedMounts, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{Path: "/healthz/live", Port: intstr.FromInt(8080)}, + }, + PeriodSeconds: 10, + TimeoutSeconds: 5, + FailureThreshold: 3, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{Path: "/healthz/ready", Port: intstr.FromInt(8080)}, + }, + PeriodSeconds: 10, + TimeoutSeconds: 5, + FailureThreshold: 3, + }, + SecurityContext: &corev1.SecurityContext{ + RunAsUser: &runAsUser, + RunAsNonRoot: &runAsNonRoot, + AllowPrivilegeEscalation: &[]bool{false}[0], + Capabilities: &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + Add: []corev1.Capability{"NET_BIND_SERVICE"}, + }, + Privileged: &privileged, + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + } + + // Upsert by name. + found := false + for i := range podTemplateSpec.Spec.Containers { + if podTemplateSpec.Spec.Containers[i].Name == sc.Name { + podTemplateSpec.Spec.Containers[i] = sc + found = true + break + } + } + if !found { + podTemplateSpec.Spec.Containers = append(podTemplateSpec.Spec.Containers, sc) + } + } + + // Inject init container (if image provided). + if img := strings.TrimSpace(GetSplunkInitImage()); img != "" { + ic := corev1.Container{ + Name: "splunk-init", + Image: img, + ImagePullPolicy: corev1.PullPolicy(spec.ImagePullPolicy), + Env: []corev1.EnvVar{ + {Name: "SPLUNK_HOME", Value: "/opt/splunk"}, + {Name: "SPLUNK_ROLE", Value: role}, + // Reuse the same "defaults URL" logic to seed the init pipeline. + {Name: "SPLUNK_CONFIG_SOURCES", Value: splunkDefaults}, + }, + VolumeMounts: sharedMounts, + SecurityContext: &corev1.SecurityContext{ + RunAsUser: &runAsUser, + RunAsNonRoot: &runAsNonRoot, + SeccompProfile: &corev1.SeccompProfile{ + Type: corev1.SeccompProfileTypeRuntimeDefault, + }, + }, + } + + found := false + for i := range podTemplateSpec.Spec.InitContainers { + if podTemplateSpec.Spec.InitContainers[i].Name == ic.Name { + podTemplateSpec.Spec.InitContainers[i] = ic + found = true + break + } + } + if !found { + podTemplateSpec.Spec.InitContainers = append(podTemplateSpec.Spec.InitContainers, ic) + } + } + } + privileged := false // update each container in pod for idx := range podTemplateSpec.Spec.Containers { + if podTemplateSpec.Spec.Containers[idx].Name != "splunk" { + continue + } podTemplateSpec.Spec.Containers[idx].Resources = spec.Resources podTemplateSpec.Spec.Containers[idx].LivenessProbe = livenessProbe podTemplateSpec.Spec.Containers[idx].ReadinessProbe = readinessProbe diff --git a/pkg/splunk/enterprise/multicontainer.go b/pkg/splunk/enterprise/multicontainer.go new file mode 100644 index 000000000..bf954750b --- /dev/null +++ b/pkg/splunk/enterprise/multicontainer.go @@ -0,0 +1,13 @@ +package enterprise + +import ( + "os" + "strings" +) + +// isMultiContainerPodEnabled gates the new multi-container pod layout without changing CRDs. +// When disabled, the operator behaves exactly as before. +func isMultiContainerPodEnabled() bool { + v := strings.TrimSpace(os.Getenv("SPLUNK_POD_ARCH")) + return strings.EqualFold(v, "multi-container") || strings.EqualFold(v, "multicontainer") +} diff --git a/pkg/splunk/enterprise/multicontainer_test.go b/pkg/splunk/enterprise/multicontainer_test.go new file mode 100644 index 000000000..4be23919d --- /dev/null +++ b/pkg/splunk/enterprise/multicontainer_test.go @@ -0,0 +1,107 @@ +package enterprise + +import ( + "context" + "os" + "testing" + + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + spltest "github.com/splunk/splunk-operator/pkg/splunk/test" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestUpdateSplunkPodTemplateWithConfig_MultiContainerInjectsInitSidecarAndHTTPProbes(t *testing.T) { + t.Setenv("SPLUNK_POD_ARCH", "multi-container") + t.Setenv("RELATED_IMAGE_SPLUNK_INIT", "test/splunk-init:latest") + t.Setenv("RELATED_IMAGE_SPLUNK_SIDECAR", "test/splunk-sidecar:latest") + + ctx := context.TODO() + client := spltest.NewMockClient() + + cr := &enterpriseApi.Standalone{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stack1", + Namespace: "test", + }, + } + + pod := &corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{}, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "splunk", Image: "test/splunk:latest"}, + }, + }, + } + + spec := &enterpriseApi.CommonSplunkSpec{} + updateSplunkPodTemplateWithConfig(ctx, client, pod, cr, spec, SplunkStandalone, nil, "dummy-secret") + + // Init container injected. + foundInit := false + for _, c := range pod.Spec.InitContainers { + if c.Name == "splunk-init" { + foundInit = true + } + } + if !foundInit { + t.Fatalf("expected init container splunk-init to be injected") + } + + // Sidecar injected. + foundSidecar := false + for _, c := range pod.Spec.Containers { + if c.Name == "splunk-sidecar" { + foundSidecar = true + } + } + if !foundSidecar { + t.Fatalf("expected sidecar container splunk-sidecar to be injected") + } + + // Splunk probes should be HTTP GET, not exec scripts. + var splunk corev1.Container + ok := false + for _, c := range pod.Spec.Containers { + if c.Name == "splunk" { + splunk = c + ok = true + break + } + } + if !ok { + t.Fatalf("expected splunk container to exist") + } + if splunk.ReadinessProbe == nil || splunk.ReadinessProbe.HTTPGet == nil { + t.Fatalf("expected splunk readiness probe to be httpGet") + } + if splunk.ReadinessProbe.HTTPGet.Path != "/healthz/pod-ready" { + t.Fatalf("unexpected readiness path: %q", splunk.ReadinessProbe.HTTPGet.Path) + } + if splunk.LivenessProbe == nil || splunk.LivenessProbe.HTTPGet == nil { + t.Fatalf("expected splunk liveness probe to be httpGet") + } + if splunk.StartupProbe == nil || splunk.StartupProbe.HTTPGet == nil { + t.Fatalf("expected splunk startup probe to be httpGet") + } + + // Ensure we didn't clobber sidecar env with the splunk env set. + for _, c := range pod.Spec.Containers { + if c.Name != "splunk-sidecar" { + continue + } + for _, e := range c.Env { + if e.Name == "SPLUNK_DEFAULTS_URL" { + t.Fatalf("sidecar should not receive splunk container env SPLUNK_DEFAULTS_URL") + } + } + } + + // Cleanup for any other tests using os.Getenv directly. + _ = os.Unsetenv("SPLUNK_POD_ARCH") + _ = os.Unsetenv("RELATED_IMAGE_SPLUNK_INIT") + _ = os.Unsetenv("RELATED_IMAGE_SPLUNK_SIDECAR") +} diff --git a/pkg/splunk/enterprise/names.go b/pkg/splunk/enterprise/names.go index e49782f59..3972e6e30 100644 --- a/pkg/splunk/enterprise/names.go +++ b/pkg/splunk/enterprise/names.go @@ -314,6 +314,16 @@ func GetSplunkImage(specImage string) string { return name } +// GetSplunkInitImage returns the docker image to use for the init container in multi-container mode. +func GetSplunkInitImage() string { + return os.Getenv("RELATED_IMAGE_SPLUNK_INIT") +} + +// GetSplunkSidecarImage returns the docker image to use for the sidecar container in multi-container mode. +func GetSplunkSidecarImage() string { + return os.Getenv("RELATED_IMAGE_SPLUNK_SIDECAR") +} + // GetPortName uses a template to enrich a port name with protocol information for usage with mesh services func GetPortName(port string, protocol string) string { return fmt.Sprintf(portNameTemplateStr, protocol, port) From 60d3df364bb4ca581aadecf5ccb7227284e6a0d3 Mon Sep 17 00:00:00 2001 From: vivekr-splunk Date: Sat, 28 Feb 2026 22:54:43 -0800 Subject: [PATCH 2/6] fix: align multi-container probe timings with operator defaults --- pkg/splunk/enterprise/configuration.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/splunk/enterprise/configuration.go b/pkg/splunk/enterprise/configuration.go index 48fc43ac5..18313de75 100644 --- a/pkg/splunk/enterprise/configuration.go +++ b/pkg/splunk/enterprise/configuration.go @@ -1113,10 +1113,10 @@ func updateSplunkPodTemplateWithConfig(ctx context.Context, client splcommon.Con Port: intstr.FromInt(8080), }, }, - InitialDelaySeconds: 60, - TimeoutSeconds: 5, - PeriodSeconds: 10, - FailureThreshold: 3, + InitialDelaySeconds: livenessProbeDefaultDelaySec, + TimeoutSeconds: livenessProbeTimeoutSec, + PeriodSeconds: livenessProbePeriodSec, + FailureThreshold: livenessProbeFailureThreshold, } startupProbe = &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ @@ -1125,10 +1125,10 @@ func updateSplunkPodTemplateWithConfig(ctx context.Context, client splcommon.Con Port: intstr.FromInt(8080), }, }, - InitialDelaySeconds: 10, - TimeoutSeconds: 5, - PeriodSeconds: 10, - FailureThreshold: 30, + InitialDelaySeconds: startupProbeDefaultDelaySec, + TimeoutSeconds: startupProbeTimeoutSec, + PeriodSeconds: startupProbePeriodSec, + FailureThreshold: startupProbeFailureThreshold, } readinessProbe = &corev1.Probe{ ProbeHandler: corev1.ProbeHandler{ @@ -1137,10 +1137,10 @@ func updateSplunkPodTemplateWithConfig(ctx context.Context, client splcommon.Con Port: intstr.FromInt(8080), }, }, - InitialDelaySeconds: 10, - TimeoutSeconds: 5, - PeriodSeconds: 10, - FailureThreshold: 3, + InitialDelaySeconds: readinessProbeDefaultDelaySec, + TimeoutSeconds: readinessProbeTimeoutSec, + PeriodSeconds: readinessProbePeriodSec, + FailureThreshold: readinessProbeFailureThreshold, } // Choose sidecar role. For SHC members we want strict gating until SHC join completes. From 8af5f6d23823741c4b0e4a83f53221ed1cfaea5f Mon Sep 17 00:00:00 2001 From: vivekr-splunk Date: Sat, 28 Feb 2026 23:06:25 -0800 Subject: [PATCH 3/6] dev: add skaffold + ECR overlay for operator - Add skaffold config with ecr-vivek profile (linux/amd64)\n- Add kustomize overlay to avoid make/sed placeholders and enable multi-container pods\n- Add ECR login/repo helper scripts\n- Document workflows --- .gitignore | 3 +- aws/ecr_ensure_repo.sh | 25 +++++++++++ aws/ecr_login.sh | 19 ++++++++ config/skaffold-ecr-vivek/kustomization.yaml | 14 ++++++ .../skaffold_env_patch.yaml | 28 ++++++++++++ docs/CONTRIBUTING.md | 33 ++++++++++++++ skaffold.yaml | 45 +++++++++++++++++++ 7 files changed, 166 insertions(+), 1 deletion(-) create mode 100755 aws/ecr_ensure_repo.sh create mode 100755 aws/ecr_login.sh create mode 100644 config/skaffold-ecr-vivek/kustomization.yaml create mode 100644 config/skaffold-ecr-vivek/skaffold_env_patch.yaml create mode 100644 skaffold.yaml diff --git a/.gitignore b/.gitignore index 4846768ad..f3e39095f 100644 --- a/.gitignore +++ b/.gitignore @@ -99,4 +99,5 @@ bundle_*/ test/secret/*.log kubeconfig .devcontainer/devcontainer.json -kuttl-artifacts/* \ No newline at end of file +kuttl-artifacts/* +.skaffold/ diff --git a/aws/ecr_ensure_repo.sh b/aws/ecr_ensure_repo.sh new file mode 100755 index 000000000..3ab2b4946 --- /dev/null +++ b/aws/ecr_ensure_repo.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -euo pipefail + +region="${AWS_REGION:-us-west-2}" +repo="${1:-}" + +if [[ -z "${repo}" ]]; then + echo "usage: $0 " + echo "example: $0 vivek/splunk-operator" + exit 2 +fi + +if ! command -v aws >/dev/null 2>&1; then + echo "aws CLI not found" + exit 1 +fi + +if aws ecr describe-repositories --region "${region}" --repository-names "${repo}" >/dev/null 2>&1; then + echo "exists: ${repo}" + exit 0 +fi + +aws ecr create-repository --region "${region}" --repository-name "${repo}" >/dev/null +echo "created: ${repo}" + diff --git a/aws/ecr_login.sh b/aws/ecr_login.sh new file mode 100755 index 000000000..3bf3da928 --- /dev/null +++ b/aws/ecr_login.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +region="${AWS_REGION:-us-west-2}" +account_id="${AWS_ACCOUNT_ID:-667741767953}" +registry="${account_id}.dkr.ecr.${region}.amazonaws.com" + +if ! command -v aws >/dev/null 2>&1; then + echo "aws CLI not found" + exit 1 +fi +if ! command -v docker >/dev/null 2>&1; then + echo "docker not found" + exit 1 +fi + +aws ecr get-login-password --region "${region}" | docker login --username AWS --password-stdin "${registry}" +echo "logged in: ${registry}" + diff --git a/config/skaffold-ecr-vivek/kustomization.yaml b/config/skaffold-ecr-vivek/kustomization.yaml new file mode 100644 index 000000000..e85265f97 --- /dev/null +++ b/config/skaffold-ecr-vivek/kustomization.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../default + +# Override operator manager image to ECR so Skaffold can build/push and deploy without `make deploy`. +images: +- name: docker.io/splunk/splunk-operator + newName: 667741767953.dkr.ecr.us-west-2.amazonaws.com/vivek/splunk-operator + newTag: latest + +patchesStrategicMerge: +- skaffold_env_patch.yaml diff --git a/config/skaffold-ecr-vivek/skaffold_env_patch.yaml b/config/skaffold-ecr-vivek/skaffold_env_patch.yaml new file mode 100644 index 000000000..a5029d507 --- /dev/null +++ b/config/skaffold-ecr-vivek/skaffold_env_patch.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: splunk-operator-controller-manager + namespace: splunk-operator +spec: + template: + spec: + containers: + - name: manager + env: + # These values are normally injected by `make deploy` via sed placeholders. + # In Skaffold workflows we set concrete defaults here so `skaffold dev` works end-to-end. + - name: WATCH_NAMESPACE + value: "" + - name: RELATED_IMAGE_SPLUNK_ENTERPRISE + value: docker.io/splunk/splunk + - name: SPLUNK_GENERAL_TERMS + # Update if your org requires a different SGT acceptance string. + value: "--accept-sgt-current-at-splunk-com" + + # Multi-container pod orchestration (distroless Splunk + init + sidecar). + - name: SPLUNK_POD_ARCH + value: "multi-container" + - name: RELATED_IMAGE_SPLUNK_INIT + value: "667741767953.dkr.ecr.us-west-2.amazonaws.com/splunk-init:latest" + - name: RELATED_IMAGE_SPLUNK_SIDECAR + value: "667741767953.dkr.ecr.us-west-2.amazonaws.com/splunk-sidecar:latest" diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 7e3c7531f..5c455ef24 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -176,6 +176,39 @@ We can always use improvements to our documentation! Anyone can contribute to th You can also edit documentation files directly in the GitHub web interface, without creating a local copy. This can be convenient for small typos or grammar fixes. +## Skaffold (Dev + CI/CD) + +This fork supports Skaffold-based build/push/deploy loops for the operator manager image. + +### ECR + EKS + +1. Authenticate Docker to ECR (adjust via `AWS_ACCOUNT_ID` / `AWS_REGION` if needed): + +```bash +./aws/ecr_login.sh +./aws/ecr_ensure_repo.sh vivek/splunk-operator +``` + +2. Deploy to a kubecontext (example: `vivek-ipv6-splunk-20260227`): + +```bash +skaffold dev -p ecr-vivek --kube-context vivek-ipv6-splunk-20260227 +``` + +Notes: +- Profile `ecr-vivek` deploys `config/skaffold-ecr-vivek` which sets concrete env values (no `make deploy` placeholder substitution required). +- Multi-container pods are enabled via `SPLUNK_POD_ARCH=multi-container`. +- Update init/sidecar image envs in `config/skaffold-ecr-vivek/skaffold_env_patch.yaml` when publishing new images. + +### Make Deploy (Same Overlay) + +If you prefer `make deploy`, you can use the same overlay: + +```bash +make docker-buildx IMG=667741767953.dkr.ecr.us-west-2.amazonaws.com/vivek/splunk-operator:dev PLATFORMS=linux/amd64 +make deploy ENVIRONMENT=skaffold-ecr-vivek IMG=667741767953.dkr.ecr.us-west-2.amazonaws.com/vivek/splunk-operator:dev +``` + ## Maintainers If you need help, tag one of the active maintainers of this project in a post or comment. We'll do our best to reach out to you as quickly as we can. diff --git a/skaffold.yaml b/skaffold.yaml new file mode 100644 index 000000000..fac05d62e --- /dev/null +++ b/skaffold.yaml @@ -0,0 +1,45 @@ +apiVersion: skaffold/v4beta13 +kind: Config +metadata: + name: splunk-operator + +build: + tagPolicy: + gitCommit: {} + local: + # Default profile is local-only (good for kind/minikube). + push: false + useBuildkit: true + artifacts: + # The operator deployment renders to docker.io/splunk/splunk-operator by default (config/default). + # The ecr-vivek profile switches both the artifact image and the kustomize overlay to ECR. + - image: docker.io/splunk/splunk-operator + context: . + docker: + dockerfile: Dockerfile + +manifests: + kustomize: + paths: + - config/default + +deploy: + kubectl: {} + +profiles: +- name: ecr-vivek + build: + local: + push: true + useBuildkit: true + artifacts: + - image: 667741767953.dkr.ecr.us-west-2.amazonaws.com/vivek/splunk-operator + context: . + docker: + dockerfile: Dockerfile + platforms: + - linux/amd64 + manifests: + kustomize: + paths: + - config/skaffold-ecr-vivek From 775ff16fcf4850c3b5c3b2c351e27ecee77a64b6 Mon Sep 17 00:00:00 2001 From: vivekr-splunk Date: Sat, 28 Feb 2026 23:07:25 -0800 Subject: [PATCH 4/6] dev: make default skaffold workflow runnable without make deploy - Add config/skaffold overlay to replace make/sed placeholder envs\n- Point default skaffold manifests to config/skaffold --- config/skaffold/kustomization.yaml | 9 +++++++++ config/skaffold/skaffold_env_patch.yaml | 25 +++++++++++++++++++++++++ skaffold.yaml | 2 +- 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 config/skaffold/kustomization.yaml create mode 100644 config/skaffold/skaffold_env_patch.yaml diff --git a/config/skaffold/kustomization.yaml b/config/skaffold/kustomization.yaml new file mode 100644 index 000000000..6bb72c1a3 --- /dev/null +++ b/config/skaffold/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../default + +patchesStrategicMerge: +- skaffold_env_patch.yaml + diff --git a/config/skaffold/skaffold_env_patch.yaml b/config/skaffold/skaffold_env_patch.yaml new file mode 100644 index 000000000..3beaa73e4 --- /dev/null +++ b/config/skaffold/skaffold_env_patch.yaml @@ -0,0 +1,25 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: splunk-operator-controller-manager + namespace: splunk-operator +spec: + template: + spec: + containers: + - name: manager + env: + - name: WATCH_NAMESPACE + value: "" + - name: RELATED_IMAGE_SPLUNK_ENTERPRISE + value: docker.io/splunk/splunk + - name: SPLUNK_GENERAL_TERMS + value: "--accept-sgt-current-at-splunk-com" + + - name: SPLUNK_POD_ARCH + value: "multi-container" + - name: RELATED_IMAGE_SPLUNK_INIT + value: "667741767953.dkr.ecr.us-west-2.amazonaws.com/splunk-init:latest" + - name: RELATED_IMAGE_SPLUNK_SIDECAR + value: "667741767953.dkr.ecr.us-west-2.amazonaws.com/splunk-sidecar:latest" + diff --git a/skaffold.yaml b/skaffold.yaml index fac05d62e..c73babefa 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -21,7 +21,7 @@ build: manifests: kustomize: paths: - - config/default + - config/skaffold deploy: kubectl: {} From f1786f006a6cf5fdf4d829c4fba7d2c0aaf1323f Mon Sep 17 00:00:00 2001 From: vivekr-splunk Date: Sat, 28 Feb 2026 23:13:33 -0800 Subject: [PATCH 5/6] test: allow integration tests to deploy operator via multi-container overlay - Parameterize operator ENVIRONMENT/NAMESPACE for test harness\n- Avoid hardcoding RELATED_IMAGE_SPLUNK_ENTERPRISE in skaffold overlays --- .../skaffold-ecr-vivek/skaffold_env_patch.yaml | 2 -- config/skaffold/skaffold_env_patch.yaml | 3 --- test/deploy-operator.sh | 17 +++++++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/config/skaffold-ecr-vivek/skaffold_env_patch.yaml b/config/skaffold-ecr-vivek/skaffold_env_patch.yaml index a5029d507..16718d3b0 100644 --- a/config/skaffold-ecr-vivek/skaffold_env_patch.yaml +++ b/config/skaffold-ecr-vivek/skaffold_env_patch.yaml @@ -13,8 +13,6 @@ spec: # In Skaffold workflows we set concrete defaults here so `skaffold dev` works end-to-end. - name: WATCH_NAMESPACE value: "" - - name: RELATED_IMAGE_SPLUNK_ENTERPRISE - value: docker.io/splunk/splunk - name: SPLUNK_GENERAL_TERMS # Update if your org requires a different SGT acceptance string. value: "--accept-sgt-current-at-splunk-com" diff --git a/config/skaffold/skaffold_env_patch.yaml b/config/skaffold/skaffold_env_patch.yaml index 3beaa73e4..6ab2d4f5a 100644 --- a/config/skaffold/skaffold_env_patch.yaml +++ b/config/skaffold/skaffold_env_patch.yaml @@ -11,8 +11,6 @@ spec: env: - name: WATCH_NAMESPACE value: "" - - name: RELATED_IMAGE_SPLUNK_ENTERPRISE - value: docker.io/splunk/splunk - name: SPLUNK_GENERAL_TERMS value: "--accept-sgt-current-at-splunk-com" @@ -22,4 +20,3 @@ spec: value: "667741767953.dkr.ecr.us-west-2.amazonaws.com/splunk-init:latest" - name: RELATED_IMAGE_SPLUNK_SIDECAR value: "667741767953.dkr.ecr.us-west-2.amazonaws.com/splunk-sidecar:latest" - diff --git a/test/deploy-operator.sh b/test/deploy-operator.sh index c14447fe7..b3daa35de 100644 --- a/test/deploy-operator.sh +++ b/test/deploy-operator.sh @@ -5,6 +5,11 @@ topdir=${scriptdir}/.. source ${scriptdir}/env.sh +# Allow callers (CI/dev) to select which kustomize overlay to use for operator deployment. +# Default keeps legacy behavior. +: "${OPERATOR_ENVIRONMENT:=debug}" +: "${OPERATOR_NAMESPACE:=splunk-operator}" + # Check if exactly 2 arguments are supplied if [ "$#" -ne 2 ]; then echo "Error: Exactly 2 arguments are required." @@ -37,17 +42,17 @@ elif [ "${CLUSTER_WIDE}" != "true" ]; then bin/kustomize build config/crd | kubectl create -f - else echo "Installing enterprise operator from ${PRIVATE_SPLUNK_OPERATOR_IMAGE} using enterprise image from ${PRIVATE_SPLUNK_ENTERPRISE_IMAGE}..." - make deploy IMG=${PRIVATE_SPLUNK_OPERATOR_IMAGE} SPLUNK_ENTERPRISE_IMAGE=${PRIVATE_SPLUNK_ENTERPRISE_IMAGE} SPLUNK_GENERAL_TERMS="--accept-sgt-current-at-splunk-com" WATCH_NAMESPACE="" ENVIRONMENT=debug + make deploy IMG=${PRIVATE_SPLUNK_OPERATOR_IMAGE} NAMESPACE=${OPERATOR_NAMESPACE} SPLUNK_ENTERPRISE_IMAGE=${PRIVATE_SPLUNK_ENTERPRISE_IMAGE} SPLUNK_GENERAL_TERMS="--accept-sgt-current-at-splunk-com" WATCH_NAMESPACE="" ENVIRONMENT=${OPERATOR_ENVIRONMENT} fi if [ $? -ne 0 ]; then echo "Unable to install the operator. Exiting..." - kubectl describe pod -n splunk-operator + kubectl describe pod -n "${OPERATOR_NAMESPACE}" exit 1 fi echo "Dumping operator config here..." -kubectl describe deployment splunk-operator-controller-manager -n splunk-operator +kubectl describe deployment splunk-operator-controller-manager -n "${OPERATOR_NAMESPACE}" if [ "${CLUSTER_WIDE}" == "true" ]; then @@ -55,18 +60,18 @@ if [ "${CLUSTER_WIDE}" == "true" ]; then # sleep before checking for deployment, in slow clusters deployment call may not even started # in those cases, kubectl will fail with error: no matching resources found sleep 2 - kubectl wait --for=condition=ready pod -l control-plane=controller-manager --timeout=600s -n splunk-operator + kubectl wait --for=condition=ready pod -l control-plane=controller-manager --timeout=600s -n "${OPERATOR_NAMESPACE}" if [ $? -ne 0 ]; then echo "kubectl get pods -n kube-system ---" kubectl get pods -n kube-system echo "kubectl get deployement ebs-csi-controller -n kube-system ---" kubectl get deployement ebs-csi-controller -n kube-system echo "kubectl describe pvc -n splunk-operator ---" - kubectl describe pvc -n splunk-operator + kubectl describe pvc -n "${OPERATOR_NAMESPACE}" echo "kubectl describe pv ---" kubectl describe pv echo "kubectl describe pod -n splunk-operator ---" - kubectl describe pod -n splunk-operator + kubectl describe pod -n "${OPERATOR_NAMESPACE}" echo "Operator installation not ready..." exit 1 fi From c3dea30389078e929a9d8b599038496661a657bc Mon Sep 17 00:00:00 2001 From: vivekr-splunk Date: Sun, 1 Mar 2026 09:25:05 -0800 Subject: [PATCH 6/6] testenv: improve EKS multi-container e2e (IRSA, exec, S3 auth) --- pkg/splunk/enterprise/util.go | 13 +++++++-- test/deploy-operator.sh | 0 test/env.sh | 15 +++++++++++ test/get-private-registry-enterprise.sh | 11 ++++++-- test/get-private-registry-operator.sh | 9 ++++++- test/testenv/appframework_utils.go | 16 ++++++++++- test/testenv/deployment.go | 18 +++++++++++++ test/testenv/s3utils.go | 9 ++++++- test/testenv/testcaseenv.go | 8 ++++++ test/testenv/testenv.go | 36 ++++++++++++++++++++----- test/testenv/util.go | 36 +++++++++++++++++++++++-- test/trigger-tests.sh | 0 12 files changed, 156 insertions(+), 15 deletions(-) mode change 100644 => 100755 test/deploy-operator.sh mode change 100644 => 100755 test/env.sh mode change 100644 => 100755 test/trigger-tests.sh diff --git a/pkg/splunk/enterprise/util.go b/pkg/splunk/enterprise/util.go index cc48f69a7..42153afbb 100644 --- a/pkg/splunk/enterprise/util.go +++ b/pkg/splunk/enterprise/util.go @@ -291,10 +291,16 @@ func ReconcileCRSpecificConfigMap(ctx context.Context, client splcommon.Controll configMap.SetOwnerReferences(append(configMap.GetOwnerReferences(), splcommon.AsOwner(cr, true))) err = client.Create(ctx, configMap) if err != nil { + // Reconcile can be re-entered quickly; if another loop created the ConfigMap after our Get() + // returned NotFound, treat AlreadyExists as success and continue. + if k8serrors.IsAlreadyExists(err) { + scopedLog.Info("ConfigMap already exists; continuing", "configmap", configMapName) + return nil + } scopedLog.Error(err, "Failed to create config map") return err } - scopedLog.Info("Created new config map with ManualUpdate set to 'on'") + scopedLog.Info("Created new config map with manualUpdate set to 'off'") return nil } scopedLog.Error(err, "Failed to get config map") @@ -302,6 +308,9 @@ func ReconcileCRSpecificConfigMap(ctx context.Context, client splcommon.Controll } // Check if the ManualUpdate field exists + if configMap.Data == nil { + configMap.Data = map[string]string{} + } if _, exists := configMap.Data["manualUpdate"]; !exists { configMap.Data["manualUpdate"] = "off" err = client.Update(ctx, configMap) @@ -309,7 +318,7 @@ func ReconcileCRSpecificConfigMap(ctx context.Context, client splcommon.Controll scopedLog.Error(err, "Failed to update config map with manualUpdate field") return err } - scopedLog.Info("Updated config map with manualUpdate set to 'on'") + scopedLog.Info("Updated config map with manualUpdate set to 'off'") } return nil diff --git a/test/deploy-operator.sh b/test/deploy-operator.sh old mode 100644 new mode 100755 diff --git a/test/env.sh b/test/env.sh old mode 100644 new mode 100755 index 942b91e85..a364344ab --- a/test/env.sh +++ b/test/env.sh @@ -59,6 +59,14 @@ : "${DEPLOYMENT_TYPE:=manifest}" : "${TEST_CLUSTER_PLATFORM:=eks}" +# Go test helpers read TEST_CLUSTER_PLATFORM via os.Getenv(), so it must be exported. +export TEST_CLUSTER_PLATFORM + +# Multi-container pod layout defaults for tests. +# The per-testcase operator deployment (when -cluster-wide=false) picks these up from the test process env. +: "${SPLUNK_POD_ARCH:=multi-container}" +export SPLUNK_POD_ARCH + # Docker registry to use to push the test images to and pull from in the cluster if [ -z "${PRIVATE_REGISTRY}" ]; then case ${CLUSTER_PROVIDER} in @@ -90,3 +98,10 @@ if [ -z "${PRIVATE_REGISTRY}" ]; then ;; esac fi + +# Images used by the operator when SPLUNK_POD_ARCH=multi-container. +# Default to images in the same registry used for operator/splunk images. +: "${RELATED_IMAGE_SPLUNK_INIT:=${PRIVATE_REGISTRY}/splunk-init:latest}" +: "${RELATED_IMAGE_SPLUNK_SIDECAR:=${PRIVATE_REGISTRY}/splunk-sidecar:latest}" +export RELATED_IMAGE_SPLUNK_INIT +export RELATED_IMAGE_SPLUNK_SIDECAR diff --git a/test/get-private-registry-enterprise.sh b/test/get-private-registry-enterprise.sh index 32d44cb2a..0d33009f5 100644 --- a/test/get-private-registry-enterprise.sh +++ b/test/get-private-registry-enterprise.sh @@ -7,6 +7,13 @@ source ${scriptdir}/env.sh PRIVATE_SPLUNK_ENTERPRISE_IMAGE=${SPLUNK_ENTERPRISE_IMAGE} +# We deploy to EKS amd64; on arm64 dev machines, force pulls to amd64 so the +# integration harness can still pull/tag/push images locally. +DOCKER_PULL_PLATFORM=() +case "$(uname -m)" in + arm64|aarch64) DOCKER_PULL_PLATFORM=(--platform=linux/amd64) ;; +esac + # if we are using private registry, we need to pull, tag and push images to it if [ -n "${PRIVATE_REGISTRY}" ]; then @@ -18,14 +25,14 @@ if [ -n "${PRIVATE_REGISTRY}" ]; then echo "check if image exists, docker manifest inspect $PRIVATE_SPLUNK_ENTERPRISE_IMAGE" if docker manifest inspect "$PRIVATE_SPLUNK_ENTERPRISE_IMAGE" > /dev/null 2>&1; then echo "Image $PRIVATE_SPLUNK_ENTERPRISE_IMAGE exists on the remote repository." - docker pull ${PRIVATE_SPLUNK_ENTERPRISE_IMAGE} + docker pull "${DOCKER_PULL_PLATFORM[@]}" ${PRIVATE_SPLUNK_ENTERPRISE_IMAGE} if [ $? -ne 0 ]; then echo "Unable to pull ${PRIVATE_SPLUNK_ENTERPRISE_IMAGE}. Exiting..." exit 1 fi else echo "Image $PRIVATE_SPLUNK_ENTERPRISE_IMAGE does not exist on the remote repository." - docker pull ${SPLUNK_ENTERPRISE_IMAGE} + docker pull "${DOCKER_PULL_PLATFORM[@]}" ${SPLUNK_ENTERPRISE_IMAGE} if [ $? -ne 0 ]; then echo "Unable to pull ${SPLUNK_ENTERPRISE_IMAGE}. Exiting..." exit 1 diff --git a/test/get-private-registry-operator.sh b/test/get-private-registry-operator.sh index 1f033ac55..1e047e0dc 100644 --- a/test/get-private-registry-operator.sh +++ b/test/get-private-registry-operator.sh @@ -7,6 +7,13 @@ source ${scriptdir}/env.sh PRIVATE_SPLUNK_OPERATOR_IMAGE=${SPLUNK_OPERATOR_IMAGE} +# We deploy to EKS amd64; on arm64 dev machines, force pulls to amd64 so the +# integration harness can still pull/tag images locally. +DOCKER_PULL_PLATFORM=() +case "$(uname -m)" in + arm64|aarch64) DOCKER_PULL_PLATFORM=(--platform=linux/amd64) ;; +esac + # if we are using private registry, we need to pull, tag and push images to it if [ -n "${PRIVATE_REGISTRY}" ]; then echo "Using private registry at ${PRIVATE_REGISTRY}" @@ -16,7 +23,7 @@ if [ -n "${PRIVATE_REGISTRY}" ]; then # Don't pull splunk operator if exists locally since we maybe building it locally if [ -z $(docker images -q ${PRIVATE_SPLUNK_OPERATOR_IMAGE}) ]; then echo "Doesn't exist, pulling ${PRIVATE_SPLUNK_OPERATOR_IMAGE}..." - docker pull ${PRIVATE_SPLUNK_OPERATOR_IMAGE} + docker pull "${DOCKER_PULL_PLATFORM[@]}" ${PRIVATE_SPLUNK_OPERATOR_IMAGE} if [ $? -ne 0 ]; then echo "Unable to pull ${SPLUNK_OPERATOR_IMAGE}. Exiting..." exit 1 diff --git a/test/testenv/appframework_utils.go b/test/testenv/appframework_utils.go index e9879679b..2301d5cbb 100644 --- a/test/testenv/appframework_utils.go +++ b/test/testenv/appframework_utils.go @@ -392,7 +392,21 @@ func GenerateAppFrameworkSpec(ctx context.Context, testenvInstance *TestCaseEnv, // Create App framework volume switch ClusterProvider { case "eks": - volumeSpec = []enterpriseApi.VolumeSpec{GenerateIndexVolumeSpec(volumeName, GetS3Endpoint(), testenvInstance.GetIndexSecretName(), "aws", "s3", GetDefaultS3Region())} + // If explicit credentials aren't provided, omit SecretRef so the operator + // uses IAM role based auth (e.g. IRSA / node role). + accessKey := os.Getenv("TEST_S3_ACCESS_KEY_ID") + if accessKey == "" { + accessKey = os.Getenv("AWS_ACCESS_KEY_ID") + } + secretKey := os.Getenv("TEST_S3_SECRET_ACCESS_KEY") + if secretKey == "" { + secretKey = os.Getenv("AWS_SECRET_ACCESS_KEY") + } + secretRef := testenvInstance.GetIndexSecretName() + if accessKey == "" || secretKey == "" { + secretRef = "" + } + volumeSpec = []enterpriseApi.VolumeSpec{GenerateIndexVolumeSpec(volumeName, GetS3Endpoint(), secretRef, "aws", "s3", GetDefaultS3Region())} case "azure": managedID := os.Getenv("AZURE_MANAGED_ID_ENABLED") if managedID == "false" { diff --git a/test/testenv/deployment.go b/test/testenv/deployment.go index e639a9513..563b01454 100644 --- a/test/testenv/deployment.go +++ b/test/testenv/deployment.go @@ -229,6 +229,24 @@ func (d *Deployment) PodExecCommand(ctx context.Context, podName string, cmd []s Stderr: true, TTY: tty, } + + // Multi-container Splunk pods (splunk + sidecar) require an explicit container + // name for exec. Default to the main Splunk container when present. + if option.Container == "" { + if len(pod.Spec.Containers) == 1 { + option.Container = pod.Spec.Containers[0].Name + } else { + for _, c := range pod.Spec.Containers { + if c.Name == "splunk" { + option.Container = "splunk" + break + } + } + if option.Container == "" && len(pod.Spec.Containers) > 0 { + option.Container = pod.Spec.Containers[0].Name + } + } + } if stdin == "" { option.Stdin = false } diff --git a/test/testenv/s3utils.go b/test/testenv/s3utils.go index a7309fd30..2c578ad4c 100644 --- a/test/testenv/s3utils.go +++ b/test/testenv/s3utils.go @@ -21,7 +21,14 @@ import ( // Set Global Variables var ( - ClusterProvider = os.Getenv("TEST_CLUSTER_PLATFORM") + // Historically, tests used TEST_CLUSTER_PLATFORM. Some runners set CLUSTER_PROVIDER. + // Support both to reduce footguns. + ClusterProvider = func() string { + if v := strings.TrimSpace(os.Getenv("TEST_CLUSTER_PLATFORM")); v != "" { + return v + } + return strings.TrimSpace(os.Getenv("CLUSTER_PROVIDER")) + }() ) // Set S3 Variables diff --git a/test/testenv/testcaseenv.go b/test/testenv/testcaseenv.go index cb3c8a107..064fdf775 100644 --- a/test/testenv/testcaseenv.go +++ b/test/testenv/testcaseenv.go @@ -531,6 +531,14 @@ func (testenv *TestCaseEnv) createIndexSecret() error { secretKey = os.Getenv("AWS_SECRET_ACCESS_KEY") } + // If explicit credentials aren't provided, skip creating the secret. + // The operator supports IAM role based auth (e.g. IRSA / node role) when SecretRef is empty. + if accessKey == "" || secretKey == "" { + testenv.Log.Info("Skipping creation of s3 index secret object (no access/secret key in env). Use IAM role based auth instead.", + "secretName", secretName) + return nil + } + data := map[string][]byte{"s3_access_key": []byte(accessKey), "s3_secret_key": []byte(secretKey)} secret := newSecretSpec(ns, secretName, data) diff --git a/test/testenv/testenv.go b/test/testenv/testenv.go index 06fe304d4..22c0df6b0 100644 --- a/test/testenv/testenv.go +++ b/test/testenv/testenv.go @@ -20,6 +20,7 @@ import ( "fmt" "net" "os" + "strings" "time" "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -133,7 +134,9 @@ const ( ) var ( - metricsHost = "0.0.0.0" + // Bind the per-testenv controller-runtime manager metrics to loopback to avoid collisions + // with other long-running local test processes and to avoid exposing local ports. + metricsHost = "127.0.0.1" metricsPort = 8383 specifiedOperatorImage = defaultOperatorImage specifiedSplunkImage = defaultSplunkImage @@ -263,9 +266,9 @@ func NewTestEnv(name, commitHash, operatorImage, splunkImage, licenseFilePath st testenv.kubeAPIServer = cfg.Host testenv.Log.Info("Using kube-apiserver\n", "kube-apiserver", cfg.Host) - suiteConfig, _ := ginkgo.GinkgoConfiguration() - - metricsAddr := fmt.Sprintf("%s:%d", metricsHost, metricsPort+suiteConfig.ParallelProcess) + // Use an ephemeral loopback port to avoid collisions when multiple suites + // run concurrently (e.g. `ginkgo -r`) or when previous test processes linger. + metricsAddr := fmt.Sprintf("%s:%d", metricsHost, 0) kubeManager, err := manager.New(cfg, manager.Options{ Metrics: server.Options{ @@ -334,15 +337,36 @@ func (testenv *TestEnv) popCleanupFunc() (cleanupFunc, error) { // Create a service account config func newServiceAccount(ns string, serviceAccountName string) *corev1.ServiceAccount { + annotations := map[string]string{} + if roleArn := getEKSIRSARoleArn(); roleArn != "" { + // IRSA: allow operator/test pods to access AWS APIs (S3 for AppFramework/SmartStore) without static secrets. + annotations["eks.amazonaws.com/role-arn"] = roleArn + } + new := corev1.ServiceAccount{ TypeMeta: metav1.TypeMeta{ Kind: "ServiceAccount", }, ObjectMeta: metav1.ObjectMeta{ - Name: serviceAccountName, - Namespace: ns, + Name: serviceAccountName, + Namespace: ns, + Annotations: annotations, }, } return &new } + +func getEKSIRSARoleArn() string { + if strings.TrimSpace(ClusterProvider) != "eks" { + return "" + } + + // TEST_IRSA_ROLE_ARN is the preferred name; keep backwards/alt names for convenience. + for _, k := range []string{"TEST_IRSA_ROLE_ARN", "TEST_EKS_IRSA_ROLE_ARN"} { + if v := strings.TrimSpace(os.Getenv(k)); v != "" { + return v + } + } + return "" +} diff --git a/test/testenv/util.go b/test/testenv/util.go index 366ea3668..4af7340e7 100644 --- a/test/testenv/util.go +++ b/test/testenv/util.go @@ -131,6 +131,8 @@ func newLicenseManager(name, ns, licenseConfigMapName, splunkImage string) *ente Spec: enterpriseApi.LicenseManagerSpec{ CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{ + EtcVolumeStorageConfig: enterpriseApi.StorageClassSpec{StorageClassName: DefaultStorageClassName}, + VarVolumeStorageConfig: enterpriseApi.StorageClassSpec{StorageClassName: DefaultStorageClassName}, Volumes: []corev1.Volume{ { Name: "licenses", @@ -169,6 +171,8 @@ func newLicenseMaster(name, ns, licenseConfigMapName, splunkImage string) *enter Spec: enterpriseApiV3.LicenseMasterSpec{ CommonSplunkSpec: enterpriseApi.CommonSplunkSpec{ + EtcVolumeStorageConfig: enterpriseApi.StorageClassSpec{StorageClassName: DefaultStorageClassName}, + VarVolumeStorageConfig: enterpriseApi.StorageClassSpec{StorageClassName: DefaultStorageClassName}, Volumes: []corev1.Volume{ { Name: "licenses", @@ -607,6 +611,11 @@ func newPVC(name, ns, storage, storageClassName string) (*corev1.PersistentVolum func newOperator(name, ns, account, operatorImageAndTag, splunkEnterpriseImageAndTag string) *appsv1.Deployment { var replicas int32 = 1 + // Enable multi-container pod layout for tests when requested. Tests still work without it, but our goal + // is to validate the init + sidecar layout end-to-end. + podArch := strings.TrimSpace(os.Getenv("SPLUNK_POD_ARCH")) + initImg := strings.TrimSpace(os.Getenv("RELATED_IMAGE_SPLUNK_INIT")) + sidecarImg := strings.TrimSpace(os.Getenv("RELATED_IMAGE_SPLUNK_SIDECAR")) operator := appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ @@ -637,7 +646,7 @@ func newOperator(name, ns, account, operatorImageAndTag, splunkEnterpriseImageAn Name: name, Image: operatorImageAndTag, ImagePullPolicy: "Always", - Env: []corev1.EnvVar{ + Env: append([]corev1.EnvVar{ { Name: "WATCH_NAMESPACE", ValueFrom: &corev1.EnvVarSource{ @@ -662,7 +671,7 @@ func newOperator(name, ns, account, operatorImageAndTag, splunkEnterpriseImageAn Name: "SPLUNK_GENERAL_TERMS", Value: "--accept-sgt-current-at-splunk-com", }, - }, + }, buildMultiContainerEnv(podArch, initImg, sidecarImg)...), }, }, }, @@ -673,6 +682,20 @@ func newOperator(name, ns, account, operatorImageAndTag, splunkEnterpriseImageAn return &operator } +func buildMultiContainerEnv(podArch, initImg, sidecarImg string) []corev1.EnvVar { + var out []corev1.EnvVar + if podArch != "" { + out = append(out, corev1.EnvVar{Name: "SPLUNK_POD_ARCH", Value: podArch}) + } + if initImg != "" { + out = append(out, corev1.EnvVar{Name: "RELATED_IMAGE_SPLUNK_INIT", Value: initImg}) + } + if sidecarImg != "" { + out = append(out, corev1.EnvVar{Name: "RELATED_IMAGE_SPLUNK_SIDECAR", Value: sidecarImg}) + } + return out +} + // newStandaloneWithLM creates and initializes CR for Standalone Kind with License Manager func newStandaloneWithLM(name, ns, licenseManagerName, splunkImage string) *enterpriseApi.Standalone { @@ -779,6 +802,15 @@ func newMonitoringConsoleSpec(name, ns, LicenseManagerRef, splunkImage string) * // newMonitoringConsoleSpecWithGivenSpec returns MC Spec with given name, namespace and Spec func newMonitoringConsoleSpecWithGivenSpec(name string, ns string, spec enterpriseApi.MonitoringConsoleSpec) *enterpriseApi.MonitoringConsole { + // Many tests pass a custom spec that doesn't include StorageClassName, but the EKS + // test clusters we run against do not have a default StorageClass. Ensure PVCs bind. + if spec.EtcVolumeStorageConfig.StorageClassName == "" && !spec.EtcVolumeStorageConfig.EphemeralStorage { + spec.EtcVolumeStorageConfig.StorageClassName = DefaultStorageClassName + } + if spec.VarVolumeStorageConfig.StorageClassName == "" && !spec.VarVolumeStorageConfig.EphemeralStorage { + spec.VarVolumeStorageConfig.StorageClassName = DefaultStorageClassName + } + mcSpec := enterpriseApi.MonitoringConsole{ TypeMeta: metav1.TypeMeta{ Kind: "MonitoringConsole", diff --git a/test/trigger-tests.sh b/test/trigger-tests.sh old mode 100644 new mode 100755