From e52bf044b1b75fcbf185e809eed5e5ec8bd62874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Wed, 25 Mar 2026 21:05:03 +0100 Subject: [PATCH 01/36] Initial version of postgres cluster and postgres database CRs Adds PostgresCluster, PostgresClusterClass, and PostgresDatabase CRDs with controllers, RBAC, CRD manifests, and samples. Integrates cloudnative-pg v1.28.0 operator dependency. Bumps controller-runtime to v0.22.4 and k8s deps to v0.34.2 for compatibility. --- .tool-versions | 1 + CLAUDE.md | 26 + PROJECT | 27 + api/v4/common_types.go | 108 +- api/v4/postgrescluster_types.go | 207 ++++ api/v4/postgresclusterclass_types.go | 204 ++++ api/v4/postgresdatabase_types.go | 96 ++ api/v4/zz_generated.deepcopy.go | 664 +++++++++++ ...ise.splunk.com_postgresclusterclasses.yaml | 332 ++++++ ...nterprise.splunk.com_postgresclusters.yaml | 477 ++++++++ ...terprise.splunk.com_postgresdatabases.yaml | 267 +++++ cmd/main.go | 17 + ...ise.splunk.com_postgresclusterclasses.yaml | 326 ++++++ ...nterprise.splunk.com_postgresclusters.yaml | 469 ++++++++ ...terprise.splunk.com_postgresdatabases.yaml | 259 +++++ config/crd/kustomization.yaml | 3 + config/rbac/postgrescluster_admin_role.yaml | 27 + config/rbac/postgrescluster_editor_role.yaml | 33 + config/rbac/postgrescluster_viewer_role.yaml | 29 + .../rbac/postgresclusterclass_admin_role.yaml | 27 + .../postgresclusterclass_editor_role.yaml | 33 + .../postgresclusterclass_viewer_role.yaml | 29 + config/rbac/postgresdatabase_admin_role.yaml | 27 + config/rbac/postgresdatabase_editor_role.yaml | 33 + config/rbac/postgresdatabase_viewer_role.yaml | 29 + config/rbac/role.yaml | 35 + ...enterprise_v4_postgrescluster_default.yaml | 12 + .../enterprise_v4_postgrescluster_dev.yaml | 28 + ...nterprise_v4_postgresclusterclass_dev.yaml | 39 + ...terprise_v4_postgresclusterclass_prod.yaml | 80 ++ .../enterprise_v4_postgresdatabase.yaml | 18 + config/samples/kustomization.yaml | 3 + .../database/01-invalid-duplicate-names.yaml | 14 + .../02-invalid-immutability-update.yaml | 19 + .../database/03-invalid-deletion-policy.yaml | 11 + .../database/04-invalid-missing-fields.yaml | 11 + go.mod | 87 +- go.sum | 190 ++-- .../controller/postgrescluster_controller.go | 179 +++ .../postgrescluster_controller_test.go | 84 ++ .../controller/postgresdatabase_controller.go | 115 ++ .../postgresdatabase_controller_test.go | 84 ++ internal/controller/suite_test.go | 175 +-- pkg/postgresql/cluster/core/cluster.go | 1006 +++++++++++++++++ pkg/postgresql/cluster/core/types.go | 102 ++ .../database/adapter/db_repository.go | 80 ++ pkg/postgresql/database/core/database.go | 941 +++++++++++++++ pkg/postgresql/database/core/ports.go | 10 + pkg/postgresql/database/core/types.go | 94 ++ pkg/splunk/common/names.go | 3 + pkg/splunk/test/controller.go | 10 + pkg/splunk/util/util.go | 2 +- pkg/splunk/util/util_test.go | 2 +- test/connect-to-postgres-cluster.sh | 121 ++ test/postgrescluster-retain-upgrade-flow.sh | 356 ++++++ test/testenv/deployment.go | 4 +- test/testenv/ingest_utils.go | 2 +- 57 files changed, 7357 insertions(+), 310 deletions(-) create mode 100644 .tool-versions create mode 100644 CLAUDE.md create mode 100644 api/v4/postgrescluster_types.go create mode 100644 api/v4/postgresclusterclass_types.go create mode 100644 api/v4/postgresdatabase_types.go create mode 100644 bundle/manifests/enterprise.splunk.com_postgresclusterclasses.yaml create mode 100644 bundle/manifests/enterprise.splunk.com_postgresclusters.yaml create mode 100644 bundle/manifests/enterprise.splunk.com_postgresdatabases.yaml create mode 100644 config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml create mode 100644 config/crd/bases/enterprise.splunk.com_postgresclusters.yaml create mode 100644 config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml create mode 100644 config/rbac/postgrescluster_admin_role.yaml create mode 100644 config/rbac/postgrescluster_editor_role.yaml create mode 100644 config/rbac/postgrescluster_viewer_role.yaml create mode 100644 config/rbac/postgresclusterclass_admin_role.yaml create mode 100644 config/rbac/postgresclusterclass_editor_role.yaml create mode 100644 config/rbac/postgresclusterclass_viewer_role.yaml create mode 100644 config/rbac/postgresdatabase_admin_role.yaml create mode 100644 config/rbac/postgresdatabase_editor_role.yaml create mode 100644 config/rbac/postgresdatabase_viewer_role.yaml create mode 100644 config/samples/enterprise_v4_postgrescluster_default.yaml create mode 100644 config/samples/enterprise_v4_postgrescluster_dev.yaml create mode 100644 config/samples/enterprise_v4_postgresclusterclass_dev.yaml create mode 100644 config/samples/enterprise_v4_postgresclusterclass_prod.yaml create mode 100644 config/samples/enterprise_v4_postgresdatabase.yaml create mode 100644 config/samples/validation-tests/database/01-invalid-duplicate-names.yaml create mode 100644 config/samples/validation-tests/database/02-invalid-immutability-update.yaml create mode 100644 config/samples/validation-tests/database/03-invalid-deletion-policy.yaml create mode 100644 config/samples/validation-tests/database/04-invalid-missing-fields.yaml create mode 100644 internal/controller/postgrescluster_controller.go create mode 100644 internal/controller/postgrescluster_controller_test.go create mode 100644 internal/controller/postgresdatabase_controller.go create mode 100644 internal/controller/postgresdatabase_controller_test.go create mode 100644 pkg/postgresql/cluster/core/cluster.go create mode 100644 pkg/postgresql/cluster/core/types.go create mode 100644 pkg/postgresql/database/adapter/db_repository.go create mode 100644 pkg/postgresql/database/core/database.go create mode 100644 pkg/postgresql/database/core/ports.go create mode 100644 pkg/postgresql/database/core/types.go create mode 100755 test/connect-to-postgres-cluster.sh create mode 100755 test/postgrescluster-retain-upgrade-flow.sh diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 000000000..1527bb834 --- /dev/null +++ b/.tool-versions @@ -0,0 +1 @@ +golang 1.25.5 \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..de84839de --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,26 @@ +# Claude Code – Project Ground Rules + +## Role & Expertise +- You are a **Go expert** and a **Kubernetes controller/operator expert**. +- You write **small, clean, unit-testable functions**. +- Comments explain **why**, not what. Avoid restating the code in prose. + +## Code Style +- Keep functions focused and short — each should do one thing. +- Prefer explicit error handling with descriptive context (e.g. `fmt.Errorf("reconciling roles: %w", err)`). +- Avoid deep nesting; use early returns. + +## Reconciler / Operator Design +- The **reconciler is the main orchestration flow**. All state modifications are coordinated here. +- We build state **incrementally**: each major step updates state and requeues (`ctrl.Result{RequeueAfter: ...}`). +- Every operation must be **idempotent** — safe to run multiple times with the same outcome. +- Follow **Kubernetes controller best practices**: + - Use `SSA` (Server-Side Apply) where appropriate. + - Emit `Events` for meaningful state transitions. + - Use `Status` conditions to reflect progress and errors. + - Respect finalizers for cleanup logic. + +## Testing +- New logic should be accompanied by unit tests. +- Prefer table-driven tests. +- Mock external dependencies (k8s client, DB connections) via interfaces. diff --git a/PROJECT b/PROJECT index e87979069..9acd416fe 100644 --- a/PROJECT +++ b/PROJECT @@ -140,4 +140,31 @@ resources: kind: ObjectStorage path: github.com/splunk/splunk-operator/api/v4 version: v4 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: splunk.com + group: enterprise + kind: PostgresCluster + path: github.com/splunk/splunk-operator/api/v4 + version: v4 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: splunk.com + group: enterprise + kind: PostgresClusterClass + path: github.com/splunk/splunk-operator/api/v4 + version: v4 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: splunk.com + group: enterprise + kind: PostgresDatabase + path: github.com/splunk/splunk-operator/api/v4 + version: v4 version: "3" diff --git a/api/v4/common_types.go b/api/v4/common_types.go index e53317075..5bba9c0cd 100644 --- a/api/v4/common_types.go +++ b/api/v4/common_types.go @@ -91,15 +91,12 @@ type Spec struct { // Image to use for Splunk pod containers (overrides RELATED_IMAGE_SPLUNK_ENTERPRISE environment variables) Image string `json:"image"` - // Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent") - // +kubebuilder:validation:Enum=Always;IfNotPresent;Never - // +kubebuilder:default=IfNotPresent - // +optional - ImagePullPolicy string `json:"imagePullPolicy,omitempty"` + // Sets pull policy for all images (either “Always” or the default: “IfNotPresent”) + // +kubebuilder:validation:Enum=Always;IfNotPresent + ImagePullPolicy string `json:"imagePullPolicy"` // Name of Scheduler to use for pod placement (defaults to “default-scheduler”) - // +optional - SchedulerName string `json:"schedulerName,omitempty"` + SchedulerName string `json:"schedulerName"` // Kubernetes Affinity rules that control how pods are assigned to particular nodes. Affinity corev1.Affinity `json:"affinity"` @@ -140,7 +137,7 @@ const ( // PhaseTerminating means a custom resource is in the process of being removed PhaseTerminating Phase = "Terminating" - // PhaseError means an error occurred with custom resource management + // PhaseError means an error occured with custom resource management PhaseError Phase = "Error" ) @@ -167,16 +164,13 @@ type CommonSplunkSpec struct { Spec `json:",inline"` // Storage configuration for /opt/splunk/etc volume - // +optional - EtcVolumeStorageConfig StorageClassSpec `json:"etcVolumeStorageConfig,omitempty"` + EtcVolumeStorageConfig StorageClassSpec `json:"etcVolumeStorageConfig"` // Storage configuration for /opt/splunk/var volume - // +optional - VarVolumeStorageConfig StorageClassSpec `json:"varVolumeStorageConfig,omitempty"` + VarVolumeStorageConfig StorageClassSpec `json:"varVolumeStorageConfig"` // List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ - // +optional - Volumes []corev1.Volume `json:"volumes,omitempty"` + Volumes []corev1.Volume `json:"volumes"` // Inline map of default.yml overrides used to initialize the environment Defaults string `json:"defaults"` @@ -216,12 +210,10 @@ type CommonSplunkSpec struct { // ServiceAccount is the service account used by the pods deployed by the CRD. // If not specified uses the default serviceAccount for the namespace as per // https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#use-the-default-service-account-to-access-the-api-server - // +optional - ServiceAccount string `json:"serviceAccount,omitempty"` + ServiceAccount string `json:"serviceAccount"` // ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers // WARNING: Setting environment variables used by Splunk or Ansible will affect Splunk installation and operation - // +optional ExtraEnv []corev1.EnvVar `json:"extraEnv,omitempty"` // ReadinessInitialDelaySeconds defines initialDelaySeconds(See https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes) for Readiness probe @@ -235,64 +227,45 @@ type CommonSplunkSpec struct { LivenessInitialDelaySeconds int32 `json:"livenessInitialDelaySeconds"` // LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command - // +optional - // +kubebuilder:default:={"initialDelaySeconds":30,"timeoutSeconds":30,"periodSeconds":30,"failureThreshold":3} LivenessProbe *Probe `json:"livenessProbe,omitempty"` // ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes - // +optional - // +kubebuilder:default:={"initialDelaySeconds":10,"timeoutSeconds":5,"periodSeconds":5,"failureThreshold":3} ReadinessProbe *Probe `json:"readinessProbe,omitempty"` // StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes - // +optional - // +kubebuilder:default:={"initialDelaySeconds":40,"timeoutSeconds":30,"periodSeconds":30,"failureThreshold":12} StartupProbe *Probe `json:"startupProbe,omitempty"` // Sets imagePullSecrets if image is being pulled from a private registry. // See https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ - // +optional ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` } // StorageClassSpec defines storage class configuration -// +kubebuilder:validation:XValidation:rule="!(size(self.storageClassName) > 0 && self.ephemeralStorage == true)",message="storageClassName and ephemeralStorage are mutually exclusive" -// +kubebuilder:validation:XValidation:rule="!(size(self.storageCapacity) > 0 && self.ephemeralStorage == true)",message="storageCapacity and ephemeralStorage are mutually exclusive" type StorageClassSpec struct { // Name of StorageClass to use for persistent volume claims - // +optional - StorageClassName string `json:"storageClassName,omitempty"` + StorageClassName string `json:"storageClassName"` - // Storage capacity to request persistent volume claims (default="10Gi" for etc and "100Gi" for var) - // +optional - StorageCapacity string `json:"storageCapacity,omitempty"` + // Storage capacity to request persistent volume claims (default=”10Gi” for etc and "100Gi" for var) + StorageCapacity string `json:"storageCapacity"` // If true, ephemeral (emptyDir) storage will be used + // default false // +optional - // +kubebuilder:default=false - EphemeralStorage bool `json:"ephemeralStorage,omitempty"` + EphemeralStorage bool `json:"ephemeralStorage"` } // SmartStoreSpec defines Splunk indexes and remote storage volume configuration type SmartStoreSpec struct { // List of remote storage volumes - // +optional - // +listType=map - // +listMapKey=name VolList []VolumeSpec `json:"volumes,omitempty"` // List of Splunk indexes - // +optional - // +listType=map - // +listMapKey=name IndexList []IndexSpec `json:"indexes,omitempty"` // Default configuration for indexes - // +optional Defaults IndexConfDefaultsSpec `json:"defaults,omitempty"` // Defines Cache manager settings - // +optional CacheManagerConf CacheManagerSpec `json:"cacheManager,omitempty"` } @@ -301,23 +274,18 @@ type CacheManagerSpec struct { IndexAndCacheManagerCommonSpec `json:",inline"` // Eviction policy to use - // +optional EvictionPolicy string `json:"evictionPolicy,omitempty"` // Max cache size per partition - // +optional MaxCacheSizeMB uint `json:"maxCacheSize,omitempty"` // Additional size beyond 'minFreeSize' before eviction kicks in - // +optional EvictionPaddingSizeMB uint `json:"evictionPadding,omitempty"` // Maximum number of buckets that can be downloaded from remote storage in parallel - // +optional MaxConcurrentDownloads uint `json:"maxConcurrentDownloads,omitempty"` // Maximum number of buckets that can be uploaded to remote storage in parallel - // +optional MaxConcurrentUploads uint `json:"maxConcurrentUploads,omitempty"` } @@ -327,42 +295,30 @@ type IndexConfDefaultsSpec struct { } // VolumeSpec defines remote volume config -// +kubebuilder:validation:XValidation:rule="self.provider != 'aws' || size(self.region) > 0",message="region is required when provider is aws" type VolumeSpec struct { // Remote volume name - // +kubebuilder:validation:Required - // +kubebuilder:validation:MinLength=1 Name string `json:"name"` // Remote volume URI - // +kubebuilder:validation:Required - // +kubebuilder:validation:MinLength=1 Endpoint string `json:"endpoint"` // Remote volume path - // +kubebuilder:validation:Required - // +kubebuilder:validation:MinLength=1 Path string `json:"path"` // Secret object name - // +optional - SecretRef string `json:"secretRef,omitempty"` + SecretRef string `json:"secretRef"` // Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp. - // +kubebuilder:validation:Enum=s3;blob;gcs Type string `json:"storageType"` // App Package Remote Store provider. Supported values: aws, minio, azure, gcp. - // +optional - // +kubebuilder:validation:Enum=aws;minio;azure;gcp - Provider string `json:"provider,omitempty"` + Provider string `json:"provider"` - // Region of the remote storage volume where apps reside. Required for aws, optional for azure and gcp. - // +optional - Region string `json:"region,omitempty"` + // Region of the remote storage volume where apps reside. Used for aws, if provided. Not used for minio and azure. + Region string `json:"region"` } -// VolumeAndTypeSpec used to add any custom variables for volume implementation +// VolumeAndTypeSpec used to add any custom varaibles for volume implementation type VolumeAndTypeSpec struct { VolumeSpec `json:",inline"` } @@ -370,12 +326,9 @@ type VolumeAndTypeSpec struct { // IndexSpec defines Splunk index name and storage path type IndexSpec struct { // Splunk index name - // +kubebuilder:validation:Required - // +kubebuilder:validation:MinLength=1 Name string `json:"name"` // Index location relative to the remote volume path - // +optional RemotePath string `json:"remotePath,omitempty"` IndexAndCacheManagerCommonSpec `json:",inline"` @@ -387,26 +340,21 @@ type IndexSpec struct { type IndexAndGlobalCommonSpec struct { // Remote Volume name - // +optional VolName string `json:"volumeName,omitempty"` // MaxGlobalDataSizeMB defines the maximum amount of space for warm and cold buckets of an index - // +optional MaxGlobalDataSizeMB uint `json:"maxGlobalDataSizeMB,omitempty"` // MaxGlobalDataSizeMB defines the maximum amount of cumulative space for warm and cold buckets of an index - // +optional MaxGlobalRawDataSizeMB uint `json:"maxGlobalRawDataSizeMB,omitempty"` } // IndexAndCacheManagerCommonSpec defines configurations that can be configured at index level or at server level type IndexAndCacheManagerCommonSpec struct { // Time period relative to the bucket's age, during which the bucket is protected from cache eviction - // +optional HotlistRecencySecs uint `json:"hotlistRecencySecs,omitempty"` // Time period relative to the bucket's age, during which the bloom filter file is protected from cache eviction - // +optional HotlistBloomFilterRecencyHours uint `json:"hotlistBloomFilterRecencyHours,omitempty"` } @@ -427,9 +375,8 @@ type AppSourceDefaultSpec struct { // PremiumAppsProps represents properties for premium apps such as ES type PremiumAppsProps struct { - // Type: enterpriseSecurity for now, can accommodate itsi etc.. later + // Type: enterpriseSecurity for now, can accomodate itsi etc.. later // +optional - // +kubebuilder:validation:Enum=enterpriseSecurity Type string `json:"type,omitempty"` // Enterpreise Security App defaults @@ -456,13 +403,9 @@ type EsDefaults struct { // AppSourceSpec defines list of App package (*.spl, *.tgz) locations on remote volumes type AppSourceSpec struct { // Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo - // +kubebuilder:validation:Required - // +kubebuilder:validation:MinLength=1 Name string `json:"name"` // Location relative to the volume path - // +kubebuilder:validation:Required - // +kubebuilder:validation:MinLength=1 Location string `json:"location"` AppSourceDefaultSpec `json:",inline"` @@ -480,18 +423,17 @@ type AppFrameworkSpec struct { // 1. If no value or 0 is specified then it means periodic polling is disabled. // 2. If anything less than min is specified then we set it to 1 min. // 3. If anything more than the max value is specified then we set it to 1 day. - // +optional AppsRepoPollInterval int64 `json:"appsRepoPollIntervalSeconds,omitempty"` // App installation period within a reconcile. Apps will be installed during this period before the next reconcile is attempted. // Note: Do not change this setting unless instructed to do so by Splunk Support - // +optional + // +kubebuilder:validation:Optional // +kubebuilder:validation:Minimum:=30 // +kubebuilder:default:=90 SchedulerYieldInterval uint64 `json:"appInstallPeriodSeconds,omitempty"` // Maximum number of retries to install Apps - // +optional + // +kubebuilder:validation:Optional // +kubebuilder:validation:Minimum:=0 // +kubebuilder:default:=2 PhaseMaxRetries uint32 `json:"installMaxRetries,omitempty"` @@ -500,13 +442,9 @@ type AppFrameworkSpec struct { VolList []VolumeSpec `json:"volumes,omitempty"` // List of App sources on remote storage - // +optional - // +listType=map - // +listMapKey=name AppSources []AppSourceSpec `json:"appSources,omitempty"` // Maximum number of apps that can be downloaded at same time - // +optional MaxConcurrentAppDownloads uint64 `json:"maxConcurrentAppDownloads,omitempty"` } @@ -545,7 +483,7 @@ type AppSrcDeployInfo struct { type BundlePushStageType int const ( - // BundlePushUninitialized indicates bundle push never happened + // BundlePushUninitialized indicates bundle push never happend BundlePushUninitialized BundlePushStageType = iota // BundlePushPending waiting for all the apps to be copied to the Pod BundlePushPending diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go new file mode 100644 index 000000000..6ddb14c9d --- /dev/null +++ b/api/v4/postgrescluster_types.go @@ -0,0 +1,207 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v4 + +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// ManagedRole represents a PostgreSQL role to be created and managed in the cluster. +type ManagedRole struct { + // Name of the role/user to create. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=63 + Name string `json:"name"` + + // PasswordSecretRef references a Secret and the key within it containing the password for this role. + // +optional + PasswordSecretRef *corev1.SecretKeySelector `json:"passwordSecretRef,omitempty"` + + // Exists controls whether the role should be present (true) or absent (false) in PostgreSQL. + // +kubebuilder:default=true + // +optional + Exists bool `json:"exists,omitempty"` +} + +// PostgresClusterSpec defines the desired state of PostgresCluster. +// Validation rules ensure immutability of Class, and that Storage and PostgresVersion can only be set once and cannot be removed or downgraded. +// +kubebuilder:validation:XValidation:rule="!has(oldSelf.postgresVersion) || (has(self.postgresVersion) && int(self.postgresVersion.split('.')[0]) >= int(oldSelf.postgresVersion.split('.')[0]))",messageExpression="!has(self.postgresVersion) ? 'postgresVersion cannot be removed once set (was: ' + oldSelf.postgresVersion + ')' : 'postgresVersion major version cannot be downgraded (from: ' + oldSelf.postgresVersion + ', to: ' + self.postgresVersion + ')'" +// +kubebuilder:validation:XValidation:rule="!has(oldSelf.storage) || (has(self.storage) && quantity(self.storage).compareTo(quantity(oldSelf.storage)) >= 0)",messageExpression="!has(self.storage) ? 'storage cannot be removed once set (was: ' + string(oldSelf.storage) + ')' : 'storage size cannot be decreased (from: ' + string(oldSelf.storage) + ', to: ' + string(self.storage) + ')'" +// +kubebuilder:validation:XValidation:rule="!has(self.connectionPoolerConfig)",message="connectionPoolerConfig cannot be overridden on PostgresCluster" +type PostgresClusterSpec struct { + // This field is IMMUTABLE after creation. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="class is immutable" + Class string `json:"class"` + + // Storage overrides the storage size from ClusterClass. + // Example: "5Gi" + // +optional + Storage *resource.Quantity `json:"storage,omitempty"` + + // Instances overrides the number of PostgreSQL instances from ClusterClass. + // +optional + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=10 + Instances *int32 `json:"instances,omitempty"` + + // PostgresVersion is the PostgreSQL version (major or major.minor). + // Examples: "18" (latest 18.x), "18.1" (specific minor), "17", "16" + // +kubebuilder:validation:Pattern=`^[0-9]+(\.[0-9]+)?$` + // +optional + PostgresVersion *string `json:"postgresVersion,omitempty"` + + // Resources overrides CPU/memory resources from ClusterClass. + // +optional + Resources *corev1.ResourceRequirements `json:"resources,omitempty"` + + // PostgreSQL overrides PostgreSQL engine parameters from ClusterClass. + // Maps to postgresql.conf settings. + // Default empty map prevents panic. + // Example: {"shared_buffers": "128MB", "log_min_duration_statement": "500ms"} + // +optional + // +kubebuilder:default={} + PostgreSQLConfig map[string]string `json:"postgresqlConfig,omitempty"` + + // PgHBA contains pg_hba.conf host-based authentication rules. + // Defines client authentication and connection security (cluster-wide). + // Maps to pg_hba.conf settings. + // Default empty array prevents panic. + // Example: ["hostssl all all 0.0.0.0/0 scram-sha-256"] + // +optional + // +kubebuilder:default={} + PgHBA []string `json:"pgHBA,omitempty"` + + // ConnectionPoolerEnabled controls whether PgBouncer connection pooling is deployed for this cluster. + // When set, takes precedence over the class-level connectionPoolerEnabled value. + // +kubebuilder:default=false + // +optional + ConnectionPoolerEnabled *bool `json:"connectionPoolerEnabled,omitempty"` + + // Only takes effect when connection pooling is enabled. + // +optional + ConnectionPoolerConfig *ConnectionPoolerConfig `json:"connectionPoolerConfig,omitempty"` + + // ManagedRoles contains PostgreSQL roles that should be created in the cluster. + // This field supports Server-Side Apply with per-role granularity, allowing + // multiple PostgresDatabase controllers to manage different roles independently. + // +optional + // +listType=map + // +listMapKey=name + ManagedRoles []ManagedRole `json:"managedRoles,omitempty"` + + // ClusterDeletionPolicy controls the deletion behavior of the underlying CNPG Cluster when the PostgresCluster is deleted. + // +kubebuilder:validation:Enum=Delete;Retain + // +kubebuilder:default=Retain + // +optional + ClusterDeletionPolicy *string `json:"clusterDeletionPolicy,omitempty"` +} + +// PostgresClusterResources defines references to Kubernetes resources related to the PostgresCluster, such as ConfigMaps and Secrets. +type PostgresClusterResources struct { + // ConfigMapRef references the ConfigMap with connection endpoints. + // Contains: CLUSTER_ENDPOINTS, POOLER_ENDPOINTS (if connection pooler enabled) + // +optional + ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"` + + // +optional + SuperUserSecretRef *corev1.SecretKeySelector `json:"secretRef,omitempty"` +} + +// PostgresClusterStatus defines the observed state of PostgresCluster. +type PostgresClusterStatus struct { + // Phase represents the current phase of the PostgresCluster. + // Values: "Pending", "Provisioning", "Failed", "Ready", "Deleting" + // +optional + Phase *string `json:"phase,omitempty"` + + // Conditions represent the latest available observations of the PostgresCluster's state. + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // ProvisionerRef contains reference to the provisioner resource managing this PostgresCluster. + // Right now, only CNPG is supported. + // +optional + ProvisionerRef *corev1.ObjectReference `json:"provisionerRef,omitempty"` + + // ConnectionPoolerStatus contains the observed state of the connection pooler. + // Only populated when connection pooler is enabled in the PostgresClusterClass. + // +optional + ConnectionPoolerStatus *ConnectionPoolerStatus `json:"connectionPoolerStatus,omitempty"` + + // ManagedRolesStatus tracks the reconciliation status of managed roles. + // +optional + ManagedRolesStatus *ManagedRolesStatus `json:"managedRolesStatus,omitempty"` + + // Resources contains references to related Kubernetes resources like ConfigMaps and Secrets. + // +optional + Resources *PostgresClusterResources `json:"resources,omitempty"` +} + +// ManagedRolesStatus tracks the state of managed PostgreSQL roles. +type ManagedRolesStatus struct { + // Reconciled contains roles that have been successfully created and are ready. + // +optional + Reconciled []string `json:"reconciled,omitempty"` + + // Pending contains roles that are being created but not yet ready. + // +optional + Pending []string `json:"pending,omitempty"` + + // Failed contains roles that failed to reconcile with error messages. + // +optional + Failed map[string]string `json:"failed,omitempty"` +} + +// ConnectionPoolerStatus contains the observed state of the connection pooler. +type ConnectionPoolerStatus struct { + // Enabled indicates whether pooler is active for this cluster. + Enabled bool `json:"enabled"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:scope=Namespaced +// +kubebuilder:printcolumn:name="Class",type=string,JSONPath=`.spec.class` +// +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// PostgresCluster is the Schema for the postgresclusters API. +type PostgresCluster struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec PostgresClusterSpec `json:"spec,omitempty"` + Status PostgresClusterStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// PostgresClusterList contains a list of PostgresCluster. +type PostgresClusterList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []PostgresCluster `json:"items"` +} + +func init() { + SchemeBuilder.Register(&PostgresCluster{}, &PostgresClusterList{}) +} diff --git a/api/v4/postgresclusterclass_types.go b/api/v4/postgresclusterclass_types.go new file mode 100644 index 000000000..9945ec669 --- /dev/null +++ b/api/v4/postgresclusterclass_types.go @@ -0,0 +1,204 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v4 + +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +kubebuilder:validation:XValidation:rule="!has(self.cnpg) || self.provisioner == 'postgresql.cnpg.io'",message="cnpg config can only be set when provisioner is postgresql.cnpg.io" +// +kubebuilder:validation:XValidation:rule="!has(self.config) || !has(self.config.connectionPoolerEnabled) || !self.config.connectionPoolerEnabled || (has(self.cnpg) && has(self.cnpg.connectionPooler))",message="cnpg.connectionPooler must be set when config.connectionPoolerEnabled is true" +// PostgresClusterClassSpec defines the desired state of PostgresClusterClass. +// PostgresClusterClass is immutable after creation - it serves as a template for Cluster CRs. +type PostgresClusterClassSpec struct { + // Provisioner identifies which database provisioner to use. + // Currently supported: "postgresql.cnpg.io" (CloudNativePG) + // +kubebuilder:validation:Required + // +kubebuilder:validation:Enum=postgresql.cnpg.io + Provisioner string `json:"provisioner"` + + // PostgresClusterConfig contains cluster-level configuration. + // These settings apply to PostgresCluster infrastructure. + // Can be overridden in PostgresCluster CR. + // +kubebuilder:default={} + // +optional + Config *PostgresClusterClassConfig `json:"config,omitempty"` + + // CNPG contains CloudNativePG-specific configuration and policies. + // Only used when Provisioner is "postgresql.cnpg.io" + // These settings CANNOT be overridden in PostgresCluster CR (platform policy). + // +optional + CNPG *CNPGConfig `json:"cnpg,omitempty"` +} + +// PostgresClusterClassConfig contains provider-agnostic cluster configuration. +// These fields define PostgresCluster infrastructure and can be overridden in PostgresCluster CR. +type PostgresClusterClassConfig struct { + // Instances is the number of database instances (1 primary + N replicas). + // Single instance (1) is suitable for development. + // High availability requires at least 3 instances (1 primary + 2 replicas). + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=10 + // +kubebuilder:default=1 + // +optional + Instances *int32 `json:"instances,omitempty"` + + // Storage is the size of persistent volume for each instance. + // Cannot be decreased after cluster creation (PostgreSQL limitation). + // Recommended minimum: 10Gi for production viability. + // Example: "50Gi", "100Gi", "1Ti" + // +kubebuilder:default="50Gi" + // +optional + Storage *resource.Quantity `json:"storage,omitempty"` + + // PostgresVersion is the PostgreSQL version (major or major.minor). + // Examples: "18" (latest 18.x), "18.1" (specific minor), "17", "16" + // +kubebuilder:validation:Pattern=`^[0-9]+(\.[0-9]+)?$` + // +kubebuilder:default="18" + // +optional + PostgresVersion *string `json:"postgresVersion,omitempty"` + + // Resources defines CPU and memory requests/limits per instance. + // All instances in the cluster have the same resources. + // +optional + Resources *corev1.ResourceRequirements `json:"resources,omitempty"` + + // PostgreSQLConfig contains PostgreSQL engine configuration parameters. + // Maps to postgresql.conf settings (cluster-wide). + // Example: {"max_connections": "200", "shared_buffers": "2GB"} + // +optional + PostgreSQLConfig map[string]string `json:"postgresqlConfig,omitempty"` + + // PgHBA contains pg_hba.conf host-based authentication rules. + // Defines client authentication and connection security (cluster-wide). + // Example: ["hostssl all all 0.0.0.0/0 scram-sha-256"] + // +optional + PgHBA []string `json:"pgHBA,omitempty"` + + // ConnectionPoolerEnabled controls whether PgBouncer connection pooling is deployed. + // When true, creates RW and RO pooler deployments for clusters using this class. + // Can be overridden in PostgresCluster CR. + // +kubebuilder:default=false + // +optional + ConnectionPoolerEnabled *bool `json:"connectionPoolerEnabled,omitempty"` +} + +// ConnectionPoolerMode defines the PgBouncer connection pooling strategy. +// +kubebuilder:validation:Enum=session;transaction;statement +type ConnectionPoolerMode string + +const ( + // ConnectionPoolerModeSession assigns a connection for the entire client session (most compatible). + ConnectionPoolerModeSession ConnectionPoolerMode = "session" + + // ConnectionPoolerModeTransaction returns the connection after each transaction (recommended). + ConnectionPoolerModeTransaction ConnectionPoolerMode = "transaction" + + // ConnectionPoolerModeStatement returns the connection after each statement (limited compatibility). + ConnectionPoolerModeStatement ConnectionPoolerMode = "statement" +) + +// ConnectionPoolerConfig defines PgBouncer connection pooler configuration. +// When enabled, creates RW and RO pooler deployments for clusters using this class. +type ConnectionPoolerConfig struct { + // Instances is the number of PgBouncer pod replicas. + // Higher values provide better availability and load distribution. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=10 + // +kubebuilder:default=3 + // +optional + Instances *int32 `json:"instances,omitempty"` + + // Mode defines the connection pooling strategy. + // +kubebuilder:default="transaction" + // +optional + Mode *ConnectionPoolerMode `json:"mode,omitempty"` + + // Config contains PgBouncer configuration parameters. + // Passed directly to CNPG Pooler spec.pgbouncer.parameters. + // See: https://cloudnative-pg.io/docs/1.28/connection_pooling/#pgbouncer-configuration-options + // +optional + Config map[string]string `json:"config,omitempty"` +} + +// CNPGConfig contains CloudNativePG-specific configuration. +// These fields control CNPG operator behavior and enforce platform policies. +// Cannot be overridden in Cluster CR. +type CNPGConfig struct { + // PrimaryUpdateMethod determines how the primary instance is updated. + // "restart" - tolerate brief downtime (suitable for development) + // "switchover" - minimal downtime via automated failover (production-grade) + // + // NOTE: When using "switchover", ensure clusterConfig.instances > 1. + // Switchover requires at least one replica to fail over to. + // +kubebuilder:validation:Enum=restart;switchover + // +kubebuilder:default=switchover + // +optional + PrimaryUpdateMethod *string `json:"primaryUpdateMethod,omitempty"` + + // ConnectionPooler contains PgBouncer connection pooler configuration. + // When enabled, creates RW and RO pooler deployments for clusters using this class. + // +optional + ConnectionPooler *ConnectionPoolerConfig `json:"connectionPooler,omitempty"` +} + +// PostgresClusterClassStatus defines the observed state of PostgresClusterClass. +type PostgresClusterClassStatus struct { + // Conditions represent the latest available observations of the PostgresClusterClass state. + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // Phase represents the current phase of the PostgresClusterClass. + // Valid phases: "Ready", "Invalid" + // +optional + Phase *string `json:"phase,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:scope=Cluster +// +kubebuilder:printcolumn:name="Provisioner",type=string,JSONPath=`.spec.provisioner` +// +kubebuilder:printcolumn:name="Instances",type=integer,JSONPath=`.spec.postgresClusterConfig.instances` +// +kubebuilder:printcolumn:name="Storage",type=string,JSONPath=`.spec.postgresClusterConfig.storage` +// +kubebuilder:printcolumn:name="Version",type=string,JSONPath=`.spec.postgresClusterConfig.postgresVersion` +// +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// PostgresClusterClass is the Schema for the postgresclusterclasses API. +// PostgresClusterClass defines a reusable template and policy for postgres cluster provisioning. +type PostgresClusterClass struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec PostgresClusterClassSpec `json:"spec,omitempty"` + Status PostgresClusterClassStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// PostgresClusterClassList contains a list of PostgresClusterClass. +type PostgresClusterClassList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []PostgresClusterClass `json:"items"` +} + +func init() { + SchemeBuilder.Register(&PostgresClusterClass{}, &PostgresClusterClassList{}) +} diff --git a/api/v4/postgresdatabase_types.go b/api/v4/postgresdatabase_types.go new file mode 100644 index 000000000..edab619b0 --- /dev/null +++ b/api/v4/postgresdatabase_types.go @@ -0,0 +1,96 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v4 + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// PostgresDatabaseSpec defines the desired state of PostgresDatabase. +// +kubebuilder:validation:XValidation:rule="self.clusterRef == oldSelf.clusterRef",message="clusterRef is immutable" +type PostgresDatabaseSpec struct { + // Reference to Postgres Cluster managed by postgresCluster controller + // +kubebuilder:validation:Required + ClusterRef corev1.LocalObjectReference `json:"clusterRef"` + + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=10 + // +kubebuilder:validation:XValidation:rule="self.all(x, self.filter(y, y.name == x.name).size() == 1)",message="database names must be unique" + Databases []DatabaseDefinition `json:"databases"` +} + +type DatabaseDefinition struct { + // +kubebuilder:validation:Required + // +kubebuilder:validation:MaxLength=30 + Name string `json:"name"` + Extensions []string `json:"extensions,omitempty"` + // +kubebuilder:validation:Enum=Delete;Retain + // +kubebuilder:default=Delete + DeletionPolicy string `json:"deletionPolicy,omitempty"` +} + +type DatabaseInfo struct { + Name string `json:"name"` + Ready bool `json:"ready"` + DatabaseRef *corev1.LocalObjectReference `json:"databaseRef,omitempty"` + AdminUserSecretRef *corev1.SecretKeySelector `json:"adminUserSecretRef,omitempty"` + RWUserSecretRef *corev1.SecretKeySelector `json:"rwUserSecretRef,omitempty"` + ConfigMapRef *corev1.LocalObjectReference `json:"configMap,omitempty"` +} + +// PostgresDatabaseStatus defines the observed state of PostgresDatabase. +type PostgresDatabaseStatus struct { + // +optional + Phase *string `json:"phase,omitempty"` + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + // +optional + Databases []DatabaseInfo `json:"databases,omitempty"` + // ObservedGeneration represents the .metadata.generation that the status was set based upon. + // +optional + ObservedGeneration *int64 `json:"observedGeneration,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Cluster",type=string,JSONPath=`.spec.clusterRef.name` +// +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// PostgresDatabase is the Schema for the postgresdatabases API. +type PostgresDatabase struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec PostgresDatabaseSpec `json:"spec,omitempty"` + Status PostgresDatabaseStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// PostgresDatabaseList contains a list of PostgresDatabase. +type PostgresDatabaseList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []PostgresDatabase `json:"items"` +} + +func init() { + SchemeBuilder.Register(&PostgresDatabase{}, &PostgresDatabaseList{}) +} diff --git a/api/v4/zz_generated.deepcopy.go b/api/v4/zz_generated.deepcopy.go index 7ae136536..d9535fb93 100644 --- a/api/v4/zz_generated.deepcopy.go +++ b/api/v4/zz_generated.deepcopy.go @@ -22,6 +22,7 @@ package v4 import ( "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -180,6 +181,31 @@ func (in *BundlePushTracker) DeepCopy() *BundlePushTracker { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CNPGConfig) DeepCopyInto(out *CNPGConfig) { + *out = *in + if in.PrimaryUpdateMethod != nil { + in, out := &in.PrimaryUpdateMethod, &out.PrimaryUpdateMethod + *out = new(string) + **out = **in + } + if in.ConnectionPooler != nil { + in, out := &in.ConnectionPooler, &out.ConnectionPooler + *out = new(ConnectionPoolerConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CNPGConfig. +func (in *CNPGConfig) DeepCopy() *CNPGConfig { + if in == nil { + return nil + } + out := new(CNPGConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CacheManagerSpec) DeepCopyInto(out *CacheManagerSpec) { *out = *in @@ -355,6 +381,108 @@ func (in *CommonSplunkSpec) DeepCopy() *CommonSplunkSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConnectionPoolerConfig) DeepCopyInto(out *ConnectionPoolerConfig) { + *out = *in + if in.Instances != nil { + in, out := &in.Instances, &out.Instances + *out = new(int32) + **out = **in + } + if in.Mode != nil { + in, out := &in.Mode, &out.Mode + *out = new(ConnectionPoolerMode) + **out = **in + } + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConnectionPoolerConfig. +func (in *ConnectionPoolerConfig) DeepCopy() *ConnectionPoolerConfig { + if in == nil { + return nil + } + out := new(ConnectionPoolerConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConnectionPoolerStatus) DeepCopyInto(out *ConnectionPoolerStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConnectionPoolerStatus. +func (in *ConnectionPoolerStatus) DeepCopy() *ConnectionPoolerStatus { + if in == nil { + return nil + } + out := new(ConnectionPoolerStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DatabaseDefinition) DeepCopyInto(out *DatabaseDefinition) { + *out = *in + if in.Extensions != nil { + in, out := &in.Extensions, &out.Extensions + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DatabaseDefinition. +func (in *DatabaseDefinition) DeepCopy() *DatabaseDefinition { + if in == nil { + return nil + } + out := new(DatabaseDefinition) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DatabaseInfo) DeepCopyInto(out *DatabaseInfo) { + *out = *in + if in.DatabaseRef != nil { + in, out := &in.DatabaseRef, &out.DatabaseRef + *out = new(v1.LocalObjectReference) + **out = **in + } + if in.AdminUserSecretRef != nil { + in, out := &in.AdminUserSecretRef, &out.AdminUserSecretRef + *out = new(v1.SecretKeySelector) + (*in).DeepCopyInto(*out) + } + if in.RWUserSecretRef != nil { + in, out := &in.RWUserSecretRef, &out.RWUserSecretRef + *out = new(v1.SecretKeySelector) + (*in).DeepCopyInto(*out) + } + if in.ConfigMapRef != nil { + in, out := &in.ConfigMapRef, &out.ConfigMapRef + *out = new(v1.LocalObjectReference) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DatabaseInfo. +func (in *DatabaseInfo) DeepCopy() *DatabaseInfo { + if in == nil { + return nil + } + out := new(DatabaseInfo) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EsDefaults) DeepCopyInto(out *EsDefaults) { *out = *in @@ -742,6 +870,58 @@ func (in *LicenseManagerStatus) DeepCopy() *LicenseManagerStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ManagedRole) DeepCopyInto(out *ManagedRole) { + *out = *in + if in.PasswordSecretRef != nil { + in, out := &in.PasswordSecretRef, &out.PasswordSecretRef + *out = new(v1.SecretKeySelector) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ManagedRole. +func (in *ManagedRole) DeepCopy() *ManagedRole { + if in == nil { + return nil + } + out := new(ManagedRole) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ManagedRolesStatus) DeepCopyInto(out *ManagedRolesStatus) { + *out = *in + if in.Reconciled != nil { + in, out := &in.Reconciled, &out.Reconciled + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Pending != nil { + in, out := &in.Pending, &out.Pending + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Failed != nil { + in, out := &in.Failed, &out.Failed + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ManagedRolesStatus. +func (in *ManagedRolesStatus) DeepCopy() *ManagedRolesStatus { + if in == nil { + return nil + } + out := new(ManagedRolesStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MonitoringConsole) DeepCopyInto(out *MonitoringConsole) { *out = *in @@ -946,6 +1126,490 @@ func (in *PhaseInfo) DeepCopy() *PhaseInfo { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresCluster) DeepCopyInto(out *PostgresCluster) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresCluster. +func (in *PostgresCluster) DeepCopy() *PostgresCluster { + if in == nil { + return nil + } + out := new(PostgresCluster) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PostgresCluster) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterClass) DeepCopyInto(out *PostgresClusterClass) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterClass. +func (in *PostgresClusterClass) DeepCopy() *PostgresClusterClass { + if in == nil { + return nil + } + out := new(PostgresClusterClass) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PostgresClusterClass) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterClassConfig) DeepCopyInto(out *PostgresClusterClassConfig) { + *out = *in + if in.Instances != nil { + in, out := &in.Instances, &out.Instances + *out = new(int32) + **out = **in + } + if in.Storage != nil { + in, out := &in.Storage, &out.Storage + x := (*in).DeepCopy() + *out = &x + } + if in.PostgresVersion != nil { + in, out := &in.PostgresVersion, &out.PostgresVersion + *out = new(string) + **out = **in + } + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(v1.ResourceRequirements) + (*in).DeepCopyInto(*out) + } + if in.PostgreSQLConfig != nil { + in, out := &in.PostgreSQLConfig, &out.PostgreSQLConfig + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.PgHBA != nil { + in, out := &in.PgHBA, &out.PgHBA + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.ConnectionPoolerEnabled != nil { + in, out := &in.ConnectionPoolerEnabled, &out.ConnectionPoolerEnabled + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterClassConfig. +func (in *PostgresClusterClassConfig) DeepCopy() *PostgresClusterClassConfig { + if in == nil { + return nil + } + out := new(PostgresClusterClassConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterClassList) DeepCopyInto(out *PostgresClusterClassList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]PostgresClusterClass, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterClassList. +func (in *PostgresClusterClassList) DeepCopy() *PostgresClusterClassList { + if in == nil { + return nil + } + out := new(PostgresClusterClassList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PostgresClusterClassList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterClassSpec) DeepCopyInto(out *PostgresClusterClassSpec) { + *out = *in + if in.Config != nil { + in, out := &in.Config, &out.Config + *out = new(PostgresClusterClassConfig) + (*in).DeepCopyInto(*out) + } + if in.CNPG != nil { + in, out := &in.CNPG, &out.CNPG + *out = new(CNPGConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterClassSpec. +func (in *PostgresClusterClassSpec) DeepCopy() *PostgresClusterClassSpec { + if in == nil { + return nil + } + out := new(PostgresClusterClassSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterClassStatus) DeepCopyInto(out *PostgresClusterClassStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Phase != nil { + in, out := &in.Phase, &out.Phase + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterClassStatus. +func (in *PostgresClusterClassStatus) DeepCopy() *PostgresClusterClassStatus { + if in == nil { + return nil + } + out := new(PostgresClusterClassStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterList) DeepCopyInto(out *PostgresClusterList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]PostgresCluster, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterList. +func (in *PostgresClusterList) DeepCopy() *PostgresClusterList { + if in == nil { + return nil + } + out := new(PostgresClusterList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PostgresClusterList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterResources) DeepCopyInto(out *PostgresClusterResources) { + *out = *in + if in.ConfigMapRef != nil { + in, out := &in.ConfigMapRef, &out.ConfigMapRef + *out = new(v1.LocalObjectReference) + **out = **in + } + if in.SuperUserSecretRef != nil { + in, out := &in.SuperUserSecretRef, &out.SuperUserSecretRef + *out = new(v1.SecretKeySelector) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterResources. +func (in *PostgresClusterResources) DeepCopy() *PostgresClusterResources { + if in == nil { + return nil + } + out := new(PostgresClusterResources) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterSpec) DeepCopyInto(out *PostgresClusterSpec) { + *out = *in + if in.Storage != nil { + in, out := &in.Storage, &out.Storage + x := (*in).DeepCopy() + *out = &x + } + if in.Instances != nil { + in, out := &in.Instances, &out.Instances + *out = new(int32) + **out = **in + } + if in.PostgresVersion != nil { + in, out := &in.PostgresVersion, &out.PostgresVersion + *out = new(string) + **out = **in + } + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(v1.ResourceRequirements) + (*in).DeepCopyInto(*out) + } + if in.PostgreSQLConfig != nil { + in, out := &in.PostgreSQLConfig, &out.PostgreSQLConfig + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.PgHBA != nil { + in, out := &in.PgHBA, &out.PgHBA + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.ConnectionPoolerEnabled != nil { + in, out := &in.ConnectionPoolerEnabled, &out.ConnectionPoolerEnabled + *out = new(bool) + **out = **in + } + if in.ConnectionPoolerConfig != nil { + in, out := &in.ConnectionPoolerConfig, &out.ConnectionPoolerConfig + *out = new(ConnectionPoolerConfig) + (*in).DeepCopyInto(*out) + } + if in.ManagedRoles != nil { + in, out := &in.ManagedRoles, &out.ManagedRoles + *out = make([]ManagedRole, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.ClusterDeletionPolicy != nil { + in, out := &in.ClusterDeletionPolicy, &out.ClusterDeletionPolicy + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterSpec. +func (in *PostgresClusterSpec) DeepCopy() *PostgresClusterSpec { + if in == nil { + return nil + } + out := new(PostgresClusterSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterStatus) DeepCopyInto(out *PostgresClusterStatus) { + *out = *in + if in.Phase != nil { + in, out := &in.Phase, &out.Phase + *out = new(string) + **out = **in + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.ProvisionerRef != nil { + in, out := &in.ProvisionerRef, &out.ProvisionerRef + *out = new(v1.ObjectReference) + **out = **in + } + if in.ConnectionPoolerStatus != nil { + in, out := &in.ConnectionPoolerStatus, &out.ConnectionPoolerStatus + *out = new(ConnectionPoolerStatus) + **out = **in + } + if in.ManagedRolesStatus != nil { + in, out := &in.ManagedRolesStatus, &out.ManagedRolesStatus + *out = new(ManagedRolesStatus) + (*in).DeepCopyInto(*out) + } + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(PostgresClusterResources) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterStatus. +func (in *PostgresClusterStatus) DeepCopy() *PostgresClusterStatus { + if in == nil { + return nil + } + out := new(PostgresClusterStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresDatabase) DeepCopyInto(out *PostgresDatabase) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresDatabase. +func (in *PostgresDatabase) DeepCopy() *PostgresDatabase { + if in == nil { + return nil + } + out := new(PostgresDatabase) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PostgresDatabase) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresDatabaseList) DeepCopyInto(out *PostgresDatabaseList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]PostgresDatabase, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresDatabaseList. +func (in *PostgresDatabaseList) DeepCopy() *PostgresDatabaseList { + if in == nil { + return nil + } + out := new(PostgresDatabaseList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *PostgresDatabaseList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresDatabaseSpec) DeepCopyInto(out *PostgresDatabaseSpec) { + *out = *in + out.ClusterRef = in.ClusterRef + if in.Databases != nil { + in, out := &in.Databases, &out.Databases + *out = make([]DatabaseDefinition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresDatabaseSpec. +func (in *PostgresDatabaseSpec) DeepCopy() *PostgresDatabaseSpec { + if in == nil { + return nil + } + out := new(PostgresDatabaseSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresDatabaseStatus) DeepCopyInto(out *PostgresDatabaseStatus) { + *out = *in + if in.Phase != nil { + in, out := &in.Phase, &out.Phase + *out = new(string) + **out = **in + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Databases != nil { + in, out := &in.Databases, &out.Databases + *out = make([]DatabaseInfo, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.ObservedGeneration != nil { + in, out := &in.ObservedGeneration, &out.ObservedGeneration + *out = new(int64) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresDatabaseStatus. +func (in *PostgresDatabaseStatus) DeepCopy() *PostgresDatabaseStatus { + if in == nil { + return nil + } + out := new(PostgresDatabaseStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PremiumAppsProps) DeepCopyInto(out *PremiumAppsProps) { *out = *in diff --git a/bundle/manifests/enterprise.splunk.com_postgresclusterclasses.yaml b/bundle/manifests/enterprise.splunk.com_postgresclusterclasses.yaml new file mode 100644 index 000000000..b564ca757 --- /dev/null +++ b/bundle/manifests/enterprise.splunk.com_postgresclusterclasses.yaml @@ -0,0 +1,332 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + creationTimestamp: null + labels: + name: splunk-operator + name: postgresclusterclasses.enterprise.splunk.com +spec: + group: enterprise.splunk.com + names: + kind: PostgresClusterClass + listKind: PostgresClusterClassList + plural: postgresclusterclasses + singular: postgresclusterclass + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.provisioner + name: Provisioner + type: string + - jsonPath: .spec.postgresClusterConfig.instances + name: Instances + type: integer + - jsonPath: .spec.postgresClusterConfig.storage + name: Storage + type: string + - jsonPath: .spec.postgresClusterConfig.postgresVersion + name: Version + type: string + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v4 + schema: + openAPIV3Schema: + description: |- + PostgresClusterClass is the Schema for the postgresclusterclasses API. + PostgresClusterClass defines a reusable template and policy for postgres cluster provisioning. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + PostgresClusterClassSpec defines the desired state of PostgresClusterClass. + PostgresClusterClass is immutable after creation - it serves as a template for Cluster CRs. + properties: + cnpg: + description: |- + CNPG contains CloudNativePG-specific configuration and policies. + Only used when Provisioner is "postgresql.cnpg.io" + These settings CANNOT be overridden in PostgresCluster CR (platform policy). + properties: + connectionPooler: + description: |- + ConnectionPooler contains PgBouncer connection pooler configuration. + When enabled, creates RW and RO pooler deployments for clusters using this class. + properties: + config: + additionalProperties: + type: string + description: |- + Config contains PgBouncer configuration parameters. + Passed directly to CNPG Pooler spec.pgbouncer.parameters. + See: https://cloudnative-pg.io/docs/1.28/connection_pooling/#pgbouncer-configuration-options + type: object + instances: + default: 3 + description: |- + Instances is the number of PgBouncer pod replicas. + Higher values provide better availability and load distribution. + format: int32 + maximum: 10 + minimum: 1 + type: integer + mode: + default: transaction + description: Mode defines the connection pooling strategy. + enum: + - session + - transaction + - statement + type: string + type: object + primaryUpdateMethod: + default: switchover + description: |- + PrimaryUpdateMethod determines how the primary instance is updated. + "restart" - tolerate brief downtime (suitable for development) + "switchover" - minimal downtime via automated failover (production-grade) + + NOTE: When using "switchover", ensure clusterConfig.instances > 1. + Switchover requires at least one replica to fail over to. + enum: + - restart + - switchover + type: string + type: object + config: + default: {} + description: |- + PostgresClusterConfig contains cluster-level configuration. + These settings apply to PostgresCluster infrastructure. + Can be overridden in PostgresCluster CR. + properties: + connectionPoolerEnabled: + default: false + description: |- + ConnectionPoolerEnabled controls whether PgBouncer connection pooling is deployed. + When true, creates RW and RO pooler deployments for clusters using this class. + Can be overridden in PostgresCluster CR. + type: boolean + instances: + default: 1 + description: |- + Instances is the number of database instances (1 primary + N replicas). + Single instance (1) is suitable for development. + High availability requires at least 3 instances (1 primary + 2 replicas). + format: int32 + maximum: 10 + minimum: 1 + type: integer + pgHBA: + description: |- + PgHBA contains pg_hba.conf host-based authentication rules. + Defines client authentication and connection security (cluster-wide). + Example: ["hostssl all all 0.0.0.0/0 scram-sha-256"] + items: + type: string + type: array + postgresVersion: + default: "18" + description: |- + PostgresVersion is the PostgreSQL version (major or major.minor). + Examples: "18" (latest 18.x), "18.1" (specific minor), "17", "16" + pattern: ^[0-9]+(\.[0-9]+)?$ + type: string + postgresqlConfig: + additionalProperties: + type: string + description: |- + PostgreSQLConfig contains PostgreSQL engine configuration parameters. + Maps to postgresql.conf settings (cluster-wide). + Example: {"max_connections": "200", "shared_buffers": "2GB"} + type: object + resources: + description: |- + Resources defines CPU and memory requests/limits per instance. + All instances in the cluster have the same resources. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This field depends on the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + storage: + anyOf: + - type: integer + - type: string + default: 50Gi + description: |- + Storage is the size of persistent volume for each instance. + Cannot be decreased after cluster creation (PostgreSQL limitation). + Recommended minimum: 10Gi for production viability. + Example: "50Gi", "100Gi", "1Ti" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + provisioner: + description: |- + Provisioner identifies which database provisioner to use. + Currently supported: "postgresql.cnpg.io" (CloudNativePG) + type: string + required: + - provisioner + type: object + x-kubernetes-validations: + - message: cnpg config can only be set when provisioner is postgresql.cnpg.io + rule: '!has(self.cnpg) || self.provisioner == ''postgresql.cnpg.io''' + - message: cnpg.connectionPooler must be set when config.connectionPoolerEnabled + is true + rule: '!has(self.config) || !has(self.config.connectionPoolerEnabled) + || !self.config.connectionPoolerEnabled || (has(self.cnpg) && has(self.cnpg.connectionPooler))' + status: + description: PostgresClusterClassStatus defines the observed state of + PostgresClusterClass. + properties: + conditions: + description: Conditions represent the latest available observations + of the PostgresClusterClass state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + phase: + description: |- + Phase represents the current phase of the PostgresClusterClass. + Valid phases: "Ready", "Invalid" + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: null + storedVersions: null diff --git a/bundle/manifests/enterprise.splunk.com_postgresclusters.yaml b/bundle/manifests/enterprise.splunk.com_postgresclusters.yaml new file mode 100644 index 000000000..abc6ddfd0 --- /dev/null +++ b/bundle/manifests/enterprise.splunk.com_postgresclusters.yaml @@ -0,0 +1,477 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + creationTimestamp: null + labels: + name: splunk-operator + name: postgresclusters.enterprise.splunk.com +spec: + group: enterprise.splunk.com + names: + kind: PostgresCluster + listKind: PostgresClusterList + plural: postgresclusters + singular: postgrescluster + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.class + name: Class + type: string + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v4 + schema: + openAPIV3Schema: + description: PostgresCluster is the Schema for the postgresclusters API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + PostgresClusterSpec defines the desired state of PostgresCluster. + Validation rules ensure immutability of Class, and that Storage and PostgresVersion can only be set once and cannot be removed or downgraded. + properties: + class: + description: This field is IMMUTABLE after creation. + minLength: 1 + type: string + x-kubernetes-validations: + - message: class is immutable + rule: self == oldSelf + clusterDeletionPolicy: + default: Retain + description: ClusterDeletionPolicy controls the deletion behavior + of the underlying CNPG Cluster when the PostgresCluster is deleted. + enum: + - Delete + - Retain + type: string + connectionPoolerConfig: + description: Only takes effect when connection pooling is enabled. + properties: + config: + additionalProperties: + type: string + description: |- + Config contains PgBouncer configuration parameters. + Passed directly to CNPG Pooler spec.pgbouncer.parameters. + See: https://cloudnative-pg.io/docs/1.28/connection_pooling/#pgbouncer-configuration-options + type: object + instances: + default: 3 + description: |- + Instances is the number of PgBouncer pod replicas. + Higher values provide better availability and load distribution. + format: int32 + maximum: 10 + minimum: 1 + type: integer + mode: + default: transaction + description: Mode defines the connection pooling strategy. + enum: + - session + - transaction + - statement + type: string + type: object + connectionPoolerEnabled: + default: false + description: |- + ConnectionPoolerEnabled controls whether PgBouncer connection pooling is deployed for this cluster. + When set, takes precedence over the class-level connectionPoolerEnabled value. + type: boolean + instances: + description: Instances overrides the number of PostgreSQL instances + from ClusterClass. + format: int32 + maximum: 10 + minimum: 1 + type: integer + managedRoles: + description: |- + ManagedRoles contains PostgreSQL roles that should be created in the cluster. + This field supports Server-Side Apply with per-role granularity, allowing + multiple PostgresDatabase controllers to manage different roles independently. + items: + description: ManagedRole represents a PostgreSQL role to be created + and managed in the cluster. + properties: + exists: + default: true + description: Exists controls whether the role should be present + (true) or absent (false) in PostgreSQL. + type: boolean + name: + description: Name of the role/user to create. + maxLength: 63 + minLength: 1 + type: string + passwordSecretRef: + description: PasswordSecretRef references a Secret and the key + within it containing the password for this role. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + pgHBA: + default: [] + description: |- + PgHBA contains pg_hba.conf host-based authentication rules. + Defines client authentication and connection security (cluster-wide). + Maps to pg_hba.conf settings. + Default empty array prevents panic. + Example: ["hostssl all all 0.0.0.0/0 scram-sha-256"] + items: + type: string + type: array + postgresVersion: + description: |- + PostgresVersion is the PostgreSQL version (major or major.minor). + Examples: "18" (latest 18.x), "18.1" (specific minor), "17", "16" + pattern: ^[0-9]+(\.[0-9]+)?$ + type: string + postgresqlConfig: + additionalProperties: + type: string + default: {} + description: |- + PostgreSQL overrides PostgreSQL engine parameters from ClusterClass. + Maps to postgresql.conf settings. + Default empty map prevents panic. + Example: {"shared_buffers": "128MB", "log_min_duration_statement": "500ms"} + type: object + resources: + description: Resources overrides CPU/memory resources from ClusterClass. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This field depends on the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + storage: + anyOf: + - type: integer + - type: string + description: |- + Storage overrides the storage size from ClusterClass. + Example: "5Gi" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - class + type: object + x-kubernetes-validations: + - messageExpression: '!has(self.postgresVersion) ? ''postgresVersion cannot + be removed once set (was: '' + oldSelf.postgresVersion + '')'' : ''postgresVersion + major version cannot be downgraded (from: '' + oldSelf.postgresVersion + + '', to: '' + self.postgresVersion + '')''' + rule: '!has(oldSelf.postgresVersion) || (has(self.postgresVersion) && + int(self.postgresVersion.split(''.'')[0]) >= int(oldSelf.postgresVersion.split(''.'')[0]))' + - messageExpression: '!has(self.storage) ? ''storage cannot be removed + once set (was: '' + string(oldSelf.storage) + '')'' : ''storage size + cannot be decreased (from: '' + string(oldSelf.storage) + '', to: + '' + string(self.storage) + '')''' + rule: '!has(oldSelf.storage) || (has(self.storage) && quantity(self.storage).compareTo(quantity(oldSelf.storage)) + >= 0)' + - message: connectionPoolerConfig cannot be overridden on PostgresCluster + rule: '!has(self.connectionPoolerConfig)' + status: + description: PostgresClusterStatus defines the observed state of PostgresCluster. + properties: + conditions: + description: Conditions represent the latest available observations + of the PostgresCluster's state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + connectionPoolerStatus: + description: |- + ConnectionPoolerStatus contains the observed state of the connection pooler. + Only populated when connection pooler is enabled in the PostgresClusterClass. + properties: + enabled: + description: Enabled indicates whether pooler is active for this + cluster. + type: boolean + type: object + managedRolesStatus: + description: ManagedRolesStatus tracks the reconciliation status of + managed roles. + properties: + failed: + additionalProperties: + type: string + description: Failed contains roles that failed to reconcile with + error messages. + type: object + pending: + description: Pending contains roles that are being created but + not yet ready. + items: + type: string + type: array + reconciled: + description: Reconciled contains roles that have been successfully + created and are ready. + items: + type: string + type: array + type: object + phase: + description: |- + Phase represents the current phase of the PostgresCluster. + Values: "Pending", "Provisioning", "Failed", "Ready", "Deleting" + type: string + provisionerRef: + description: |- + ProvisionerRef contains reference to the provisioner resource managing this PostgresCluster. + Right now, only CNPG is supported. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + resources: + description: Resources contains references to related Kubernetes resources + like ConfigMaps and Secrets. + properties: + configMapRef: + description: |- + ConfigMapRef references the ConfigMap with connection endpoints. + Contains: CLUSTER_ENDPOINTS, POOLER_ENDPOINTS (if connection pooler enabled) + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + secretRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be + a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must be + defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: null + storedVersions: null diff --git a/bundle/manifests/enterprise.splunk.com_postgresdatabases.yaml b/bundle/manifests/enterprise.splunk.com_postgresdatabases.yaml new file mode 100644 index 000000000..badbc70b8 --- /dev/null +++ b/bundle/manifests/enterprise.splunk.com_postgresdatabases.yaml @@ -0,0 +1,267 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + creationTimestamp: null + labels: + name: splunk-operator + name: postgresdatabases.enterprise.splunk.com +spec: + group: enterprise.splunk.com + names: + kind: PostgresDatabase + listKind: PostgresDatabaseList + plural: postgresdatabases + singular: postgresdatabase + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.clusterRef.name + name: Cluster + type: string + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v4 + schema: + openAPIV3Schema: + description: PostgresDatabase is the Schema for the postgresdatabases API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: PostgresDatabaseSpec defines the desired state of PostgresDatabase. + properties: + clusterRef: + description: Reference to Postgres Cluster managed by postgresCluster + controller + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + databases: + items: + properties: + deletionPolicy: + default: Delete + enum: + - Delete + - Retain + type: string + extensions: + items: + type: string + type: array + name: + maxLength: 30 + type: string + required: + - name + type: object + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-validations: + - message: database names must be unique + rule: self.all(x, self.filter(y, y.name == x.name).size() == 1) + required: + - clusterRef + - databases + type: object + x-kubernetes-validations: + - message: clusterRef is immutable + rule: self.clusterRef == oldSelf.clusterRef + status: + description: PostgresDatabaseStatus defines the observed state of PostgresDatabase. + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + databases: + items: + properties: + adminUserSecretRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + configMap: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + databaseRef: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + name: + type: string + ready: + type: boolean + rwUserSecretRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: array + observedGeneration: + description: ObservedGeneration represents the .metadata.generation + that the status was set based upon. + format: int64 + type: integer + phase: + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: null + storedVersions: null diff --git a/cmd/main.go b/cmd/main.go index 3c9e223cc..9b960c2bc 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -53,6 +53,8 @@ import ( enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" enterpriseApi "github.com/splunk/splunk-operator/api/v4" "github.com/splunk/splunk-operator/internal/controller" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" //+kubebuilder:scaffold:imports //extapi "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" ) @@ -66,6 +68,7 @@ func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(enterpriseApi.AddToScheme(scheme)) utilruntime.Must(enterpriseApiV3.AddToScheme(scheme)) + utilruntime.Must(cnpgv1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme //utilruntime.Must(extapi.AddToScheme(scheme)) } @@ -279,6 +282,20 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "Telemetry") os.Exit(1) } + if err := (&controller.PostgresDatabaseReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "PostgresDatabase") + os.Exit(1) + } + if err := (&controller.PostgresClusterReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "PostgresCluster") + os.Exit(1) + } // Setup centralized validation webhook server (opt-in via ENABLE_VALIDATION_WEBHOOK env var, defaults to false) enableWebhooks := os.Getenv("ENABLE_VALIDATION_WEBHOOK") diff --git a/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml b/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml new file mode 100644 index 000000000..70ef3536b --- /dev/null +++ b/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml @@ -0,0 +1,326 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: postgresclusterclasses.enterprise.splunk.com +spec: + group: enterprise.splunk.com + names: + kind: PostgresClusterClass + listKind: PostgresClusterClassList + plural: postgresclusterclasses + singular: postgresclusterclass + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.provisioner + name: Provisioner + type: string + - jsonPath: .spec.postgresClusterConfig.instances + name: Instances + type: integer + - jsonPath: .spec.postgresClusterConfig.storage + name: Storage + type: string + - jsonPath: .spec.postgresClusterConfig.postgresVersion + name: Version + type: string + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v4 + schema: + openAPIV3Schema: + description: |- + PostgresClusterClass is the Schema for the postgresclusterclasses API. + PostgresClusterClass defines a reusable template and policy for postgres cluster provisioning. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + PostgresClusterClassSpec defines the desired state of PostgresClusterClass. + PostgresClusterClass is immutable after creation - it serves as a template for Cluster CRs. + properties: + cnpg: + description: |- + CNPG contains CloudNativePG-specific configuration and policies. + Only used when Provisioner is "postgresql.cnpg.io" + These settings CANNOT be overridden in PostgresCluster CR (platform policy). + properties: + connectionPooler: + description: |- + ConnectionPooler contains PgBouncer connection pooler configuration. + When enabled, creates RW and RO pooler deployments for clusters using this class. + properties: + config: + additionalProperties: + type: string + description: |- + Config contains PgBouncer configuration parameters. + Passed directly to CNPG Pooler spec.pgbouncer.parameters. + See: https://cloudnative-pg.io/docs/1.28/connection_pooling/#pgbouncer-configuration-options + type: object + instances: + default: 3 + description: |- + Instances is the number of PgBouncer pod replicas. + Higher values provide better availability and load distribution. + format: int32 + maximum: 10 + minimum: 1 + type: integer + mode: + default: transaction + description: Mode defines the connection pooling strategy. + enum: + - session + - transaction + - statement + type: string + type: object + primaryUpdateMethod: + default: switchover + description: |- + PrimaryUpdateMethod determines how the primary instance is updated. + "restart" - tolerate brief downtime (suitable for development) + "switchover" - minimal downtime via automated failover (production-grade) + + NOTE: When using "switchover", ensure clusterConfig.instances > 1. + Switchover requires at least one replica to fail over to. + enum: + - restart + - switchover + type: string + type: object + config: + default: {} + description: |- + PostgresClusterConfig contains cluster-level configuration. + These settings apply to PostgresCluster infrastructure. + Can be overridden in PostgresCluster CR. + properties: + connectionPoolerEnabled: + default: false + description: |- + ConnectionPoolerEnabled controls whether PgBouncer connection pooling is deployed. + When true, creates RW and RO pooler deployments for clusters using this class. + Can be overridden in PostgresCluster CR. + type: boolean + instances: + default: 1 + description: |- + Instances is the number of database instances (1 primary + N replicas). + Single instance (1) is suitable for development. + High availability requires at least 3 instances (1 primary + 2 replicas). + format: int32 + maximum: 10 + minimum: 1 + type: integer + pgHBA: + description: |- + PgHBA contains pg_hba.conf host-based authentication rules. + Defines client authentication and connection security (cluster-wide). + Example: ["hostssl all all 0.0.0.0/0 scram-sha-256"] + items: + type: string + type: array + postgresVersion: + default: "18" + description: |- + PostgresVersion is the PostgreSQL version (major or major.minor). + Examples: "18" (latest 18.x), "18.1" (specific minor), "17", "16" + pattern: ^[0-9]+(\.[0-9]+)?$ + type: string + postgresqlConfig: + additionalProperties: + type: string + description: |- + PostgreSQLConfig contains PostgreSQL engine configuration parameters. + Maps to postgresql.conf settings (cluster-wide). + Example: {"max_connections": "200", "shared_buffers": "2GB"} + type: object + resources: + description: |- + Resources defines CPU and memory requests/limits per instance. + All instances in the cluster have the same resources. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This field depends on the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + storage: + anyOf: + - type: integer + - type: string + default: 50Gi + description: |- + Storage is the size of persistent volume for each instance. + Cannot be decreased after cluster creation (PostgreSQL limitation). + Recommended minimum: 10Gi for production viability. + Example: "50Gi", "100Gi", "1Ti" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + provisioner: + description: |- + Provisioner identifies which database provisioner to use. + Currently supported: "postgresql.cnpg.io" (CloudNativePG) + enum: + - postgresql.cnpg.io + type: string + required: + - provisioner + type: object + x-kubernetes-validations: + - message: cnpg config can only be set when provisioner is postgresql.cnpg.io + rule: '!has(self.cnpg) || self.provisioner == ''postgresql.cnpg.io''' + - message: cnpg.connectionPooler must be set when config.connectionPoolerEnabled + is true + rule: '!has(self.config) || !has(self.config.connectionPoolerEnabled) + || !self.config.connectionPoolerEnabled || (has(self.cnpg) && has(self.cnpg.connectionPooler))' + status: + description: PostgresClusterClassStatus defines the observed state of + PostgresClusterClass. + properties: + conditions: + description: Conditions represent the latest available observations + of the PostgresClusterClass state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + phase: + description: |- + Phase represents the current phase of the PostgresClusterClass. + Valid phases: "Ready", "Invalid" + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml b/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml new file mode 100644 index 000000000..14ba142d6 --- /dev/null +++ b/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml @@ -0,0 +1,469 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: postgresclusters.enterprise.splunk.com +spec: + group: enterprise.splunk.com + names: + kind: PostgresCluster + listKind: PostgresClusterList + plural: postgresclusters + singular: postgrescluster + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.class + name: Class + type: string + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v4 + schema: + openAPIV3Schema: + description: PostgresCluster is the Schema for the postgresclusters API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + PostgresClusterSpec defines the desired state of PostgresCluster. + Validation rules ensure immutability of Class, and that Storage and PostgresVersion can only be set once and cannot be removed or downgraded. + properties: + class: + description: This field is IMMUTABLE after creation. + minLength: 1 + type: string + x-kubernetes-validations: + - message: class is immutable + rule: self == oldSelf + clusterDeletionPolicy: + default: Retain + description: ClusterDeletionPolicy controls the deletion behavior + of the underlying CNPG Cluster when the PostgresCluster is deleted. + enum: + - Delete + - Retain + type: string + connectionPoolerConfig: + description: Only takes effect when connection pooling is enabled. + properties: + config: + additionalProperties: + type: string + description: |- + Config contains PgBouncer configuration parameters. + Passed directly to CNPG Pooler spec.pgbouncer.parameters. + See: https://cloudnative-pg.io/docs/1.28/connection_pooling/#pgbouncer-configuration-options + type: object + instances: + default: 3 + description: |- + Instances is the number of PgBouncer pod replicas. + Higher values provide better availability and load distribution. + format: int32 + maximum: 10 + minimum: 1 + type: integer + mode: + default: transaction + description: Mode defines the connection pooling strategy. + enum: + - session + - transaction + - statement + type: string + type: object + connectionPoolerEnabled: + default: false + description: |- + ConnectionPoolerEnabled controls whether PgBouncer connection pooling is deployed for this cluster. + When set, takes precedence over the class-level connectionPoolerEnabled value. + type: boolean + instances: + description: Instances overrides the number of PostgreSQL instances + from ClusterClass. + format: int32 + maximum: 10 + minimum: 1 + type: integer + managedRoles: + description: |- + ManagedRoles contains PostgreSQL roles that should be created in the cluster. + This field supports Server-Side Apply with per-role granularity, allowing + multiple PostgresDatabase controllers to manage different roles independently. + items: + description: ManagedRole represents a PostgreSQL role to be created + and managed in the cluster. + properties: + exists: + default: true + description: Exists controls whether the role should be present + (true) or absent (false) in PostgreSQL. + type: boolean + name: + description: Name of the role/user to create. + maxLength: 63 + minLength: 1 + type: string + passwordSecretRef: + description: PasswordSecretRef references a Secret and the key + within it containing the password for this role. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + pgHBA: + default: [] + description: |- + PgHBA contains pg_hba.conf host-based authentication rules. + Defines client authentication and connection security (cluster-wide). + Maps to pg_hba.conf settings. + Default empty array prevents panic. + Example: ["hostssl all all 0.0.0.0/0 scram-sha-256"] + items: + type: string + type: array + postgresVersion: + description: |- + PostgresVersion is the PostgreSQL version (major or major.minor). + Examples: "18" (latest 18.x), "18.1" (specific minor), "17", "16" + pattern: ^[0-9]+(\.[0-9]+)?$ + type: string + postgresqlConfig: + additionalProperties: + type: string + default: {} + description: |- + PostgreSQL overrides PostgreSQL engine parameters from ClusterClass. + Maps to postgresql.conf settings. + Default empty map prevents panic. + Example: {"shared_buffers": "128MB", "log_min_duration_statement": "500ms"} + type: object + resources: + description: Resources overrides CPU/memory resources from ClusterClass. + properties: + claims: + description: |- + Claims lists the names of resources, defined in spec.resourceClaims, + that are used by this container. + + This field depends on the + DynamicResourceAllocation feature gate. + + This field is immutable. It can only be set for containers. + items: + description: ResourceClaim references one entry in PodSpec.ResourceClaims. + properties: + name: + description: |- + Name must match the name of one entry in pod.spec.resourceClaims of + the Pod where this field is used. It makes that resource available + inside a container. + type: string + request: + description: |- + Request is the name chosen for a request in the referenced claim. + If empty, everything from the claim is made available, otherwise + only the result of this request. + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + storage: + anyOf: + - type: integer + - type: string + description: |- + Storage overrides the storage size from ClusterClass. + Example: "5Gi" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + required: + - class + type: object + x-kubernetes-validations: + - messageExpression: '!has(self.postgresVersion) ? ''postgresVersion cannot + be removed once set (was: '' + oldSelf.postgresVersion + '')'' : ''postgresVersion + major version cannot be downgraded (from: '' + oldSelf.postgresVersion + + '', to: '' + self.postgresVersion + '')''' + rule: '!has(oldSelf.postgresVersion) || (has(self.postgresVersion) && + int(self.postgresVersion.split(''.'')[0]) >= int(oldSelf.postgresVersion.split(''.'')[0]))' + - messageExpression: '!has(self.storage) ? ''storage cannot be removed + once set (was: '' + string(oldSelf.storage) + '')'' : ''storage size + cannot be decreased (from: '' + string(oldSelf.storage) + '', to: + '' + string(self.storage) + '')''' + rule: '!has(oldSelf.storage) || (has(self.storage) && quantity(self.storage).compareTo(quantity(oldSelf.storage)) + >= 0)' + - message: connectionPoolerConfig cannot be overridden on PostgresCluster + rule: '!has(self.connectionPoolerConfig)' + status: + description: PostgresClusterStatus defines the observed state of PostgresCluster. + properties: + conditions: + description: Conditions represent the latest available observations + of the PostgresCluster's state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + connectionPoolerStatus: + description: |- + ConnectionPoolerStatus contains the observed state of the connection pooler. + Only populated when connection pooler is enabled in the PostgresClusterClass. + properties: + enabled: + description: Enabled indicates whether pooler is active for this + cluster. + type: boolean + type: object + managedRolesStatus: + description: ManagedRolesStatus tracks the reconciliation status of + managed roles. + properties: + failed: + additionalProperties: + type: string + description: Failed contains roles that failed to reconcile with + error messages. + type: object + pending: + description: Pending contains roles that are being created but + not yet ready. + items: + type: string + type: array + reconciled: + description: Reconciled contains roles that have been successfully + created and are ready. + items: + type: string + type: array + type: object + phase: + description: |- + Phase represents the current phase of the PostgresCluster. + Values: "Pending", "Provisioning", "Failed", "Ready", "Deleting" + type: string + provisionerRef: + description: |- + ProvisionerRef contains reference to the provisioner resource managing this PostgresCluster. + Right now, only CNPG is supported. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + resources: + description: Resources contains references to related Kubernetes resources + like ConfigMaps and Secrets. + properties: + configMapRef: + description: |- + ConfigMapRef references the ConfigMap with connection endpoints. + Contains: CLUSTER_ENDPOINTS, POOLER_ENDPOINTS (if connection pooler enabled) + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + secretRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must be + a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must be + defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml b/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml new file mode 100644 index 000000000..d8df534d3 --- /dev/null +++ b/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml @@ -0,0 +1,259 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.18.0 + name: postgresdatabases.enterprise.splunk.com +spec: + group: enterprise.splunk.com + names: + kind: PostgresDatabase + listKind: PostgresDatabaseList + plural: postgresdatabases + singular: postgresdatabase + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.clusterRef.name + name: Cluster + type: string + - jsonPath: .status.phase + name: Phase + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v4 + schema: + openAPIV3Schema: + description: PostgresDatabase is the Schema for the postgresdatabases API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: PostgresDatabaseSpec defines the desired state of PostgresDatabase. + properties: + clusterRef: + description: Reference to Postgres Cluster managed by postgresCluster + controller + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + databases: + items: + properties: + deletionPolicy: + default: Delete + enum: + - Delete + - Retain + type: string + extensions: + items: + type: string + type: array + name: + maxLength: 30 + type: string + required: + - name + type: object + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-validations: + - message: database names must be unique + rule: self.all(x, self.filter(y, y.name == x.name).size() == 1) + required: + - clusterRef + - databases + type: object + x-kubernetes-validations: + - message: clusterRef is immutable + rule: self.clusterRef == oldSelf.clusterRef + status: + description: PostgresDatabaseStatus defines the observed state of PostgresDatabase. + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + databases: + items: + properties: + adminUserSecretRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + configMap: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + databaseRef: + description: |- + LocalObjectReference contains enough information to let you locate the + referenced object inside the same namespace. + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + name: + type: string + ready: + type: boolean + rwUserSecretRef: + description: SecretKeySelector selects a key of a Secret. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + type: array + observedGeneration: + description: ObservedGeneration represents the .metadata.generation + that the status was set based upon. + format: int64 + type: integer + phase: + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 21dd480ce..648316baf 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -13,6 +13,9 @@ resources: - bases/enterprise.splunk.com_ingestorclusters.yaml - bases/enterprise.splunk.com_queues.yaml - bases/enterprise.splunk.com_objectstorages.yaml +- bases/enterprise.splunk.com_postgresdatabases.yaml +- bases/enterprise.splunk.com_postgresclusterclasses.yaml +- bases/enterprise.splunk.com_postgresclusters.yaml #+kubebuilder:scaffold:crdkustomizeresource diff --git a/config/rbac/postgrescluster_admin_role.yaml b/config/rbac/postgrescluster_admin_role.yaml new file mode 100644 index 000000000..bb3f2e06b --- /dev/null +++ b/config/rbac/postgrescluster_admin_role.yaml @@ -0,0 +1,27 @@ +# This rule is not used by the project splunk-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over enterprise.splunk.com. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgrescluster-admin-role +rules: +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusters + verbs: + - '*' +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusters/status + verbs: + - get diff --git a/config/rbac/postgrescluster_editor_role.yaml b/config/rbac/postgrescluster_editor_role.yaml new file mode 100644 index 000000000..13884ce4b --- /dev/null +++ b/config/rbac/postgrescluster_editor_role.yaml @@ -0,0 +1,33 @@ +# This rule is not used by the project splunk-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the enterprise.splunk.com. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgrescluster-editor-role +rules: +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusters + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusters/status + verbs: + - get diff --git a/config/rbac/postgrescluster_viewer_role.yaml b/config/rbac/postgrescluster_viewer_role.yaml new file mode 100644 index 000000000..0474151b3 --- /dev/null +++ b/config/rbac/postgrescluster_viewer_role.yaml @@ -0,0 +1,29 @@ +# This rule is not used by the project splunk-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to enterprise.splunk.com resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgrescluster-viewer-role +rules: +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusters + verbs: + - get + - list + - watch +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusters/status + verbs: + - get diff --git a/config/rbac/postgresclusterclass_admin_role.yaml b/config/rbac/postgresclusterclass_admin_role.yaml new file mode 100644 index 000000000..d16defdd6 --- /dev/null +++ b/config/rbac/postgresclusterclass_admin_role.yaml @@ -0,0 +1,27 @@ +# This rule is not used by the project splunk-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over enterprise.splunk.com. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgresclusterclass-admin-role +rules: +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusterclasses + verbs: + - '*' +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusterclasses/status + verbs: + - get diff --git a/config/rbac/postgresclusterclass_editor_role.yaml b/config/rbac/postgresclusterclass_editor_role.yaml new file mode 100644 index 000000000..a634510ff --- /dev/null +++ b/config/rbac/postgresclusterclass_editor_role.yaml @@ -0,0 +1,33 @@ +# This rule is not used by the project splunk-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the enterprise.splunk.com. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgresclusterclass-editor-role +rules: +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusterclasses + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusterclasses/status + verbs: + - get diff --git a/config/rbac/postgresclusterclass_viewer_role.yaml b/config/rbac/postgresclusterclass_viewer_role.yaml new file mode 100644 index 000000000..4da318ff2 --- /dev/null +++ b/config/rbac/postgresclusterclass_viewer_role.yaml @@ -0,0 +1,29 @@ +# This rule is not used by the project splunk-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to enterprise.splunk.com resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgresclusterclass-viewer-role +rules: +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusterclasses + verbs: + - get + - list + - watch +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusterclasses/status + verbs: + - get diff --git a/config/rbac/postgresdatabase_admin_role.yaml b/config/rbac/postgresdatabase_admin_role.yaml new file mode 100644 index 000000000..b98548d5c --- /dev/null +++ b/config/rbac/postgresdatabase_admin_role.yaml @@ -0,0 +1,27 @@ +# This rule is not used by the project splunk-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over enterprise.splunk.com. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgresdatabase-admin-role +rules: +- apiGroups: + - enterprise.splunk.com + resources: + - postgresdatabases + verbs: + - '*' +- apiGroups: + - enterprise.splunk.com + resources: + - postgresdatabases/status + verbs: + - get diff --git a/config/rbac/postgresdatabase_editor_role.yaml b/config/rbac/postgresdatabase_editor_role.yaml new file mode 100644 index 000000000..21891af10 --- /dev/null +++ b/config/rbac/postgresdatabase_editor_role.yaml @@ -0,0 +1,33 @@ +# This rule is not used by the project splunk-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the enterprise.splunk.com. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgresdatabase-editor-role +rules: +- apiGroups: + - enterprise.splunk.com + resources: + - postgresdatabases + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - enterprise.splunk.com + resources: + - postgresdatabases/status + verbs: + - get diff --git a/config/rbac/postgresdatabase_viewer_role.yaml b/config/rbac/postgresdatabase_viewer_role.yaml new file mode 100644 index 000000000..702fab391 --- /dev/null +++ b/config/rbac/postgresdatabase_viewer_role.yaml @@ -0,0 +1,29 @@ +# This rule is not used by the project splunk-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to enterprise.splunk.com resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgresdatabase-viewer-role +rules: +- apiGroups: + - enterprise.splunk.com + resources: + - postgresdatabases + verbs: + - get + - list + - watch +- apiGroups: + - enterprise.splunk.com + resources: + - postgresdatabases/status + verbs: + - get diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 7873f18e1..d676ac24a 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -55,6 +55,8 @@ rules: - licensemasters - monitoringconsoles - objectstorages + - postgresclusters + - postgresdatabases - queues - searchheadclusters - standalones @@ -77,6 +79,8 @@ rules: - licensemasters/finalizers - monitoringconsoles/finalizers - objectstorages/finalizers + - postgresclusters/finalizers + - postgresdatabases/finalizers - queues/finalizers - searchheadclusters/finalizers - standalones/finalizers @@ -93,6 +97,8 @@ rules: - licensemasters/status - monitoringconsoles/status - objectstorages/status + - postgresclusters/status + - postgresdatabases/status - queues/status - searchheadclusters/status - standalones/status @@ -100,3 +106,32 @@ rules: - get - patch - update +- apiGroups: + - enterprise.splunk.com + resources: + - postgresclusterclasses + verbs: + - get + - list + - watch +- apiGroups: + - postgresql.cnpg.io + resources: + - clusters + - databases + - poolers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - postgresql.cnpg.io + resources: + - clusters/status + - poolers/status + verbs: + - get diff --git a/config/samples/enterprise_v4_postgrescluster_default.yaml b/config/samples/enterprise_v4_postgrescluster_default.yaml new file mode 100644 index 000000000..6669aceb2 --- /dev/null +++ b/config/samples/enterprise_v4_postgrescluster_default.yaml @@ -0,0 +1,12 @@ +# This is a sample PostgresCluster manifest with default values for all fields. +# Defaults are inherited from the ClusterClass "postgresql-dev" (see enterprise_v4_clusterclass_dev.yaml) and can be overridden here. +apiVersion: enterprise.splunk.com/v4 +kind: PostgresCluster +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgresql-cluster-dev +spec: + class: postgresql-dev + diff --git a/config/samples/enterprise_v4_postgrescluster_dev.yaml b/config/samples/enterprise_v4_postgrescluster_dev.yaml new file mode 100644 index 000000000..b5c6b8700 --- /dev/null +++ b/config/samples/enterprise_v4_postgrescluster_dev.yaml @@ -0,0 +1,28 @@ +# Sample PostgresCluster using Postgres-dev ClusterClass with overriding defaults +# This sample demonstrates how to override default values from the ClusterClass "postgresql-dev" (see enterprise_v4_clusterclass_dev.yaml) in a PostgresCluster manifest. +# Overrides include changing storage, changing PostgreSQL version, and modifying resources. +apiVersion: enterprise.splunk.com/v4 +kind: PostgresCluster +metadata: + labels: + app.kubernetes.io/name: splunk-operator + app.kubernetes.io/managed-by: kustomize + name: postgresql-cluster-dev +spec: + # Reference the ClusterClass to inherit defaults - this is required, immutable, and must match the name of an existing ClusterClass + class: postgresql-dev + clusterDeletionPolicy: Retain + instances: 3 + # Storage and PostgreSQL version are overridden from the ClusterClass defaults. Validation rules on the PostgresCluster resource will prevent removing these fields or setting them to lower values than the original overrides. + storage: 1Gi + postgresVersion: "15.10" + resources: + requests: + cpu: "250m" + memory: "512Mi" + limits: + cpu: "500m" + memory: "1Gi" + # Enable connection pooler for this cluster + # Takes precedence over the class-level connectionPoolerEnabled value + connectionPoolerEnabled: true \ No newline at end of file diff --git a/config/samples/enterprise_v4_postgresclusterclass_dev.yaml b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml new file mode 100644 index 000000000..a9846e36c --- /dev/null +++ b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml @@ -0,0 +1,39 @@ +--- +# Development PostgresClusterClass +# Minimal configuration for local development and testing +apiVersion: enterprise.splunk.com/v4 +kind: PostgresClusterClass +metadata: + name: postgresql-dev +spec: + provisioner: postgresql.cnpg.io + + config: + # Single instance - no HA (suitable for development) + instances: 1 + + # Small storage for development + storage: 10Gi + + # Latest PostgreSQL 18 + postgresVersion: "18" + + # Minimal resources + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1" + memory: "2Gi" + connectionPoolerEnabled: true + + cnpg: + # Restart method - tolerate downtime in dev + primaryUpdateMethod: restart + connectionPooler: + instances: 2 + mode: transaction + config: + max_client_conn: "100" + diff --git a/config/samples/enterprise_v4_postgresclusterclass_prod.yaml b/config/samples/enterprise_v4_postgresclusterclass_prod.yaml new file mode 100644 index 000000000..56d9f232a --- /dev/null +++ b/config/samples/enterprise_v4_postgresclusterclass_prod.yaml @@ -0,0 +1,80 @@ +--- +# Production PostgresClusterClass +# Full configuration with HA, security, and tuned PostgreSQL settings +apiVersion: enterprise.splunk.com/v4 +kind: PostgresClusterClass +metadata: + name: postgresql-prod +spec: + provisioner: postgresql.cnpg.io + + config: + # High availability - 1 primary + 2 replicas + instances: 3 + + # Production storage + storage: 100Gi + + # PostgreSQL 18.1 (specific minor version) + postgresVersion: "18.1" + + # Production-grade resources + resources: + requests: + cpu: "2" + memory: "8Gi" + limits: + cpu: "4" + memory: "16Gi" + + # Tuned PostgreSQL configuration for OLTP workload + postgresqlConfig: + # Connection settings + max_connections: "200" + + # Memory settings (based on 8GB RAM) + shared_buffers: "2GB" + effective_cache_size: "6GB" + maintenance_work_mem: "512MB" + work_mem: "20MB" + + # WAL settings + wal_buffers: "16MB" + min_wal_size: "1GB" + max_wal_size: "4GB" + + # Query tuning + random_page_cost: "1.1" # SSD optimization + effective_io_concurrency: "200" + + # Logging + log_destination: "stderr" + logging_collector: "on" + log_min_duration_statement: "1000" # Log queries > 1s + + # Secure pg_hba configuration + pgHBA: + # Reject all non-SSL connections + - "hostnossl all all 0.0.0.0/0 reject" + # Require SSL + password authentication + - "hostssl all all 0.0.0.0/0 scram-sha-256" + + # Enable connection pooler for clusters using this class + connectionPoolerEnabled: true + + cnpg: + # Switchover method - minimal downtime via automated failover + primaryUpdateMethod: switchover + + # Connection pooler configuration (PgBouncer) + connectionPooler: + # Number of PgBouncer pod replicas + instances: 3 + # Pooling mode + mode: transaction + # PgBouncer configuration parameters + config: + # Maximum number of client connections allowed + max_client_conn: "100" + # Default number of server connections per user/database pair + default_pool_size: "20" diff --git a/config/samples/enterprise_v4_postgresdatabase.yaml b/config/samples/enterprise_v4_postgresdatabase.yaml new file mode 100644 index 000000000..874393548 --- /dev/null +++ b/config/samples/enterprise_v4_postgresdatabase.yaml @@ -0,0 +1,18 @@ +apiVersion: enterprise.splunk.com/v4 +kind: PostgresDatabase +metadata: + name: splunk-databases + # namespace: default +spec: + clusterRef: + name: postgresql-cluster-dev + databases: + - name: kvstore + extensions: + - pg_stat_statements + - pgcrypto + deletionPolicy: Delete + - name: analytics + extensions: + - pg_trgm + deletionPolicy: Delete \ No newline at end of file diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 34c05ab05..b2d13b188 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -16,4 +16,7 @@ resources: - enterprise_v4_ingestorcluster.yaml - enterprise_v4_queue.yaml - enterprise_v4_objectstorage.yaml +- enterprise_v4_postgresdatabase.yaml +- enterprise_v4_postgresclusterclass_dev.yaml +- enterprise_v4_postgrescluster_default.yaml #+kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/samples/validation-tests/database/01-invalid-duplicate-names.yaml b/config/samples/validation-tests/database/01-invalid-duplicate-names.yaml new file mode 100644 index 000000000..95cd4d19b --- /dev/null +++ b/config/samples/validation-tests/database/01-invalid-duplicate-names.yaml @@ -0,0 +1,14 @@ +apiVersion: enterprise.splunk.com/v4 +kind: PostgresDatabase +metadata: + name: test-duplicate-names + namespace: default +spec: + clusterRef: + name: postgres-cluster + databases: + - name: kvstore + extensions: + - pg_stat_statements + - name: analytics + - name: kvstore # DUPLICATE! Should fail with: "database names must be unique" diff --git a/config/samples/validation-tests/database/02-invalid-immutability-update.yaml b/config/samples/validation-tests/database/02-invalid-immutability-update.yaml new file mode 100644 index 000000000..73dfb300b --- /dev/null +++ b/config/samples/validation-tests/database/02-invalid-immutability-update.yaml @@ -0,0 +1,19 @@ +apiVersion: enterprise.splunk.com/v4 +kind: PostgresDatabase +metadata: + name: test-postgresdatabase + namespace: default +spec: + clusterRef: + name: different-cluster # CHANGED! Should fail with: "clusterRef is immutable" + databases: + - name: kvstore + extensions: + - pg_stat_statements + - pgcrypto + deletionPolicy: Retain + - name: analytics + extensions: + - pg_trgm + deletionPolicy: Delete + - name: metrics diff --git a/config/samples/validation-tests/database/03-invalid-deletion-policy.yaml b/config/samples/validation-tests/database/03-invalid-deletion-policy.yaml new file mode 100644 index 000000000..bb911e88c --- /dev/null +++ b/config/samples/validation-tests/database/03-invalid-deletion-policy.yaml @@ -0,0 +1,11 @@ +apiVersion: enterprise.splunk.com/v4 +kind: PostgresDatabase +metadata: + name: test-invalid-policy + namespace: default +spec: + clusterRef: + name: postgres-cluster + databases: + - name: kvstore + deletionPolicy: Archive # INVALID! Only "Delete" or "Retain" allowed diff --git a/config/samples/validation-tests/database/04-invalid-missing-fields.yaml b/config/samples/validation-tests/database/04-invalid-missing-fields.yaml new file mode 100644 index 000000000..c0376eb21 --- /dev/null +++ b/config/samples/validation-tests/database/04-invalid-missing-fields.yaml @@ -0,0 +1,11 @@ +apiVersion: enterprise.splunk.com/v4 +kind: PostgresDatabase +metadata: + name: test-missing-fields + namespace: default +spec: + # Missing clusterRef - REQUIRED field! + databases: + - name: kvstore + - extensions: # Missing name - REQUIRED in DatabaseDefinition! + - pg_stat_statements diff --git a/go.mod b/go.mod index 3615f95ed..2f9b00cc6 100644 --- a/go.mod +++ b/go.mod @@ -13,26 +13,30 @@ require ( github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.85 github.com/aws/aws-sdk-go-v2/service/s3 v1.84.1 github.com/aws/aws-sdk-go-v2/service/sqs v1.42.21 + github.com/cloudnative-pg/cloudnative-pg v1.28.0 github.com/go-logr/logr v1.4.3 github.com/google/go-cmp v0.7.0 github.com/google/uuid v1.6.0 + github.com/jackc/pgx/v5 v5.8.0 github.com/joho/godotenv v1.5.1 github.com/minio/minio-go/v7 v7.0.16 github.com/onsi/ginkgo v1.16.5 github.com/onsi/ginkgo/v2 v2.28.1 github.com/onsi/gomega v1.39.1 github.com/pkg/errors v0.9.1 - github.com/prometheus/client_golang v1.22.0 + github.com/prometheus/client_golang v1.23.2 + github.com/sethvargo/go-password v0.3.1 github.com/stretchr/testify v1.11.1 github.com/wk8/go-ordered-map/v2 v2.1.7 - go.uber.org/zap v1.27.0 + go.uber.org/zap v1.27.1 google.golang.org/api v0.155.0 - k8s.io/api v0.33.0 - k8s.io/apiextensions-apiserver v0.33.0 - k8s.io/apimachinery v0.33.0 - k8s.io/client-go v0.33.0 + k8s.io/api v0.34.2 + k8s.io/apiextensions-apiserver v0.34.2 + k8s.io/apimachinery v0.34.2 + k8s.io/client-go v0.34.2 k8s.io/kubectl v0.26.2 - sigs.k8s.io/controller-runtime v0.21.0 + k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 + sigs.k8s.io/controller-runtime v0.22.4 ) require ( @@ -64,26 +68,40 @@ require ( github.com/buger/jsonparser v1.1.1 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/cloudnative-pg/barman-cloud v0.3.3 // indirect + github.com/cloudnative-pg/cnpg-i v0.3.0 // indirect + github.com/cloudnative-pg/machinery v0.3.1 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dustin/go-humanize v1.0.1 // indirect - github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/fsnotify/fsnotify v1.7.0 // indirect - github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect - github.com/go-openapi/jsonpointer v0.21.0 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-openapi/jsonpointer v0.22.0 // indirect + github.com/go-openapi/jsonreference v0.21.1 // indirect + github.com/go-openapi/swag v0.24.1 // indirect + github.com/go-openapi/swag/cmdutils v0.24.0 // indirect + github.com/go-openapi/swag/conv v0.24.0 // indirect + github.com/go-openapi/swag/fileutils v0.24.0 // indirect + github.com/go-openapi/swag/jsonname v0.24.0 // indirect + github.com/go-openapi/swag/jsonutils v0.24.0 // indirect + github.com/go-openapi/swag/loading v0.24.0 // indirect + github.com/go-openapi/swag/mangling v0.24.0 // indirect + github.com/go-openapi/swag/netutils v0.24.0 // indirect + github.com/go-openapi/swag/stringutils v0.24.0 // indirect + github.com/go-openapi/swag/typeutils v0.24.0 // indirect + github.com/go-openapi/swag/yamlutils v0.24.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt/v5 v5.2.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/btree v1.1.3 // indirect - github.com/google/cel-go v0.23.2 // indirect - github.com/google/gnostic-models v0.6.9 // indirect + github.com/google/cel-go v0.26.0 // indirect + github.com/google/gnostic-models v0.7.0 // indirect github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 // indirect github.com/google/s2a-go v0.1.7 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect @@ -91,35 +109,40 @@ require ( github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/jackc/pgpassfile v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.18.0 // indirect github.com/klauspost/cpuid v1.3.1 // indirect + github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect - github.com/mailru/easyjson v0.7.7 // indirect + github.com/lib/pq v1.10.9 // indirect + github.com/mailru/easyjson v0.9.0 // indirect github.com/minio/md5-simd v1.1.0 // indirect github.com/minio/sha256-simd v0.1.1 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/moby/spdystream v0.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.62.0 // indirect - github.com/prometheus/procfs v0.15.1 // indirect + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.86.2 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect github.com/rs/xid v1.2.1 // indirect github.com/sirupsen/logrus v1.9.3 // indirect - github.com/spf13/cobra v1.8.1 // indirect - github.com/spf13/pflag v1.0.5 // indirect + github.com/spf13/cobra v1.10.1 // indirect + github.com/spf13/pflag v1.0.10 // indirect github.com/stoewer/go-strcase v1.3.0 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/x448/float16 v0.8.4 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect go.opentelemetry.io/otel v1.40.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 // indirect @@ -129,6 +152,7 @@ require ( go.opentelemetry.io/otel/trace v1.40.0 // indirect go.opentelemetry.io/proto/otlp v1.9.0 // indirect go.uber.org/multierr v1.11.0 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/crypto v0.47.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect @@ -139,7 +163,7 @@ require ( golang.org/x/sys v0.40.0 // indirect golang.org/x/term v0.39.0 // indirect golang.org/x/text v0.33.0 // indirect - golang.org/x/time v0.9.0 // indirect + golang.org/x/time v0.12.0 // indirect golang.org/x/tools v0.41.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto v0.0.0-20240123012728-ef4313101c80 // indirect @@ -147,18 +171,17 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect google.golang.org/grpc v1.78.0 // indirect google.golang.org/protobuf v1.36.11 // indirect - gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.66.4 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiserver v0.33.0 // indirect - k8s.io/component-base v0.33.0 // indirect + k8s.io/apiserver v0.34.2 // indirect + k8s.io/component-base v0.34.2 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect - k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect + k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect - sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect sigs.k8s.io/randfill v1.0.0 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/go.sum b/go.sum index f4c6dae6b..9ec3df9aa 100644 --- a/go.sum +++ b/go.sum @@ -82,11 +82,18 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cloudnative-pg/barman-cloud v0.3.3 h1:EEcjeV+IUivDpmyF/H/XGY1pGaKJ5LS5MYeB6wgGcak= +github.com/cloudnative-pg/barman-cloud v0.3.3/go.mod h1:5CM4MncAxAjnqxjDt0I5E/oVd7gsMLL0/o/wQ+vUSgs= +github.com/cloudnative-pg/cloudnative-pg v1.28.0 h1:vkv0a0ewDSfJOPJrsyUr4uczsxheReAWf/k171V0Dm0= +github.com/cloudnative-pg/cloudnative-pg v1.28.0/go.mod h1:209fkRR6m0vXUVQ9Q498eAPQqN2UlXECbXXtpGsZz3I= +github.com/cloudnative-pg/cnpg-i v0.3.0 h1:5ayNOG5x68lU70IVbHDZQrv5p+bErCJ0mqRmOpW2jjE= +github.com/cloudnative-pg/cnpg-i v0.3.0/go.mod h1:VOIWWXcJ1RyioK+elR2DGOa4cBA6K+6UQgx05aZmH+g= +github.com/cloudnative-pg/machinery v0.3.1 h1:KtPA6EwELTUNisCMLiFYkK83GU9606rkGQhDJGPB8Yw= +github.com/cloudnative-pg/machinery v0.3.1/go.mod h1:jebuqKxZAbrRKDEEpVCIDMKW+FbWtB9Kf/hb2kMUu9o= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f h1:Y8xYupdHxryycyPlc9Y+bSQAYZnetRJ70VMVKm5CKI0= github.com/cncf/xds/go v0.0.0-20251022180443-0feb69152e9f/go.mod h1:HlzOvOjVBOfTGSRXRyY0OiCS/3J1akRGQQpRO/7zyF4= -github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -94,8 +101,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8Yc github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -105,18 +112,18 @@ github.com/envoyproxy/go-control-plane/envoy v1.35.0/go.mod h1:09qwbGVuSWWAyN5t/ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= -github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84= -github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= +github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= -github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= -github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M= @@ -130,14 +137,34 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= -github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= -github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= -github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-openapi/jsonpointer v0.22.0 h1:TmMhghgNef9YXxTu1tOopo+0BGEytxA+okbry0HjZsM= +github.com/go-openapi/jsonpointer v0.22.0/go.mod h1:xt3jV88UtExdIkkL7NloURjRQjbeUgcxFblMjq2iaiU= +github.com/go-openapi/jsonreference v0.21.1 h1:bSKrcl8819zKiOgxkbVNRUBIr6Wwj9KYrDbMjRs0cDA= +github.com/go-openapi/jsonreference v0.21.1/go.mod h1:PWs8rO4xxTUqKGu+lEvvCxD5k2X7QYkKAepJyCmSTT8= +github.com/go-openapi/swag v0.24.1 h1:DPdYTZKo6AQCRqzwr/kGkxJzHhpKxZ9i/oX0zag+MF8= +github.com/go-openapi/swag v0.24.1/go.mod h1:sm8I3lCPlspsBBwUm1t5oZeWZS0s7m/A+Psg0ooRU0A= +github.com/go-openapi/swag/cmdutils v0.24.0 h1:KlRCffHwXFI6E5MV9n8o8zBRElpY4uK4yWyAMWETo9I= +github.com/go-openapi/swag/cmdutils v0.24.0/go.mod h1:uxib2FAeQMByyHomTlsP8h1TtPd54Msu2ZDU/H5Vuf8= +github.com/go-openapi/swag/conv v0.24.0 h1:ejB9+7yogkWly6pnruRX45D1/6J+ZxRu92YFivx54ik= +github.com/go-openapi/swag/conv v0.24.0/go.mod h1:jbn140mZd7EW2g8a8Y5bwm8/Wy1slLySQQ0ND6DPc2c= +github.com/go-openapi/swag/fileutils v0.24.0 h1:U9pCpqp4RUytnD689Ek/N1d2N/a//XCeqoH508H5oak= +github.com/go-openapi/swag/fileutils v0.24.0/go.mod h1:3SCrCSBHyP1/N+3oErQ1gP+OX1GV2QYFSnrTbzwli90= +github.com/go-openapi/swag/jsonname v0.24.0 h1:2wKS9bgRV/xB8c62Qg16w4AUiIrqqiniJFtZGi3dg5k= +github.com/go-openapi/swag/jsonname v0.24.0/go.mod h1:GXqrPzGJe611P7LG4QB9JKPtUZ7flE4DOVechNaDd7Q= +github.com/go-openapi/swag/jsonutils v0.24.0 h1:F1vE1q4pg1xtO3HTyJYRmEuJ4jmIp2iZ30bzW5XgZts= +github.com/go-openapi/swag/jsonutils v0.24.0/go.mod h1:vBowZtF5Z4DDApIoxcIVfR8v0l9oq5PpYRUuteVu6f0= +github.com/go-openapi/swag/loading v0.24.0 h1:ln/fWTwJp2Zkj5DdaX4JPiddFC5CHQpvaBKycOlceYc= +github.com/go-openapi/swag/loading v0.24.0/go.mod h1:gShCN4woKZYIxPxbfbyHgjXAhO61m88tmjy0lp/LkJk= +github.com/go-openapi/swag/mangling v0.24.0 h1:PGOQpViCOUroIeak/Uj/sjGAq9LADS3mOyjznmHy2pk= +github.com/go-openapi/swag/mangling v0.24.0/go.mod h1:Jm5Go9LHkycsz0wfoaBDkdc4CkpuSnIEf62brzyCbhc= +github.com/go-openapi/swag/netutils v0.24.0 h1:Bz02HRjYv8046Ycg/w80q3g9QCWeIqTvlyOjQPDjD8w= +github.com/go-openapi/swag/netutils v0.24.0/go.mod h1:WRgiHcYTnx+IqfMCtu0hy9oOaPR0HnPbmArSRN1SkZM= +github.com/go-openapi/swag/stringutils v0.24.0 h1:i4Z/Jawf9EvXOLUbT97O0HbPUja18VdBxeadyAqS1FM= +github.com/go-openapi/swag/stringutils v0.24.0/go.mod h1:5nUXB4xA0kw2df5PRipZDslPJgJut+NjL7D25zPZ/4w= +github.com/go-openapi/swag/typeutils v0.24.0 h1:d3szEGzGDf4L2y1gYOSSLeK6h46F+zibnEas2Jm/wIw= +github.com/go-openapi/swag/typeutils v0.24.0/go.mod h1:q8C3Kmk/vh2VhpCLaoR2MVWOGP8y7Jc8l82qCTd1DYI= +github.com/go-openapi/swag/yamlutils v0.24.0 h1:bhw4894A7Iw6ne+639hsBNRHg9iZg/ISrOVr+sJGp4c= +github.com/go-openapi/swag/yamlutils v0.24.0/go.mod h1:DpKv5aYuaGm/sULePoeiG8uwMpZSfReo1HR3Ik0yaG8= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= @@ -166,17 +193,16 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= -github.com/google/cel-go v0.23.2 h1:UdEe3CvQh3Nv+E/j9r1Y//WO0K0cSyD7/y0bzyLIMI4= -github.com/google/cel-go v0.23.2/go.mod h1:52Pb6QsDbC5kvgxvZhiL9QX1oZEkcUF/ZqaPx1J5Wwo= -github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= -github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= +github.com/google/cel-go v0.26.0 h1:DPGjXackMpJWH680oGY4lZhYjIameYmR+/6RBdDGmaI= +github.com/google/cel-go v0.26.0/go.mod h1:A9O8OU9rdvrK5MQyrqfIxo1a0u4g3sF8KB6PUIaryMM= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -204,6 +230,14 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7/go.mod h1:lW34nIZuQ8UDPdkon5fm github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.8.0 h1:TYPDoleBBme0xGSAX3/+NujXXtpZn9HBONkQC7IEZSo= +github.com/jackc/pgx/v5 v5.8.0/go.mod h1:QVeDInX2m9VyzvNeiCJVjCkNFqzsNb43204HshNSZKw= +github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= +github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -223,17 +257,20 @@ github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgo github.com/klauspost/cpuid v1.3.1 h1:5JNjFYYQrZeKRJ0734q51WCEEn2huer72Dc7K+R/b6s= github.com/klauspost/cpuid v1.3.1/go.mod h1:bYW4mA6ZgKPob1/Dlai2LviZJO7KGI3uoWLd42rAQw4= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0 h1:bMqrb3UHgHbP+PW9VwiejfDJU1R0PpXVZNMdeH8WYKI= +github.com/kubernetes-csi/external-snapshotter/client/v8 v8.4.0/go.mod h1:E3vdYxHj2C2q6qo8/Da4g7P+IcwqRZyy3gJBzYybV9Y= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= +github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE= @@ -253,8 +290,9 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= @@ -281,29 +319,34 @@ github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= -github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.86.2 h1:VRXUgbGmpmjZgFYiUnTwlC+JjfCUs5KKFsorJhI1ZKQ= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.86.2/go.mod h1:nPk0OteXBkbT0CRCa2oZQL1jRLW6RJ2fuIijHypeJdk= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= -github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= -github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= -github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= -github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rs/xid v1.2.1 h1:mhH9Nq+C1fY2l1XIpgxIiUOfNpRBYH1kKcr+qfKgjRc= github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sethvargo/go-password v0.3.1 h1:WqrLTjo7X6AcVYfC6R7GtSyuUQR9hGyAj/f1PYQZCJU= +github.com/sethvargo/go-password v0.3.1/go.mod h1:rXofC1zT54N7R8K/h1WDUdkf9BOx5OptoxrMBcrXzvs= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= -github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -321,6 +364,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/thoas/go-funk v0.9.3 h1:7+nAEx3kn5ZJcnDm2Bh23N2yOtweO14bi//dvRtgLpw= +github.com/thoas/go-funk v0.9.3/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -339,8 +384,8 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0 h1:PS8wXpbyaDJQ2VDHHncMe9Vct0Zn1fEjpsjrLxGJoSc= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.58.0/go.mod h1:HDBUsEjOuRC0EzKZ1bSaRGZWUBAzo+MhAcUUORSr4D0= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 h1:x7wzEgXfnzJcHDwStJT+mxOz4etr2EcexjqhBvmoakw= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0/go.mod h1:rg+RlpR5dKwaS95IyyZqj5Wd4E13lk/msnTS0Xl9lJM= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q= go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= @@ -363,8 +408,10 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= -go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc= +go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -430,8 +477,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= -golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= -golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -489,8 +536,8 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= -gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= @@ -508,36 +555,35 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -k8s.io/api v0.33.0 h1:yTgZVn1XEe6opVpP1FylmNrIFWuDqe2H0V8CT5gxfIU= -k8s.io/api v0.33.0/go.mod h1:CTO61ECK/KU7haa3qq8sarQ0biLq2ju405IZAd9zsiM= -k8s.io/apiextensions-apiserver v0.33.0 h1:d2qpYL7Mngbsc1taA4IjJPRJ9ilnsXIrndH+r9IimOs= -k8s.io/apiextensions-apiserver v0.33.0/go.mod h1:VeJ8u9dEEN+tbETo+lFkwaaZPg6uFKLGj5vyNEwwSzc= -k8s.io/apimachinery v0.33.0 h1:1a6kHrJxb2hs4t8EE5wuR/WxKDwGN1FKH3JvDtA0CIQ= -k8s.io/apimachinery v0.33.0/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= -k8s.io/apiserver v0.33.0 h1:QqcM6c+qEEjkOODHppFXRiw/cE2zP85704YrQ9YaBbc= -k8s.io/apiserver v0.33.0/go.mod h1:EixYOit0YTxt8zrO2kBU7ixAtxFce9gKGq367nFmqI8= -k8s.io/client-go v0.33.0 h1:UASR0sAYVUzs2kYuKn/ZakZlcs2bEHaizrrHUZg0G98= -k8s.io/client-go v0.33.0/go.mod h1:kGkd+l/gNGg8GYWAPr0xF1rRKvVWvzh9vmZAMXtaKOg= -k8s.io/component-base v0.33.0 h1:Ot4PyJI+0JAD9covDhwLp9UNkUja209OzsJ4FzScBNk= -k8s.io/component-base v0.33.0/go.mod h1:aXYZLbw3kihdkOPMDhWbjGCO6sg+luw554KP51t8qCU= +k8s.io/api v0.34.2 h1:fsSUNZhV+bnL6Aqrp6O7lMTy6o5x2C4XLjnh//8SLYY= +k8s.io/api v0.34.2/go.mod h1:MMBPaWlED2a8w4RSeanD76f7opUoypY8TFYkSM+3XHw= +k8s.io/apiextensions-apiserver v0.34.2 h1:WStKftnGeoKP4AZRz/BaAAEJvYp4mlZGN0UCv+uvsqo= +k8s.io/apiextensions-apiserver v0.34.2/go.mod h1:398CJrsgXF1wytdaanynDpJ67zG4Xq7yj91GrmYN2SE= +k8s.io/apimachinery v0.34.2 h1:zQ12Uk3eMHPxrsbUJgNF8bTauTVR2WgqJsTmwTE/NW4= +k8s.io/apimachinery v0.34.2/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/apiserver v0.34.2 h1:2/yu8suwkmES7IzwlehAovo8dDE07cFRC7KMDb1+MAE= +k8s.io/apiserver v0.34.2/go.mod h1:gqJQy2yDOB50R3JUReHSFr+cwJnL8G1dzTA0YLEqAPI= +k8s.io/client-go v0.34.2 h1:Co6XiknN+uUZqiddlfAjT68184/37PS4QAzYvQvDR8M= +k8s.io/client-go v0.34.2/go.mod h1:2VYDl1XXJsdcAxw7BenFslRQX28Dxz91U9MWKjX97fE= +k8s.io/component-base v0.34.2 h1:HQRqK9x2sSAsd8+R4xxRirlTjowsg6fWCPwWYeSvogQ= +k8s.io/component-base v0.34.2/go.mod h1:9xw2FHJavUHBFpiGkZoKuYZ5pdtLKe97DEByaA+hHbM= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= -k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= +k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611 h1:o4oKOsvSymDkZRsMAPZU7bRdwL+lPOK5VS10Dr1D6eg= +k8s.io/kube-openapi v0.0.0-20250905212525-66792eed8611/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= k8s.io/kubectl v0.26.2 h1:SMPB4j48eVFxsYluBq3VLyqXtE6b72YnszkbTAtFye4= k8s.io/kubectl v0.26.2/go.mod h1:KYWOXSwp2BrDn3kPeoU/uKzKtdqvhK1dgZGd0+no4cM= -k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro= -k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= +k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= -sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8= -sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM= -sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8= -sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo= -sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/controller-runtime v0.22.4 h1:GEjV7KV3TY8e+tJ2LCTxUTanW4z/FmNB7l327UfMq9A= +sigs.k8s.io/controller-runtime v0.22.4/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= -sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= -sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go new file mode 100644 index 000000000..dfa1f7eaf --- /dev/null +++ b/internal/controller/postgrescluster_controller.go @@ -0,0 +1,179 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + clustercore "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" +) + +const ( + ClusterTotalWorker int = 2 +) + +// PostgresClusterReconciler reconciles PostgresCluster resources. +type PostgresClusterReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +// +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters/finalizers,verbs=update +// +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusterclasses,verbs=get;list;watch +// +kubebuilder:rbac:groups=postgresql.cnpg.io,resources=clusters,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=postgresql.cnpg.io,resources=clusters/status,verbs=get +// +kubebuilder:rbac:groups=postgresql.cnpg.io,resources=poolers,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=postgresql.cnpg.io,resources=poolers/status,verbs=get + +func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + return clustercore.PostgresClusterService(ctx, r.Client, r.Scheme, req) +} + +// SetupWithManager registers the controller and owned resource watches. +func (r *PostgresClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&enterprisev4.PostgresCluster{}, builder.WithPredicates(postgresClusterPredicator())). + Owns(&cnpgv1.Cluster{}, builder.WithPredicates(cnpgClusterPredicator())). + Owns(&cnpgv1.Pooler{}, builder.WithPredicates(cnpgPoolerPredicator())). + Owns(&corev1.Secret{}, builder.WithPredicates(secretPredicator())). + Owns(&corev1.ConfigMap{}, builder.WithPredicates(configMapPredicator())). + Named("postgresCluster"). + WithOptions(controller.Options{ + MaxConcurrentReconciles: ClusterTotalWorker, + }). + Complete(r) +} + +func deletionTimestampChanged(oldObj, newObj metav1.Object) bool { + return !equality.Semantic.DeepEqual(oldObj.GetDeletionTimestamp(), newObj.GetDeletionTimestamp()) +} + +func ownerReferencesChanged(oldObj, newObj metav1.Object) bool { + return !equality.Semantic.DeepEqual(oldObj.GetOwnerReferences(), newObj.GetOwnerReferences()) +} + +// postgresClusterPredicator triggers on generation changes, deletion, and finalizer transitions. +func postgresClusterPredicator() predicate.Predicate { + return predicate.Funcs{ + CreateFunc: func(event.CreateEvent) bool { return true }, + DeleteFunc: func(event.DeleteEvent) bool { return true }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldObj, oldOK := e.ObjectOld.(*enterprisev4.PostgresCluster) + newObj, newOK := e.ObjectNew.(*enterprisev4.PostgresCluster) + if !oldOK || !newOK { + return true + } + if oldObj.Generation != newObj.Generation { + return true + } + if deletionTimestampChanged(oldObj, newObj) { + return true + } + // Finalizer changes indicate registration or deletion  always reconcile. + return controllerutil.ContainsFinalizer(oldObj, clustercore.PostgresClusterFinalizerName) != + controllerutil.ContainsFinalizer(newObj, clustercore.PostgresClusterFinalizerName) + }, + GenericFunc: func(event.GenericEvent) bool { return false }, + } +} + +// cnpgClusterPredicator triggers only on phase changes or owner reference changes. +func cnpgClusterPredicator() predicate.Predicate { + return predicate.Funcs{ + CreateFunc: func(event.CreateEvent) bool { return true }, + DeleteFunc: func(event.DeleteEvent) bool { return true }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldObj, oldOK := e.ObjectOld.(*cnpgv1.Cluster) + newObj, newOK := e.ObjectNew.(*cnpgv1.Cluster) + if !oldOK || !newOK { + return true + } + return oldObj.Status.Phase != newObj.Status.Phase || + ownerReferencesChanged(oldObj, newObj) + }, + GenericFunc: func(event.GenericEvent) bool { return false }, + } +} + +// cnpgPoolerPredicator triggers only on instance count changes. +func cnpgPoolerPredicator() predicate.Predicate { + return predicate.Funcs{ + CreateFunc: func(event.CreateEvent) bool { return true }, + DeleteFunc: func(event.DeleteEvent) bool { return true }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldObj, oldOK := e.ObjectOld.(*cnpgv1.Pooler) + newObj, newOK := e.ObjectNew.(*cnpgv1.Pooler) + if !oldOK || !newOK { + return true + } + return oldObj.Status.Instances != newObj.Status.Instances + }, + GenericFunc: func(event.GenericEvent) bool { return false }, + } +} + +// secretPredicator triggers only on owner reference changes. +func secretPredicator() predicate.Predicate { + return predicate.Funcs{ + CreateFunc: func(event.CreateEvent) bool { return true }, + DeleteFunc: func(event.DeleteEvent) bool { return true }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldObj, oldOK := e.ObjectOld.(*corev1.Secret) + newObj, newOK := e.ObjectNew.(*corev1.Secret) + if !oldOK || !newOK { + return true + } + return ownerReferencesChanged(oldObj, newObj) + }, + GenericFunc: func(event.GenericEvent) bool { return false }, + } +} + +// configMapPredicator triggers on data, label, annotation, or owner reference changes. +func configMapPredicator() predicate.Predicate { + return predicate.Funcs{ + CreateFunc: func(event.CreateEvent) bool { return true }, + DeleteFunc: func(event.DeleteEvent) bool { return true }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldObj, oldOK := e.ObjectOld.(*corev1.ConfigMap) + newObj, newOK := e.ObjectNew.(*corev1.ConfigMap) + if !oldOK || !newOK { + return true + } + return !equality.Semantic.DeepEqual(oldObj.Data, newObj.Data) || + !equality.Semantic.DeepEqual(oldObj.Labels, newObj.Labels) || + !equality.Semantic.DeepEqual(oldObj.Annotations, newObj.Annotations) || + ownerReferencesChanged(oldObj, newObj) + }, + GenericFunc: func(event.GenericEvent) bool { return false }, + } +} diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go new file mode 100644 index 000000000..c0f3493d9 --- /dev/null +++ b/internal/controller/postgrescluster_controller_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + enterprisev4 "github.com/splunk/splunk-operator/api/v4" +) + +var _ = Describe("PostgresCluster Controller", func() { + Context("When reconciling a resource", func() { + const resourceName = "test-resource" + + ctx := context.Background() + + typeNamespacedName := types.NamespacedName{ + Name: resourceName, + Namespace: "default", // TODO(user):Modify as needed + } + postgresCluster := &enterprisev4.PostgresCluster{} + + BeforeEach(func() { + By("creating the custom resource for the Kind PostgresCluster") + err := k8sClient.Get(ctx, typeNamespacedName, postgresCluster) + if err != nil && errors.IsNotFound(err) { + resource := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceName, + Namespace: "default", + }, + // TODO(user): Specify other spec details if needed. + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + } + }) + + AfterEach(func() { + // TODO(user): Cleanup logic after each test, like removing the resource instance. + resource := &enterprisev4.PostgresCluster{} + err := k8sClient.Get(ctx, typeNamespacedName, resource) + Expect(err).NotTo(HaveOccurred()) + + By("Cleanup the specific resource instance PostgresCluster") + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + }) + It("should successfully reconcile the resource", func() { + By("Reconciling the created resource") + controllerReconciler := &PostgresClusterReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + + _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: typeNamespacedName, + }) + Expect(err).NotTo(HaveOccurred()) + // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. + // Example: If you expect a certain status condition after reconciliation, verify it here. + }) + }) +}) diff --git a/internal/controller/postgresdatabase_controller.go b/internal/controller/postgresdatabase_controller.go new file mode 100644 index 000000000..40faa3eb3 --- /dev/null +++ b/internal/controller/postgresdatabase_controller.go @@ -0,0 +1,115 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "reflect" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + dbadapter "github.com/splunk/splunk-operator/pkg/postgresql/database/adapter" + dbcore "github.com/splunk/splunk-operator/pkg/postgresql/database/core" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" +) + +// PostgresDatabaseReconciler reconciles a PostgresDatabase object. +type PostgresDatabaseReconciler struct { + client.Client + Scheme *runtime.Scheme +} + +const ( + DatabaseTotalWorker int = 2 +) + +//+kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresdatabases,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresdatabases/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresdatabases/finalizers,verbs=update +//+kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters,verbs=get;list;watch +//+kubebuilder:rbac:groups=postgresql.cnpg.io,resources=clusters,verbs=get;list;watch;patch +//+kubebuilder:rbac:groups=postgresql.cnpg.io,resources=databases,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;delete +//+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;delete + +func (r *PostgresDatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + postgresDB := &enterprisev4.PostgresDatabase{} + if err := r.Get(ctx, req.NamespacedName, postgresDB); err != nil { + if apierrors.IsNotFound(err) { + logger.Info("PostgresDatabase resource not found, ignoring") + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + return dbcore.PostgresDatabaseService(ctx, r.Client, r.Scheme, postgresDB, dbadapter.NewDBRepository) +} + +// SetupWithManager sets up the controller with the Manager. +func (r *PostgresDatabaseReconciler) SetupWithManager(mgr ctrl.Manager) error { + if err := mgr.GetFieldIndexer().IndexField( + context.Background(), + &cnpgv1.Database{}, + ".metadata.controller", + func(obj client.Object) []string { + owner := metav1.GetControllerOf(obj) + if owner == nil { + return nil + } + if owner.APIVersion != enterprisev4.GroupVersion.String() || owner.Kind != "PostgresDatabase" { + return nil + } + return []string{owner.Name} + }, + ); err != nil { + return err + } + return ctrl.NewControllerManagedBy(mgr). + For(&enterprisev4.PostgresDatabase{}, builder.WithPredicates( + predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + return !reflect.DeepEqual( + e.ObjectOld.GetFinalizers(), + e.ObjectNew.GetFinalizers(), + ) + }, + }, + ), + )). + Owns(&cnpgv1.Database{}). + Owns(&corev1.Secret{}). + Owns(&corev1.ConfigMap{}). + Named("postgresdatabase"). + WithOptions(controller.Options{ + MaxConcurrentReconciles: DatabaseTotalWorker, + }). + Complete(r) +} diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go new file mode 100644 index 000000000..4e0589cad --- /dev/null +++ b/internal/controller/postgresdatabase_controller_test.go @@ -0,0 +1,84 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + enterprisev4 "github.com/splunk/splunk-operator/api/v4" +) + +var _ = Describe("Database Controller", func() { + Context("When reconciling a resource", func() { + const resourceName = "test-resource" + + ctx := context.Background() + + typeNamespacedName := types.NamespacedName{ + Name: resourceName, + Namespace: "default", // TODO(user):Modify as needed + } + database := &enterprisev4.PostgresDatabase{} + + BeforeEach(func() { + By("creating the custom resource for the Kind Database") + err := k8sClient.Get(ctx, typeNamespacedName, database) + if err != nil && errors.IsNotFound(err) { + resource := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceName, + Namespace: "default", + }, + // TODO(user): Specify other spec details if needed. + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + } + }) + + AfterEach(func() { + // TODO(user): Cleanup logic after each test, like removing the resource instance. + resource := &enterprisev4.PostgresDatabase{} + err := k8sClient.Get(ctx, typeNamespacedName, resource) + Expect(err).NotTo(HaveOccurred()) + + By("Cleanup the specific resource instance Database") + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + }) + It("should successfully reconcile the resource", func() { + By("Reconciling the created resource") + controllerReconciler := &PostgresDatabaseReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + + _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: typeNamespacedName, + }) + Expect(err).NotTo(HaveOccurred()) + // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. + // Example: If you expect a certain status condition after reconciliation, verify it here. + }) + }) +}) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 142a8720c..94db6a730 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -1,5 +1,5 @@ /* -Copyright (c) 2018-2022 Splunk Inc. All rights reserved. +Copyright 2026. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,156 +18,99 @@ package controller import ( "context" - "fmt" + "os" "path/filepath" "testing" - "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "go.uber.org/zap/zapcore" + + "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - clientgoscheme "k8s.io/client-go/kubernetes/scheme" - ctrl "sigs.k8s.io/controller-runtime" - - enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" - enterpriseApi "github.com/splunk/splunk-operator/api/v4" - //+kubebuilder:scaffold:imports + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + // +kubebuilder:scaffold:imports ) -var cfg *rest.Config -var k8sClient client.Client -var testEnv *envtest.Environment -var k8sManager ctrl.Manager +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. -func TestAPIs(t *testing.T) { +var ( + ctx context.Context + cancel context.CancelFunc + testEnv *envtest.Environment + cfg *rest.Config + k8sClient client.Client +) + +func TestControllers(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Controller Suite") } -var _ = BeforeSuite(func(ctx context.Context) { - opts := zap.Options{ - Development: true, - TimeEncoder: zapcore.RFC3339NanoTimeEncoder, - } - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true), zap.UseFlagOptions(&opts))) +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - By("bootstrapping test environment") + ctx, cancel = context.WithCancel(context.TODO()) + + var err error + err = enterprisev4.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + // +kubebuilder:scaffold:scheme + By("bootstrapping test environment") testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, } - var err error + // Retrieve the first found binary directory to allow running tests from IDEs + if getFirstFoundEnvTestBinaryDir() != "" { + testEnv.BinaryAssetsDirectory = getFirstFoundEnvTestBinaryDir() + } // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) - err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = enterpriseApiV3.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = enterpriseApiV3.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - //+kubebuilder:scaffold:scheme - - // Create New Manager for controller - k8sManager, err = ctrl.NewManager(cfg, ctrl.Options{ - Scheme: clientgoscheme.Scheme, - }) - Expect(err).ToNot(HaveOccurred()) - if err := (&ClusterManagerReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&ClusterMasterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&IndexerClusterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&IngestorClusterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&LicenseManagerReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&LicenseMasterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&MonitoringConsoleReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&SearchHeadClusterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&StandaloneReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - - go func() { - err = k8sManager.Start(ctrl.SetupSignalHandler()) - fmt.Printf("error %v", err.Error()) - Expect(err).ToNot(HaveOccurred()) - }() - - Expect(err).ToNot(HaveOccurred()) - - k8sClient, err = client.New(cfg, client.Options{Scheme: clientgoscheme.Scheme}) + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) Expect(err).NotTo(HaveOccurred()) Expect(k8sClient).NotTo(BeNil()) - -}, NodeTimeout(time.Second*500)) +}) var _ = AfterSuite(func() { By("tearing down the test environment") - testEnv.Stop() + cancel() + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) }) + +// getFirstFoundEnvTestBinaryDir locates the first binary in the specified path. +// ENVTEST-based tests depend on specific binaries, usually located in paths set by +// controller-runtime. When running tests directly (e.g., via an IDE) without using +// Makefile targets, the 'BinaryAssetsDirectory' must be explicitly configured. +// +// This function streamlines the process by finding the required binaries, similar to +// setting the 'KUBEBUILDER_ASSETS' environment variable. To ensure the binaries are +// properly set up, run 'make setup-envtest' beforehand. +func getFirstFoundEnvTestBinaryDir() string { + basePath := filepath.Join("..", "..", "bin", "k8s") + entries, err := os.ReadDir(basePath) + if err != nil { + logf.Log.Error(err, "Failed to read directory", "path", basePath) + return "" + } + for _, entry := range entries { + if entry.IsDir() { + return filepath.Join(basePath, entry.Name()) + } + } + return "" +} diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go new file mode 100644 index 000000000..66622d8ad --- /dev/null +++ b/pkg/postgresql/cluster/core/cluster.go @@ -0,0 +1,1006 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "context" + "fmt" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + password "github.com/sethvargo/go-password/password" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + log "sigs.k8s.io/controller-runtime/pkg/log" +) + +// PostgresClusterService is the application service entry point called by the primary adapter (reconciler). +func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtime.Scheme, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + logger.Info("Reconciling PostgresCluster", "name", req.Name, "namespace", req.Namespace) + + var cnpgCluster *cnpgv1.Cluster + var poolerEnabled bool + var postgresSecretName string + secret := &corev1.Secret{} + + // 1. Fetch the PostgresCluster instance, stop if not found. + postgresCluster := &enterprisev4.PostgresCluster{} + if err := c.Get(ctx, req.NamespacedName, postgresCluster); err != nil { + if apierrors.IsNotFound(err) { + logger.Info("PostgresCluster deleted, skipping reconciliation") + return ctrl.Result{}, nil + } + logger.Error(err, "Unable to fetch PostgresCluster") + return ctrl.Result{}, err + } + if postgresCluster.Status.Resources == nil { + postgresCluster.Status.Resources = &enterprisev4.PostgresClusterResources{} + } + + updateStatus := func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { + return setStatus(ctx, c, postgresCluster, conditionType, status, reason, message, phase) + } + + // Finalizer handling must come before any other processing. + if err := handleFinalizer(ctx, c, scheme, postgresCluster, secret); err != nil { + if apierrors.IsNotFound(err) { + logger.Info("PostgresCluster already deleted, skipping finalizer update") + return ctrl.Result{}, nil + } + logger.Error(err, "Failed to handle finalizer") + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterDeleteFailed, + fmt.Sprintf("Failed to delete resources during cleanup: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + if postgresCluster.GetDeletionTimestamp() != nil { + logger.Info("PostgresCluster is being deleted, cleanup complete") + return ctrl.Result{}, nil + } + + // Add finalizer if not present. + if !controllerutil.ContainsFinalizer(postgresCluster, PostgresClusterFinalizerName) { + controllerutil.AddFinalizer(postgresCluster, PostgresClusterFinalizerName) + if err := c.Update(ctx, postgresCluster); err != nil { + if apierrors.IsConflict(err) { + logger.Info("Conflict while adding finalizer, will retry on next reconcile") + return ctrl.Result{Requeue: true}, nil + } + logger.Error(err, "Failed to add finalizer to PostgresCluster") + return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) + } + logger.Info("Finalizer added successfully") + return ctrl.Result{}, nil + } + + // 2. Load the referenced PostgresClusterClass. + clusterClass := &enterprisev4.PostgresClusterClass{} + if err := c.Get(ctx, client.ObjectKey{Name: postgresCluster.Spec.Class}, clusterClass); err != nil { + logger.Error(err, "Unable to fetch referenced PostgresClusterClass", "className", postgresCluster.Spec.Class) + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterClassNotFound, + fmt.Sprintf("ClusterClass %s not found: %v", postgresCluster.Spec.Class, err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + + // 3. Merge PostgresClusterSpec on top of PostgresClusterClass defaults. + mergedConfig, err := getMergedConfig(clusterClass, postgresCluster) + if err != nil { + logger.Error(err, "Failed to merge PostgresCluster configuration") + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonInvalidConfiguration, + fmt.Sprintf("Failed to merge configuration: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + + // 4. Resolve or derive the superuser secret name. + if postgresCluster.Status.Resources != nil && postgresCluster.Status.Resources.SuperUserSecretRef != nil { + postgresSecretName = postgresCluster.Status.Resources.SuperUserSecretRef.Name + logger.Info("Using existing secret from status", "name", postgresSecretName) + } else { + postgresSecretName = fmt.Sprintf("%s%s", postgresCluster.Name, defaultSecretSuffix) + logger.Info("Generating new secret name", "name", postgresSecretName) + } + + secretExists, secretErr := clusterSecretExists(ctx, c, postgresCluster.Namespace, postgresSecretName, secret) + if secretErr != nil { + logger.Error(secretErr, "Failed to check if PostgresCluster secret exists", "name", postgresSecretName) + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, + fmt.Sprintf("Failed to check secret existence: %v", secretErr), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, secretErr + } + if !secretExists { + logger.Info("Creating PostgresCluster secret", "name", postgresSecretName) + if err := ensureClusterSecret(ctx, c, scheme, postgresCluster, postgresSecretName, secret); err != nil { + logger.Error(err, "Failed to ensure PostgresCluster secret", "name", postgresSecretName) + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, + fmt.Sprintf("Failed to generate PostgresCluster secret: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + if err := c.Status().Update(ctx, postgresCluster); err != nil { + if apierrors.IsConflict(err) { + logger.Info("Conflict after secret creation, will requeue") + return ctrl.Result{Requeue: true}, nil + } + logger.Error(err, "Failed to update status after secret creation") + return ctrl.Result{}, err + } + logger.Info("SuperUserSecretRef persisted to status") + } + + // Re-attach ownerRef if it was stripped (e.g. by a Retain-policy deletion of a previous cluster). + hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), postgresCluster, scheme) + if ownerRefErr != nil { + logger.Error(ownerRefErr, "Failed to check owner reference on Secret") + return ctrl.Result{}, fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + } + if secretExists && !hasOwnerRef { + logger.Info("Connecting existing secret to PostgresCluster by adding owner reference", "name", postgresSecretName) + originalSecret := secret.DeepCopy() + if err := ctrl.SetControllerReference(postgresCluster, secret, scheme); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to set controller reference on existing secret: %w", err) + } + if err := patchObject(ctx, c, originalSecret, secret, "Secret"); err != nil { + logger.Error(err, "Failed to patch existing secret with controller reference") + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonSuperUserSecretFailed, + fmt.Sprintf("Failed to patch existing secret: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + logger.Info("Existing secret linked successfully") + } + + if postgresCluster.Status.Resources.SuperUserSecretRef == nil { + postgresCluster.Status.Resources.SuperUserSecretRef = &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: postgresSecretName}, + Key: secretKeyPassword, + } + } + + // 5. Build desired CNPG Cluster spec. + desiredSpec := buildCNPGClusterSpec(mergedConfig, postgresSecretName) + + // 6. Fetch existing CNPG Cluster or create it. + existingCNPG := &cnpgv1.Cluster{} + err = c.Get(ctx, types.NamespacedName{Name: postgresCluster.Name, Namespace: postgresCluster.Namespace}, existingCNPG) + switch { + case apierrors.IsNotFound(err): + logger.Info("CNPG Cluster not found, creating", "name", postgresCluster.Name) + newCluster := buildCNPGCluster(scheme, postgresCluster, mergedConfig, postgresSecretName) + if err := c.Create(ctx, newCluster); err != nil { + logger.Error(err, "Failed to create CNPG Cluster") + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildFailed, + fmt.Sprintf("Failed to create CNPG Cluster: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, + "CNPG Cluster created", pendingClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + logger.Info("CNPG Cluster created successfully, requeueing for status update", "name", postgresCluster.Name) + return ctrl.Result{RequeueAfter: retryDelay}, nil + case err != nil: + logger.Error(err, "Failed to get CNPG Cluster") + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterGetFailed, + fmt.Sprintf("Failed to get CNPG Cluster: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + + // 7. Patch CNPG Cluster spec if drift detected. + cnpgCluster = existingCNPG + currentNormalized := normalizeCNPGClusterSpec(cnpgCluster.Spec, mergedConfig.Spec.PostgreSQLConfig) + desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, mergedConfig.Spec.PostgreSQLConfig) + + if !equality.Semantic.DeepEqual(currentNormalized, desiredNormalized) { + logger.Info("Detected drift in CNPG Cluster spec, patching", "name", cnpgCluster.Name) + originalCluster := cnpgCluster.DeepCopy() + cnpgCluster.Spec = desiredSpec + + switch patchErr := patchObject(ctx, c, originalCluster, cnpgCluster, "CNPGCluster"); { + case apierrors.IsConflict(patchErr): + logger.Info("Conflict occurred while updating CNPG Cluster, requeueing", "name", cnpgCluster.Name) + return ctrl.Result{Requeue: true}, nil + case patchErr != nil: + logger.Error(patchErr, "Failed to patch CNPG Cluster", "name", cnpgCluster.Name) + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterPatchFailed, + fmt.Sprintf("Failed to patch CNPG Cluster: %v", patchErr), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, patchErr + default: + logger.Info("CNPG Cluster patched successfully, requeueing for status update", "name", cnpgCluster.Name) + return ctrl.Result{RequeueAfter: retryDelay}, nil + } + } + + // 7a. Reconcile ManagedRoles. + if err := reconcileManagedRoles(ctx, c, postgresCluster, cnpgCluster); err != nil { + logger.Error(err, "Failed to reconcile managed roles") + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonManagedRolesFailed, + fmt.Sprintf("Failed to reconcile managed roles: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + + // 7b. Reconcile Connection Pooler. + poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + switch { + case !poolerEnabled: + if err := deleteConnectionPoolers(ctx, c, postgresCluster); err != nil { + logger.Error(err, "Failed to delete connection poolers") + if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to delete connection poolers: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + postgresCluster.Status.ConnectionPoolerStatus = nil + meta.RemoveStatusCondition(&postgresCluster.Status.Conditions, string(poolerReady)) + + case !poolerExists(ctx, c, postgresCluster, readWriteEndpoint) || !poolerExists(ctx, c, postgresCluster, readOnlyEndpoint): + if mergedConfig.CNPG == nil || mergedConfig.CNPG.ConnectionPooler == nil { + logger.Info("Connection pooler enabled but no config found in class or cluster spec, skipping", + "class", postgresCluster.Spec.Class, "cluster", postgresCluster.Name) + if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerConfigMissing, + fmt.Sprintf("Connection pooler is enabled but no config found in class %q or cluster %q", + postgresCluster.Spec.Class, postgresCluster.Name), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, nil + } + if cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { + logger.Info("CNPG Cluster not healthy yet, pending pooler creation", "clusterPhase", cnpgCluster.Status.Phase) + if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonCNPGClusterNotHealthy, + "Waiting for CNPG cluster to become healthy before creating poolers", pendingClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + } + if err := createOrUpdateConnectionPoolers(ctx, c, scheme, postgresCluster, mergedConfig, cnpgCluster); err != nil { + logger.Error(err, "Failed to reconcile connection pooler") + if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to reconcile connection pooler: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + logger.Info("Connection Poolers created, requeueing to check readiness") + if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, + "Connection poolers are being provisioned", provisioningClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + + case !arePoolersReady(ctx, c, postgresCluster): + logger.Info("Connection Poolers are not ready yet, requeueing") + if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, + "Connection poolers are being provisioned", pendingClusterPhase); statusErr != nil { + if apierrors.IsConflict(statusErr) { + logger.Info("Conflict updating pooler status, will requeue") + return ctrl.Result{Requeue: true}, nil + } + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + + default: + if err := syncPoolerStatus(ctx, c, postgresCluster); err != nil { + logger.Error(err, "Failed to sync pooler status") + if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to sync pooler status: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + } + + // 8. Reconcile ConfigMap when CNPG cluster is healthy. + if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { + logger.Info("CNPG Cluster is ready, reconciling ConfigMap for connection details") + desiredCM, err := generateConfigMap(ctx, c, scheme, postgresCluster, cnpgCluster, postgresSecretName) + if err != nil { + logger.Error(err, "Failed to generate ConfigMap") + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, + fmt.Sprintf("Failed to generate ConfigMap: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} + createOrUpdateResult, err := controllerutil.CreateOrUpdate(ctx, c, cm, func() error { + cm.Data = desiredCM.Data + cm.Annotations = desiredCM.Annotations + cm.Labels = desiredCM.Labels + if !metav1.IsControlledBy(cm, postgresCluster) { + if err := ctrl.SetControllerReference(postgresCluster, cm, scheme); err != nil { + return fmt.Errorf("set controller reference failed: %w", err) + } + } + return nil + }) + if err != nil { + logger.Error(err, "Failed to reconcile ConfigMap", "name", desiredCM.Name) + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, + fmt.Sprintf("Failed to reconcile ConfigMap: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + switch createOrUpdateResult { + case controllerutil.OperationResultCreated: + logger.Info("ConfigMap created", "name", desiredCM.Name) + case controllerutil.OperationResultUpdated: + logger.Info("ConfigMap updated", "name", desiredCM.Name) + default: + logger.Info("ConfigMap unchanged", "name", desiredCM.Name) + } + if postgresCluster.Status.Resources.ConfigMapRef == nil { + postgresCluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} + } + } + + // 9. Final status sync. + if err := syncStatus(ctx, c, postgresCluster, cnpgCluster); err != nil { + logger.Error(err, "Failed to sync status") + if apierrors.IsConflict(err) { + logger.Info("Conflict during status update, will requeue") + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, fmt.Errorf("failed to sync status: %w", err) + } + if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy && arePoolersReady(ctx, c, postgresCluster) { + logger.Info("Poolers are ready, syncing pooler status") + _ = syncPoolerStatus(ctx, c, postgresCluster) + } + logger.Info("Reconciliation complete") + return ctrl.Result{}, nil +} + +// getMergedConfig overlays PostgresCluster spec on top of the class defaults. +// Class values are used only where the cluster spec is silent. +func getMergedConfig(class *enterprisev4.PostgresClusterClass, cluster *enterprisev4.PostgresCluster) (*MergedConfig, error) { + result := cluster.Spec.DeepCopy() + + // Config is optional on the class — apply defaults only when provided. + if defaults := class.Spec.Config; defaults != nil { + if result.Instances == nil { + result.Instances = defaults.Instances + } + if result.PostgresVersion == nil { + result.PostgresVersion = defaults.PostgresVersion + } + if result.Resources == nil { + result.Resources = defaults.Resources + } + if result.Storage == nil { + result.Storage = defaults.Storage + } + if len(result.PostgreSQLConfig) == 0 { + result.PostgreSQLConfig = defaults.PostgreSQLConfig + } + if len(result.PgHBA) == 0 { + result.PgHBA = defaults.PgHBA + } + } + + if result.Instances == nil || result.PostgresVersion == nil || result.Storage == nil { + return nil, fmt.Errorf("invalid configuration for class %s: instances, postgresVersion and storage are required", class.Name) + } + if result.PostgreSQLConfig == nil { + result.PostgreSQLConfig = make(map[string]string) + } + if result.PgHBA == nil { + result.PgHBA = make([]string, 0) + } + if result.Resources == nil { + result.Resources = &corev1.ResourceRequirements{} + } + + return &MergedConfig{Spec: result, CNPG: class.Spec.CNPG}, nil +} + +// buildCNPGClusterSpec builds the desired CNPG ClusterSpec. +// IMPORTANT: any field added here must also appear in normalizeCNPGClusterSpec, +// otherwise spec drift will be silently ignored. +func buildCNPGClusterSpec(cfg *MergedConfig, secretName string) cnpgv1.ClusterSpec { + return cnpgv1.ClusterSpec{ + ImageName: fmt.Sprintf("ghcr.io/cloudnative-pg/postgresql:%s", *cfg.Spec.PostgresVersion), + Instances: int(*cfg.Spec.Instances), + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Parameters: cfg.Spec.PostgreSQLConfig, + PgHBA: cfg.Spec.PgHBA, + }, + SuperuserSecret: &cnpgv1.LocalObjectReference{Name: secretName}, + EnableSuperuserAccess: ptr.To(true), + Bootstrap: &cnpgv1.BootstrapConfiguration{ + InitDB: &cnpgv1.BootstrapInitDB{ + Database: defaultDatabaseName, + Owner: superUsername, + Secret: &cnpgv1.LocalObjectReference{Name: secretName}, + }, + }, + StorageConfiguration: cnpgv1.StorageConfiguration{ + Size: cfg.Spec.Storage.String(), + }, + Resources: *cfg.Spec.Resources, + } +} + +func buildCNPGCluster(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, secretName string) *cnpgv1.Cluster { + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: cluster.Name, Namespace: cluster.Namespace}, + Spec: buildCNPGClusterSpec(cfg, secretName), + } + ctrl.SetControllerReference(cluster, cnpg, scheme) + return cnpg +} + +func normalizeCNPGClusterSpec(spec cnpgv1.ClusterSpec, customDefinedParameters map[string]string) normalizedCNPGClusterSpec { + normalized := normalizedCNPGClusterSpec{ + ImageName: spec.ImageName, + Instances: spec.Instances, + StorageSize: spec.StorageConfiguration.Size, + Resources: spec.Resources, + } + if len(customDefinedParameters) > 0 { + normalized.CustomDefinedParameters = make(map[string]string) + for k := range customDefinedParameters { + normalized.CustomDefinedParameters[k] = spec.PostgresConfiguration.Parameters[k] + } + } + if len(spec.PostgresConfiguration.PgHBA) > 0 { + normalized.PgHBA = spec.PostgresConfiguration.PgHBA + } + if spec.Bootstrap != nil && spec.Bootstrap.InitDB != nil { + normalized.DefaultDatabase = spec.Bootstrap.InitDB.Database + normalized.Owner = spec.Bootstrap.InitDB.Owner + } + return normalized +} + +// reconcileManagedRoles synchronizes ManagedRoles from PostgresCluster spec to CNPG Cluster managed.roles. +func reconcileManagedRoles(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) error { + logger := log.FromContext(ctx) + + if len(cluster.Spec.ManagedRoles) == 0 { + logger.Info("No managed roles to reconcile") + return nil + } + + desiredRoles := make([]cnpgv1.RoleConfiguration, 0, len(cluster.Spec.ManagedRoles)) + for _, role := range cluster.Spec.ManagedRoles { + r := cnpgv1.RoleConfiguration{ + Name: role.Name, + Ensure: cnpgv1.EnsureAbsent, + } + // Exists bool replaces the old Ensure string enum ("present"/"absent"). + if role.Exists { + r.Ensure = cnpgv1.EnsurePresent + r.Login = true + } + if role.PasswordSecretRef != nil { + // Pass only the secret name to CNPG — CNPG always reads the "password" key. + r.PasswordSecret = &cnpgv1.LocalObjectReference{Name: role.PasswordSecretRef.LocalObjectReference.Name} + } + desiredRoles = append(desiredRoles, r) + } + + var currentRoles []cnpgv1.RoleConfiguration + if cnpgCluster.Spec.Managed != nil { + currentRoles = cnpgCluster.Spec.Managed.Roles + } + + if equality.Semantic.DeepEqual(currentRoles, desiredRoles) { + logger.Info("CNPG Cluster roles already match desired state, no update needed") + return nil + } + + logger.Info("CNPG Cluster roles differ from desired state, updating", + "currentCount", len(currentRoles), "desiredCount", len(desiredRoles)) + + originalCluster := cnpgCluster.DeepCopy() + if cnpgCluster.Spec.Managed == nil { + cnpgCluster.Spec.Managed = &cnpgv1.ManagedConfiguration{} + } + cnpgCluster.Spec.Managed.Roles = desiredRoles + + if err := c.Patch(ctx, cnpgCluster, client.MergeFrom(originalCluster)); err != nil { + return fmt.Errorf("failed to patch CNPG Cluster with managed roles: %w", err) + } + logger.Info("Successfully updated CNPG Cluster with managed roles", "roleCount", len(desiredRoles)) + return nil +} + +func poolerResourceName(clusterName, poolerType string) string { + return fmt.Sprintf("%s%s%s", clusterName, defaultPoolerSuffix, poolerType) +} + +func poolerExists(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, poolerType string) bool { + pooler := &cnpgv1.Pooler{} + err := c.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(cluster.Name, poolerType), + Namespace: cluster.Namespace, + }, pooler) + if apierrors.IsNotFound(err) { + return false + } + if err != nil { + log.FromContext(ctx).Error(err, "Failed to check pooler existence", "type", poolerType) + return false + } + return true +} + +func arePoolersReady(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster) bool { + rwPooler := &cnpgv1.Pooler{} + rwErr := c.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(cluster.Name, readWriteEndpoint), + Namespace: cluster.Namespace, + }, rwPooler) + + roPooler := &cnpgv1.Pooler{} + roErr := c.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(cluster.Name, readOnlyEndpoint), + Namespace: cluster.Namespace, + }, roPooler) + + return isPoolerReady(rwPooler, rwErr) && isPoolerReady(roPooler, roErr) +} + +// isPoolerReady checks if a pooler has all instances scheduled. +// CNPG PoolerStatus only tracks scheduled instances, not ready pods. +func isPoolerReady(pooler *cnpgv1.Pooler, err error) bool { + if err != nil { + return false + } + desired := int32(1) + if pooler.Spec.Instances != nil { + desired = *pooler.Spec.Instances + } + return pooler.Status.Instances >= desired +} + +func poolerInstanceCount(p *cnpgv1.Pooler) (desired, scheduled int32) { + desired = 1 + if p.Spec.Instances != nil { + desired = *p.Spec.Instances + } + return desired, p.Status.Instances +} + +// createOrUpdateConnectionPoolers creates RW and RO poolers if they don't exist. +func createOrUpdateConnectionPoolers(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster) error { + if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readWriteEndpoint); err != nil { + return fmt.Errorf("failed to reconcile RW pooler: %w", err) + } + if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readOnlyEndpoint); err != nil { + return fmt.Errorf("failed to reconcile RO pooler: %w", err) + } + return nil +} + +func createConnectionPooler(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerType string) error { + poolerName := poolerResourceName(cluster.Name, poolerType) + existing := &cnpgv1.Pooler{} + err := c.Get(ctx, types.NamespacedName{Name: poolerName, Namespace: cluster.Namespace}, existing) + if err == nil { + return nil // already exists + } + if !apierrors.IsNotFound(err) { + return err + } + log.FromContext(ctx).Info("Creating CNPG Pooler", "name", poolerName, "type", poolerType) + return c.Create(ctx, buildCNPGPooler(scheme, cluster, cfg, cnpgCluster, poolerType)) +} + +func buildCNPGPooler(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerType string) *cnpgv1.Pooler { + pc := cfg.CNPG.ConnectionPooler + instances := *pc.Instances + mode := cnpgv1.PgBouncerPoolMode(*pc.Mode) + pooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{Name: poolerResourceName(cluster.Name, poolerType), Namespace: cluster.Namespace}, + Spec: cnpgv1.PoolerSpec{ + Cluster: cnpgv1.LocalObjectReference{Name: cnpgCluster.Name}, + Instances: &instances, + Type: cnpgv1.PoolerType(poolerType), + PgBouncer: &cnpgv1.PgBouncerSpec{ + PoolMode: mode, + Parameters: pc.Config, + }, + }, + } + ctrl.SetControllerReference(cluster, pooler, scheme) + return pooler +} + +// deleteConnectionPoolers removes RW and RO poolers if they exist. +func deleteConnectionPoolers(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster) error { + logger := log.FromContext(ctx) + for _, poolerType := range []string{readWriteEndpoint, readOnlyEndpoint} { + poolerName := poolerResourceName(cluster.Name, poolerType) + if !poolerExists(ctx, c, cluster, poolerType) { + continue + } + pooler := &cnpgv1.Pooler{} + if err := c.Get(ctx, types.NamespacedName{Name: poolerName, Namespace: cluster.Namespace}, pooler); err != nil { + if apierrors.IsNotFound(err) { + continue + } + return fmt.Errorf("failed to get pooler %s: %w", poolerName, err) + } + logger.Info("Deleting CNPG Pooler", "name", poolerName) + if err := c.Delete(ctx, pooler); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to delete pooler %s: %w", poolerName, err) + } + } + return nil +} + +// syncPoolerStatus populates ConnectionPoolerStatus and the PoolerReady condition. +func syncPoolerStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster) error { + rwPooler := &cnpgv1.Pooler{} + if err := c.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(cluster.Name, readWriteEndpoint), + Namespace: cluster.Namespace, + }, rwPooler); err != nil { + return err + } + + roPooler := &cnpgv1.Pooler{} + if err := c.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(cluster.Name, readOnlyEndpoint), + Namespace: cluster.Namespace, + }, roPooler); err != nil { + return err + } + + cluster.Status.ConnectionPoolerStatus = &enterprisev4.ConnectionPoolerStatus{Enabled: true} + rwDesired, rwScheduled := poolerInstanceCount(rwPooler) + roDesired, roScheduled := poolerInstanceCount(roPooler) + + return setStatus(ctx, c, cluster, poolerReady, metav1.ConditionTrue, reasonAllInstancesReady, + fmt.Sprintf("%s: %d/%d, %s: %d/%d", readWriteEndpoint, rwScheduled, rwDesired, readOnlyEndpoint, roScheduled, roDesired), + readyClusterPhase) +} + +// syncStatus maps CNPG Cluster state to PostgresCluster status. +func syncStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) error { + cluster.Status.ProvisionerRef = &corev1.ObjectReference{ + APIVersion: "postgresql.cnpg.io/v1", + Kind: "Cluster", + Namespace: cnpgCluster.Namespace, + Name: cnpgCluster.Name, + UID: cnpgCluster.UID, + } + + var phase reconcileClusterPhases + var condStatus metav1.ConditionStatus + var reason conditionReasons + var message string + + switch cnpgCluster.Status.Phase { + case cnpgv1.PhaseHealthy: + phase, condStatus, reason, message = readyClusterPhase, metav1.ConditionTrue, reasonCNPGClusterHealthy, "Cluster is up and running" + case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: + phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning + message = fmt.Sprintf("CNPG cluster provisioning: %s", cnpgCluster.Status.Phase) + case cnpgv1.PhaseSwitchover: + phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGSwitchover, "Cluster changing primary node" + case cnpgv1.PhaseFailOver: + phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGFailingOver, "Pod missing, need to change primary" + case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: + phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGRestarting + message = fmt.Sprintf("CNPG cluster restarting: %s", cnpgCluster.Status.Phase) + case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: + phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGUpgrading + message = fmt.Sprintf("CNPG cluster upgrading: %s", cnpgCluster.Status.Phase) + case cnpgv1.PhaseApplyingConfiguration: + phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGApplyingConfig, "Configuration change is being applied" + case cnpgv1.PhaseReplicaClusterPromotion: + phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGPromoting, "Replica is being promoted to primary" + case cnpgv1.PhaseWaitingForUser: + phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGWaitingForUser, "Action from the user is required" + case cnpgv1.PhaseUnrecoverable: + phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGUnrecoverable, "Cluster failed, needs manual intervention" + case cnpgv1.PhaseCannotCreateClusterObjects: + phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioningFailed, "Cluster resources cannot be created" + case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: + phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGPluginError + message = fmt.Sprintf("CNPG plugin error: %s", cnpgCluster.Status.Phase) + case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: + phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGImageError + message = fmt.Sprintf("CNPG image error: %s", cnpgCluster.Status.Phase) + case "": + phase, condStatus, reason, message = pendingClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning, "CNPG cluster is pending creation" + default: + phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning + message = fmt.Sprintf("CNPG cluster phase: %s", cnpgCluster.Status.Phase) + } + + return setStatus(ctx, c, cluster, clusterReady, condStatus, reason, message, phase) +} + +// setStatus sets the phase, condition and persists the status. +func setStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, condType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { + p := string(phase) + cluster.Status.Phase = &p + meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{ + Type: string(condType), + Status: status, + Reason: string(reason), + Message: message, + ObservedGeneration: cluster.Generation, + }) + if err := c.Status().Update(ctx, cluster); err != nil { + return fmt.Errorf("failed to update PostgresCluster status: %w", err) + } + return nil +} + +// generateConfigMap builds a ConfigMap with connection details for the PostgresCluster. +func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster, secretName string) (*corev1.ConfigMap, error) { + cmName := fmt.Sprintf("%s%s", cluster.Name, defaultConfigMapSuffix) + if cluster.Status.Resources != nil && cluster.Status.Resources.ConfigMapRef != nil { + cmName = cluster.Status.Resources.ConfigMapRef.Name + } + + data := map[string]string{ + "CLUSTER_RW_ENDPOINT": fmt.Sprintf("%s-rw.%s", cnpgCluster.Name, cnpgCluster.Namespace), + "CLUSTER_RO_ENDPOINT": fmt.Sprintf("%s-ro.%s", cnpgCluster.Name, cnpgCluster.Namespace), + "CLUSTER_R_ENDPOINT": fmt.Sprintf("%s-r.%s", cnpgCluster.Name, cnpgCluster.Namespace), + "DEFAULT_CLUSTER_PORT": defaultPort, + "SUPER_USER_NAME": superUsername, + "SUPER_USER_SECRET_REF": secretName, + } + if poolerExists(ctx, c, cluster, readWriteEndpoint) && poolerExists(ctx, c, cluster, readOnlyEndpoint) { + data["CLUSTER_POOLER_RW_ENDPOINT"] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readWriteEndpoint), cnpgCluster.Namespace) + data["CLUSTER_POOLER_RO_ENDPOINT"] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readOnlyEndpoint), cnpgCluster.Namespace) + } + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmName, + Namespace: cluster.Namespace, + Labels: map[string]string{"app.kubernetes.io/managed-by": "postgrescluster-controller"}, + }, + Data: data, + } + if err := ctrl.SetControllerReference(cluster, cm, scheme); err != nil { + return nil, fmt.Errorf("failed to set controller reference: %w", err) + } + return cm, nil +} + +// ensureClusterSecret creates the superuser secret if it doesn't exist and persists the ref to status. +func ensureClusterSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, secretName string, secret *corev1.Secret) error { + err := c.Get(ctx, types.NamespacedName{Name: secretName, Namespace: cluster.Namespace}, secret) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) { + pw, err := generatePassword() + if err != nil { + return err + } + newSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: secretName, Namespace: cluster.Namespace}, + StringData: map[string]string{"username": superUsername, "password": pw}, + Type: corev1.SecretTypeOpaque, + } + if err := ctrl.SetControllerReference(cluster, newSecret, scheme); err != nil { + return err + } + if err := c.Create(ctx, newSecret); err != nil { + return err + } + } + if cluster.Status.Resources == nil { + cluster.Status.Resources = &enterprisev4.PostgresClusterResources{} + } + cluster.Status.Resources.SuperUserSecretRef = &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: secretName}, + Key: secretKeyPassword, + } + return nil +} + +func clusterSecretExists(ctx context.Context, c client.Client, namespace, name string, secret *corev1.Secret) (bool, error) { + err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, secret) + if apierrors.IsNotFound(err) { + return false, nil + } + return err == nil, err +} + +// deleteCNPGCluster deletes the CNPG Cluster if it exists. +func deleteCNPGCluster(ctx context.Context, c client.Client, cnpgCluster *cnpgv1.Cluster) error { + logger := log.FromContext(ctx) + if cnpgCluster == nil { + logger.Info("CNPG Cluster not found, skipping deletion") + return nil + } + logger.Info("Deleting CNPG Cluster", "name", cnpgCluster.Name) + if err := c.Delete(ctx, cnpgCluster); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to delete CNPG Cluster: %w", err) + } + return nil +} + +// handleFinalizer processes deletion cleanup: removes poolers, then deletes or orphans the CNPG Cluster +// based on ClusterDeletionPolicy, then removes the finalizer. +func handleFinalizer(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, secret *corev1.Secret) error { + logger := log.FromContext(ctx) + if cluster.GetDeletionTimestamp() == nil { + logger.Info("PostgresCluster not marked for deletion, skipping finalizer logic") + return nil + } + if !controllerutil.ContainsFinalizer(cluster, PostgresClusterFinalizerName) { + logger.Info("Finalizer not present on PostgresCluster, skipping finalizer logic") + return nil + } + + cnpgCluster := &cnpgv1.Cluster{} + err := c.Get(ctx, types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}, cnpgCluster) + if err != nil { + if apierrors.IsNotFound(err) { + cnpgCluster = nil + logger.Info("CNPG cluster not found during cleanup") + } else { + return fmt.Errorf("failed to fetch CNPG cluster during cleanup: %w", err) + } + } + logger.Info("Processing finalizer cleanup for PostgresCluster") + + if err := deleteConnectionPoolers(ctx, c, cluster); err != nil { + logger.Error(err, "Failed to delete connection poolers during cleanup") + return fmt.Errorf("failed to delete connection poolers: %w", err) + } + + // Dereference *string — empty string falls through to default (unknown policy). + policy := "" + if cluster.Spec.ClusterDeletionPolicy != nil { + policy = *cluster.Spec.ClusterDeletionPolicy + } + + switch policy { + case clusterDeletionPolicyDelete: + logger.Info("ClusterDeletionPolicy is 'Delete', deleting CNPG Cluster and associated resources") + if cnpgCluster != nil { + if err := deleteCNPGCluster(ctx, c, cnpgCluster); err != nil { + logger.Error(err, "Failed to delete CNPG Cluster during finalizer cleanup") + return fmt.Errorf("failed to delete CNPG Cluster during finalizer cleanup: %w", err) + } + } else { + logger.Info("CNPG Cluster not found, skipping deletion") + } + + case clusterDeletionPolicyRetain: + logger.Info("ClusterDeletionPolicy is 'Retain', removing owner references to orphan CNPG Cluster") + if cnpgCluster != nil { + originalCNPG := cnpgCluster.DeepCopy() + refRemoved, err := removeOwnerRef(scheme, cluster, cnpgCluster) + if err != nil { + return fmt.Errorf("failed to remove owner reference from CNPG cluster: %w", err) + } + if !refRemoved { + logger.Info("Owner reference already removed from CNPG Cluster, skipping patch") + } + if err := patchObject(ctx, c, originalCNPG, cnpgCluster, "CNPGCluster"); err != nil { + return fmt.Errorf("failed to patch CNPG cluster after removing owner reference: %w", err) + } + logger.Info("Removed owner reference from CNPG Cluster") + } + + // Remove owner reference from the superuser Secret to prevent cascading deletion. + if cluster.Status.Resources != nil && cluster.Status.Resources.SuperUserSecretRef != nil { + secretName := cluster.Status.Resources.SuperUserSecretRef.Name + if err := c.Get(ctx, types.NamespacedName{Name: secretName, Namespace: cluster.Namespace}, secret); err != nil { + if !apierrors.IsNotFound(err) { + logger.Error(err, "Failed to fetch Secret during cleanup") + return fmt.Errorf("failed to fetch secret during cleanup: %w", err) + } + logger.Info("Secret not found, skipping owner reference removal", "secret", secretName) + } else { + originalSecret := secret.DeepCopy() + refRemoved, err := removeOwnerRef(scheme, cluster, secret) + if err != nil { + return fmt.Errorf("failed to remove owner reference from Secret: %w", err) + } + if refRemoved { + if err := patchObject(ctx, c, originalSecret, secret, "Secret"); err != nil { + return fmt.Errorf("failed to patch Secret after removing owner reference: %w", err) + } + } + logger.Info("Removed owner reference from Secret") + } + } + + default: + logger.Info("Unknown ClusterDeletionPolicy", "policy", policy) + } + + controllerutil.RemoveFinalizer(cluster, PostgresClusterFinalizerName) + if err := c.Update(ctx, cluster); err != nil { + if apierrors.IsNotFound(err) { + logger.Info("PostgresCluster already deleted, skipping finalizer update") + return nil + } + logger.Error(err, "Failed to remove finalizer from PostgresCluster") + return fmt.Errorf("failed to remove finalizer: %w", err) + } + logger.Info("Finalizer removed, cleanup complete") + return nil +} + +func removeOwnerRef(scheme *runtime.Scheme, owner, obj client.Object) (bool, error) { + hasRef, err := controllerutil.HasOwnerReference(obj.GetOwnerReferences(), owner, scheme) + if err != nil { + return false, fmt.Errorf("failed to check owner reference: %w", err) + } + if !hasRef { + return false, nil + } + if err := controllerutil.RemoveOwnerReference(owner, obj, scheme); err != nil { + return false, fmt.Errorf("failed to remove owner reference: %w", err) + } + return true, nil +} + +// patchObject patches obj from original; treats NotFound as a no-op. +func patchObject(ctx context.Context, c client.Client, original, obj client.Object, kind objectKind) error { + logger := log.FromContext(ctx) + if err := c.Patch(ctx, obj, client.MergeFrom(original)); err != nil { + if apierrors.IsNotFound(err) { + logger.Info("Object not found, skipping patch", "kind", kind, "name", obj.GetName()) + return nil + } + return fmt.Errorf("failed to patch %s object: %w", kind, err) + } + logger.Info("Patched object successfully", "kind", kind, "name", obj.GetName()) + return nil +} + +func generatePassword() (string, error) { + const ( + length = 32 + digits = 8 + symbols = 0 + ) + return password.Generate(length, digits, symbols, false, true) +} diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go new file mode 100644 index 000000000..19886fd73 --- /dev/null +++ b/pkg/postgresql/cluster/core/types.go @@ -0,0 +1,102 @@ +package core + +import ( + "time" + + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + corev1 "k8s.io/api/core/v1" +) + +// normalizedCNPGClusterSpec is a subset of cnpgv1.ClusterSpec fields used for drift detection. +// Only fields we set in buildCNPGClusterSpec are included — CNPG-injected defaults are excluded +// to avoid false-positive drift on every reconcile. +type normalizedCNPGClusterSpec struct { + ImageName string + Instances int + CustomDefinedParameters map[string]string + PgHBA []string + DefaultDatabase string + Owner string + StorageSize string + Resources corev1.ResourceRequirements +} + +// MergedConfig is the resolved configuration after overlaying PostgresCluster on PostgresClusterClass defaults. +type MergedConfig struct { + Spec *enterprisev4.PostgresClusterSpec + CNPG *enterprisev4.CNPGConfig +} + +type reconcileClusterPhases string +type conditionTypes string +type conditionReasons string +type objectKind string + +const ( + retryDelay = time.Second * 15 + + readOnlyEndpoint string = "ro" + readWriteEndpoint string = "rw" + + defaultDatabaseName string = "postgres" + superUsername string = "postgres" + defaultPort string = "5432" + + secretKeyPassword string = "password" + defaultSecretSuffix string = "-secret" + defaultPoolerSuffix string = "-pooler-" + defaultConfigMapSuffix string = "-configmap" + + clusterDeletionPolicyDelete string = "Delete" + clusterDeletionPolicyRetain string = "Retain" + + // PostgresClusterFinalizerName is exported so the primary adapter (controller) can + // reference it in event predicates without duplicating the string. + PostgresClusterFinalizerName string = "postgresclusters.enterprise.splunk.com/finalizer" + + // cluster phases + readyClusterPhase reconcileClusterPhases = "Ready" + pendingClusterPhase reconcileClusterPhases = "Pending" + provisioningClusterPhase reconcileClusterPhases = "Provisioning" + configuringClusterPhase reconcileClusterPhases = "Configuring" + failedClusterPhase reconcileClusterPhases = "Failed" + + // condition types + clusterReady conditionTypes = "ClusterReady" + poolerReady conditionTypes = "PoolerReady" + + // condition reasons — clusterReady + reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" + reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" + reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" + reasonClusterBuildSucceeded conditionReasons = "ClusterBuildSucceeded" + reasonClusterGetFailed conditionReasons = "ClusterGetFailed" + reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" + reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" + reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" + reasonUserSecretFailed conditionReasons = "UserSecretReconciliationFailed" + reasonSuperUserSecretFailed conditionReasons = "SuperUserSecretFailed" + reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" + + // condition reasons — poolerReady + reasonPoolerReconciliationFailed conditionReasons = "PoolerReconciliationFailed" + reasonPoolerConfigMissing conditionReasons = "PoolerConfigMissing" + reasonPoolerCreating conditionReasons = "PoolerCreating" + reasonAllInstancesReady conditionReasons = "AllInstancesReady" + + // condition reasons — CNPG cluster phase mapping + reasonCNPGClusterNotHealthy conditionReasons = "CNPGClusterNotHealthy" + reasonCNPGClusterHealthy conditionReasons = "CNPGClusterHealthy" + reasonCNPGProvisioning conditionReasons = "CNPGClusterProvisioning" + reasonCNPGSwitchover conditionReasons = "CNPGSwitchover" + reasonCNPGFailingOver conditionReasons = "CNPGFailingOver" + reasonCNPGRestarting conditionReasons = "CNPGRestarting" + reasonCNPGUpgrading conditionReasons = "CNPGUpgrading" + reasonCNPGApplyingConfig conditionReasons = "CNPGApplyingConfiguration" + reasonCNPGPromoting conditionReasons = "CNPGPromoting" + reasonCNPGWaitingForUser conditionReasons = "CNPGWaitingForUser" + reasonCNPGUnrecoverable conditionReasons = "CNPGUnrecoverable" + reasonCNPGProvisioningFailed conditionReasons = "CNPGProvisioningFailed" + reasonCNPGPluginError conditionReasons = "CNPGPluginError" + reasonCNPGImageError conditionReasons = "CNPGImageError" +) diff --git a/pkg/postgresql/database/adapter/db_repository.go b/pkg/postgresql/database/adapter/db_repository.go new file mode 100644 index 000000000..0b23f685c --- /dev/null +++ b/pkg/postgresql/database/adapter/db_repository.go @@ -0,0 +1,80 @@ +// Package adapter contains driven adapters for the PostgresDatabase domain. +// Each adapter implements a port defined in core/ports.go. +package adapter + +import ( + "context" + "fmt" + "time" + + dbcore "github.com/splunk/splunk-operator/pkg/postgresql/database/core" + + "github.com/jackc/pgx/v5" +) + +const ( + superUsername = "postgres" + postgresPort = "5432" + dbConnectTimeout = 10 * time.Second +) + +// pgDBRepository is the pgx-backed adapter for the core.DBRepo port. +// It owns the full connection lifecycle: open on construction, close on ExecGrants return. +type pgDBRepository struct { + conn *pgx.Conn +} + +// ExecGrants applies all privilege grants needed for the RW role on a single database. +// GRANT ON ALL TABLES/SEQUENCES covers existing objects; ALTER DEFAULT PRIVILEGES covers +// future ones created by the admin role (e.g. via migrations). +func (r *pgDBRepository) ExecGrants(ctx context.Context, dbName string) error { + defer r.conn.Close(context.Background()) + + adminRole := dbName + "_admin" + rwRole := dbName + "_rw" + + tx, err := r.conn.Begin(ctx) + if err != nil { + return fmt.Errorf("beginning transaction: %w", err) + } + + // Identifiers cannot be parameterised in PostgreSQL — fmt.Sprintf is correct here. + // Role names are generated internally by our own functions, never from user input. + stmts := []string{ + fmt.Sprintf("GRANT CONNECT ON DATABASE %s TO %s", dbName, rwRole), + fmt.Sprintf("GRANT USAGE ON SCHEMA public TO %s", rwRole), + fmt.Sprintf("GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO %s", rwRole), + fmt.Sprintf("GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO %s", rwRole), + fmt.Sprintf("ALTER DEFAULT PRIVILEGES FOR ROLE %s IN SCHEMA public GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO %s", adminRole, rwRole), + fmt.Sprintf("ALTER DEFAULT PRIVILEGES FOR ROLE %s IN SCHEMA public GRANT USAGE, SELECT ON SEQUENCES TO %s", adminRole, rwRole), + } + + for _, stmt := range stmts { + if _, err := tx.Exec(ctx, stmt); err != nil { + return fmt.Errorf("executing grant %q: %w", stmt, err) + } + } + + return tx.Commit(ctx) +} + +// NewDBRepository opens a direct superuser connection, bypassing any pooler. +// PgBouncer in transaction mode blocks DDL; password is set on the config +// struct to avoid URL-encoding issues with special characters. +func NewDBRepository(ctx context.Context, host, dbName, password string) (dbcore.DBRepo, error) { + cfg, err := pgx.ParseConfig(fmt.Sprintf( + "postgres://%s@%s:%s/%s?sslmode=require&connect_timeout=%d", + superUsername, host, postgresPort, dbName, + int(dbConnectTimeout.Seconds()), + )) + if err != nil { + return nil, fmt.Errorf("parsing connection config for %s/%s: %w", host, dbName, err) + } + cfg.Password = password + + conn, err := pgx.ConnectConfig(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("connecting to %s/%s: %w", host, dbName, err) + } + return &pgDBRepository{conn: conn}, nil +} diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go new file mode 100644 index 000000000..1ae2227d7 --- /dev/null +++ b/pkg/postgresql/database/core/database.go @@ -0,0 +1,941 @@ +package core + +import ( + "context" + "encoding/json" + stderrors "errors" + "fmt" + "slices" + "strings" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + "github.com/sethvargo/go-password/password" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// NewDBRepoFunc constructs a DBRepo adapter for the given host and database. +// Injected by the controller so the core never imports the pgx adapter directly. +type NewDBRepoFunc func(ctx context.Context, host, dbName, password string) (DBRepo, error) + +// PostgresDatabaseService is the application service entry point called by the primary adapter (reconciler). +// newDBRepo is injected to keep the core free of pgx imports. +func PostgresDatabaseService( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + postgresDB *enterprisev4.PostgresDatabase, + newDBRepo NewDBRepoFunc, +) (ctrl.Result, error) { + logger := log.FromContext(ctx) + logger.Info("Reconciling PostgresDatabase", "name", postgresDB.Name, "namespace", postgresDB.Namespace) + + updateStatus := func(conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { + return setStatus(ctx, c, postgresDB, conditionType, conditionStatus, reason, message, phase) + } + + // Finalizer: cleanup on deletion, register on creation. + if postgresDB.GetDeletionTimestamp() != nil { + if err := handleDeletion(ctx, c, postgresDB); err != nil { + logger.Error(err, "Cleanup failed for PostgresDatabase") + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + if !controllerutil.ContainsFinalizer(postgresDB, postgresDatabaseFinalizerName) { + controllerutil.AddFinalizer(postgresDB, postgresDatabaseFinalizerName) + if err := c.Update(ctx, postgresDB); err != nil { + logger.Error(err, "Failed to add finalizer to PostgresDatabase") + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + // ObservedGeneration equality means all phases completed on the current spec — nothing to do. + if postgresDB.Status.ObservedGeneration != nil && *postgresDB.Status.ObservedGeneration == postgresDB.Generation { + logger.Info("Spec unchanged and all phases complete, skipping") + return ctrl.Result{}, nil + } + + // Phase: ClusterValidation + cluster, clusterStatus, err := ensureClusterReady(ctx, c, postgresDB) + if err != nil { + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterInfoFetchFailed, + "Can't reach Cluster CR due to transient errors", pendingDBPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + logger.Info("Cluster validation done", "clusterName", postgresDB.Spec.ClusterRef.Name, "status", clusterStatus) + + switch clusterStatus { + case ClusterNotFound: + if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterNotFound, "Cluster CR not found", pendingDBPhase); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: clusterNotFoundRetryDelay}, nil + + case ClusterNotReady, ClusterNoProvisionerRef: + if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterProvisioning, "Cluster is not in ready state yet", pendingDBPhase); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + + case ClusterReady: + if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonClusterAvailable, "Cluster is operational", provisioningDBPhase); err != nil { + return ctrl.Result{}, err + } + } + + // Phase: RoleConflictCheck — verify no other SSA field manager already owns our roles. + roleConflicts := getRoleConflicts(postgresDB, cluster) + if len(roleConflicts) > 0 { + conflictMsg := fmt.Sprintf("Role conflict: %s. "+ + "If you deleted a previous PostgresDatabase, recreate it with the original name to re-adopt the orphaned resources.", + strings.Join(roleConflicts, ", ")) + logger.Error(nil, conflictMsg) + if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRoleConflict, conflictMsg, failedDBPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, nil + } + + // We need the CNPG Cluster directly because PostgresCluster status does not yet + // surface managed role reconciliation state. + cnpgCluster := &cnpgv1.Cluster{} + if err := c.Get(ctx, types.NamespacedName{ + Name: cluster.Status.ProvisionerRef.Name, + Namespace: cluster.Status.ProvisionerRef.Namespace, + }, cnpgCluster); err != nil { + logger.Error(err, "Failed to fetch CNPG Cluster") + return ctrl.Result{}, err + } + + // Phase: CredentialProvisioning — secrets must exist before roles are patched. + // CNPG rejects a PasswordSecretRef pointing at a missing secret. + if err := reconcileUserSecrets(ctx, c, scheme, postgresDB); err != nil { + if statusErr := updateStatus(secretsReady, metav1.ConditionFalse, reasonSecretsCreationFailed, + fmt.Sprintf("Failed to reconcile user secrets: %v", err), provisioningDBPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + if err := updateStatus(secretsReady, metav1.ConditionTrue, reasonSecretsCreated, + fmt.Sprintf("All secrets provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { + return ctrl.Result{}, err + } + + // Phase: ConnectionMetadata — ConfigMaps carry connection info consumers need as soon + // as databases are ready, so they are created alongside secrets. + endpoints := resolveClusterEndpoints(cluster, cnpgCluster, postgresDB.Namespace) + if err := reconcileRoleConfigMaps(ctx, c, scheme, postgresDB, endpoints); err != nil { + if statusErr := updateStatus(configMapsReady, metav1.ConditionFalse, reasonConfigMapsCreationFailed, + fmt.Sprintf("Failed to reconcile ConfigMaps: %v", err), provisioningDBPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + if err := updateStatus(configMapsReady, metav1.ConditionTrue, reasonConfigMapsCreated, + fmt.Sprintf("All ConfigMaps provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { + return ctrl.Result{}, err + } + + // Phase: RoleProvisioning + desiredUsers := getDesiredUsers(postgresDB) + actualRoles := getUsersInClusterSpec(cluster) + var missing []string + for _, role := range desiredUsers { + if !slices.Contains(actualRoles, role) { + missing = append(missing, role) + } + } + + if len(missing) > 0 { + logger.Info("User spec changed, patching CNPG Cluster", "missing", missing) + if err := patchManagedRoles(ctx, c, postgresDB, cluster); err != nil { + logger.Error(err, "Failed to patch users in CNPG Cluster") + return ctrl.Result{}, err + } + if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, + fmt.Sprintf("Waiting for %d roles to be reconciled", len(desiredUsers)), provisioningDBPhase); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + } + + notReadyRoles, err := verifyRolesReady(ctx, desiredUsers, cnpgCluster) + if err != nil { + if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, + fmt.Sprintf("Role creation failed: %v", err), failedDBPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + if len(notReadyRoles) > 0 { + if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, + fmt.Sprintf("Waiting for roles to be reconciled: %v", notReadyRoles), provisioningDBPhase); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + } + if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonUsersAvailable, + fmt.Sprintf("All %d users in PostgreSQL", len(desiredUsers)), provisioningDBPhase); err != nil { + return ctrl.Result{}, err + } + + // Phase: DatabaseProvisioning + if err := reconcileCNPGDatabases(ctx, c, scheme, postgresDB, cluster); err != nil { + logger.Error(err, "Failed to reconcile CNPG Databases") + return ctrl.Result{}, err + } + + notReadyDBs, err := verifyDatabasesReady(ctx, c, postgresDB) + if err != nil { + logger.Error(err, "Failed to verify database status") + return ctrl.Result{}, err + } + if len(notReadyDBs) > 0 { + if err := updateStatus(databasesReady, metav1.ConditionFalse, reasonWaitingForCNPG, + fmt.Sprintf("Waiting for databases to be ready: %v", notReadyDBs), provisioningDBPhase); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + } + if err := updateStatus(databasesReady, metav1.ConditionTrue, reasonDatabasesAvailable, + fmt.Sprintf("All %d databases ready", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { + return ctrl.Result{}, err + } + + // Phase: RWRolePrivileges + // Skipped when no new databases are detected — ALTER DEFAULT PRIVILEGES covers tables + // added by migrations on existing databases. Re-runs for all databases when a new one + // is added (idempotent for existing ones, required for the new one). + if hasNewDatabases(postgresDB) { + // Read from our own status — we created this secret and wrote the SecretKeySelector + // (name + key) when the cluster was provisioned. This avoids depending on CNPG's + // spec field and makes the key explicit. + if cluster.Status.Resources == nil || cluster.Status.Resources.SuperUserSecretRef == nil { + return ctrl.Result{}, fmt.Errorf("PostgresCluster %s has no superuser secret ref in status", cluster.Name) + } + superSecretRef := cluster.Status.Resources.SuperUserSecretRef + superSecret := &corev1.Secret{} + if err := c.Get(ctx, types.NamespacedName{ + Name: superSecretRef.Name, + Namespace: postgresDB.Namespace, + }, superSecret); err != nil { + return ctrl.Result{}, fmt.Errorf("fetching superuser secret %s: %w", superSecretRef.Name, err) + } + pw, ok := superSecret.Data[superSecretRef.Key] + if !ok { + return ctrl.Result{}, fmt.Errorf("superuser secret %s missing %q key", superSecretRef.Name, superSecretRef.Key) + } + + dbNames := make([]string, 0, len(postgresDB.Spec.Databases)) + for _, dbSpec := range postgresDB.Spec.Databases { + dbNames = append(dbNames, dbSpec.Name) + } + + if err := reconcileRWRolePrivileges(ctx, endpoints.RWHost, string(pw), dbNames, newDBRepo); err != nil { + if statusErr := updateStatus(privilegesReady, metav1.ConditionFalse, reasonPrivilegesGrantFailed, + fmt.Sprintf("Failed to grant RW role privileges: %v", err), provisioningDBPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } + return ctrl.Result{}, err + } + if err := updateStatus(privilegesReady, metav1.ConditionTrue, reasonPrivilegesGranted, + fmt.Sprintf("RW role privileges granted for all %d databases", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { + return ctrl.Result{}, err + } + } + + postgresDB.Status.Databases = populateDatabaseStatus(postgresDB) + postgresDB.Status.ObservedGeneration = &postgresDB.Generation + + if err := c.Status().Update(ctx, postgresDB); err != nil { + if errors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, fmt.Errorf("persisting final status: %w", err) + } + + logger.Info("All phases complete") + return ctrl.Result{}, nil +} + +// reconcileRWRolePrivileges calls the DBRepo port for each database. +// Errors are collected so all databases are attempted before returning. +func reconcileRWRolePrivileges( + ctx context.Context, + rwHost, superPassword string, + dbNames []string, + newDBRepo NewDBRepoFunc, +) error { + logger := log.FromContext(ctx) + var errs []error + for _, dbName := range dbNames { + repo, err := newDBRepo(ctx, rwHost, dbName, superPassword) + if err != nil { + logger.Error(err, "Failed to connect to database", "database", dbName) + errs = append(errs, fmt.Errorf("database %s: %w", dbName, err)) + continue + } + if err := repo.ExecGrants(ctx, dbName); err != nil { + logger.Error(err, "Failed to grant RW role privileges", "database", dbName) + errs = append(errs, fmt.Errorf("database %s: %w", dbName, err)) + continue + } + logger.Info("RW role privileges granted", "database", dbName, "rwRole", rwRoleName(dbName)) + } + return stderrors.Join(errs...) +} + +func ensureClusterReady(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase) (*enterprisev4.PostgresCluster, clusterReadyStatus, error) { + logger := log.FromContext(ctx) + cluster := &enterprisev4.PostgresCluster{} + if err := c.Get(ctx, types.NamespacedName{Name: postgresDB.Spec.ClusterRef.Name, Namespace: postgresDB.Namespace}, cluster); err != nil { + if errors.IsNotFound(err) { + return nil, ClusterNotFound, nil + } + logger.Error(err, "Failed to fetch Cluster", "name", postgresDB.Spec.ClusterRef.Name) + return nil, ClusterNotReady, err + } + if cluster.Status.Phase == nil || *cluster.Status.Phase != string(ClusterReady) { + return cluster, ClusterNotReady, nil + } + if cluster.Status.ProvisionerRef == nil { + return cluster, ClusterNoProvisionerRef, nil + } + return cluster, ClusterReady, nil +} + +func getDesiredUsers(postgresDB *enterprisev4.PostgresDatabase) []string { + users := make([]string, 0, len(postgresDB.Spec.Databases)*2) + for _, dbSpec := range postgresDB.Spec.Databases { + users = append(users, adminRoleName(dbSpec.Name), rwRoleName(dbSpec.Name)) + } + return users +} + +func getUsersInClusterSpec(cluster *enterprisev4.PostgresCluster) []string { + users := make([]string, 0, len(cluster.Spec.ManagedRoles)) + for _, role := range cluster.Spec.ManagedRoles { + users = append(users, role.Name) + } + return users +} + +func getRoleConflicts(postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) []string { + myManager := fieldManagerName(postgresDB.Name) + desired := make(map[string]struct{}, len(postgresDB.Spec.Databases)*2) + for _, dbSpec := range postgresDB.Spec.Databases { + desired[adminRoleName(dbSpec.Name)] = struct{}{} + desired[rwRoleName(dbSpec.Name)] = struct{}{} + } + roleOwners := managedRoleOwners(cluster.ManagedFields) + var conflicts []string + for roleName := range desired { + if owner, exists := roleOwners[roleName]; exists && owner != myManager { + conflicts = append(conflicts, fmt.Sprintf("%s (owned by %s)", roleName, owner)) + } + } + return conflicts +} + +func managedRoleOwners(managedFields []metav1.ManagedFieldsEntry) map[string]string { + owners := make(map[string]string) + for _, mf := range managedFields { + if mf.FieldsV1 == nil { + continue + } + for _, name := range parseRoleNames(mf.FieldsV1.Raw) { + owners[name] = mf.Manager + } + } + return owners +} + +func parseRoleNames(raw []byte) []string { + var fields map[string]any + if err := json.Unmarshal(raw, &fields); err != nil { + return nil + } + spec, _ := fields["f:spec"].(map[string]any) + roles, _ := spec["f:managedRoles"].(map[string]any) + var names []string + for key := range roles { + var k struct{ Name string } + if err := json.Unmarshal([]byte(strings.TrimPrefix(key, "k:")), &k); err == nil && k.Name != "" { + names = append(names, k.Name) + } + } + return names +} + +func patchManagedRoles(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) error { + logger := log.FromContext(ctx) + allRoles := make([]enterprisev4.ManagedRole, 0, len(postgresDB.Spec.Databases)*2) + for _, dbSpec := range postgresDB.Spec.Databases { + allRoles = append(allRoles, + enterprisev4.ManagedRole{ + Name: adminRoleName(dbSpec.Name), + Exists: true, + PasswordSecretRef: &corev1.SecretKeySelector{LocalObjectReference: corev1.LocalObjectReference{Name: roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleAdmin)}, + Key: secretKeyPassword}, + }, + enterprisev4.ManagedRole{ + Name: rwRoleName(dbSpec.Name), + Exists: true, + PasswordSecretRef: &corev1.SecretKeySelector{LocalObjectReference: corev1.LocalObjectReference{Name: roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleRW)}, + Key: secretKeyPassword}, + }) + } + rolePatch := &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": cluster.APIVersion, + "kind": cluster.Kind, + "metadata": map[string]any{"name": cluster.Name, "namespace": cluster.Namespace}, + "spec": map[string]any{"managedRoles": allRoles}, + }, + } + fieldManager := fieldManagerName(postgresDB.Name) + if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManager)); err != nil { + logger.Error(err, "Failed to add users to PostgresCluster", "postgresDatabase", postgresDB.Name) + return fmt.Errorf("patching managed roles for PostgresDatabase %s: %w", postgresDB.Name, err) + } + logger.Info("Users added to PostgresCluster via SSA", "postgresDatabase", postgresDB.Name, "roleCount", len(allRoles)) + return nil +} + +func verifyRolesReady(ctx context.Context, expectedUsers []string, cnpgCluster *cnpgv1.Cluster) ([]string, error) { + logger := log.FromContext(ctx) + if cnpgCluster.Status.ManagedRolesStatus.CannotReconcile != nil { + for _, userName := range expectedUsers { + if errs, exists := cnpgCluster.Status.ManagedRolesStatus.CannotReconcile[userName]; exists { + return nil, fmt.Errorf("user %s reconciliation failed: %v", userName, errs) + } + } + } + reconciled := cnpgCluster.Status.ManagedRolesStatus.ByStatus[cnpgv1.RoleStatusReconciled] + var notReady []string + for _, userName := range expectedUsers { + if !slices.Contains(reconciled, userName) { + notReady = append(notReady, userName) + } + } + if len(notReady) > 0 { + logger.Info("Users not reconciled yet", "pending", notReady) + } + return notReady, nil +} + +func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) error { + logger := log.FromContext(ctx) + for _, dbSpec := range postgresDB.Spec.Databases { + cnpgDBName := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) + reclaimPolicy := cnpgv1.DatabaseReclaimDelete + if dbSpec.DeletionPolicy == deletionPolicyRetain { + reclaimPolicy = cnpgv1.DatabaseReclaimRetain + } + cnpgDB := &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: cnpgDBName, Namespace: postgresDB.Namespace}, + } + _, err := controllerutil.CreateOrUpdate(ctx, c, cnpgDB, func() error { + cnpgDB.Spec = cnpgv1.DatabaseSpec{ + Name: dbSpec.Name, + Owner: adminRoleName(dbSpec.Name), + ClusterRef: corev1.LocalObjectReference{Name: cluster.Status.ProvisionerRef.Name}, + ReclaimPolicy: reclaimPolicy, + } + reAdopting := cnpgDB.Annotations[annotationRetainedFrom] == postgresDB.Name + if reAdopting { + logger.Info("Re-adopting orphaned CNPG Database", "name", cnpgDBName) + delete(cnpgDB.Annotations, annotationRetainedFrom) + } + if cnpgDB.CreationTimestamp.IsZero() || reAdopting { + return controllerutil.SetControllerReference(postgresDB, cnpgDB, scheme) + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling CNPG Database %s: %w", cnpgDBName, err) + } + } + return nil +} + +func verifyDatabasesReady(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase) ([]string, error) { + var notReady []string + for _, dbSpec := range postgresDB.Spec.Databases { + cnpgDBName := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) + cnpgDB := &cnpgv1.Database{} + if err := c.Get(ctx, types.NamespacedName{Name: cnpgDBName, Namespace: postgresDB.Namespace}, cnpgDB); err != nil { + return nil, fmt.Errorf("getting CNPG Database %s: %w", cnpgDBName, err) + } + if cnpgDB.Status.Applied == nil || !*cnpgDB.Status.Applied { + notReady = append(notReady, dbSpec.Name) + } + } + return notReady, nil +} + +func setStatus(ctx context.Context, c client.Client, db *enterprisev4.PostgresDatabase, conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { + meta.SetStatusCondition(&db.Status.Conditions, metav1.Condition{ + Type: string(conditionType), + Status: conditionStatus, + Reason: string(reason), + Message: message, + ObservedGeneration: db.Generation, + }) + p := string(phase) + db.Status.Phase = &p + return c.Status().Update(ctx, db) +} + +func buildDeletionPlan(databases []enterprisev4.DatabaseDefinition) deletionPlan { + var plan deletionPlan + for _, db := range databases { + if db.DeletionPolicy == deletionPolicyRetain { + plan.retained = append(plan.retained, db) + } else { + plan.deleted = append(plan.deleted, db) + } + } + return plan +} + +func handleDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase) error { + plan := buildDeletionPlan(postgresDB.Spec.Databases) + if err := orphanRetainedResources(ctx, c, postgresDB, plan.retained); err != nil { + return err + } + if err := deleteRemovedResources(ctx, c, postgresDB, plan.deleted); err != nil { + return err + } + if err := cleanupManagedRoles(ctx, c, postgresDB, plan); err != nil { + return err + } + controllerutil.RemoveFinalizer(postgresDB, postgresDatabaseFinalizerName) + if err := c.Update(ctx, postgresDB); err != nil { + if errors.IsNotFound(err) { + return nil + } + return fmt.Errorf("removing finalizer: %w", err) + } + log.FromContext(ctx).Info("Cleanup complete", "name", postgresDB.Name, "retained", len(plan.retained), "deleted", len(plan.deleted)) + return nil +} + +func orphanRetainedResources(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, retained []enterprisev4.DatabaseDefinition) error { + if err := orphanCNPGDatabases(ctx, c, postgresDB, retained); err != nil { + return err + } + if err := orphanConfigMaps(ctx, c, postgresDB, retained); err != nil { + return err + } + return orphanSecrets(ctx, c, postgresDB, retained) +} + +func deleteRemovedResources(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, deleted []enterprisev4.DatabaseDefinition) error { + if err := deleteCNPGDatabases(ctx, c, postgresDB, deleted); err != nil { + return err + } + if err := deleteConfigMaps(ctx, c, postgresDB, deleted); err != nil { + return err + } + return deleteSecrets(ctx, c, postgresDB, deleted) +} + +func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, plan deletionPlan) error { + if len(plan.deleted) == 0 { + return nil + } + cluster := &enterprisev4.PostgresCluster{} + if err := c.Get(ctx, types.NamespacedName{Name: postgresDB.Spec.ClusterRef.Name, Namespace: postgresDB.Namespace}, cluster); err != nil { + if !errors.IsNotFound(err) { + return fmt.Errorf("getting PostgresCluster for role cleanup: %w", err) + } + log.FromContext(ctx).Info("PostgresCluster already deleted, skipping role cleanup") + return nil + } + return patchManagedRolesOnDeletion(ctx, c, postgresDB, cluster, plan.retained) +} + +func orphanCNPGDatabases(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, databases []enterprisev4.DatabaseDefinition) error { + logger := log.FromContext(ctx) + for _, dbSpec := range databases { + name := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) + db := &cnpgv1.Database{} + if err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: postgresDB.Namespace}, db); err != nil { + if errors.IsNotFound(err) { + continue + } + return fmt.Errorf("getting CNPG Database %s for orphaning: %w", name, err) + } + if db.Annotations[annotationRetainedFrom] == postgresDB.Name { + continue + } + stripOwnerReference(db, postgresDB.UID) + if db.Annotations == nil { + db.Annotations = make(map[string]string) + } + db.Annotations[annotationRetainedFrom] = postgresDB.Name + if err := c.Update(ctx, db); err != nil { + return fmt.Errorf("orphaning CNPG Database %s: %w", name, err) + } + logger.Info("Orphaned CNPG Database CR", "name", name) + } + return nil +} + +func orphanConfigMaps(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, databases []enterprisev4.DatabaseDefinition) error { + logger := log.FromContext(ctx) + for _, dbSpec := range databases { + name := configMapName(postgresDB.Name, dbSpec.Name) + cm := &corev1.ConfigMap{} + if err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: postgresDB.Namespace}, cm); err != nil { + if errors.IsNotFound(err) { + continue + } + return fmt.Errorf("getting ConfigMap %s for orphaning: %w", name, err) + } + if cm.Annotations[annotationRetainedFrom] == postgresDB.Name { + continue + } + stripOwnerReference(cm, postgresDB.UID) + if cm.Annotations == nil { + cm.Annotations = make(map[string]string) + } + cm.Annotations[annotationRetainedFrom] = postgresDB.Name + if err := c.Update(ctx, cm); err != nil { + return fmt.Errorf("orphaning ConfigMap %s: %w", name, err) + } + logger.Info("Orphaned ConfigMap", "name", name) + } + return nil +} + +func orphanSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, databases []enterprisev4.DatabaseDefinition) error { + logger := log.FromContext(ctx) + for _, dbSpec := range databases { + for _, role := range []string{secretRoleAdmin, secretRoleRW} { + name := roleSecretName(postgresDB.Name, dbSpec.Name, role) + secret := &corev1.Secret{} + if err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: postgresDB.Namespace}, secret); err != nil { + if errors.IsNotFound(err) { + continue + } + return fmt.Errorf("getting Secret %s for orphaning: %w", name, err) + } + if secret.Annotations[annotationRetainedFrom] == postgresDB.Name { + continue + } + stripOwnerReference(secret, postgresDB.UID) + if secret.Annotations == nil { + secret.Annotations = make(map[string]string) + } + secret.Annotations[annotationRetainedFrom] = postgresDB.Name + if err := c.Update(ctx, secret); err != nil { + return fmt.Errorf("orphaning Secret %s: %w", name, err) + } + logger.Info("Orphaned Secret", "name", name) + } + } + return nil +} + +func deleteCNPGDatabases(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, databases []enterprisev4.DatabaseDefinition) error { + logger := log.FromContext(ctx) + for _, dbSpec := range databases { + name := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) + db := &cnpgv1.Database{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: postgresDB.Namespace}} + if err := c.Delete(ctx, db); err != nil { + if errors.IsNotFound(err) { + continue + } + return fmt.Errorf("deleting CNPG Database %s: %w", name, err) + } + logger.Info("Deleted CNPG Database CR", "name", name) + } + return nil +} + +func deleteConfigMaps(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, databases []enterprisev4.DatabaseDefinition) error { + logger := log.FromContext(ctx) + for _, dbSpec := range databases { + name := configMapName(postgresDB.Name, dbSpec.Name) + cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: postgresDB.Namespace}} + if err := c.Delete(ctx, cm); err != nil { + if errors.IsNotFound(err) { + continue + } + return fmt.Errorf("deleting ConfigMap %s: %w", name, err) + } + logger.Info("Deleted ConfigMap", "name", name) + } + return nil +} + +func deleteSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, databases []enterprisev4.DatabaseDefinition) error { + logger := log.FromContext(ctx) + for _, dbSpec := range databases { + for _, role := range []string{secretRoleAdmin, secretRoleRW} { + name := roleSecretName(postgresDB.Name, dbSpec.Name, role) + secret := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: postgresDB.Namespace}} + if err := c.Delete(ctx, secret); err != nil { + if errors.IsNotFound(err) { + continue + } + return fmt.Errorf("deleting Secret %s: %w", name, err) + } + logger.Info("Deleted Secret", "name", name) + } + } + return nil +} + +func buildRetainedRoles(postgresDBName string, retained []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { + roles := make([]enterprisev4.ManagedRole, 0, len(retained)*2) + for _, dbSpec := range retained { + roles = append(roles, + enterprisev4.ManagedRole{ + Name: adminRoleName(dbSpec.Name), + Exists: true, + PasswordSecretRef: &corev1.SecretKeySelector{LocalObjectReference: corev1.LocalObjectReference{Name: roleSecretName(postgresDBName, dbSpec.Name, secretRoleAdmin)}, + Key: secretKeyPassword}, + }, + enterprisev4.ManagedRole{ + Name: rwRoleName(dbSpec.Name), + Exists: true, + PasswordSecretRef: &corev1.SecretKeySelector{LocalObjectReference: corev1.LocalObjectReference{Name: roleSecretName(postgresDBName, dbSpec.Name, secretRoleRW)}, + Key: secretKeyPassword}, + }, + ) + } + return roles +} + +func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster, retained []enterprisev4.DatabaseDefinition) error { + roles := buildRetainedRoles(postgresDB.Name, retained) + rolePatch := &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": cluster.APIVersion, + "kind": cluster.Kind, + "metadata": map[string]any{"name": cluster.Name, "namespace": cluster.Namespace}, + "spec": map[string]any{"managedRoles": roles}, + }, + } + if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManagerName(postgresDB.Name))); err != nil { + return fmt.Errorf("patching managed roles on deletion: %w", err) + } + log.FromContext(ctx).Info("Patched managed roles on deletion", "postgresDatabase", postgresDB.Name, "retainedRoles", len(roles)) + return nil +} + +func stripOwnerReference(obj metav1.Object, ownerUID types.UID) { + refs := obj.GetOwnerReferences() + filtered := make([]metav1.OwnerReference, 0, len(refs)) + for _, ref := range refs { + if ref.UID != ownerUID { + filtered = append(filtered, ref) + } + } + obj.SetOwnerReferences(filtered) +} + +func adoptResource(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, obj client.Object) error { + annotations := obj.GetAnnotations() + delete(annotations, annotationRetainedFrom) + obj.SetAnnotations(annotations) + if err := controllerutil.SetControllerReference(postgresDB, obj, scheme); err != nil { + return err + } + return c.Update(ctx, obj) +} + +func reconcileUserSecrets(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase) error { + for _, dbSpec := range postgresDB.Spec.Databases { + if err := ensureSecret(ctx, c, scheme, postgresDB, adminRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleAdmin)); err != nil { + return err + } + if err := ensureSecret(ctx, c, scheme, postgresDB, rwRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleRW)); err != nil { + return err + } + } + return nil +} + +func ensureSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { + secret, err := getSecret(ctx, c, postgresDB.Namespace, secretName) + if err != nil { + return err + } + logger := log.FromContext(ctx) + switch { + case secret == nil: + logger.Info("Creating missing user secret", "name", secretName) + return createUserSecret(ctx, c, scheme, postgresDB, roleName, secretName) + case secret.Annotations[annotationRetainedFrom] == postgresDB.Name: + logger.Info("Re-adopting orphaned secret", "name", secretName) + return adoptResource(ctx, c, scheme, postgresDB, secret) + } + return nil +} + +func getSecret(ctx context.Context, c client.Client, namespace, name string) (*corev1.Secret, error) { + secret := &corev1.Secret{} + err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, secret) + if errors.IsNotFound(err) { + return nil, nil + } + if err != nil { + return nil, err + } + return secret, nil +} + +func createUserSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { + pw, err := generatePassword() + if err != nil { + return err + } + secret := buildPasswordSecret(postgresDB, secretName, roleName, pw) + if err := controllerutil.SetControllerReference(postgresDB, secret, scheme); err != nil { + return fmt.Errorf("setting owner reference on Secret %s: %w", secretName, err) + } + if err := c.Create(ctx, secret); err != nil { + if errors.IsAlreadyExists(err) { + return nil + } + return err + } + return nil +} + +func buildPasswordSecret(postgresDB *enterprisev4.PostgresDatabase, secretName, roleName, pw string) *corev1.Secret { + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + Labels: map[string]string{labelManagedBy: "splunk-operator", labelCNPGReload: "true"}, + }, + Data: map[string][]byte{"username": []byte(roleName), secretKeyPassword: []byte(pw)}, + } +} + +func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, endpoints clusterEndpoints) error { + logger := log.FromContext(ctx) + for _, dbSpec := range postgresDB.Spec.Databases { + cmName := configMapName(postgresDB.Name, dbSpec.Name) + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmName, + Namespace: postgresDB.Namespace, + Labels: map[string]string{labelManagedBy: "splunk-operator"}, + }, + } + _, err := controllerutil.CreateOrUpdate(ctx, c, cm, func() error { + cm.Data = buildDatabaseConfigMapBody(dbSpec.Name, endpoints) + reAdopting := cm.Annotations[annotationRetainedFrom] == postgresDB.Name + if reAdopting { + logger.Info("Re-adopting orphaned ConfigMap", "name", cmName) + delete(cm.Annotations, annotationRetainedFrom) + } + if cm.CreationTimestamp.IsZero() || reAdopting { + return controllerutil.SetControllerReference(postgresDB, cm, scheme) + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling ConfigMap %s: %w", cmName, err) + } + } + return nil +} + +func buildDatabaseConfigMapBody(dbName string, endpoints clusterEndpoints) map[string]string { + data := map[string]string{ + "dbname": dbName, + "port": postgresPort, + "rw-host": endpoints.RWHost, + "ro-host": endpoints.ROHost, + "admin-user": adminRoleName(dbName), + "rw-user": rwRoleName(dbName), + } + if endpoints.PoolerRWHost != "" { + data["pooler-rw-host"] = endpoints.PoolerRWHost + } + if endpoints.PoolerROHost != "" { + data["pooler-ro-host"] = endpoints.PoolerROHost + } + return data +} + +func resolveClusterEndpoints(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster, namespace string) clusterEndpoints { + // FQDN so consumers in other namespaces can resolve without extra config. + endpoints := clusterEndpoints{ + RWHost: fmt.Sprintf("%s.%s.svc.cluster.local", cnpgCluster.Status.WriteService, namespace), + ROHost: fmt.Sprintf("%s.%s.svc.cluster.local", cnpgCluster.Status.ReadService, namespace), + } + if cluster.Status.ConnectionPoolerStatus != nil && cluster.Status.ConnectionPoolerStatus.Enabled { + endpoints.PoolerRWHost = fmt.Sprintf("%s-pooler-%s.%s.svc.cluster.local", cnpgCluster.Name, readWriteEndpoint, namespace) + endpoints.PoolerROHost = fmt.Sprintf("%s-pooler-%s.%s.svc.cluster.local", cnpgCluster.Name, readOnlyEndpoint, namespace) + } + return endpoints +} + +func populateDatabaseStatus(postgresDB *enterprisev4.PostgresDatabase) []enterprisev4.DatabaseInfo { + databases := make([]enterprisev4.DatabaseInfo, 0, len(postgresDB.Spec.Databases)) + for _, dbSpec := range postgresDB.Spec.Databases { + databases = append(databases, enterprisev4.DatabaseInfo{ + Name: dbSpec.Name, + Ready: true, + AdminUserSecretRef: &corev1.SecretKeySelector{LocalObjectReference: corev1.LocalObjectReference{Name: roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleAdmin)}, Key: secretKeyPassword}, + RWUserSecretRef: &corev1.SecretKeySelector{LocalObjectReference: corev1.LocalObjectReference{Name: roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleRW)}, Key: secretKeyPassword}, + ConfigMapRef: &corev1.LocalObjectReference{Name: configMapName(postgresDB.Name, dbSpec.Name)}, + }) + } + return databases +} + +func hasNewDatabases(postgresDB *enterprisev4.PostgresDatabase) bool { + existing := make(map[string]bool, len(postgresDB.Status.Databases)) + for _, dbInfo := range postgresDB.Status.Databases { + existing[dbInfo.Name] = true + } + for _, dbSpec := range postgresDB.Spec.Databases { + if !existing[dbSpec.Name] { + return true + } + } + return false +} + +// Naming helpers — single source of truth shared by creation and status wiring. +func fieldManagerName(postgresDBName string) string { return fieldManagerPrefix + postgresDBName } +func adminRoleName(dbName string) string { return dbName + "_admin" } +func rwRoleName(dbName string) string { return dbName + "_rw" } +func cnpgDatabaseName(postgresDBName, dbName string) string { + return fmt.Sprintf("%s-%s", postgresDBName, dbName) +} +func roleSecretName(postgresDBName, dbName, role string) string { + return fmt.Sprintf("%s-%s-%s", postgresDBName, dbName, role) +} +func configMapName(postgresDBName, dbName string) string { + return fmt.Sprintf("%s-%s-config", postgresDBName, dbName) +} + +// generatePassword uses crypto/rand (via sethvargo/go-password) — predictable passwords +// are unacceptable for credentials that protect live database access. +func generatePassword() (string, error) { + return password.Generate(passwordLength, passwordDigits, passwordSymbols, false, true) +} diff --git a/pkg/postgresql/database/core/ports.go b/pkg/postgresql/database/core/ports.go new file mode 100644 index 000000000..0ee71bfe4 --- /dev/null +++ b/pkg/postgresql/database/core/ports.go @@ -0,0 +1,10 @@ +package core + +import "context" + +// DBRepo is the port for all direct database operations that require a +// superuser connection, bypassing any connection pooler. +// Adapters implementing this port live in adapter/. +type DBRepo interface { + ExecGrants(ctx context.Context, dbName string) error +} diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go new file mode 100644 index 000000000..0d1fa116a --- /dev/null +++ b/pkg/postgresql/database/core/types.go @@ -0,0 +1,94 @@ +package core + +import ( + "time" + + enterprisev4 "github.com/splunk/splunk-operator/api/v4" +) + +type reconcileDBPhases string +type conditionTypes string +type conditionReasons string +type clusterReadyStatus string + +const ( + retryDelay = time.Second * 15 + clusterNotFoundRetryDelay = time.Second * 30 + + postgresPort string = "5432" + + readOnlyEndpoint string = "ro" + readWriteEndpoint string = "rw" + + deletionPolicyRetain string = "Retain" + + postgresDatabaseFinalizerName string = "postgresdatabases.enterprise.splunk.com/finalizer" + annotationRetainedFrom string = "enterprise.splunk.com/retained-from" + + fieldManagerPrefix string = "postgresdatabase-" + + secretRoleAdmin string = "admin" + secretRoleRW string = "rw" + secretKeyPassword string = "password" + + labelManagedBy string = "app.kubernetes.io/managed-by" + labelCNPGReload string = "cnpg.io/reload" + + // Password generation — no symbols for PostgreSQL connection string compatibility. + passwordLength = 32 + passwordDigits = 8 + passwordSymbols = 0 + + // DB reconcile phases + readyDBPhase reconcileDBPhases = "Ready" + pendingDBPhase reconcileDBPhases = "Pending" + provisioningDBPhase reconcileDBPhases = "Provisioning" + failedDBPhase reconcileDBPhases = "Failed" + + // condition types + clusterReady conditionTypes = "ClusterReady" + rolesReady conditionTypes = "RolesReady" + databasesReady conditionTypes = "DatabasesReady" + secretsReady conditionTypes = "SecretsReady" + configMapsReady conditionTypes = "ConfigMapsReady" + privilegesReady conditionTypes = "PrivilegesReady" + + // condition reasons + reasonClusterNotFound conditionReasons = "ClusterNotFound" + reasonClusterProvisioning conditionReasons = "ClusterProvisioning" + reasonClusterInfoFetchFailed conditionReasons = "ClusterInfoFetchNotPossible" + reasonClusterAvailable conditionReasons = "ClusterAvailable" + reasonDatabasesAvailable conditionReasons = "DatabasesAvailable" + reasonSecretsCreated conditionReasons = "SecretsCreated" + reasonSecretsCreationFailed conditionReasons = "SecretsCreationFailed" + reasonWaitingForCNPG conditionReasons = "WaitingForCNPG" + reasonUsersCreationFailed conditionReasons = "UsersCreationFailed" + reasonUsersAvailable conditionReasons = "UsersAvailable" + reasonRoleConflict conditionReasons = "RoleConflict" + reasonConfigMapsCreationFailed conditionReasons = "ConfigMapsCreationFailed" + reasonConfigMapsCreated conditionReasons = "ConfigMapsCreated" + reasonPrivilegesGranted conditionReasons = "PrivilegesGranted" + reasonPrivilegesGrantFailed conditionReasons = "PrivilegesGrantFailed" + + // ClusterReady sentinel values returned by ensureClusterReady. + // Exported so the controller adapter can switch on them if needed. + ClusterNotFound clusterReadyStatus = "NotFound" + ClusterNotReady clusterReadyStatus = "NotReady" + ClusterNoProvisionerRef clusterReadyStatus = "NoProvisionerRef" + ClusterReady clusterReadyStatus = "Ready" +) + +// clusterEndpoints holds fully-resolved connection hostnames for a cluster. +// PoolerRWHost and PoolerROHost are empty when connection pooling is disabled. +type clusterEndpoints struct { + RWHost string + ROHost string + PoolerRWHost string + PoolerROHost string +} + +// deletionPlan separates databases by their DeletionPolicy for the cleanup workflow. +type deletionPlan struct { + retained []enterprisev4.DatabaseDefinition + deleted []enterprisev4.DatabaseDefinition +} diff --git a/pkg/splunk/common/names.go b/pkg/splunk/common/names.go index cc70de668..7d1c5d4ca 100644 --- a/pkg/splunk/common/names.go +++ b/pkg/splunk/common/names.go @@ -108,6 +108,9 @@ const ( // MockClientInduceErrorDelete represents an error for delete Api MockClientInduceErrorDelete = "mockClientDeleteError" + // MockClientInduceErrorApply represents an error for apply Api (controller-runtime v0.22+ / k8s v0.34+) + MockClientInduceErrorApply = "mockClientApplyError" + // Rerr represents a random error strting Rerr = "randomError" ) diff --git a/pkg/splunk/test/controller.go b/pkg/splunk/test/controller.go index 6d43fa149..0274b63ad 100644 --- a/pkg/splunk/test/controller.go +++ b/pkg/splunk/test/controller.go @@ -504,6 +504,16 @@ func (c MockClient) Status() client.StatusWriter { return c.StatusWriter } +// Apply applies the given apply configuration to the mock client's state. +// Required by client.Client in controller-runtime v0.22+ (k8s v0.34+). +func (c MockClient) Apply(ctx context.Context, obj runtime.ApplyConfiguration, opts ...client.ApplyOption) error { + if value, ok := c.InduceErrorKind[splcommon.MockClientInduceErrorApply]; ok && value != nil { + return value + } + c.Calls["Apply"] = append(c.Calls["Apply"], MockFuncCall{CTX: ctx}) + return nil +} + // ResetCalls resets the function call tracker func (c *MockClient) ResetCalls() { c.Calls = make(map[string][]MockFuncCall) diff --git a/pkg/splunk/util/util.go b/pkg/splunk/util/util.go index df1252f25..5d6b4a214 100644 --- a/pkg/splunk/util/util.go +++ b/pkg/splunk/util/util.go @@ -211,7 +211,7 @@ func PodExecCommand(ctx context.Context, c splcommon.ControllerClient, podName s return "", "", err } } - restClient, err := podExecRESTClientForGVK(gvk, false, restConfig, serializer.NewCodecFactory(scheme.Scheme), http.DefaultClient) + restClient, err := podExecRESTClientForGVK(gvk, false, false, restConfig, serializer.NewCodecFactory(scheme.Scheme), http.DefaultClient) if err != nil { return "", "", err } diff --git a/pkg/splunk/util/util_test.go b/pkg/splunk/util/util_test.go index 19c4b27df..b42d9442d 100644 --- a/pkg/splunk/util/util_test.go +++ b/pkg/splunk/util/util_test.go @@ -48,7 +48,7 @@ var fakePodExecGetConfig = func() (*rest.Config, error) { } // Faking RESTClientForGVK -var fakePodExecRESTClientForGVK = func(gvk schema.GroupVersionKind, isUnstructured bool, baseConfig *rest.Config, codecs serializer.CodecFactory, client *http.Client) (rest.Interface, error) { +var fakePodExecRESTClientForGVK = func(gvk schema.GroupVersionKind, forceDisableProtoBuf bool, isUnstructured bool, baseConfig *rest.Config, codecs serializer.CodecFactory, client *http.Client) (rest.Interface, error) { return &fakeRestInterface{}, errors.New("fakeerror") } diff --git a/test/connect-to-postgres-cluster.sh b/test/connect-to-postgres-cluster.sh new file mode 100755 index 000000000..5f45e92d2 --- /dev/null +++ b/test/connect-to-postgres-cluster.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# filepath: scripts/test-postgres-connection.sh + +set -e + +# Color output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Default values +NAMESPACE="${NAMESPACE:-default}" +POSTGRES_CLUSTER_NAME="${1:-}" + +if [ -z "$POSTGRES_CLUSTER_NAME" ]; then + echo -e "${RED}Error: PostgresCluster name is required${NC}" + echo "Usage: $0 [namespace]" + echo "Example: $0 my-postgres-cluster default" + exit 1 +fi + +if [ -n "$2" ]; then + NAMESPACE="$2" +fi + +echo -e "${YELLOW}Connecting to PostgresCluster: $POSTGRES_CLUSTER_NAME in namespace: $NAMESPACE${NC}" + +# Get ConfigMap name from PostgresCluster status +CONFIGMAP_NAME=$(kubectl get postgrescluster "$POSTGRES_CLUSTER_NAME" -n "$NAMESPACE" \ + -o jsonpath='{.status.resources.configMapRef.name}' 2>/dev/null) + +if [ -z "$CONFIGMAP_NAME" ]; then + echo -e "${RED}Error: ConfigMap reference not found in PostgresCluster status${NC}" + echo "Make sure the PostgresCluster is ready and the ConfigMap has been created" + exit 1 +fi + +# Get Secret name from PostgresCluster status +SECRET_NAME=$(kubectl get postgrescluster "$POSTGRES_CLUSTER_NAME" -n "$NAMESPACE" \ + -o jsonpath='{.status.resources.secretRef.name}' 2>/dev/null) + +if [ -z "$SECRET_NAME" ]; then + echo -e "${RED}Error: Secret reference not found in PostgresCluster status${NC}" + echo "Make sure the PostgresCluster is ready and the Secret has been created" + exit 1 +fi + +echo -e "${GREEN}Found ConfigMap: $CONFIGMAP_NAME${NC}" +echo -e "${GREEN}Found Secret: $SECRET_NAME${NC}" + +# Extract connection details from ConfigMap (using correct uppercase keys) +echo -e "\n${YELLOW}Extracting connection details...${NC}" +DB_PORT=$(kubectl get configmap "$CONFIGMAP_NAME" -n "$NAMESPACE" -o jsonpath='{.data.DEFAULT_CLUSTER_PORT}') +DB_USER=$(kubectl get configmap "$CONFIGMAP_NAME" -n "$NAMESPACE" -o jsonpath='{.data.SUPER_USER_NAME}') +RW_SERVICE_FQDN=$(kubectl get configmap "$CONFIGMAP_NAME" -n "$NAMESPACE" -o jsonpath='{.data.CLUSTER_RW_ENDPOINT}') +RO_SERVICE_FQDN=$(kubectl get configmap "$CONFIGMAP_NAME" -n "$NAMESPACE" -o jsonpath='{.data.CLUSTER_RO_ENDPOINT}') +R_SERVICE_FQDN=$(kubectl get configmap "$CONFIGMAP_NAME" -n "$NAMESPACE" -o jsonpath='{.data.CLUSTER_R_ENDPOINT}') + +# Extract just the service name (first part before the dot) +RW_SERVICE=$(echo "$RW_SERVICE_FQDN" | cut -d'.' -f1) +RO_SERVICE=$(echo "$RO_SERVICE_FQDN" | cut -d'.' -f1) +R_SERVICE=$(echo "$R_SERVICE_FQDN" | cut -d'.' -f1) + +# Extract password from Secret +DB_PASSWORD=$(kubectl get secret "$SECRET_NAME" -n "$NAMESPACE" -o jsonpath='{.data.password}' | base64 -d) + +# Get database name from CNPG cluster (assuming it matches the PostgresCluster name or is 'app') +DB_NAME=$(kubectl get cluster "$POSTGRES_CLUSTER_NAME" -n "$NAMESPACE" -o jsonpath='{.spec.bootstrap.initdb.database}' 2>/dev/null || echo "postgres") + +echo -e "${GREEN}Connection Details:${NC}" +echo " RW Service: $RW_SERVICE_FQDN" +echo " RO Service: $RO_SERVICE_FQDN" +echo " R Service: $R_SERVICE_FQDN" +echo " Port: $DB_PORT" +echo " Database: $DB_NAME" +echo " User: $DB_USER" + +# Check if psql is installed +if ! command -v psql &> /dev/null; then + echo -e "\n${YELLOW}psql client not found. Using kubectl run with postgres image...${NC}" + + echo -e "${YELLOW}Creating temporary pod for connection test...${NC}" + + kubectl run postgres-client-test \ + --rm -i --tty \ + --image=postgres:16 \ + --restart=Never \ + --namespace="$NAMESPACE" \ + --env="PGPASSWORD=$DB_PASSWORD" \ + -- psql -h "$RW_SERVICE_FQDN" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" +else + # Use port-forward for local connection + echo -e "\n${YELLOW}Setting up port-forward to PostgreSQL service...${NC}" + + # Kill any existing port-forward on 5432 + pkill -f "kubectl.*port-forward.*$RW_SERVICE" 2>/dev/null || true + + # Start port-forward in background (use service name only, not FQDN) + kubectl port-forward -n "$NAMESPACE" "service/$RW_SERVICE" 5432:$DB_PORT > /dev/null 2>&1 & + PORT_FORWARD_PID=$! + + # Cleanup function + cleanup() { + echo -e "\n${YELLOW}Cleaning up port-forward...${NC}" + kill $PORT_FORWARD_PID 2>/dev/null || true + } + trap cleanup EXIT + + # Wait for port-forward to be ready + echo -e "${YELLOW}Waiting for port-forward to be ready...${NC}" + sleep 3 + + echo -e "${GREEN}Connecting to PostgreSQL...${NC}" + echo -e "${YELLOW}Password: $DB_PASSWORD${NC}\n" + + # Use connection string format which is more reliable + # Disable GSSAPI and use password authentication only + PGPASSWORD="$DB_PASSWORD" psql "postgresql://$DB_USER@localhost:5432/$DB_NAME?gssencmode=disable" \ + || PGPASSWORD="$DB_PASSWORD" psql -h localhost -p 5432 -U "$DB_USER" -d "$DB_NAME" --no-psqlrc +fi \ No newline at end of file diff --git a/test/postgrescluster-retain-upgrade-flow.sh b/test/postgrescluster-retain-upgrade-flow.sh new file mode 100755 index 000000000..69124c536 --- /dev/null +++ b/test/postgrescluster-retain-upgrade-flow.sh @@ -0,0 +1,356 @@ +#!/usr/bin/env bash +# run make install make run in a separate terminal to have the operator running while this test executes +# this test verifies that when a PostgresCluster with clusterDeletionPolicy=Retain is deleted, the underlying CNPG Cluster and superuser Secret are not deleted and can be re-attached to a new PostgresCluster with the same name (simulating a major version upgrade flow where the cluster needs to be recreated). +# then, in a separate terminal, run: NAMESPACE=your-namespace UPGRADE_POSTGRES_VERSION=16 ./test/postgrescluster-retain-upgrade-flow.sh + + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +TEST_DIR="$ROOT_DIR/test" +SAMPLES_DIR="$ROOT_DIR/config/samples" + +CLUSTER_MANIFEST="${CLUSTER_MANIFEST:-$SAMPLES_DIR/enterprise_v4_postgrescluster_dev.yaml}" +DATABASE_MANIFEST="${DATABASE_MANIFEST:-$SAMPLES_DIR/enterprise_v4_postgresdatabase.yaml}" +CONNECT_SCRIPT="${CONNECT_SCRIPT:-$TEST_DIR/connect-to-postgres-cluster.sh}" +UPGRADE_POSTGRES_VERSION="${UPGRADE_POSTGRES_VERSION:-16}" +POLL_INTERVAL="${POLL_INTERVAL:-5}" +TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-900}" +REQUIRE_POSTGRESDATABASE_READY="${REQUIRE_POSTGRESDATABASE_READY:-0}" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +log() { + echo -e "${YELLOW}[$(date '+%Y-%m-%d %H:%M:%S')] $*${NC}" +} + +pass() { + echo -e "${GREEN}[PASS] $*${NC}" +} + +fail() { + echo -e "${RED}[FAIL] $*${NC}" >&2 + exit 1 +} + +require_file() { + local path="$1" + [[ -f "$path" ]] || fail "Required file not found: $path" +} + +require_command() { + local cmd="$1" + command -v "$cmd" >/dev/null 2>&1 || fail "Required command not found: $cmd" +} + +current_namespace() { + local ns + ns="$(kubectl config view --minify --output 'jsonpath={..namespace}' 2>/dev/null || true)" + if [[ -z "$ns" ]]; then + ns="default" + fi + printf '%s' "$ns" +} + +preflight_namespace() { + local deletion_ts phase + deletion_ts="$(kubectl get ns "$NAMESPACE" -o jsonpath='{.metadata.deletionTimestamp}' 2>/dev/null || true)" + phase="$(kubectl get ns "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || true)" + if [[ -n "$deletion_ts" || "$phase" == "Terminating" ]]; then + fail "Namespace $NAMESPACE is terminating (deletionTimestamp=$deletion_ts phase=$phase). Use a non-terminating namespace." + fi +} + +preflight_cluster_dns() { + local host + host="${CLUSTER_NAME}-rw.${NAMESPACE}.svc.cluster.local" + if getent hosts "$host" >/dev/null 2>&1; then + return 0 + fi + + log "Cluster DNS name is not resolvable from this machine: $host" + log "This does not block local connection tests (we use kubectl port-forward), but it blocks PostgresDatabase DB-connection/privilege phases when the operator runs out-of-cluster (make run)." + log "Fix: run the operator in-cluster or use telepresence/kubefwd to get cluster DNS/networking on your machine." + + SKIP_POSTGRESDATABASE_READY_CHECK=1 + if [[ "$REQUIRE_POSTGRESDATABASE_READY" == "1" ]]; then + fail "PostgresDatabase readiness required (REQUIRE_POSTGRESDATABASE_READY=1) but cluster DNS is not available." + fi + + log "Continuing with degraded PostgresDatabase checks (readiness will not be required)." +} + +resource_exists() { + local resource="$1" + local name="$2" + kubectl get "$resource" "$name" -n "$NAMESPACE" >/dev/null 2>&1 +} + +jsonpath_value() { + local resource="$1" + local name="$2" + local jsonpath="$3" + kubectl get "$resource" "$name" -n "$NAMESPACE" -o "jsonpath=${jsonpath}" 2>/dev/null +} + +wait_for_jsonpath() { + local resource="$1" + local name="$2" + local jsonpath="$3" + local expected="$4" + local timeout="${5:-$TIMEOUT_SECONDS}" + local deadline=$((SECONDS + timeout)) + local value="" + + while (( SECONDS < deadline )); do + value="$(jsonpath_value "$resource" "$name" "$jsonpath" || true)" + if [[ "$value" == "$expected" ]]; then + pass "$resource/$name reached ${jsonpath}=${expected}" + return 0 + fi + sleep "$POLL_INTERVAL" + done + + fail "Timed out waiting for $resource/$name to reach ${jsonpath}=${expected}. Last value: ${value:-}" +} + +wait_for_contains() { + local resource="$1" + local name="$2" + local jsonpath="$3" + local expected_substring="$4" + local timeout="${5:-$TIMEOUT_SECONDS}" + local deadline=$((SECONDS + timeout)) + local value="" + + while (( SECONDS < deadline )); do + value="$(jsonpath_value "$resource" "$name" "$jsonpath" || true)" + if [[ "$value" == *"$expected_substring"* ]]; then + pass "$resource/$name contains ${expected_substring} in ${jsonpath}" + return 0 + fi + sleep "$POLL_INTERVAL" + done + + fail "Timed out waiting for $resource/$name to contain ${expected_substring} in ${jsonpath}. Last value: ${value:-}" +} + +wait_for_absence() { + local resource="$1" + local name="$2" + local timeout="${3:-$TIMEOUT_SECONDS}" + local deadline=$((SECONDS + timeout)) + + while (( SECONDS < deadline )); do + if ! resource_exists "$resource" "$name"; then + pass "$resource/$name is absent" + return 0 + fi + sleep "$POLL_INTERVAL" + done + + fail "Timed out waiting for $resource/$name to be deleted" +} + +wait_for_presence() { + local resource="$1" + local name="$2" + local timeout="${3:-$TIMEOUT_SECONDS}" + local deadline=$((SECONDS + timeout)) + + while (( SECONDS < deadline )); do + if resource_exists "$resource" "$name"; then + pass "$resource/$name exists" + return 0 + fi + sleep "$POLL_INTERVAL" + done + + fail "Timed out waiting for $resource/$name to exist" +} + +wait_for_owner_reference() { + local resource="$1" + local name="$2" + local owner_kind="$3" + local owner_name="$4" + local owner_uid="$5" + local timeout="${6:-$TIMEOUT_SECONDS}" + local deadline=$((SECONDS + timeout)) + local owners="" + local expected="${owner_kind}:${owner_name}:${owner_uid}" + + while (( SECONDS < deadline )); do + owners="$(jsonpath_value "$resource" "$name" '{range .metadata.ownerReferences[*]}{.kind}:{.name}:{.uid}{"\n"}{end}' || true)" + if [[ "$owners" == *"$expected"* ]]; then + pass "$resource/$name is owned by ${owner_kind}/${owner_name}" + return 0 + fi + sleep "$POLL_INTERVAL" + done + + fail "Timed out waiting for $resource/$name to be owned by ${owner_kind}/${owner_name}. Owners: ${owners:-}" +} + +run_connection_check() { + log "Checking superuser connection with $CONNECT_SCRIPT" + printf 'SELECT current_user;\n\\q\n' | bash "$CONNECT_SCRIPT" "$CLUSTER_NAME" "$NAMESPACE" + pass "Superuser connection succeeded" +} + +patch_cluster() { + local deletion_policy="$1" + local pooler_enabled="$2" + kubectl patch postgrescluster "$CLUSTER_NAME" -n "$NAMESPACE" --type merge \ + -p "{\"spec\":{\"clusterDeletionPolicy\":\"${deletion_policy}\",\"connectionPoolerEnabled\":${pooler_enabled}}}" >/dev/null +} + +apply_upgraded_cluster_manifest() { + local tmp_manifest + tmp_manifest="$(mktemp)" + + sed \ + -e "s/^\([[:space:]]*clusterDeletionPolicy:\).*/\1 Retain/" \ + -e "s/^\([[:space:]]*postgresVersion:\).*/\1 \"${UPGRADE_POSTGRES_VERSION}\"/" \ + "$CLUSTER_MANIFEST" > "$tmp_manifest" + + kubectl apply -n "$NAMESPACE" -f "$tmp_manifest" >/dev/null + rm -f "$tmp_manifest" +} + +assert_cluster_ready() { + wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.status.phase}' 'Ready' + wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.status.conditions[?(@.type=="ClusterReady")].status}' 'True' + wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.status.conditions[?(@.type=="ConfigMapReady")].status}' 'True' +} + +assert_database_created() { + wait_for_presence postgresdatabase "$DATABASE_NAME" + for db in "${DATABASES[@]}"; do + wait_for_presence databases.postgresql.cnpg.io "${DATABASE_NAME}-${db}" + done + pass "PostgresDatabase CR exists and CNPG Database CRs are present" +} + +assert_database_ready() { + if [[ "${SKIP_POSTGRESDATABASE_READY_CHECK:-0}" == "1" ]]; then + assert_database_created + return 0 + fi + wait_for_jsonpath postgresdatabase "$DATABASE_NAME" '{.status.phase}' 'Ready' + wait_for_jsonpath postgresdatabase "$DATABASE_NAME" '{.status.observedGeneration}' \ + "$(jsonpath_value postgresdatabase "$DATABASE_NAME" '{.metadata.generation}')" +} + +record_cluster_artifacts() { + SUPERUSER_SECRET_NAME="$(jsonpath_value postgrescluster "$CLUSTER_NAME" '{.status.resources.secretRef.name}')" + CONFIGMAP_NAME="$(jsonpath_value postgrescluster "$CLUSTER_NAME" '{.status.resources.configMapRef.name}')" + + [[ -n "$SUPERUSER_SECRET_NAME" ]] || fail "PostgresCluster status.resources.secretRef.name is empty" + [[ -n "$CONFIGMAP_NAME" ]] || fail "PostgresCluster status.resources.configMapRef.name is empty" +} + +cleanup_database_cr() { + if resource_exists postgresdatabase "$DATABASE_NAME"; then + log "Deleting PostgresDatabase/$DATABASE_NAME to leave the namespace clean" + kubectl delete postgresdatabase "$DATABASE_NAME" -n "$NAMESPACE" --wait=false >/dev/null + wait_for_absence postgresdatabase "$DATABASE_NAME" + fi +} + +require_command kubectl +require_file "$CLUSTER_MANIFEST" +require_file "$DATABASE_MANIFEST" +require_file "$CONNECT_SCRIPT" + +NAMESPACE="${NAMESPACE:-$(current_namespace)}" +CLUSTER_NAME="${CLUSTER_NAME:-$(kubectl create --dry-run=client -f "$CLUSTER_MANIFEST" -o jsonpath='{.metadata.name}')}" +DATABASE_NAME="${DATABASE_NAME:-$(kubectl create --dry-run=client -f "$DATABASE_MANIFEST" -o jsonpath='{.metadata.name}')}" +DATABASES_STR="$(kubectl create --dry-run=client -f "$DATABASE_MANIFEST" -o jsonpath='{range .spec.databases[*]}{.name}{" "}{end}')" +read -r -a DATABASES <<< "${DATABASES_STR:-}" +RW_POOLER_NAME="${CLUSTER_NAME}-pooler-rw" +RO_POOLER_NAME="${CLUSTER_NAME}-pooler-ro" + +log "Using namespace: $NAMESPACE" +log "Cluster manifest: $CLUSTER_MANIFEST" +log "Database manifest: $DATABASE_MANIFEST" +log "Upgrade target postgresVersion: $UPGRADE_POSTGRES_VERSION" + +preflight_namespace +preflight_cluster_dns + +log "1. Creating PostgresCluster from sample manifest" +kubectl apply -n "$NAMESPACE" -f "$CLUSTER_MANIFEST" + +log "2. Creating PostgresDatabase from sample manifest" +kubectl apply -n "$NAMESPACE" -f "$DATABASE_MANIFEST" + +log "3. Waiting for PostgresCluster and PostgresDatabase to become ready" +assert_cluster_ready +assert_database_ready +record_cluster_artifacts +pass "PostgresCluster and PostgresDatabase were created successfully" + +log "4. Verifying superuser connection to PostgresCluster" +run_connection_check + +log "5. Setting clusterDeletionPolicy=Retain and connectionPoolerEnabled=false" +patch_cluster "Retain" "false" +wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.spec.clusterDeletionPolicy}' 'Retain' +wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.spec.connectionPoolerEnabled}' 'false' +wait_for_absence pooler.postgresql.cnpg.io "$RW_POOLER_NAME" +wait_for_absence pooler.postgresql.cnpg.io "$RO_POOLER_NAME" +assert_cluster_ready + +log "6. Setting connectionPoolerEnabled=true and waiting for poolers" +patch_cluster "Retain" "true" +wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.spec.connectionPoolerEnabled}' 'true' +wait_for_presence pooler.postgresql.cnpg.io "$RW_POOLER_NAME" +wait_for_presence pooler.postgresql.cnpg.io "$RO_POOLER_NAME" +wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.status.conditions[?(@.type=="PoolerReady")].status}' 'True' +assert_cluster_ready + +log "7. Deleting PostgresCluster with retention enabled" +kubectl delete postgrescluster "$CLUSTER_NAME" -n "$NAMESPACE" --wait=false >/dev/null +wait_for_absence postgrescluster "$CLUSTER_NAME" +wait_for_presence cluster.postgresql.cnpg.io "$CLUSTER_NAME" +wait_for_presence secret "$SUPERUSER_SECRET_NAME" +pass "CNPG cluster and superuser secret remained after PostgresCluster deletion" + +log "8. Recreating PostgresCluster with a major version upgrade" +apply_upgraded_cluster_manifest +wait_for_presence postgrescluster "$CLUSTER_NAME" +wait_for_contains cluster.postgresql.cnpg.io "$CLUSTER_NAME" '{.spec.imageName}' ":${UPGRADE_POSTGRES_VERSION}" +assert_cluster_ready +record_cluster_artifacts + +log "9. Checking that retained resources were re-attached to the recreated PostgresCluster" +POSTGRES_CLUSTER_UID="$(jsonpath_value postgrescluster "$CLUSTER_NAME" '{.metadata.uid}')" +wait_for_owner_reference cluster.postgresql.cnpg.io "$CLUSTER_NAME" "PostgresCluster" "$CLUSTER_NAME" "$POSTGRES_CLUSTER_UID" +wait_for_owner_reference secret "$SUPERUSER_SECRET_NAME" "PostgresCluster" "$CLUSTER_NAME" "$POSTGRES_CLUSTER_UID" + +log "10. Verifying superuser connection after recreate/upgrade" +run_connection_check + +log "11. Setting clusterDeletionPolicy=Delete" +kubectl patch postgrescluster "$CLUSTER_NAME" -n "$NAMESPACE" --type merge \ + -p '{"spec":{"clusterDeletionPolicy":"Delete"}}' >/dev/null +wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.spec.clusterDeletionPolicy}' 'Delete' + +log "12. Deleting the PostgresCluster" +kubectl delete postgrescluster "$CLUSTER_NAME" -n "$NAMESPACE" --wait=false >/dev/null +wait_for_absence postgrescluster "$CLUSTER_NAME" + +log "13. Checking that no cluster leftovers remain" +cleanup_database_cr +wait_for_absence cluster.postgresql.cnpg.io "$CLUSTER_NAME" +wait_for_absence pooler.postgresql.cnpg.io "$RW_POOLER_NAME" +wait_for_absence pooler.postgresql.cnpg.io "$RO_POOLER_NAME" +wait_for_absence secret "$SUPERUSER_SECRET_NAME" +wait_for_absence configmap "$CONFIGMAP_NAME" +pass "No PostgresCluster leftovers remain in namespace $NAMESPACE" + +log "Flow finished successfully" diff --git a/test/testenv/deployment.go b/test/testenv/deployment.go index e639a9513..cb3624c99 100644 --- a/test/testenv/deployment.go +++ b/test/testenv/deployment.go @@ -217,7 +217,7 @@ func (d *Deployment) PodExecCommand(ctx context.Context, podName string, cmd []s return "", "", err } //FIXME - restClient, err := apiutil.RESTClientForGVK(gvk, false, restConfig, serializer.NewCodecFactory(scheme.Scheme), http.DefaultClient) + restClient, err := apiutil.RESTClientForGVK(gvk, false, false, restConfig, serializer.NewCodecFactory(scheme.Scheme), http.DefaultClient) if err != nil { return "", "", err } @@ -264,7 +264,7 @@ func (d *Deployment) OperatorPodExecCommand(ctx context.Context, podName string, return "", "", err } //FIXME - restClient, err := apiutil.RESTClientForGVK(gvk, false, restConfig, serializer.NewCodecFactory(scheme.Scheme), http.DefaultClient) + restClient, err := apiutil.RESTClientForGVK(gvk, false, false, restConfig, serializer.NewCodecFactory(scheme.Scheme), http.DefaultClient) if err != nil { return "", "", err } diff --git a/test/testenv/ingest_utils.go b/test/testenv/ingest_utils.go index 2c0403b1e..d4606ef31 100644 --- a/test/testenv/ingest_utils.go +++ b/test/testenv/ingest_utils.go @@ -187,7 +187,7 @@ func CopyFileToPod(ctx context.Context, podName string, srcPath string, destPath if err != nil { return "", "", err } - restClient, err := apiutil.RESTClientForGVK(gvk, false, restConfig, serializer.NewCodecFactory(scheme.Scheme), http.DefaultClient) + restClient, err := apiutil.RESTClientForGVK(gvk, false, false, restConfig, serializer.NewCodecFactory(scheme.Scheme), http.DefaultClient) if err != nil { return "", "", err } From f445129ac0982b636126fbe166e91616f0beb3d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Wed, 25 Mar 2026 21:23:30 +0100 Subject: [PATCH 02/36] Refresh crds and bundles with new version of controller-gen --- .gitignore | 3 +- .tool-versions | 1 - CLAUDE.md | 26 - api/v4/common_types.go | 108 ++- bundle.Dockerfile | 7 +- ...enterprise.splunk.com_clustermanagers.yaml | 346 ++++++++- .../enterprise.splunk.com_clustermasters.yaml | 346 ++++++++- ...enterprise.splunk.com_indexerclusters.yaml | 433 +++++++++-- ...nterprise.splunk.com_ingestorclusters.yaml | 284 ++++++- ...enterprise.splunk.com_licensemanagers.yaml | 284 ++++++- .../enterprise.splunk.com_licensemasters.yaml | 284 ++++++- ...erprise.splunk.com_monitoringconsoles.yaml | 568 ++++++++++++-- ...ise.splunk.com_postgresclusterclasses.yaml | 2 + .../enterprise.splunk.com_queues.yaml | 30 +- ...erprise.splunk.com_searchheadclusters.yaml | 571 +++++++++++++-- .../enterprise.splunk.com_standalones.yaml | 693 ++++++++++++++++-- ...splunk-operator.clusterserviceversion.yaml | 161 +++- bundle/metadata/annotations.yaml | 7 +- ...enterprise.splunk.com_clustermanagers.yaml | 169 ++++- .../enterprise.splunk.com_clustermasters.yaml | 169 ++++- ...enterprise.splunk.com_indexerclusters.yaml | 338 ++++++++- ...nterprise.splunk.com_ingestorclusters.yaml | 169 ++++- ...enterprise.splunk.com_licensemanagers.yaml | 169 ++++- .../enterprise.splunk.com_licensemasters.yaml | 169 ++++- ...erprise.splunk.com_monitoringconsoles.yaml | 338 ++++++++- ...erprise.splunk.com_searchheadclusters.yaml | 340 ++++++++- .../enterprise.splunk.com_standalones.yaml | 338 ++++++++- ...splunk-operator.clusterserviceversion.yaml | 17 + config/rbac/postgrescluster_admin_role.yaml | 27 - .../rbac/postgresclusterclass_admin_role.yaml | 27 - config/rbac/postgresdatabase_admin_role.yaml | 27 - internal/controller/suite_test.go | 175 +++-- test/postgrescluster-retain-upgrade-flow.sh | 356 --------- 33 files changed, 5825 insertions(+), 1157 deletions(-) delete mode 100644 .tool-versions delete mode 100644 CLAUDE.md delete mode 100644 config/rbac/postgrescluster_admin_role.yaml delete mode 100644 config/rbac/postgresclusterclass_admin_role.yaml delete mode 100644 config/rbac/postgresdatabase_admin_role.yaml delete mode 100755 test/postgrescluster-retain-upgrade-flow.sh diff --git a/.gitignore b/.gitignore index 5de8f6d85..050b59d70 100644 --- a/.gitignore +++ b/.gitignore @@ -100,4 +100,5 @@ bundle_*/ test/secret/*.log kubeconfig .devcontainer/devcontainer.json -kuttl-artifacts/* \ No newline at end of file +kuttl-artifacts/* +.tool-versions \ No newline at end of file diff --git a/.tool-versions b/.tool-versions deleted file mode 100644 index 1527bb834..000000000 --- a/.tool-versions +++ /dev/null @@ -1 +0,0 @@ -golang 1.25.5 \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index de84839de..000000000 --- a/CLAUDE.md +++ /dev/null @@ -1,26 +0,0 @@ -# Claude Code – Project Ground Rules - -## Role & Expertise -- You are a **Go expert** and a **Kubernetes controller/operator expert**. -- You write **small, clean, unit-testable functions**. -- Comments explain **why**, not what. Avoid restating the code in prose. - -## Code Style -- Keep functions focused and short — each should do one thing. -- Prefer explicit error handling with descriptive context (e.g. `fmt.Errorf("reconciling roles: %w", err)`). -- Avoid deep nesting; use early returns. - -## Reconciler / Operator Design -- The **reconciler is the main orchestration flow**. All state modifications are coordinated here. -- We build state **incrementally**: each major step updates state and requeues (`ctrl.Result{RequeueAfter: ...}`). -- Every operation must be **idempotent** — safe to run multiple times with the same outcome. -- Follow **Kubernetes controller best practices**: - - Use `SSA` (Server-Side Apply) where appropriate. - - Emit `Events` for meaningful state transitions. - - Use `Status` conditions to reflect progress and errors. - - Respect finalizers for cleanup logic. - -## Testing -- New logic should be accompanied by unit tests. -- Prefer table-driven tests. -- Mock external dependencies (k8s client, DB connections) via interfaces. diff --git a/api/v4/common_types.go b/api/v4/common_types.go index 5bba9c0cd..e53317075 100644 --- a/api/v4/common_types.go +++ b/api/v4/common_types.go @@ -91,12 +91,15 @@ type Spec struct { // Image to use for Splunk pod containers (overrides RELATED_IMAGE_SPLUNK_ENTERPRISE environment variables) Image string `json:"image"` - // Sets pull policy for all images (either “Always” or the default: “IfNotPresent”) - // +kubebuilder:validation:Enum=Always;IfNotPresent - ImagePullPolicy string `json:"imagePullPolicy"` + // Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent") + // +kubebuilder:validation:Enum=Always;IfNotPresent;Never + // +kubebuilder:default=IfNotPresent + // +optional + ImagePullPolicy string `json:"imagePullPolicy,omitempty"` // Name of Scheduler to use for pod placement (defaults to “default-scheduler”) - SchedulerName string `json:"schedulerName"` + // +optional + SchedulerName string `json:"schedulerName,omitempty"` // Kubernetes Affinity rules that control how pods are assigned to particular nodes. Affinity corev1.Affinity `json:"affinity"` @@ -137,7 +140,7 @@ const ( // PhaseTerminating means a custom resource is in the process of being removed PhaseTerminating Phase = "Terminating" - // PhaseError means an error occured with custom resource management + // PhaseError means an error occurred with custom resource management PhaseError Phase = "Error" ) @@ -164,13 +167,16 @@ type CommonSplunkSpec struct { Spec `json:",inline"` // Storage configuration for /opt/splunk/etc volume - EtcVolumeStorageConfig StorageClassSpec `json:"etcVolumeStorageConfig"` + // +optional + EtcVolumeStorageConfig StorageClassSpec `json:"etcVolumeStorageConfig,omitempty"` // Storage configuration for /opt/splunk/var volume - VarVolumeStorageConfig StorageClassSpec `json:"varVolumeStorageConfig"` + // +optional + VarVolumeStorageConfig StorageClassSpec `json:"varVolumeStorageConfig,omitempty"` // List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ - Volumes []corev1.Volume `json:"volumes"` + // +optional + Volumes []corev1.Volume `json:"volumes,omitempty"` // Inline map of default.yml overrides used to initialize the environment Defaults string `json:"defaults"` @@ -210,10 +216,12 @@ type CommonSplunkSpec struct { // ServiceAccount is the service account used by the pods deployed by the CRD. // If not specified uses the default serviceAccount for the namespace as per // https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#use-the-default-service-account-to-access-the-api-server - ServiceAccount string `json:"serviceAccount"` + // +optional + ServiceAccount string `json:"serviceAccount,omitempty"` // ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers // WARNING: Setting environment variables used by Splunk or Ansible will affect Splunk installation and operation + // +optional ExtraEnv []corev1.EnvVar `json:"extraEnv,omitempty"` // ReadinessInitialDelaySeconds defines initialDelaySeconds(See https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes) for Readiness probe @@ -227,45 +235,64 @@ type CommonSplunkSpec struct { LivenessInitialDelaySeconds int32 `json:"livenessInitialDelaySeconds"` // LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command + // +optional + // +kubebuilder:default:={"initialDelaySeconds":30,"timeoutSeconds":30,"periodSeconds":30,"failureThreshold":3} LivenessProbe *Probe `json:"livenessProbe,omitempty"` // ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes + // +optional + // +kubebuilder:default:={"initialDelaySeconds":10,"timeoutSeconds":5,"periodSeconds":5,"failureThreshold":3} ReadinessProbe *Probe `json:"readinessProbe,omitempty"` // StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes + // +optional + // +kubebuilder:default:={"initialDelaySeconds":40,"timeoutSeconds":30,"periodSeconds":30,"failureThreshold":12} StartupProbe *Probe `json:"startupProbe,omitempty"` // Sets imagePullSecrets if image is being pulled from a private registry. // See https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + // +optional ImagePullSecrets []corev1.LocalObjectReference `json:"imagePullSecrets,omitempty"` } // StorageClassSpec defines storage class configuration +// +kubebuilder:validation:XValidation:rule="!(size(self.storageClassName) > 0 && self.ephemeralStorage == true)",message="storageClassName and ephemeralStorage are mutually exclusive" +// +kubebuilder:validation:XValidation:rule="!(size(self.storageCapacity) > 0 && self.ephemeralStorage == true)",message="storageCapacity and ephemeralStorage are mutually exclusive" type StorageClassSpec struct { // Name of StorageClass to use for persistent volume claims - StorageClassName string `json:"storageClassName"` + // +optional + StorageClassName string `json:"storageClassName,omitempty"` - // Storage capacity to request persistent volume claims (default=”10Gi” for etc and "100Gi" for var) - StorageCapacity string `json:"storageCapacity"` + // Storage capacity to request persistent volume claims (default="10Gi" for etc and "100Gi" for var) + // +optional + StorageCapacity string `json:"storageCapacity,omitempty"` // If true, ephemeral (emptyDir) storage will be used - // default false // +optional - EphemeralStorage bool `json:"ephemeralStorage"` + // +kubebuilder:default=false + EphemeralStorage bool `json:"ephemeralStorage,omitempty"` } // SmartStoreSpec defines Splunk indexes and remote storage volume configuration type SmartStoreSpec struct { // List of remote storage volumes + // +optional + // +listType=map + // +listMapKey=name VolList []VolumeSpec `json:"volumes,omitempty"` // List of Splunk indexes + // +optional + // +listType=map + // +listMapKey=name IndexList []IndexSpec `json:"indexes,omitempty"` // Default configuration for indexes + // +optional Defaults IndexConfDefaultsSpec `json:"defaults,omitempty"` // Defines Cache manager settings + // +optional CacheManagerConf CacheManagerSpec `json:"cacheManager,omitempty"` } @@ -274,18 +301,23 @@ type CacheManagerSpec struct { IndexAndCacheManagerCommonSpec `json:",inline"` // Eviction policy to use + // +optional EvictionPolicy string `json:"evictionPolicy,omitempty"` // Max cache size per partition + // +optional MaxCacheSizeMB uint `json:"maxCacheSize,omitempty"` // Additional size beyond 'minFreeSize' before eviction kicks in + // +optional EvictionPaddingSizeMB uint `json:"evictionPadding,omitempty"` // Maximum number of buckets that can be downloaded from remote storage in parallel + // +optional MaxConcurrentDownloads uint `json:"maxConcurrentDownloads,omitempty"` // Maximum number of buckets that can be uploaded to remote storage in parallel + // +optional MaxConcurrentUploads uint `json:"maxConcurrentUploads,omitempty"` } @@ -295,30 +327,42 @@ type IndexConfDefaultsSpec struct { } // VolumeSpec defines remote volume config +// +kubebuilder:validation:XValidation:rule="self.provider != 'aws' || size(self.region) > 0",message="region is required when provider is aws" type VolumeSpec struct { // Remote volume name + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 Name string `json:"name"` // Remote volume URI + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 Endpoint string `json:"endpoint"` // Remote volume path + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 Path string `json:"path"` // Secret object name - SecretRef string `json:"secretRef"` + // +optional + SecretRef string `json:"secretRef,omitempty"` // Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp. + // +kubebuilder:validation:Enum=s3;blob;gcs Type string `json:"storageType"` // App Package Remote Store provider. Supported values: aws, minio, azure, gcp. - Provider string `json:"provider"` + // +optional + // +kubebuilder:validation:Enum=aws;minio;azure;gcp + Provider string `json:"provider,omitempty"` - // Region of the remote storage volume where apps reside. Used for aws, if provided. Not used for minio and azure. - Region string `json:"region"` + // Region of the remote storage volume where apps reside. Required for aws, optional for azure and gcp. + // +optional + Region string `json:"region,omitempty"` } -// VolumeAndTypeSpec used to add any custom varaibles for volume implementation +// VolumeAndTypeSpec used to add any custom variables for volume implementation type VolumeAndTypeSpec struct { VolumeSpec `json:",inline"` } @@ -326,9 +370,12 @@ type VolumeAndTypeSpec struct { // IndexSpec defines Splunk index name and storage path type IndexSpec struct { // Splunk index name + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 Name string `json:"name"` // Index location relative to the remote volume path + // +optional RemotePath string `json:"remotePath,omitempty"` IndexAndCacheManagerCommonSpec `json:",inline"` @@ -340,21 +387,26 @@ type IndexSpec struct { type IndexAndGlobalCommonSpec struct { // Remote Volume name + // +optional VolName string `json:"volumeName,omitempty"` // MaxGlobalDataSizeMB defines the maximum amount of space for warm and cold buckets of an index + // +optional MaxGlobalDataSizeMB uint `json:"maxGlobalDataSizeMB,omitempty"` // MaxGlobalDataSizeMB defines the maximum amount of cumulative space for warm and cold buckets of an index + // +optional MaxGlobalRawDataSizeMB uint `json:"maxGlobalRawDataSizeMB,omitempty"` } // IndexAndCacheManagerCommonSpec defines configurations that can be configured at index level or at server level type IndexAndCacheManagerCommonSpec struct { // Time period relative to the bucket's age, during which the bucket is protected from cache eviction + // +optional HotlistRecencySecs uint `json:"hotlistRecencySecs,omitempty"` // Time period relative to the bucket's age, during which the bloom filter file is protected from cache eviction + // +optional HotlistBloomFilterRecencyHours uint `json:"hotlistBloomFilterRecencyHours,omitempty"` } @@ -375,8 +427,9 @@ type AppSourceDefaultSpec struct { // PremiumAppsProps represents properties for premium apps such as ES type PremiumAppsProps struct { - // Type: enterpriseSecurity for now, can accomodate itsi etc.. later + // Type: enterpriseSecurity for now, can accommodate itsi etc.. later // +optional + // +kubebuilder:validation:Enum=enterpriseSecurity Type string `json:"type,omitempty"` // Enterpreise Security App defaults @@ -403,9 +456,13 @@ type EsDefaults struct { // AppSourceSpec defines list of App package (*.spl, *.tgz) locations on remote volumes type AppSourceSpec struct { // Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 Name string `json:"name"` // Location relative to the volume path + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 Location string `json:"location"` AppSourceDefaultSpec `json:",inline"` @@ -423,17 +480,18 @@ type AppFrameworkSpec struct { // 1. If no value or 0 is specified then it means periodic polling is disabled. // 2. If anything less than min is specified then we set it to 1 min. // 3. If anything more than the max value is specified then we set it to 1 day. + // +optional AppsRepoPollInterval int64 `json:"appsRepoPollIntervalSeconds,omitempty"` // App installation period within a reconcile. Apps will be installed during this period before the next reconcile is attempted. // Note: Do not change this setting unless instructed to do so by Splunk Support - // +kubebuilder:validation:Optional + // +optional // +kubebuilder:validation:Minimum:=30 // +kubebuilder:default:=90 SchedulerYieldInterval uint64 `json:"appInstallPeriodSeconds,omitempty"` // Maximum number of retries to install Apps - // +kubebuilder:validation:Optional + // +optional // +kubebuilder:validation:Minimum:=0 // +kubebuilder:default:=2 PhaseMaxRetries uint32 `json:"installMaxRetries,omitempty"` @@ -442,9 +500,13 @@ type AppFrameworkSpec struct { VolList []VolumeSpec `json:"volumes,omitempty"` // List of App sources on remote storage + // +optional + // +listType=map + // +listMapKey=name AppSources []AppSourceSpec `json:"appSources,omitempty"` // Maximum number of apps that can be downloaded at same time + // +optional MaxConcurrentAppDownloads uint64 `json:"maxConcurrentAppDownloads,omitempty"` } @@ -483,7 +545,7 @@ type AppSrcDeployInfo struct { type BundlePushStageType int const ( - // BundlePushUninitialized indicates bundle push never happend + // BundlePushUninitialized indicates bundle push never happened BundlePushUninitialized BundlePushStageType = iota // BundlePushPending waiting for all the apps to be copied to the Pod BundlePushPending diff --git a/bundle.Dockerfile b/bundle.Dockerfile index c16e98425..7a08487c3 100644 --- a/bundle.Dockerfile +++ b/bundle.Dockerfile @@ -5,11 +5,10 @@ LABEL operators.operatorframework.io.bundle.mediatype.v1=registry+v1 LABEL operators.operatorframework.io.bundle.manifests.v1=manifests/ LABEL operators.operatorframework.io.bundle.metadata.v1=metadata/ LABEL operators.operatorframework.io.bundle.package.v1=splunk-operator -LABEL operators.operatorframework.io.bundle.channels.v1=stable -LABEL operators.operatorframework.io.bundle.channel.default.v1: stable -LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v1.39.0 +LABEL operators.operatorframework.io.bundle.channels.v1=alpha +LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v1.42.0 LABEL operators.operatorframework.io.metrics.mediatype.v1=metrics+v1 -LABEL operators.operatorframework.io.metrics.project_layout=go.kubebuilder.io/v3 +LABEL operators.operatorframework.io.metrics.project_layout=go.kubebuilder.io/v4 # Labels for testing. LABEL operators.operatorframework.io.test.mediatype.v1=scorecard+v1 diff --git a/bundle/manifests/enterprise.splunk.com_clustermanagers.yaml b/bundle/manifests/enterprise.splunk.com_clustermanagers.yaml index caf564122..4f191f44a 100644 --- a/bundle/manifests/enterprise.splunk.com_clustermanagers.yaml +++ b/bundle/manifests/enterprise.splunk.com_clustermanagers.yaml @@ -651,8 +651,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1014,10 +1014,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -1042,6 +1044,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1053,8 +1057,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -1092,6 +1102,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1122,21 +1134,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -1145,8 +1164,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -1256,19 +1286,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -1278,7 +1314,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1336,6 +1374,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1394,12 +1469,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -1521,6 +1597,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -1597,6 +1678,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -1629,7 +1715,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2275,6 +2361,7 @@ spec: type: integer name: description: Splunk index name + minLength: 1 type: string remotePath: description: Index location relative to the remote volume @@ -2283,8 +2370,13 @@ spec: volumeName: description: Remote Volume name type: string + required: + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map volumes: description: List of remote storage volumes items: @@ -2292,21 +2384,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -2315,11 +2414,30 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -2561,19 +2679,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -3248,15 +3372,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3438,12 +3560,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3522,7 +3642,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3942,6 +4062,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -4076,7 +4301,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4386,11 +4610,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -4416,6 +4642,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4427,8 +4655,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -4467,6 +4701,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4497,21 +4733,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -4521,8 +4765,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: @@ -4750,6 +5005,7 @@ spec: type: integer name: description: Splunk index name + minLength: 1 type: string remotePath: description: Index location relative to the remote volume @@ -4758,8 +5014,13 @@ spec: volumeName: description: Remote Volume name type: string + required: + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map volumes: description: List of remote storage volumes items: @@ -4767,21 +5028,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -4790,9 +5058,23 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object telAppInstalled: description: Telemetry App installation flag diff --git a/bundle/manifests/enterprise.splunk.com_clustermasters.yaml b/bundle/manifests/enterprise.splunk.com_clustermasters.yaml index 24743e927..d4d231d0c 100644 --- a/bundle/manifests/enterprise.splunk.com_clustermasters.yaml +++ b/bundle/manifests/enterprise.splunk.com_clustermasters.yaml @@ -647,8 +647,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1010,10 +1010,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -1038,6 +1040,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1049,8 +1053,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -1088,6 +1098,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1118,21 +1130,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -1141,8 +1160,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -1252,19 +1282,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -1274,7 +1310,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1332,6 +1370,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1390,12 +1465,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -1517,6 +1593,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -1593,6 +1674,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -1625,7 +1711,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2271,6 +2357,7 @@ spec: type: integer name: description: Splunk index name + minLength: 1 type: string remotePath: description: Index location relative to the remote volume @@ -2279,8 +2366,13 @@ spec: volumeName: description: Remote Volume name type: string + required: + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map volumes: description: List of remote storage volumes items: @@ -2288,21 +2380,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -2311,11 +2410,30 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -2557,19 +2675,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -3244,15 +3368,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3434,12 +3556,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3518,7 +3638,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3938,6 +4058,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -4072,7 +4297,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4382,11 +4606,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -4412,6 +4638,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4423,8 +4651,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -4463,6 +4697,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4493,21 +4729,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -4517,8 +4761,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: @@ -4743,6 +4998,7 @@ spec: type: integer name: description: Splunk index name + minLength: 1 type: string remotePath: description: Index location relative to the remote volume @@ -4751,8 +5007,13 @@ spec: volumeName: description: Remote Volume name type: string + required: + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map volumes: description: List of remote storage volumes items: @@ -4760,21 +5021,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -4783,9 +5051,23 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object telAppInstalled: description: Telemetry App installation flag diff --git a/bundle/manifests/enterprise.splunk.com_indexerclusters.yaml b/bundle/manifests/enterprise.splunk.com_indexerclusters.yaml index 51682e450..86121b919 100644 --- a/bundle/manifests/enterprise.splunk.com_indexerclusters.yaml +++ b/bundle/manifests/enterprise.splunk.com_indexerclusters.yaml @@ -654,8 +654,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1104,19 +1104,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -1126,7 +1132,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1184,6 +1192,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1242,12 +1287,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -1369,6 +1415,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -1445,6 +1496,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -1482,7 +1538,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2056,6 +2112,11 @@ spec: type: object type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -2297,19 +2358,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -2984,15 +3051,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3174,12 +3239,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3258,7 +3321,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3678,6 +3741,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -3812,7 +3980,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4840,8 +5007,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -5290,19 +5457,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -5312,7 +5485,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -5370,6 +5545,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -5428,12 +5640,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -5555,6 +5768,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -5717,6 +5935,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -5742,8 +5965,7 @@ spec: type: integer type: object replicas: - description: Number of search head pods; a search head cluster will - be created if > 1 + description: Number of indexer cluster peers format: int32 type: integer resources: @@ -5754,7 +5976,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -6328,6 +6550,11 @@ spec: type: object type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -6569,19 +6796,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -7256,15 +7489,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -7446,12 +7677,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -7530,7 +7759,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -7950,6 +8179,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -8084,7 +8418,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/bundle/manifests/enterprise.splunk.com_ingestorclusters.yaml b/bundle/manifests/enterprise.splunk.com_ingestorclusters.yaml index 0481e4a83..1e359a3ac 100644 --- a/bundle/manifests/enterprise.splunk.com_ingestorclusters.yaml +++ b/bundle/manifests/enterprise.splunk.com_ingestorclusters.yaml @@ -647,8 +647,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1010,10 +1010,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -1038,6 +1040,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1049,8 +1053,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -1088,6 +1098,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1118,21 +1130,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -1141,8 +1160,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -1252,19 +1282,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -1274,7 +1310,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1332,6 +1370,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1390,12 +1465,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -1517,6 +1593,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -1679,6 +1760,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -1717,7 +1803,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2291,6 +2377,11 @@ spec: type: object type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -2532,19 +2623,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -3219,15 +3316,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3409,12 +3504,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3493,7 +3586,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3913,6 +4006,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -4047,7 +4245,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4366,11 +4563,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -4396,6 +4595,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4407,8 +4608,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -4447,6 +4654,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4477,21 +4686,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -4501,8 +4718,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: diff --git a/bundle/manifests/enterprise.splunk.com_licensemanagers.yaml b/bundle/manifests/enterprise.splunk.com_licensemanagers.yaml index 25f8b3e1b..889c044b4 100644 --- a/bundle/manifests/enterprise.splunk.com_licensemanagers.yaml +++ b/bundle/manifests/enterprise.splunk.com_licensemanagers.yaml @@ -641,8 +641,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1004,10 +1004,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -1032,6 +1034,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1043,8 +1047,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -1082,6 +1092,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1112,21 +1124,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -1135,8 +1154,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -1246,19 +1276,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -1268,7 +1304,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1326,6 +1364,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1384,12 +1459,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -1511,6 +1587,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -1587,6 +1668,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -1619,7 +1705,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2193,6 +2279,11 @@ spec: type: object type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -2434,19 +2525,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -3121,15 +3218,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3311,12 +3406,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3395,7 +3488,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3815,6 +3908,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -3949,7 +4147,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4260,11 +4457,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -4290,6 +4489,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4301,8 +4502,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -4341,6 +4548,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4371,21 +4580,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -4395,8 +4612,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: diff --git a/bundle/manifests/enterprise.splunk.com_licensemasters.yaml b/bundle/manifests/enterprise.splunk.com_licensemasters.yaml index 4687c7109..ab649b9d0 100644 --- a/bundle/manifests/enterprise.splunk.com_licensemasters.yaml +++ b/bundle/manifests/enterprise.splunk.com_licensemasters.yaml @@ -636,8 +636,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -999,10 +999,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -1027,6 +1029,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1038,8 +1042,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -1077,6 +1087,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1107,21 +1119,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -1130,8 +1149,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -1241,19 +1271,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -1263,7 +1299,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1321,6 +1359,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1379,12 +1454,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -1506,6 +1582,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -1582,6 +1663,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -1614,7 +1700,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2188,6 +2274,11 @@ spec: type: object type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -2429,19 +2520,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -3116,15 +3213,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3306,12 +3401,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3390,7 +3483,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3810,6 +3903,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -3944,7 +4142,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4255,11 +4452,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -4285,6 +4484,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4296,8 +4497,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -4336,6 +4543,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4366,21 +4575,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -4390,8 +4607,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: diff --git a/bundle/manifests/enterprise.splunk.com_monitoringconsoles.yaml b/bundle/manifests/enterprise.splunk.com_monitoringconsoles.yaml index c8954a274..7d5487622 100644 --- a/bundle/manifests/enterprise.splunk.com_monitoringconsoles.yaml +++ b/bundle/manifests/enterprise.splunk.com_monitoringconsoles.yaml @@ -643,8 +643,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1006,10 +1006,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -1034,6 +1036,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1045,8 +1049,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -1084,6 +1094,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1114,21 +1126,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -1137,8 +1156,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -1248,19 +1278,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -1270,7 +1306,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1328,6 +1366,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1386,12 +1461,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -1513,6 +1589,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -1589,6 +1670,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -1621,7 +1707,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2195,6 +2281,11 @@ spec: type: object type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -2436,19 +2527,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -3123,15 +3220,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3313,12 +3408,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3397,7 +3490,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3817,6 +3910,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -3951,7 +4149,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4261,11 +4458,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -4291,6 +4490,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4302,8 +4503,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -4342,6 +4549,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4372,21 +4581,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -4396,8 +4613,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: @@ -5174,8 +5402,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -5537,10 +5765,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -5565,6 +5795,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -5576,8 +5808,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -5615,6 +5853,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -5645,21 +5885,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -5668,8 +5915,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -5779,19 +6037,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -5801,7 +6065,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -5859,6 +6125,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -5917,12 +6220,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -6044,6 +6348,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -6120,6 +6429,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -6152,7 +6466,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -6726,6 +7040,11 @@ spec: type: object type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -6967,19 +7286,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -7654,15 +7979,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -7844,12 +8167,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -7928,7 +8249,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -8348,6 +8669,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -8482,7 +8908,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -8792,11 +9217,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -8822,6 +9249,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -8833,8 +9262,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -8873,6 +9308,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -8903,21 +9340,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -8927,8 +9372,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: diff --git a/bundle/manifests/enterprise.splunk.com_postgresclusterclasses.yaml b/bundle/manifests/enterprise.splunk.com_postgresclusterclasses.yaml index b564ca757..9c9d19e35 100644 --- a/bundle/manifests/enterprise.splunk.com_postgresclusterclasses.yaml +++ b/bundle/manifests/enterprise.splunk.com_postgresclusterclasses.yaml @@ -240,6 +240,8 @@ spec: description: |- Provisioner identifies which database provisioner to use. Currently supported: "postgresql.cnpg.io" (CloudNativePG) + enum: + - postgresql.cnpg.io type: string required: - provisioner diff --git a/bundle/manifests/enterprise.splunk.com_queues.yaml b/bundle/manifests/enterprise.splunk.com_queues.yaml index 90dca1f99..8939f7d05 100644 --- a/bundle/manifests/enterprise.splunk.com_queues.yaml +++ b/bundle/manifests/enterprise.splunk.com_queues.yaml @@ -84,34 +84,20 @@ spec: volumes: description: List of remote storage volumes items: - description: VolumeSpec defines remote volume config + description: SQSVolumeSpec defines a volume reference for SQS + queue authentication properties: - endpoint: - description: Remote volume URI - type: string name: description: Remote volume name - type: string - path: - description: Remote volume path - type: string - provider: - description: 'App Package Remote Store provider. Supported - values: aws, minio, azure, gcp.' - type: string - region: - description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + minLength: 1 type: string secretRef: - description: Secret object name - type: string - storageType: - description: 'Remote Storage type. Supported values: s3, - blob, gcs. s3 works with aws or minio providers, whereas - blob works with azure provider, gcs works for gcp.' + description: Remote volume secret ref + minLength: 1 type: string + required: + - name + - secretRef type: object type: array required: diff --git a/bundle/manifests/enterprise.splunk.com_searchheadclusters.yaml b/bundle/manifests/enterprise.splunk.com_searchheadclusters.yaml index 4c9359abe..d87e80b39 100644 --- a/bundle/manifests/enterprise.splunk.com_searchheadclusters.yaml +++ b/bundle/manifests/enterprise.splunk.com_searchheadclusters.yaml @@ -649,8 +649,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1012,10 +1012,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -1040,6 +1042,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1051,8 +1055,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -1090,6 +1100,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1120,21 +1132,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -1143,8 +1162,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -1254,19 +1284,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -1276,7 +1312,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1334,6 +1372,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1392,12 +1467,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -1519,6 +1595,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -1595,6 +1676,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -1632,7 +1718,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2206,6 +2292,11 @@ spec: type: object type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -2447,19 +2538,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -3134,15 +3231,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3324,12 +3419,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3408,7 +3501,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3828,6 +3921,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -3962,7 +4160,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4284,11 +4481,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -4314,6 +4513,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4325,8 +4526,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -4365,6 +4572,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4395,21 +4604,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -4419,8 +4636,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: @@ -5267,8 +5495,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -5630,10 +5858,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -5658,6 +5888,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -5669,8 +5901,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -5708,6 +5946,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -5738,21 +5978,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -5761,8 +6008,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -6074,7 +6332,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -6129,19 +6387,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -6151,7 +6415,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -6209,6 +6475,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -6267,12 +6570,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -6394,6 +6698,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -6470,6 +6779,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -6495,6 +6809,7 @@ spec: type: integer type: object replicas: + default: 3 description: Number of search head pods; a search head cluster will be created if > 1 format: int32 @@ -6507,7 +6822,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -7081,6 +7396,11 @@ spec: type: object type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -7322,19 +7642,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -8009,15 +8335,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -8199,12 +8523,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -8283,7 +8605,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -8703,6 +9025,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -8837,7 +9264,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -9159,11 +9585,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -9189,6 +9617,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -9200,8 +9630,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -9240,6 +9676,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -9270,21 +9708,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -9294,8 +9740,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: diff --git a/bundle/manifests/enterprise.splunk.com_standalones.yaml b/bundle/manifests/enterprise.splunk.com_standalones.yaml index 60c549249..16457a549 100644 --- a/bundle/manifests/enterprise.splunk.com_standalones.yaml +++ b/bundle/manifests/enterprise.splunk.com_standalones.yaml @@ -644,8 +644,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1007,10 +1007,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -1035,6 +1037,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1046,8 +1050,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -1085,6 +1095,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -1115,21 +1127,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -1138,8 +1157,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -1249,19 +1279,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -1271,7 +1307,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1329,6 +1367,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1387,12 +1462,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -1514,6 +1590,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -1590,6 +1671,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -1626,7 +1712,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -2272,6 +2358,7 @@ spec: type: integer name: description: Splunk index name + minLength: 1 type: string remotePath: description: Index location relative to the remote volume @@ -2280,8 +2367,13 @@ spec: volumeName: description: Remote Volume name type: string + required: + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map volumes: description: List of remote storage volumes items: @@ -2289,21 +2381,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -2312,11 +2411,30 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -2558,19 +2676,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -3245,15 +3369,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3435,12 +3557,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3519,7 +3639,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3939,6 +4059,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -4073,7 +4298,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4384,11 +4608,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -4414,6 +4640,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4425,8 +4653,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -4465,6 +4699,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -4495,21 +4731,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -4519,8 +4763,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: @@ -4742,6 +4997,7 @@ spec: type: integer name: description: Splunk index name + minLength: 1 type: string remotePath: description: Index location relative to the remote volume @@ -4750,8 +5006,13 @@ spec: volumeName: description: Remote Volume name type: string + required: + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map volumes: description: List of remote storage volumes items: @@ -4759,21 +5020,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -4782,9 +5050,23 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object telAppInstalled: description: Telemetry App installation flag @@ -5419,8 +5701,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -5782,10 +6064,12 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when scope @@ -5810,6 +6094,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -5821,8 +6107,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -5860,6 +6152,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -5890,21 +6184,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -5913,8 +6214,19 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object clusterManagerRef: @@ -6024,19 +6336,25 @@ spec: description: Storage configuration for /opt/splunk/etc volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' extraEnv: description: |- ExtraEnv refers to extra environment variables to be passed to the Splunk instance containers @@ -6046,7 +6364,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -6104,6 +6424,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -6162,12 +6519,13 @@ spec: environment variables) type: string imagePullPolicy: + default: IfNotPresent description: 'Sets pull policy for all images ("Always", "Never", or the default: "IfNotPresent")' enum: - Always - - Never - IfNotPresent + - Never type: string imagePullSecrets: description: |- @@ -6289,6 +6647,11 @@ spec: minimum: 0 type: integer livenessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 30 description: LivenessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-a-liveness-command properties: failureThreshold: @@ -6365,6 +6728,11 @@ spec: minimum: 0 type: integer readinessProbe: + default: + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 description: ReadinessProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes properties: failureThreshold: @@ -6390,6 +6758,7 @@ spec: type: integer type: object replicas: + default: 1 description: Number of standalone pods format: int32 type: integer @@ -6401,7 +6770,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -7047,6 +7416,7 @@ spec: type: integer name: description: Splunk index name + minLength: 1 type: string remotePath: description: Index location relative to the remote volume @@ -7055,8 +7425,13 @@ spec: volumeName: description: Remote Volume name type: string + required: + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map volumes: description: List of remote storage volumes items: @@ -7064,21 +7439,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -7087,11 +7469,30 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object startupProbe: + default: + failureThreshold: 12 + initialDelaySeconds: 40 + periodSeconds: 30 + timeoutSeconds: 30 description: StartupProbe as defined in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes properties: failureThreshold: @@ -7333,19 +7734,25 @@ spec: description: Storage configuration for /opt/splunk/var volume properties: ephemeralStorage: - description: |- - If true, ephemeral (emptyDir) storage will be used - default false + default: false + description: If true, ephemeral (emptyDir) storage will be used type: boolean storageCapacity: description: Storage capacity to request persistent volume claims - (default=”10Gi” for etc and "100Gi" for var) + (default="10Gi" for etc and "100Gi" for var) type: string storageClassName: description: Name of StorageClass to use for persistent volume claims type: string type: object + x-kubernetes-validations: + - message: storageClassName and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageClassName) > 0 && self.ephemeralStorage + == true)' + - message: storageCapacity and ephemeralStorage are mutually exclusive + rule: '!(size(self.storageCapacity) > 0 && self.ephemeralStorage + == true)' volumes: description: List of one or more Kubernetes volumes. These will be mounted in all pod containers as as /mnt/ @@ -8020,15 +8427,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -8210,12 +8615,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -8294,7 +8697,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -8714,6 +9117,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -8848,7 +9356,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -9159,11 +9666,13 @@ spec: properties: location: description: Location relative to the volume path + minLength: 1 type: string name: description: Logical name for the set of apps placed in this location. Logical name must be unique to the appRepo + minLength: 1 type: string premiumAppsProps: description: Properties for premium apps, fill in when @@ -9189,6 +9698,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -9200,8 +9711,14 @@ spec: volumeName: description: Remote Storage Volume name type: string + required: + - location + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map appsRepoPollIntervalSeconds: description: |- Interval in seconds to check the Remote Storage for App changes. @@ -9240,6 +9757,8 @@ spec: type: description: 'Type: enterpriseSecurity for now, can accommodate itsi etc.. later' + enum: + - enterpriseSecurity type: string type: object scope: @@ -9270,21 +9789,29 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where - apps reside. Used for aws, if provided. Not used for - minio and azure. + apps reside. Required for aws, optional for azure + and gcp. type: string secretRef: description: Secret object name @@ -9294,8 +9821,19 @@ spec: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array type: object appSrcDeployStatus: @@ -9520,6 +10058,7 @@ spec: type: integer name: description: Splunk index name + minLength: 1 type: string remotePath: description: Index location relative to the remote volume @@ -9528,8 +10067,13 @@ spec: volumeName: description: Remote Volume name type: string + required: + - name type: object type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map volumes: description: List of remote storage volumes items: @@ -9537,21 +10081,28 @@ spec: properties: endpoint: description: Remote volume URI + minLength: 1 type: string name: description: Remote volume name + minLength: 1 type: string path: description: Remote volume path + minLength: 1 type: string provider: description: 'App Package Remote Store provider. Supported values: aws, minio, azure, gcp.' + enum: + - aws + - minio + - azure + - gcp type: string region: description: Region of the remote storage volume where apps - reside. Used for aws, if provided. Not used for minio - and azure. + reside. Required for aws, optional for azure and gcp. type: string secretRef: description: Secret object name @@ -9560,9 +10111,23 @@ spec: description: 'Remote Storage type. Supported values: s3, blob, gcs. s3 works with aws or minio providers, whereas blob works with azure provider, gcs works for gcp.' + enum: + - s3 + - blob + - gcs type: string + required: + - endpoint + - name + - path type: object + x-kubernetes-validations: + - message: region is required when provider is aws + rule: self.provider != 'aws' || size(self.region) > 0 type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map type: object telAppInstalled: description: Telemetry App installation flag diff --git a/bundle/manifests/splunk-operator.clusterserviceversion.yaml b/bundle/manifests/splunk-operator.clusterserviceversion.yaml index 113078cbc..841068555 100644 --- a/bundle/manifests/splunk-operator.clusterserviceversion.yaml +++ b/bundle/manifests/splunk-operator.clusterserviceversion.yaml @@ -69,7 +69,7 @@ metadata: "name": "clustermanager-sample", "namespace": "splunk-operator" }, - "spec": {} + "spec": null }, { "apiVersion": "enterprise.splunk.com/v4", @@ -130,7 +130,7 @@ metadata: "name": "monitoringconsole-sample", "namespace": "splunk-operator" }, - "spec": {} + "spec": null }, { "apiVersion": "enterprise.splunk.com/v4", @@ -141,7 +141,91 @@ metadata: ], "name": "objectstorage-sample" }, - "spec": {} + "spec": { + "provider": "s3", + "s3": { + "path": "bucket/path" + } + } + }, + { + "apiVersion": "enterprise.splunk.com/v4", + "kind": "PostgresCluster", + "metadata": { + "labels": { + "app.kubernetes.io/managed-by": "kustomize", + "app.kubernetes.io/name": "splunk-operator" + }, + "name": "postgresql-cluster-dev" + }, + "spec": { + "class": "postgresql-dev" + } + }, + { + "apiVersion": "enterprise.splunk.com/v4", + "kind": "PostgresClusterClass", + "metadata": { + "name": "postgresql-dev" + }, + "spec": { + "cnpg": { + "connectionPooler": { + "config": { + "max_client_conn": "100" + }, + "instances": 2, + "mode": "transaction" + }, + "primaryUpdateMethod": "restart" + }, + "config": { + "connectionPoolerEnabled": true, + "instances": 1, + "postgresVersion": "18", + "resources": { + "limits": { + "cpu": "1", + "memory": "2Gi" + }, + "requests": { + "cpu": "500m", + "memory": "1Gi" + } + }, + "storage": "10Gi" + }, + "provisioner": "postgresql.cnpg.io" + } + }, + { + "apiVersion": "enterprise.splunk.com/v4", + "kind": "PostgresDatabase", + "metadata": { + "name": "splunk-databases" + }, + "spec": { + "clusterRef": { + "name": "postgresql-cluster-dev" + }, + "databases": [ + { + "deletionPolicy": "Delete", + "extensions": [ + "pg_stat_statements", + "pgcrypto" + ], + "name": "kvstore" + }, + { + "deletionPolicy": "Delete", + "extensions": [ + "pg_trgm" + ], + "name": "analytics" + } + ] + } }, { "apiVersion": "enterprise.splunk.com/v4", @@ -152,7 +236,14 @@ metadata: ], "name": "queue-sample" }, - "spec": {} + "spec": { + "provider": "sqs", + "sqs": { + "authRegion": "us-west-2", + "dlq": "dlq", + "name": "queue" + } + } }, { "apiVersion": "enterprise.splunk.com/v4", @@ -180,13 +271,13 @@ metadata: "name": "standalone-sample", "namespace": "splunk-operator" }, - "spec": {} + "spec": null } ] capabilities: Seamless Upgrades categories: Big Data, Logging & Tracing, Monitoring, Security, AI/Machine Learning containerImage: splunk/splunk-operator@sha256:c4e0d314622699496f675760aad314520d050a66627fdf33e1e21fa28ca85d50 - createdAt: "2026-03-02T17:02:29Z" + createdAt: "2026-03-25T20:21:54Z" description: The Splunk Operator for Kubernetes enables you to quickly and easily deploy Splunk Enterprise on your choice of private or public cloud provider. The Operator simplifies scaling and management of Splunk Enterprise by automating @@ -269,6 +360,23 @@ spec: kind: ObjectStorage name: objectstorages.enterprise.splunk.com version: v4 + - description: |- + PostgresClusterClass is the Schema for the postgresclusterclasses API. + PostgresClusterClass defines a reusable template and policy for postgres cluster provisioning. + displayName: Postgres Cluster Class + kind: PostgresClusterClass + name: postgresclusterclasses.enterprise.splunk.com + version: v4 + - description: PostgresCluster is the Schema for the postgresclusters API. + displayName: Postgres Cluster + kind: PostgresCluster + name: postgresclusters.enterprise.splunk.com + version: v4 + - description: PostgresDatabase is the Schema for the postgresdatabases API. + displayName: Postgres Database + kind: PostgresDatabase + name: postgresdatabases.enterprise.splunk.com + version: v4 - description: Queue is the Schema for the queues API displayName: Queue kind: Queue @@ -371,6 +479,8 @@ spec: - licensemasters - monitoringconsoles - objectstorages + - postgresclusters + - postgresdatabases - queues - searchheadclusters - standalones @@ -393,6 +503,8 @@ spec: - licensemasters/finalizers - monitoringconsoles/finalizers - objectstorages/finalizers + - postgresclusters/finalizers + - postgresdatabases/finalizers - queues/finalizers - searchheadclusters/finalizers - standalones/finalizers @@ -409,6 +521,8 @@ spec: - licensemasters/status - monitoringconsoles/status - objectstorages/status + - postgresclusters/status + - postgresdatabases/status - queues/status - searchheadclusters/status - standalones/status @@ -416,6 +530,35 @@ spec: - get - patch - update + - apiGroups: + - enterprise.splunk.com + resources: + - postgresclusterclasses + verbs: + - get + - list + - watch + - apiGroups: + - postgresql.cnpg.io + resources: + - clusters + - databases + - poolers + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - postgresql.cnpg.io + resources: + - clusters/status + - poolers/status + verbs: + - get - apiGroups: - authentication.k8s.io resources: @@ -465,7 +608,7 @@ spec: fieldRef: fieldPath: metadata.annotations['olm.targetNamespaces'] - name: RELATED_IMAGE_SPLUNK_ENTERPRISE - value: docker.io/splunk/splunk:10.2.0 + value: docker.io/splunk/splunk - name: OPERATOR_NAME value: splunk-operator - name: SPLUNK_GENERAL_TERMS @@ -474,7 +617,7 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name - image: docker.io/splunk/splunk-operator:3.1.0 + image: controller:latest imagePullPolicy: Always livenessProbe: httpGet: @@ -587,7 +730,7 @@ spec: name: Splunk Inc. url: www.splunk.com relatedImages: - - image: docker.io/splunk/splunk:10.2.0 + - image: docker.io/splunk/splunk name: splunk-enterprise replaces: splunk-operator.v3.0.0 version: 3.1.0 diff --git a/bundle/metadata/annotations.yaml b/bundle/metadata/annotations.yaml index 890fd61c7..87b0891ef 100644 --- a/bundle/metadata/annotations.yaml +++ b/bundle/metadata/annotations.yaml @@ -4,11 +4,10 @@ annotations: operators.operatorframework.io.bundle.manifests.v1: manifests/ operators.operatorframework.io.bundle.metadata.v1: metadata/ operators.operatorframework.io.bundle.package.v1: splunk-operator - operators.operatorframework.io.bundle.channels.v1: stable - operators.operatorframework.io.bundle.channel.default.v1: stable - operators.operatorframework.io.metrics.builder: operator-sdk-v1.31.0 + operators.operatorframework.io.bundle.channels.v1: alpha + operators.operatorframework.io.metrics.builder: operator-sdk-v1.42.0 operators.operatorframework.io.metrics.mediatype.v1: metrics+v1 - operators.operatorframework.io.metrics.project_layout: go.kubebuilder.io/v3 + operators.operatorframework.io.metrics.project_layout: go.kubebuilder.io/v4 # Annotations for testing. operators.operatorframework.io.test.mediatype.v1: scorecard+v1 diff --git a/config/crd/bases/enterprise.splunk.com_clustermanagers.yaml b/config/crd/bases/enterprise.splunk.com_clustermanagers.yaml index a19a639ae..d6d40d56a 100644 --- a/config/crd/bases/enterprise.splunk.com_clustermanagers.yaml +++ b/config/crd/bases/enterprise.splunk.com_clustermanagers.yaml @@ -639,8 +639,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1302,7 +1302,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1360,6 +1362,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1664,7 +1703,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -3321,15 +3360,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3511,12 +3548,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3595,7 +3630,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -4015,6 +4050,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -4149,7 +4289,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/config/crd/bases/enterprise.splunk.com_clustermasters.yaml b/config/crd/bases/enterprise.splunk.com_clustermasters.yaml index 77b835376..adc19b9d7 100644 --- a/config/crd/bases/enterprise.splunk.com_clustermasters.yaml +++ b/config/crd/bases/enterprise.splunk.com_clustermasters.yaml @@ -635,8 +635,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1298,7 +1298,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1356,6 +1358,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1660,7 +1699,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -3317,15 +3356,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3507,12 +3544,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3591,7 +3626,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -4011,6 +4046,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -4145,7 +4285,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/config/crd/bases/enterprise.splunk.com_indexerclusters.yaml b/config/crd/bases/enterprise.splunk.com_indexerclusters.yaml index 8ae972d7c..bf9f312d1 100644 --- a/config/crd/bases/enterprise.splunk.com_indexerclusters.yaml +++ b/config/crd/bases/enterprise.splunk.com_indexerclusters.yaml @@ -642,8 +642,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1120,7 +1120,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1178,6 +1180,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1487,7 +1526,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -3000,15 +3039,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3190,12 +3227,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3274,7 +3309,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3694,6 +3729,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -3828,7 +3968,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -4856,8 +4995,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -5334,7 +5473,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -5392,6 +5533,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -5786,7 +5964,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -7299,15 +7477,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -7489,12 +7665,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -7573,7 +7747,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -7993,6 +8167,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -8127,7 +8406,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/config/crd/bases/enterprise.splunk.com_ingestorclusters.yaml b/config/crd/bases/enterprise.splunk.com_ingestorclusters.yaml index 44238ca7a..a7d91abaa 100644 --- a/config/crd/bases/enterprise.splunk.com_ingestorclusters.yaml +++ b/config/crd/bases/enterprise.splunk.com_ingestorclusters.yaml @@ -635,8 +635,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1298,7 +1298,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1356,6 +1358,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1752,7 +1791,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -3265,15 +3304,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3455,12 +3492,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3539,7 +3574,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3959,6 +3994,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -4093,7 +4233,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/config/crd/bases/enterprise.splunk.com_licensemanagers.yaml b/config/crd/bases/enterprise.splunk.com_licensemanagers.yaml index b65b6beaf..bedc18ed2 100644 --- a/config/crd/bases/enterprise.splunk.com_licensemanagers.yaml +++ b/config/crd/bases/enterprise.splunk.com_licensemanagers.yaml @@ -629,8 +629,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1292,7 +1292,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1350,6 +1352,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1654,7 +1693,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -3167,15 +3206,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3357,12 +3394,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3441,7 +3476,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3861,6 +3896,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -3995,7 +4135,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/config/crd/bases/enterprise.splunk.com_licensemasters.yaml b/config/crd/bases/enterprise.splunk.com_licensemasters.yaml index c2a96b9e7..03563e31d 100644 --- a/config/crd/bases/enterprise.splunk.com_licensemasters.yaml +++ b/config/crd/bases/enterprise.splunk.com_licensemasters.yaml @@ -624,8 +624,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1287,7 +1287,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1345,6 +1347,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1649,7 +1688,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -3162,15 +3201,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3352,12 +3389,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3436,7 +3471,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3856,6 +3891,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -3990,7 +4130,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/config/crd/bases/enterprise.splunk.com_monitoringconsoles.yaml b/config/crd/bases/enterprise.splunk.com_monitoringconsoles.yaml index f5142a14f..2a77ad99e 100644 --- a/config/crd/bases/enterprise.splunk.com_monitoringconsoles.yaml +++ b/config/crd/bases/enterprise.splunk.com_monitoringconsoles.yaml @@ -631,8 +631,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1294,7 +1294,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1352,6 +1354,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1656,7 +1695,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -3169,15 +3208,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3359,12 +3396,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3443,7 +3478,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3863,6 +3898,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -3997,7 +4137,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -5251,8 +5390,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -5914,7 +6053,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -5972,6 +6113,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -6276,7 +6454,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -7789,15 +7967,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -7979,12 +8155,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -8063,7 +8237,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -8483,6 +8657,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -8617,7 +8896,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/config/crd/bases/enterprise.splunk.com_searchheadclusters.yaml b/config/crd/bases/enterprise.splunk.com_searchheadclusters.yaml index 4f4674b32..2128ce7d6 100644 --- a/config/crd/bases/enterprise.splunk.com_searchheadclusters.yaml +++ b/config/crd/bases/enterprise.splunk.com_searchheadclusters.yaml @@ -637,8 +637,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1300,7 +1300,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1358,6 +1360,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1667,7 +1706,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -3180,15 +3219,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3370,12 +3407,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3454,7 +3489,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -3874,6 +3909,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -4008,7 +4148,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -5344,8 +5483,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -6181,7 +6320,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -6264,7 +6403,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -6322,6 +6463,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -6632,7 +6810,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -8145,15 +8323,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -8335,12 +8511,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -8419,7 +8593,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -8839,6 +9013,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -8973,7 +9252,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/config/crd/bases/enterprise.splunk.com_standalones.yaml b/config/crd/bases/enterprise.splunk.com_standalones.yaml index 8c8c5035c..5f36c74d1 100644 --- a/config/crd/bases/enterprise.splunk.com_standalones.yaml +++ b/config/crd/bases/enterprise.splunk.com_standalones.yaml @@ -632,8 +632,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -1295,7 +1295,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -1353,6 +1355,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -1661,7 +1700,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -3318,15 +3357,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -3508,12 +3545,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -3592,7 +3627,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -4012,6 +4047,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -4146,7 +4286,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- @@ -5550,8 +5689,8 @@ spec: most preferred is the one with the greatest sum of weights, i.e. for each node that meets all of the scheduling requirements (resource request, requiredDuringScheduling anti-affinity expressions, etc.), - compute a sum by iterating through the elements of this field and adding - "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the node(s) with the highest sum are the most preferred. items: description: The weights of all of the matched WeightedPodAffinityTerm @@ -6213,7 +6352,9 @@ spec: a Container. properties: name: - description: Name of the environment variable. Must be a C_IDENTIFIER. + description: |- + Name of the environment variable. + May consist of any printable ASCII characters except '='. type: string value: description: |- @@ -6271,6 +6412,43 @@ spec: - fieldPath type: object x-kubernetes-map-type: atomic + fileKeyRef: + description: |- + FileKeyRef selects a key of the env file. + Requires the EnvFiles feature gate to be enabled. + properties: + key: + description: |- + The key within the env file. An invalid key will prevent the pod from starting. + The keys defined within a source may consist of any printable ASCII characters except '='. + During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters. + type: string + optional: + default: false + description: |- + Specify whether the file or its key must be defined. If the file or key + does not exist, then the env var is not published. + If optional is set to true and the specified key does not exist, + the environment variable will not be set in the Pod's containers. + + If optional is set to false and the specified key does not exist, + an error will be returned during Pod creation. + type: boolean + path: + description: |- + The path within the volume from which to select the file. + Must be relative and may not contain the '..' path or start with '..'. + type: string + volumeName: + description: The name of the volume mount containing + the env file. + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic resourceFieldRef: description: |- Selects a resource of the container: only resources limits and requests @@ -6580,7 +6758,7 @@ spec: Claims lists the names of resources, defined in spec.resourceClaims, that are used by this container. - This is an alpha field and requires enabling the + This field depends on the DynamicResourceAllocation feature gate. This field is immutable. It can only be set for containers. @@ -8237,15 +8415,13 @@ spec: volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. If specified, the CSI driver will create or update the volume with the attributes defined in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, - it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass - will be applied to the claim but it's not allowed to reset this field to empty string once it is set. - If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass - will be set by the persistentvolume controller if it exists. + it can be changed after the claim is created. An empty string or nil value indicates that no + VolumeAttributesClass will be applied to the claim. If the claim enters an Infeasible error state, + this field can be reset to its previous value (including nil) to cancel the modification. If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource exists. More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ - (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). type: string volumeMode: description: |- @@ -8427,12 +8603,10 @@ spec: description: |- glusterfs represents a Glusterfs mount on the host that shares a pod's lifetime. Deprecated: Glusterfs is deprecated and the in-tree glusterfs type is no longer supported. - More info: https://examples.k8s.io/volumes/glusterfs/README.md properties: endpoints: - description: |- - endpoints is the endpoint name that details Glusterfs topology. - More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod + description: endpoints is the endpoint name that details + Glusterfs topology. type: string path: description: |- @@ -8511,7 +8685,7 @@ spec: description: |- iscsi represents an ISCSI Disk resource that is attached to a kubelet's host machine and then exposed to the pod. - More info: https://examples.k8s.io/volumes/iscsi/README.md + More info: https://kubernetes.io/docs/concepts/storage/volumes/#iscsi properties: chapAuthDiscovery: description: chapAuthDiscovery defines whether support iSCSI @@ -8931,6 +9105,111 @@ spec: type: array x-kubernetes-list-type: atomic type: object + podCertificate: + description: |- + Projects an auto-rotating credential bundle (private key and certificate + chain) that the pod can use either as a TLS client or server. + + Kubelet generates a private key and uses it to send a + PodCertificateRequest to the named signer. Once the signer approves the + request and issues a certificate chain, Kubelet writes the key and + certificate chain to the pod filesystem. The pod does not start until + certificates have been issued for each podCertificate projected volume + source in its spec. + + Kubelet will begin trying to rotate the certificate at the time indicated + by the signer using the PodCertificateRequest.Status.BeginRefreshAt + timestamp. + + Kubelet can write a single file, indicated by the credentialBundlePath + field, or separate files, indicated by the keyPath and + certificateChainPath fields. + + The credential bundle is a single file in PEM format. The first PEM + entry is the private key (in PKCS#8 format), and the remaining PEM + entries are the certificate chain issued by the signer (typically, + signers will return their certificate chain in leaf-to-root order). + + Prefer using the credential bundle format, since your application code + can read it atomically. If you use keyPath and certificateChainPath, + your application must make two separate file reads. If these coincide + with a certificate rotation, it is possible that the private key and leaf + certificate you read may not correspond to each other. Your application + will need to check for this condition, and re-read until they are + consistent. + + The named signer controls chooses the format of the certificate it + issues; consult the signer implementation's documentation to learn how to + use the certificates it issues. + properties: + certificateChainPath: + description: |- + Write the certificate chain at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + credentialBundlePath: + description: |- + Write the credential bundle at this path in the projected volume. + + The credential bundle is a single file that contains multiple PEM blocks. + The first PEM block is a PRIVATE KEY block, containing a PKCS#8 private + key. + + The remaining blocks are CERTIFICATE blocks, containing the issued + certificate chain from the signer (leaf and any intermediates). + + Using credentialBundlePath lets your Pod's application code make a single + atomic read that retrieves a consistent key and certificate chain. If you + project them to separate files, your application code will need to + additionally check that the leaf certificate was issued to the key. + type: string + keyPath: + description: |- + Write the key at this path in the projected volume. + + Most applications should use credentialBundlePath. When using keyPath + and certificateChainPath, your application needs to check that the key + and leaf certificate are consistent, because it is possible to read the + files mid-rotation. + type: string + keyType: + description: |- + The type of keypair Kubelet will generate for the pod. + + Valid values are "RSA3072", "RSA4096", "ECDSAP256", "ECDSAP384", + "ECDSAP521", and "ED25519". + type: string + maxExpirationSeconds: + description: |- + maxExpirationSeconds is the maximum lifetime permitted for the + certificate. + + Kubelet copies this value verbatim into the PodCertificateRequests it + generates for this projection. + + If omitted, kube-apiserver will set it to 86400(24 hours). kube-apiserver + will reject values shorter than 3600 (1 hour). The maximum allowable + value is 7862400 (91 days). + + The signer implementation is then free to issue a certificate with any + lifetime *shorter* than MaxExpirationSeconds, but no shorter than 3600 + seconds (1 hour). This constraint is enforced by kube-apiserver. + `kubernetes.io` signers will never issue certificates with a lifetime + longer than 24 hours. + format: int32 + type: integer + signerName: + description: Kubelet's generated CSRs will be + addressed to this signer. + type: string + required: + - keyType + - signerName + type: object secret: description: secret information about the secret data to project @@ -9065,7 +9344,6 @@ spec: description: |- rbd represents a Rados Block Device mount on the host that shares a pod's lifetime. Deprecated: RBD is deprecated and the in-tree rbd type is no longer supported. - More info: https://examples.k8s.io/volumes/rbd/README.md properties: fsType: description: |- diff --git a/config/manifests/bases/splunk-operator.clusterserviceversion.yaml b/config/manifests/bases/splunk-operator.clusterserviceversion.yaml index 3fa109139..f96e1158b 100644 --- a/config/manifests/bases/splunk-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/splunk-operator.clusterserviceversion.yaml @@ -68,6 +68,23 @@ spec: kind: ObjectStorage name: objectstorages.enterprise.splunk.com version: v4 + - description: |- + PostgresClusterClass is the Schema for the postgresclusterclasses API. + PostgresClusterClass defines a reusable template and policy for postgres cluster provisioning. + displayName: Postgres Cluster Class + kind: PostgresClusterClass + name: postgresclusterclasses.enterprise.splunk.com + version: v4 + - description: PostgresCluster is the Schema for the postgresclusters API. + displayName: Postgres Cluster + kind: PostgresCluster + name: postgresclusters.enterprise.splunk.com + version: v4 + - description: PostgresDatabase is the Schema for the postgresdatabases API. + displayName: Postgres Database + kind: PostgresDatabase + name: postgresdatabases.enterprise.splunk.com + version: v4 - description: Queue is the Schema for the queues API displayName: Queue kind: Queue diff --git a/config/rbac/postgrescluster_admin_role.yaml b/config/rbac/postgrescluster_admin_role.yaml deleted file mode 100644 index bb3f2e06b..000000000 --- a/config/rbac/postgrescluster_admin_role.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# This rule is not used by the project splunk-operator itself. -# It is provided to allow the cluster admin to help manage permissions for users. -# -# Grants full permissions ('*') over enterprise.splunk.com. -# This role is intended for users authorized to modify roles and bindings within the cluster, -# enabling them to delegate specific permissions to other users or groups as needed. - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - app.kubernetes.io/name: splunk-operator - app.kubernetes.io/managed-by: kustomize - name: postgrescluster-admin-role -rules: -- apiGroups: - - enterprise.splunk.com - resources: - - postgresclusters - verbs: - - '*' -- apiGroups: - - enterprise.splunk.com - resources: - - postgresclusters/status - verbs: - - get diff --git a/config/rbac/postgresclusterclass_admin_role.yaml b/config/rbac/postgresclusterclass_admin_role.yaml deleted file mode 100644 index d16defdd6..000000000 --- a/config/rbac/postgresclusterclass_admin_role.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# This rule is not used by the project splunk-operator itself. -# It is provided to allow the cluster admin to help manage permissions for users. -# -# Grants full permissions ('*') over enterprise.splunk.com. -# This role is intended for users authorized to modify roles and bindings within the cluster, -# enabling them to delegate specific permissions to other users or groups as needed. - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - app.kubernetes.io/name: splunk-operator - app.kubernetes.io/managed-by: kustomize - name: postgresclusterclass-admin-role -rules: -- apiGroups: - - enterprise.splunk.com - resources: - - postgresclusterclasses - verbs: - - '*' -- apiGroups: - - enterprise.splunk.com - resources: - - postgresclusterclasses/status - verbs: - - get diff --git a/config/rbac/postgresdatabase_admin_role.yaml b/config/rbac/postgresdatabase_admin_role.yaml deleted file mode 100644 index b98548d5c..000000000 --- a/config/rbac/postgresdatabase_admin_role.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# This rule is not used by the project splunk-operator itself. -# It is provided to allow the cluster admin to help manage permissions for users. -# -# Grants full permissions ('*') over enterprise.splunk.com. -# This role is intended for users authorized to modify roles and bindings within the cluster, -# enabling them to delegate specific permissions to other users or groups as needed. - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - app.kubernetes.io/name: splunk-operator - app.kubernetes.io/managed-by: kustomize - name: postgresdatabase-admin-role -rules: -- apiGroups: - - enterprise.splunk.com - resources: - - postgresdatabases - verbs: - - '*' -- apiGroups: - - enterprise.splunk.com - resources: - - postgresdatabases/status - verbs: - - get diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 94db6a730..142a8720c 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -1,5 +1,5 @@ /* -Copyright 2026. +Copyright (c) 2018-2022 Splunk Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,99 +18,156 @@ package controller import ( "context" - "os" + "fmt" "path/filepath" "testing" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - - "k8s.io/client-go/kubernetes/scheme" + "go.uber.org/zap/zapcore" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - enterprisev4 "github.com/splunk/splunk-operator/api/v4" - // +kubebuilder:scaffold:imports -) - -// These tests use Ginkgo (BDD-style Go testing framework). Refer to -// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" -var ( - ctx context.Context - cancel context.CancelFunc - testEnv *envtest.Environment - cfg *rest.Config - k8sClient client.Client + enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + //+kubebuilder:scaffold:imports ) -func TestControllers(t *testing.T) { +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment +var k8sManager ctrl.Manager + +func TestAPIs(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Controller Suite") } -var _ = BeforeSuite(func() { - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - - ctx, cancel = context.WithCancel(context.TODO()) - - var err error - err = enterprisev4.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - // +kubebuilder:scaffold:scheme +var _ = BeforeSuite(func(ctx context.Context) { + opts := zap.Options{ + Development: true, + TimeEncoder: zapcore.RFC3339NanoTimeEncoder, + } + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true), zap.UseFlagOptions(&opts))) By("bootstrapping test environment") + testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, } - // Retrieve the first found binary directory to allow running tests from IDEs - if getFirstFoundEnvTestBinaryDir() != "" { - testEnv.BinaryAssetsDirectory = getFirstFoundEnvTestBinaryDir() - } + var err error // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) - k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) Expect(err).NotTo(HaveOccurred()) - Expect(k8sClient).NotTo(BeNil()) -}) -var _ = AfterSuite(func() { - By("tearing down the test environment") - cancel() - err := testEnv.Stop() + err = enterpriseApiV3.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) Expect(err).NotTo(HaveOccurred()) -}) -// getFirstFoundEnvTestBinaryDir locates the first binary in the specified path. -// ENVTEST-based tests depend on specific binaries, usually located in paths set by -// controller-runtime. When running tests directly (e.g., via an IDE) without using -// Makefile targets, the 'BinaryAssetsDirectory' must be explicitly configured. -// -// This function streamlines the process by finding the required binaries, similar to -// setting the 'KUBEBUILDER_ASSETS' environment variable. To ensure the binaries are -// properly set up, run 'make setup-envtest' beforehand. -func getFirstFoundEnvTestBinaryDir() string { - basePath := filepath.Join("..", "..", "bin", "k8s") - entries, err := os.ReadDir(basePath) - if err != nil { - logf.Log.Error(err, "Failed to read directory", "path", basePath) - return "" + err = enterpriseApiV3.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + //+kubebuilder:scaffold:scheme + + // Create New Manager for controller + k8sManager, err = ctrl.NewManager(cfg, ctrl.Options{ + Scheme: clientgoscheme.Scheme, + }) + Expect(err).ToNot(HaveOccurred()) + if err := (&ClusterManagerReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) } - for _, entry := range entries { - if entry.IsDir() { - return filepath.Join(basePath, entry.Name()) - } + if err := (&ClusterMasterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) } - return "" -} + if err := (&IndexerClusterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&IngestorClusterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&LicenseManagerReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&LicenseMasterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&MonitoringConsoleReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&SearchHeadClusterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&StandaloneReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + + go func() { + err = k8sManager.Start(ctrl.SetupSignalHandler()) + fmt.Printf("error %v", err.Error()) + Expect(err).ToNot(HaveOccurred()) + }() + + Expect(err).ToNot(HaveOccurred()) + + k8sClient, err = client.New(cfg, client.Options{Scheme: clientgoscheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + +}, NodeTimeout(time.Second*500)) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + testEnv.Stop() +}) diff --git a/test/postgrescluster-retain-upgrade-flow.sh b/test/postgrescluster-retain-upgrade-flow.sh deleted file mode 100755 index 69124c536..000000000 --- a/test/postgrescluster-retain-upgrade-flow.sh +++ /dev/null @@ -1,356 +0,0 @@ -#!/usr/bin/env bash -# run make install make run in a separate terminal to have the operator running while this test executes -# this test verifies that when a PostgresCluster with clusterDeletionPolicy=Retain is deleted, the underlying CNPG Cluster and superuser Secret are not deleted and can be re-attached to a new PostgresCluster with the same name (simulating a major version upgrade flow where the cluster needs to be recreated). -# then, in a separate terminal, run: NAMESPACE=your-namespace UPGRADE_POSTGRES_VERSION=16 ./test/postgrescluster-retain-upgrade-flow.sh - - -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -TEST_DIR="$ROOT_DIR/test" -SAMPLES_DIR="$ROOT_DIR/config/samples" - -CLUSTER_MANIFEST="${CLUSTER_MANIFEST:-$SAMPLES_DIR/enterprise_v4_postgrescluster_dev.yaml}" -DATABASE_MANIFEST="${DATABASE_MANIFEST:-$SAMPLES_DIR/enterprise_v4_postgresdatabase.yaml}" -CONNECT_SCRIPT="${CONNECT_SCRIPT:-$TEST_DIR/connect-to-postgres-cluster.sh}" -UPGRADE_POSTGRES_VERSION="${UPGRADE_POSTGRES_VERSION:-16}" -POLL_INTERVAL="${POLL_INTERVAL:-5}" -TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-900}" -REQUIRE_POSTGRESDATABASE_READY="${REQUIRE_POSTGRESDATABASE_READY:-0}" - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -log() { - echo -e "${YELLOW}[$(date '+%Y-%m-%d %H:%M:%S')] $*${NC}" -} - -pass() { - echo -e "${GREEN}[PASS] $*${NC}" -} - -fail() { - echo -e "${RED}[FAIL] $*${NC}" >&2 - exit 1 -} - -require_file() { - local path="$1" - [[ -f "$path" ]] || fail "Required file not found: $path" -} - -require_command() { - local cmd="$1" - command -v "$cmd" >/dev/null 2>&1 || fail "Required command not found: $cmd" -} - -current_namespace() { - local ns - ns="$(kubectl config view --minify --output 'jsonpath={..namespace}' 2>/dev/null || true)" - if [[ -z "$ns" ]]; then - ns="default" - fi - printf '%s' "$ns" -} - -preflight_namespace() { - local deletion_ts phase - deletion_ts="$(kubectl get ns "$NAMESPACE" -o jsonpath='{.metadata.deletionTimestamp}' 2>/dev/null || true)" - phase="$(kubectl get ns "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || true)" - if [[ -n "$deletion_ts" || "$phase" == "Terminating" ]]; then - fail "Namespace $NAMESPACE is terminating (deletionTimestamp=$deletion_ts phase=$phase). Use a non-terminating namespace." - fi -} - -preflight_cluster_dns() { - local host - host="${CLUSTER_NAME}-rw.${NAMESPACE}.svc.cluster.local" - if getent hosts "$host" >/dev/null 2>&1; then - return 0 - fi - - log "Cluster DNS name is not resolvable from this machine: $host" - log "This does not block local connection tests (we use kubectl port-forward), but it blocks PostgresDatabase DB-connection/privilege phases when the operator runs out-of-cluster (make run)." - log "Fix: run the operator in-cluster or use telepresence/kubefwd to get cluster DNS/networking on your machine." - - SKIP_POSTGRESDATABASE_READY_CHECK=1 - if [[ "$REQUIRE_POSTGRESDATABASE_READY" == "1" ]]; then - fail "PostgresDatabase readiness required (REQUIRE_POSTGRESDATABASE_READY=1) but cluster DNS is not available." - fi - - log "Continuing with degraded PostgresDatabase checks (readiness will not be required)." -} - -resource_exists() { - local resource="$1" - local name="$2" - kubectl get "$resource" "$name" -n "$NAMESPACE" >/dev/null 2>&1 -} - -jsonpath_value() { - local resource="$1" - local name="$2" - local jsonpath="$3" - kubectl get "$resource" "$name" -n "$NAMESPACE" -o "jsonpath=${jsonpath}" 2>/dev/null -} - -wait_for_jsonpath() { - local resource="$1" - local name="$2" - local jsonpath="$3" - local expected="$4" - local timeout="${5:-$TIMEOUT_SECONDS}" - local deadline=$((SECONDS + timeout)) - local value="" - - while (( SECONDS < deadline )); do - value="$(jsonpath_value "$resource" "$name" "$jsonpath" || true)" - if [[ "$value" == "$expected" ]]; then - pass "$resource/$name reached ${jsonpath}=${expected}" - return 0 - fi - sleep "$POLL_INTERVAL" - done - - fail "Timed out waiting for $resource/$name to reach ${jsonpath}=${expected}. Last value: ${value:-}" -} - -wait_for_contains() { - local resource="$1" - local name="$2" - local jsonpath="$3" - local expected_substring="$4" - local timeout="${5:-$TIMEOUT_SECONDS}" - local deadline=$((SECONDS + timeout)) - local value="" - - while (( SECONDS < deadline )); do - value="$(jsonpath_value "$resource" "$name" "$jsonpath" || true)" - if [[ "$value" == *"$expected_substring"* ]]; then - pass "$resource/$name contains ${expected_substring} in ${jsonpath}" - return 0 - fi - sleep "$POLL_INTERVAL" - done - - fail "Timed out waiting for $resource/$name to contain ${expected_substring} in ${jsonpath}. Last value: ${value:-}" -} - -wait_for_absence() { - local resource="$1" - local name="$2" - local timeout="${3:-$TIMEOUT_SECONDS}" - local deadline=$((SECONDS + timeout)) - - while (( SECONDS < deadline )); do - if ! resource_exists "$resource" "$name"; then - pass "$resource/$name is absent" - return 0 - fi - sleep "$POLL_INTERVAL" - done - - fail "Timed out waiting for $resource/$name to be deleted" -} - -wait_for_presence() { - local resource="$1" - local name="$2" - local timeout="${3:-$TIMEOUT_SECONDS}" - local deadline=$((SECONDS + timeout)) - - while (( SECONDS < deadline )); do - if resource_exists "$resource" "$name"; then - pass "$resource/$name exists" - return 0 - fi - sleep "$POLL_INTERVAL" - done - - fail "Timed out waiting for $resource/$name to exist" -} - -wait_for_owner_reference() { - local resource="$1" - local name="$2" - local owner_kind="$3" - local owner_name="$4" - local owner_uid="$5" - local timeout="${6:-$TIMEOUT_SECONDS}" - local deadline=$((SECONDS + timeout)) - local owners="" - local expected="${owner_kind}:${owner_name}:${owner_uid}" - - while (( SECONDS < deadline )); do - owners="$(jsonpath_value "$resource" "$name" '{range .metadata.ownerReferences[*]}{.kind}:{.name}:{.uid}{"\n"}{end}' || true)" - if [[ "$owners" == *"$expected"* ]]; then - pass "$resource/$name is owned by ${owner_kind}/${owner_name}" - return 0 - fi - sleep "$POLL_INTERVAL" - done - - fail "Timed out waiting for $resource/$name to be owned by ${owner_kind}/${owner_name}. Owners: ${owners:-}" -} - -run_connection_check() { - log "Checking superuser connection with $CONNECT_SCRIPT" - printf 'SELECT current_user;\n\\q\n' | bash "$CONNECT_SCRIPT" "$CLUSTER_NAME" "$NAMESPACE" - pass "Superuser connection succeeded" -} - -patch_cluster() { - local deletion_policy="$1" - local pooler_enabled="$2" - kubectl patch postgrescluster "$CLUSTER_NAME" -n "$NAMESPACE" --type merge \ - -p "{\"spec\":{\"clusterDeletionPolicy\":\"${deletion_policy}\",\"connectionPoolerEnabled\":${pooler_enabled}}}" >/dev/null -} - -apply_upgraded_cluster_manifest() { - local tmp_manifest - tmp_manifest="$(mktemp)" - - sed \ - -e "s/^\([[:space:]]*clusterDeletionPolicy:\).*/\1 Retain/" \ - -e "s/^\([[:space:]]*postgresVersion:\).*/\1 \"${UPGRADE_POSTGRES_VERSION}\"/" \ - "$CLUSTER_MANIFEST" > "$tmp_manifest" - - kubectl apply -n "$NAMESPACE" -f "$tmp_manifest" >/dev/null - rm -f "$tmp_manifest" -} - -assert_cluster_ready() { - wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.status.phase}' 'Ready' - wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.status.conditions[?(@.type=="ClusterReady")].status}' 'True' - wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.status.conditions[?(@.type=="ConfigMapReady")].status}' 'True' -} - -assert_database_created() { - wait_for_presence postgresdatabase "$DATABASE_NAME" - for db in "${DATABASES[@]}"; do - wait_for_presence databases.postgresql.cnpg.io "${DATABASE_NAME}-${db}" - done - pass "PostgresDatabase CR exists and CNPG Database CRs are present" -} - -assert_database_ready() { - if [[ "${SKIP_POSTGRESDATABASE_READY_CHECK:-0}" == "1" ]]; then - assert_database_created - return 0 - fi - wait_for_jsonpath postgresdatabase "$DATABASE_NAME" '{.status.phase}' 'Ready' - wait_for_jsonpath postgresdatabase "$DATABASE_NAME" '{.status.observedGeneration}' \ - "$(jsonpath_value postgresdatabase "$DATABASE_NAME" '{.metadata.generation}')" -} - -record_cluster_artifacts() { - SUPERUSER_SECRET_NAME="$(jsonpath_value postgrescluster "$CLUSTER_NAME" '{.status.resources.secretRef.name}')" - CONFIGMAP_NAME="$(jsonpath_value postgrescluster "$CLUSTER_NAME" '{.status.resources.configMapRef.name}')" - - [[ -n "$SUPERUSER_SECRET_NAME" ]] || fail "PostgresCluster status.resources.secretRef.name is empty" - [[ -n "$CONFIGMAP_NAME" ]] || fail "PostgresCluster status.resources.configMapRef.name is empty" -} - -cleanup_database_cr() { - if resource_exists postgresdatabase "$DATABASE_NAME"; then - log "Deleting PostgresDatabase/$DATABASE_NAME to leave the namespace clean" - kubectl delete postgresdatabase "$DATABASE_NAME" -n "$NAMESPACE" --wait=false >/dev/null - wait_for_absence postgresdatabase "$DATABASE_NAME" - fi -} - -require_command kubectl -require_file "$CLUSTER_MANIFEST" -require_file "$DATABASE_MANIFEST" -require_file "$CONNECT_SCRIPT" - -NAMESPACE="${NAMESPACE:-$(current_namespace)}" -CLUSTER_NAME="${CLUSTER_NAME:-$(kubectl create --dry-run=client -f "$CLUSTER_MANIFEST" -o jsonpath='{.metadata.name}')}" -DATABASE_NAME="${DATABASE_NAME:-$(kubectl create --dry-run=client -f "$DATABASE_MANIFEST" -o jsonpath='{.metadata.name}')}" -DATABASES_STR="$(kubectl create --dry-run=client -f "$DATABASE_MANIFEST" -o jsonpath='{range .spec.databases[*]}{.name}{" "}{end}')" -read -r -a DATABASES <<< "${DATABASES_STR:-}" -RW_POOLER_NAME="${CLUSTER_NAME}-pooler-rw" -RO_POOLER_NAME="${CLUSTER_NAME}-pooler-ro" - -log "Using namespace: $NAMESPACE" -log "Cluster manifest: $CLUSTER_MANIFEST" -log "Database manifest: $DATABASE_MANIFEST" -log "Upgrade target postgresVersion: $UPGRADE_POSTGRES_VERSION" - -preflight_namespace -preflight_cluster_dns - -log "1. Creating PostgresCluster from sample manifest" -kubectl apply -n "$NAMESPACE" -f "$CLUSTER_MANIFEST" - -log "2. Creating PostgresDatabase from sample manifest" -kubectl apply -n "$NAMESPACE" -f "$DATABASE_MANIFEST" - -log "3. Waiting for PostgresCluster and PostgresDatabase to become ready" -assert_cluster_ready -assert_database_ready -record_cluster_artifacts -pass "PostgresCluster and PostgresDatabase were created successfully" - -log "4. Verifying superuser connection to PostgresCluster" -run_connection_check - -log "5. Setting clusterDeletionPolicy=Retain and connectionPoolerEnabled=false" -patch_cluster "Retain" "false" -wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.spec.clusterDeletionPolicy}' 'Retain' -wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.spec.connectionPoolerEnabled}' 'false' -wait_for_absence pooler.postgresql.cnpg.io "$RW_POOLER_NAME" -wait_for_absence pooler.postgresql.cnpg.io "$RO_POOLER_NAME" -assert_cluster_ready - -log "6. Setting connectionPoolerEnabled=true and waiting for poolers" -patch_cluster "Retain" "true" -wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.spec.connectionPoolerEnabled}' 'true' -wait_for_presence pooler.postgresql.cnpg.io "$RW_POOLER_NAME" -wait_for_presence pooler.postgresql.cnpg.io "$RO_POOLER_NAME" -wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.status.conditions[?(@.type=="PoolerReady")].status}' 'True' -assert_cluster_ready - -log "7. Deleting PostgresCluster with retention enabled" -kubectl delete postgrescluster "$CLUSTER_NAME" -n "$NAMESPACE" --wait=false >/dev/null -wait_for_absence postgrescluster "$CLUSTER_NAME" -wait_for_presence cluster.postgresql.cnpg.io "$CLUSTER_NAME" -wait_for_presence secret "$SUPERUSER_SECRET_NAME" -pass "CNPG cluster and superuser secret remained after PostgresCluster deletion" - -log "8. Recreating PostgresCluster with a major version upgrade" -apply_upgraded_cluster_manifest -wait_for_presence postgrescluster "$CLUSTER_NAME" -wait_for_contains cluster.postgresql.cnpg.io "$CLUSTER_NAME" '{.spec.imageName}' ":${UPGRADE_POSTGRES_VERSION}" -assert_cluster_ready -record_cluster_artifacts - -log "9. Checking that retained resources were re-attached to the recreated PostgresCluster" -POSTGRES_CLUSTER_UID="$(jsonpath_value postgrescluster "$CLUSTER_NAME" '{.metadata.uid}')" -wait_for_owner_reference cluster.postgresql.cnpg.io "$CLUSTER_NAME" "PostgresCluster" "$CLUSTER_NAME" "$POSTGRES_CLUSTER_UID" -wait_for_owner_reference secret "$SUPERUSER_SECRET_NAME" "PostgresCluster" "$CLUSTER_NAME" "$POSTGRES_CLUSTER_UID" - -log "10. Verifying superuser connection after recreate/upgrade" -run_connection_check - -log "11. Setting clusterDeletionPolicy=Delete" -kubectl patch postgrescluster "$CLUSTER_NAME" -n "$NAMESPACE" --type merge \ - -p '{"spec":{"clusterDeletionPolicy":"Delete"}}' >/dev/null -wait_for_jsonpath postgrescluster "$CLUSTER_NAME" '{.spec.clusterDeletionPolicy}' 'Delete' - -log "12. Deleting the PostgresCluster" -kubectl delete postgrescluster "$CLUSTER_NAME" -n "$NAMESPACE" --wait=false >/dev/null -wait_for_absence postgrescluster "$CLUSTER_NAME" - -log "13. Checking that no cluster leftovers remain" -cleanup_database_cr -wait_for_absence cluster.postgresql.cnpg.io "$CLUSTER_NAME" -wait_for_absence pooler.postgresql.cnpg.io "$RW_POOLER_NAME" -wait_for_absence pooler.postgresql.cnpg.io "$RO_POOLER_NAME" -wait_for_absence secret "$SUPERUSER_SECRET_NAME" -wait_for_absence configmap "$CONFIGMAP_NAME" -pass "No PostgresCluster leftovers remain in namespace $NAMESPACE" - -log "Flow finished successfully" From 53d78dd454b13258b9225caa4cbf451c4674e236 Mon Sep 17 00:00:00 2001 From: Kamil Ubych <56136249+limak9182@users.noreply.github.com> Date: Thu, 26 Mar 2026 10:15:47 +0100 Subject: [PATCH 03/36] unit tests for postgres cluster for helpers (#1789) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Database CR POC * Adjust packages and tests interfaces so tests pass with latest packages forced by cnpg * Reshape cluster class crd * Apply PR suggestions * Remove class controller * Configure database CRD * Change CR name. Apply PR suggestions * PostgresCluster controller with base functionality * Database connection pooler types (#1696) * addition of connection pooler types * moved ConnectionPooler to CNPGConfig * createdconnectionPoolerEnabled in cluster config * delete connectionpooler_types.go * fix validation logic for storage and postgresversion remove semver check, add messages extended description * Adding initial database creation * Add user create validation * Implement SSA on the postgres cluster CR level * fix infinite loop on reconciliation * Removed redundant comment. * Resolve merge conflicts * reconciliation logic for pgbouncer (#1713) * Add secrets management for roles and its use in cluster * Apply PR suggestions * postgres database finalizer (#1750) * Add configmaps and secret handling for PostgresCluster * Add finalizer support to PostgresCluster controller * Add Support for config map generation in database controller (#1759) * Add Support for config map generation * Simplify code and create-update pattern * database finalizer - leave resources aproach * fix comment * simplify classifyRoles * naming fix * comments fix * patchManagedRolesOnDeletion simplified * naming fix * simplify recinciling secfrets logic * config map simplification * simplify readopting logic * refacttor secrets reconciliation * refactor handleDeletion function * simplify patchManagedRolesOnDeletion * simplify configMap and Secret, fix finalizer logic * checking ssa conflicts in field manager * comment patchManagedRolesOnDeletion func * Add support to apply RW role privileges to the database * Use vendor to pass internal libs * unit tests for postgres cluster for helpers * Use pgx instead of internal lib * Fix reconciliation storm for postgresCluster * move to assert * arePoolersReady logic separation * test rename * moving the test file to pkg * AAA adjustments * polers simplificaton reversed --------- Co-authored-by: Michał Płoski Co-authored-by: dpishchenkov Co-authored-by: Dmytro Pishchenkov --- pkg/postgresql/cluster/core/cluster.go | 53 +- .../cluster/core/cluster_unit_test.go | 1139 +++++++++++++++++ 2 files changed, 1170 insertions(+), 22 deletions(-) create mode 100644 pkg/postgresql/cluster/core/cluster_unit_test.go diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 66622d8ad..3459101a4 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -308,7 +308,19 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } return ctrl.Result{RequeueAfter: retryDelay}, nil - case !arePoolersReady(ctx, c, postgresCluster): + case func() bool { + rwPooler := &cnpgv1.Pooler{} + rwErr := c.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), + Namespace: postgresCluster.Namespace, + }, rwPooler) + roPooler := &cnpgv1.Pooler{} + roErr := c.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), + Namespace: postgresCluster.Namespace, + }, roPooler) + return rwErr != nil || roErr != nil || !arePoolersReady(rwPooler, roPooler) + }(): logger.Info("Connection Poolers are not ready yet, requeueing") if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, "Connection poolers are being provisioned", pendingClusterPhase); statusErr != nil { @@ -384,9 +396,21 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } return ctrl.Result{}, fmt.Errorf("failed to sync status: %w", err) } - if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy && arePoolersReady(ctx, c, postgresCluster) { - logger.Info("Poolers are ready, syncing pooler status") - _ = syncPoolerStatus(ctx, c, postgresCluster) + if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { + rwPooler := &cnpgv1.Pooler{} + rwErr := c.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), + Namespace: postgresCluster.Namespace, + }, rwPooler) + roPooler := &cnpgv1.Pooler{} + roErr := c.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), + Namespace: postgresCluster.Namespace, + }, roPooler) + if rwErr == nil && roErr == nil && arePoolersReady(rwPooler, roPooler) { + logger.Info("Poolers are ready, syncing pooler status") + _ = syncPoolerStatus(ctx, c, postgresCluster) + } } logger.Info("Reconciliation complete") return ctrl.Result{}, nil @@ -567,28 +591,13 @@ func poolerExists(ctx context.Context, c client.Client, cluster *enterprisev4.Po return true } -func arePoolersReady(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster) bool { - rwPooler := &cnpgv1.Pooler{} - rwErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(cluster.Name, readWriteEndpoint), - Namespace: cluster.Namespace, - }, rwPooler) - - roPooler := &cnpgv1.Pooler{} - roErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(cluster.Name, readOnlyEndpoint), - Namespace: cluster.Namespace, - }, roPooler) - - return isPoolerReady(rwPooler, rwErr) && isPoolerReady(roPooler, roErr) +func arePoolersReady(rwPooler, roPooler *cnpgv1.Pooler) bool { + return isPoolerReady(rwPooler) && isPoolerReady(roPooler) } // isPoolerReady checks if a pooler has all instances scheduled. // CNPG PoolerStatus only tracks scheduled instances, not ready pods. -func isPoolerReady(pooler *cnpgv1.Pooler, err error) bool { - if err != nil { - return false - } +func isPoolerReady(pooler *cnpgv1.Pooler) bool { desired := int32(1) if pooler.Spec.Instances != nil { desired = *pooler.Spec.Instances diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go new file mode 100644 index 000000000..e87173afb --- /dev/null +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -0,0 +1,1139 @@ +package core + +import ( + "context" + "testing" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/ptr" + client "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestPoolerResourceName(t *testing.T) { + tests := []struct { + name string + clusterName string + poolerType string + expected string + }{ + { + name: "read-write pooler", + clusterName: "my-cluster", + poolerType: "rw", + expected: "my-cluster-pooler-rw", + }, + { + name: "cluster name with mixed case and alphanumeric suffix", + clusterName: "My-Cluster-12x2f", + poolerType: "rw", + expected: "My-Cluster-12x2f-pooler-rw", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := poolerResourceName(tt.clusterName, tt.poolerType) + + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestIsPoolerReady(t *testing.T) { + tests := []struct { + name string + pooler *cnpgv1.Pooler + expected bool + }{ + { + name: "nil instances defaults desired to 1, zero scheduled means not ready", + pooler: &cnpgv1.Pooler{ + Status: cnpgv1.PoolerStatus{Instances: 0}, + }, + expected: false, + }, + { + name: "nil instances defaults desired to 1, one scheduled means ready", + pooler: &cnpgv1.Pooler{ + Status: cnpgv1.PoolerStatus{Instances: 1}, + }, + expected: true, + }, + { + name: "scheduled meets desired", + pooler: &cnpgv1.Pooler{ + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(3))}, + Status: cnpgv1.PoolerStatus{Instances: 3}, + }, + expected: true, + }, + { + name: "scheduled below desired", + pooler: &cnpgv1.Pooler{ + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(3))}, + Status: cnpgv1.PoolerStatus{Instances: 2}, + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isPoolerReady(tt.pooler) + + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestNormalizeCNPGClusterSpec(t *testing.T) { + tests := []struct { + name string + spec cnpgv1.ClusterSpec + customDefinedParameters map[string]string + expected normalizedCNPGClusterSpec + }{ + { + name: "basic fields are copied", + spec: cnpgv1.ClusterSpec{ + ImageName: "ghcr.io/cloudnative-pg/postgresql:18", + Instances: 3, + StorageConfiguration: cnpgv1.StorageConfiguration{Size: "10Gi"}, + }, + customDefinedParameters: nil, + expected: normalizedCNPGClusterSpec{ + ImageName: "ghcr.io/cloudnative-pg/postgresql:18", + Instances: 3, + StorageSize: "10Gi", + }, + }, + { + name: "CNPG-injected parameters are excluded from comparison", + spec: cnpgv1.ClusterSpec{ + ImageName: "img:18", + Instances: 1, + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Parameters: map[string]string{ + "shared_buffers": "256MB", + "max_connections": "200", + "cnpg_injected": "should-not-appear", + }, + }, + }, + customDefinedParameters: map[string]string{ + "shared_buffers": "256MB", + "max_connections": "200", + }, + expected: normalizedCNPGClusterSpec{ + ImageName: "img:18", + Instances: 1, + CustomDefinedParameters: map[string]string{ + "shared_buffers": "256MB", + "max_connections": "200", + }, + }, + }, + { + name: "empty custom params does not populate CustomDefinedParameters", + spec: cnpgv1.ClusterSpec{ + ImageName: "img:18", + Instances: 1, + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + Parameters: map[string]string{"cnpg_injected": "val"}, + }, + }, + customDefinedParameters: map[string]string{}, + expected: normalizedCNPGClusterSpec{ + ImageName: "img:18", + Instances: 1, + }, + }, + { + name: "PgHBA included when non-empty", + spec: cnpgv1.ClusterSpec{ + ImageName: "img:18", + Instances: 1, + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + PgHBA: []string{"hostssl all all 0.0.0.0/0 scram-sha-256"}, + }, + }, + expected: normalizedCNPGClusterSpec{ + ImageName: "img:18", + Instances: 1, + PgHBA: []string{"hostssl all all 0.0.0.0/0 scram-sha-256"}, + }, + }, + { + name: "empty PgHBA is excluded", + spec: cnpgv1.ClusterSpec{ + ImageName: "img:18", + Instances: 1, + PostgresConfiguration: cnpgv1.PostgresConfiguration{ + PgHBA: []string{}, + }, + }, + expected: normalizedCNPGClusterSpec{ + ImageName: "img:18", + Instances: 1, + }, + }, + { + name: "bootstrap populates database and owner", + spec: cnpgv1.ClusterSpec{ + ImageName: "img:18", + Instances: 1, + Bootstrap: &cnpgv1.BootstrapConfiguration{ + InitDB: &cnpgv1.BootstrapInitDB{ + Database: "mydb", + Owner: "admin", + }, + }, + }, + expected: normalizedCNPGClusterSpec{ + ImageName: "img:18", + Instances: 1, + DefaultDatabase: "mydb", + Owner: "admin", + }, + }, + { + name: "nil bootstrap leaves database and owner empty", + spec: cnpgv1.ClusterSpec{ + ImageName: "img:18", + Instances: 1, + }, + expected: normalizedCNPGClusterSpec{ + ImageName: "img:18", + Instances: 1, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := normalizeCNPGClusterSpec(tt.spec, tt.customDefinedParameters) + + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestGetMergedConfig(t *testing.T) { + classInstances := int32(1) + classVersion := "17" + classStorage := resource.MustParse("50Gi") + baseClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{Name: "standard"}, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + Instances: &classInstances, + PostgresVersion: &classVersion, + Storage: &classStorage, + Resources: &corev1.ResourceRequirements{}, + PostgreSQLConfig: map[string]string{"shared_buffers": "128MB"}, + PgHBA: []string{"host all all 0.0.0.0/0 md5"}, + }, + CNPG: &enterprisev4.CNPGConfig{PrimaryUpdateMethod: ptr.To("switchover")}, + }, + } + + t.Run("cluster spec overrides class defaults", func(t *testing.T) { + overrideInstances := int32(5) + overrideVersion := "18" + overrideStorage := resource.MustParse("100Gi") + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + Instances: &overrideInstances, + PostgresVersion: &overrideVersion, + Storage: &overrideStorage, + PostgreSQLConfig: map[string]string{"max_connections": "200"}, + PgHBA: []string{"hostssl all all 0.0.0.0/0 scram-sha-256"}, + }, + } + + cfg, err := getMergedConfig(baseClass, cluster) + + require.NoError(t, err) + assert.Equal(t, int32(5), *cfg.Spec.Instances) + assert.Equal(t, "18", *cfg.Spec.PostgresVersion) + assert.Equal(t, "100Gi", cfg.Spec.Storage.String()) + assert.Equal(t, "200", cfg.Spec.PostgreSQLConfig["max_connections"]) + assert.Equal(t, "hostssl all all 0.0.0.0/0 scram-sha-256", cfg.Spec.PgHBA[0]) + }) + + t.Run("class defaults fill in nil cluster fields", func(t *testing.T) { + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{}, + } + + cfg, err := getMergedConfig(baseClass, cluster) + + require.NoError(t, err) + assert.Equal(t, int32(1), *cfg.Spec.Instances) + assert.Equal(t, "17", *cfg.Spec.PostgresVersion) + assert.Equal(t, "50Gi", cfg.Spec.Storage.String()) + assert.Equal(t, "128MB", cfg.Spec.PostgreSQLConfig["shared_buffers"]) + }) + + t.Run("returns error when required fields missing from both", func(t *testing.T) { + emptyClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{Name: "empty"}, + Spec: enterprisev4.PostgresClusterClassSpec{}, + } + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{}, + } + + _, err := getMergedConfig(emptyClass, cluster) + + require.Error(t, err) + }) + + t.Run("CNPG config comes from class not cluster", func(t *testing.T) { + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{}, + } + + cfg, err := getMergedConfig(baseClass, cluster) + + require.NoError(t, err) + require.NotNil(t, cfg.CNPG) + assert.Equal(t, "switchover", *cfg.CNPG.PrimaryUpdateMethod) + }) + + t.Run("nil maps and slices initialized to safe zero values", func(t *testing.T) { + classWithNoMaps := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{Name: "minimal"}, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + Instances: &classInstances, + PostgresVersion: &classVersion, + Storage: &classStorage, + }, + }, + } + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{}, + } + + cfg, err := getMergedConfig(classWithNoMaps, cluster) + + require.NoError(t, err) + assert.NotNil(t, cfg.Spec.PostgreSQLConfig) + assert.NotNil(t, cfg.Spec.PgHBA) + assert.NotNil(t, cfg.Spec.Resources) + }) +} + +func TestBuildCNPGClusterSpec(t *testing.T) { + version := "18" + instances := int32(3) + storage := resource.MustParse("50Gi") + cfg := &MergedConfig{ + Spec: &enterprisev4.PostgresClusterSpec{ + PostgresVersion: &version, + Instances: &instances, + Storage: &storage, + PostgreSQLConfig: map[string]string{ + "shared_buffers": "256MB", + "max_connections": "200", + }, + PgHBA: []string{ + "hostssl all all 0.0.0.0/0 scram-sha-256", + "host replication all 10.0.0.0/8 md5", + }, + Resources: &corev1.ResourceRequirements{}, + }, + } + + spec := buildCNPGClusterSpec(cfg, "my-secret") + + assert.Equal(t, "ghcr.io/cloudnative-pg/postgresql:18", spec.ImageName) + assert.Equal(t, 3, spec.Instances) + require.NotNil(t, spec.SuperuserSecret) + assert.Equal(t, "my-secret", spec.SuperuserSecret.Name) + assert.Equal(t, "my-secret", spec.Bootstrap.InitDB.Secret.Name) + require.NotNil(t, spec.EnableSuperuserAccess) + assert.True(t, *spec.EnableSuperuserAccess) + assert.Equal(t, "postgres", spec.Bootstrap.InitDB.Database) + assert.Equal(t, "postgres", spec.Bootstrap.InitDB.Owner) + assert.Equal(t, "50Gi", spec.StorageConfiguration.Size) + assert.Equal(t, "256MB", spec.PostgresConfiguration.Parameters["shared_buffers"]) + assert.Equal(t, "200", spec.PostgresConfiguration.Parameters["max_connections"]) + require.Len(t, spec.PostgresConfiguration.PgHBA, 2) + assert.Equal(t, "hostssl all all 0.0.0.0/0 scram-sha-256", spec.PostgresConfiguration.PgHBA[0]) + assert.Equal(t, "host replication all 10.0.0.0/8 md5", spec.PostgresConfiguration.PgHBA[1]) +} + +func TestBuildCNPGPooler(t *testing.T) { + scheme := runtime.NewScheme() + enterprisev4.AddToScheme(scheme) + cnpgv1.AddToScheme(scheme) + + poolerInstances := int32(3) + poolerMode := enterprisev4.ConnectionPoolerModeTransaction + postgresCluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "db-ns", + UID: "test-uid", + }, + } + cnpgCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + }, + } + cfg := &MergedConfig{ + CNPG: &enterprisev4.CNPGConfig{ + ConnectionPooler: &enterprisev4.ConnectionPoolerConfig{ + Instances: &poolerInstances, + Mode: &poolerMode, + Config: map[string]string{"default_pool_size": "25"}, + }, + }, + } + + t.Run("rw pooler", func(t *testing.T) { + pooler := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "rw") + + assert.Equal(t, "my-cluster-pooler-rw", pooler.Name) + assert.Equal(t, "db-ns", pooler.Namespace) + assert.Equal(t, "my-cluster", pooler.Spec.Cluster.Name) + require.NotNil(t, pooler.Spec.Instances) + assert.Equal(t, int32(3), *pooler.Spec.Instances) + assert.Equal(t, cnpgv1.PoolerType("rw"), pooler.Spec.Type) + assert.Equal(t, cnpgv1.PgBouncerPoolMode("transaction"), pooler.Spec.PgBouncer.PoolMode) + assert.Equal(t, "25", pooler.Spec.PgBouncer.Parameters["default_pool_size"]) + require.Len(t, pooler.OwnerReferences, 1) + assert.Equal(t, "test-uid", string(pooler.OwnerReferences[0].UID)) + }) + + t.Run("ro pooler", func(t *testing.T) { + pooler := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "ro") + + assert.Equal(t, "my-cluster-pooler-ro", pooler.Name) + assert.Equal(t, cnpgv1.PoolerType("ro"), pooler.Spec.Type) + }) +} + +func TestBuildCNPGCluster(t *testing.T) { + scheme := runtime.NewScheme() + enterprisev4.AddToScheme(scheme) + cnpgv1.AddToScheme(scheme) + + instances := int32(3) + version := "18" + storage := resource.MustParse("50Gi") + postgresCluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "db-ns", + UID: "pg-uid", + }, + } + cfg := &MergedConfig{ + Spec: &enterprisev4.PostgresClusterSpec{ + Instances: &instances, + PostgresVersion: &version, + Storage: &storage, + PostgreSQLConfig: map[string]string{}, + PgHBA: []string{}, + Resources: &corev1.ResourceRequirements{}, + }, + } + + cluster := buildCNPGCluster(scheme, postgresCluster, cfg, "my-secret") + + assert.Equal(t, "my-cluster", cluster.Name) + assert.Equal(t, "db-ns", cluster.Namespace) + require.Len(t, cluster.OwnerReferences, 1) + assert.Equal(t, "pg-uid", string(cluster.OwnerReferences[0].UID)) + assert.Equal(t, 3, cluster.Spec.Instances) +} + +func TestClusterSecretExists(t *testing.T) { + scheme := runtime.NewScheme() + corev1.AddToScheme(scheme) + + tests := []struct { + name string + objects []client.Object + secretName string + expectedExists bool + }{ + { + name: "returns true when secret exists", + objects: []client.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-secret", + Namespace: "default", + }, + }, + }, + secretName: "my-secret", + expectedExists: true, + }, + { + name: "returns false when secret not found", + objects: []client.Object{ + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "other-secret", + Namespace: "default", + }, + }, + }, + secretName: "missing-secret", + expectedExists: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() + secret := &corev1.Secret{} + + exists, err := clusterSecretExists(context.Background(), c, "default", tt.secretName, secret) + + require.NoError(t, err) + assert.Equal(t, tt.expectedExists, exists) + }) + } +} + +func TestRemoveOwnerRef(t *testing.T) { + scheme := runtime.NewScheme() + corev1.AddToScheme(scheme) + enterprisev4.AddToScheme(scheme) + + owner := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + UID: "owner-uid", + }, + } + + otherOwnerRef := metav1.OwnerReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "other-owner", + UID: "other-uid", + } + ourOwnerRef := metav1.OwnerReference{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + Name: "my-cluster", + UID: "owner-uid", + } + + tests := []struct { + name string + ownerRefs []metav1.OwnerReference + expectedRemoved bool + expectedRefsLen int + }{ + { + name: "returns false when owner ref not present", + ownerRefs: nil, + expectedRemoved: false, + expectedRefsLen: 0, + }, + { + name: "removes owner ref and returns true", + ownerRefs: []metav1.OwnerReference{ourOwnerRef}, + expectedRemoved: true, + expectedRefsLen: 0, + }, + { + name: "removes only our owner ref and keeps others", + ownerRefs: []metav1.OwnerReference{otherOwnerRef, ourOwnerRef}, + expectedRemoved: true, + expectedRefsLen: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-secret", + Namespace: "default", + OwnerReferences: tt.ownerRefs, + }, + } + + removed, err := removeOwnerRef(scheme, owner, secret) + + require.NoError(t, err) + assert.Equal(t, tt.expectedRemoved, removed) + assert.Len(t, secret.GetOwnerReferences(), tt.expectedRefsLen) + }) + } +} + +func TestPatchObject(t *testing.T) { + scheme := runtime.NewScheme() + corev1.AddToScheme(scheme) + + t.Run("patches object successfully", func(t *testing.T) { + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-secret", + Namespace: "default", + }, + Data: map[string][]byte{"key": []byte("old-value")}, + } + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(existing).Build() + original := existing.DeepCopy() + existing.Data["key"] = []byte("new-value") + + err := patchObject(context.Background(), c, original, existing, "Secret") + + require.NoError(t, err) + patched := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), client.ObjectKeyFromObject(existing), patched)) + assert.Equal(t, "new-value", string(patched.Data["key"])) + }) + + t.Run("returns nil when object not found", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).Build() + original := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "deleted-secret", + Namespace: "default", + }, + } + modified := original.DeepCopy() + modified.Data = map[string][]byte{"key": []byte("value")} + + err := patchObject(context.Background(), c, original, modified, "Secret") + + assert.NoError(t, err) + }) +} + +func TestDeleteCNPGCluster(t *testing.T) { + scheme := runtime.NewScheme() + cnpgv1.AddToScheme(scheme) + + tests := []struct { + name string + objects []client.Object + cluster *cnpgv1.Cluster + }{ + { + name: "deletes existing cluster", + objects: []client.Object{ + &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + }, + }, + }, + cluster: &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + }, + }, + }, + { + name: "already deleted cluster returns nil", + cluster: &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "gone-cluster", + Namespace: "default", + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() + + err := deleteCNPGCluster(context.Background(), c, tt.cluster) + + require.NoError(t, err) + }) + } +} + +func TestPoolerExists(t *testing.T) { + scheme := runtime.NewScheme() + cnpgv1.AddToScheme(scheme) + enterprisev4.AddToScheme(scheme) + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + }, + } + + tests := []struct { + name string + objects []client.Object + expected bool + }{ + { + name: "returns true when pooler exists", + objects: []client.Object{ + &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster-pooler-rw", + Namespace: "default", + }, + }, + }, + expected: true, + }, + { + name: "returns false when given pooler is not found", + objects: []client.Object{ + &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster-pooler-ro", + Namespace: "default", + }, + }, + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() + + got := poolerExists(context.Background(), c, cluster, "rw") + + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestDeleteConnectionPoolers(t *testing.T) { + scheme := runtime.NewScheme() + cnpgv1.AddToScheme(scheme) + enterprisev4.AddToScheme(scheme) + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + }, + } + + rwPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster-pooler-rw", + Namespace: "default", + }, + } + roPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster-pooler-ro", + Namespace: "default", + }, + } + + t.Run("deletes both poolers when they exist", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rwPooler.DeepCopy(), roPooler.DeepCopy()).Build() + + err := deleteConnectionPoolers(context.Background(), c, cluster) + + require.NoError(t, err) + assert.True(t, apierrors.IsNotFound(c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-rw", Namespace: "default"}, &cnpgv1.Pooler{}))) + assert.True(t, apierrors.IsNotFound(c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-ro", Namespace: "default"}, &cnpgv1.Pooler{}))) + }) + + t.Run("no-op when no poolers exist", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := deleteConnectionPoolers(context.Background(), c, cluster) + + require.NoError(t, err) + }) +} + +func TestEnsureClusterSecret(t *testing.T) { + scheme := runtime.NewScheme() + corev1.AddToScheme(scheme) + enterprisev4.AddToScheme(scheme) + + t.Run("creates secret with credentials and owner reference", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).Build() + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + UID: "cluster-uid", + }, + } + + err := ensureClusterSecret(context.Background(), c, scheme, cluster, "my-secret", &corev1.Secret{}) + + require.NoError(t, err) + secret := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), client.ObjectKey{Name: "my-secret", Namespace: "default"}, secret)) + assert.Equal(t, "my-secret", secret.Name) + assert.Equal(t, "default", secret.Namespace) + assert.Equal(t, corev1.SecretTypeOpaque, secret.Type) + require.Len(t, secret.OwnerReferences, 1) + assert.Equal(t, "cluster-uid", string(secret.OwnerReferences[0].UID)) + }) + + t.Run("no-op when secret already exists", func(t *testing.T) { + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-secret", + Namespace: "default", + }, + StringData: map[string]string{"username": "existing-user"}, + } + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(existing).Build() + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + UID: "cluster-uid", + }, + } + + err := ensureClusterSecret(context.Background(), c, scheme, cluster, "my-secret", &corev1.Secret{}) + + require.NoError(t, err) + }) +} + +func TestArePoolersReady(t *testing.T) { + makePooler := func(desired, actual int32) *cnpgv1.Pooler { + return &cnpgv1.Pooler{ + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(desired)}, + Status: cnpgv1.PoolerStatus{Instances: actual}, + } + } + + tests := []struct { + name string + rw *cnpgv1.Pooler + ro *cnpgv1.Pooler + expected bool + }{ + { + name: "returns true when both poolers are ready", + rw: makePooler(2, 2), + ro: makePooler(2, 2), + expected: true, + }, + { + name: "returns false when rw pooler not ready", + rw: makePooler(2, 0), + ro: makePooler(2, 2), + expected: false, + }, + { + name: "returns false when ro pooler not ready", + rw: makePooler(2, 2), + ro: makePooler(2, 1), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := arePoolersReady(tt.rw, tt.ro) + + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestCreateConnectionPooler(t *testing.T) { + scheme := runtime.NewScheme() + corev1.AddToScheme(scheme) + cnpgv1.AddToScheme(scheme) + enterprisev4.AddToScheme(scheme) + + poolerInstances := int32(2) + poolerMode := enterprisev4.ConnectionPoolerModeTransaction + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + UID: "cluster-uid", + }, + } + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + }, + } + cfg := &MergedConfig{ + CNPG: &enterprisev4.CNPGConfig{ + ConnectionPooler: &enterprisev4.ConnectionPoolerConfig{ + Instances: &poolerInstances, + Mode: &poolerMode, + Config: map[string]string{"default_pool_size": "25"}, + }, + }, + } + + tests := []struct { + name string + objects []client.Object + expectInstances int32 + }{ + { + name: "creates pooler when it does not exist", + objects: nil, + expectInstances: 2, + }, + { + name: "no-op when pooler already exists", + objects: []client.Object{ + &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster-pooler-rw", + Namespace: "default", + }, + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(1))}, + }, + }, + expectInstances: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() + + err := createConnectionPooler(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpg, "rw") + + require.NoError(t, err) + fetched := &cnpgv1.Pooler{} + require.NoError(t, c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-rw", Namespace: "default"}, fetched)) + require.NotNil(t, fetched.Spec.Instances) + assert.Equal(t, tt.expectInstances, *fetched.Spec.Instances) + }) + } +} + +func TestGenerateConfigMap(t *testing.T) { + scheme := runtime.NewScheme() + corev1.AddToScheme(scheme) + cnpgv1.AddToScheme(scheme) + enterprisev4.AddToScheme(scheme) + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + UID: "cluster-uid", + }, + } + cnpgCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + }, + } + + t.Run("base endpoints without poolers", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).Build() + cm, err := generateConfigMap(context.Background(), c, scheme, cluster.DeepCopy(), cnpgCluster, "my-secret") + + require.NoError(t, err) + assert.Equal(t, "my-cluster-configmap", cm.Name) + assert.Equal(t, "default", cm.Namespace) + assert.Equal(t, "my-cluster-rw.default", cm.Data["CLUSTER_RW_ENDPOINT"]) + assert.Equal(t, "my-cluster-ro.default", cm.Data["CLUSTER_RO_ENDPOINT"]) + assert.Equal(t, "my-cluster-r.default", cm.Data["CLUSTER_R_ENDPOINT"]) + assert.Equal(t, "5432", cm.Data["DEFAULT_CLUSTER_PORT"]) + assert.Equal(t, "postgres", cm.Data["SUPER_USER_NAME"]) + assert.Equal(t, "my-secret", cm.Data["SUPER_USER_SECRET_REF"]) + assert.NotContains(t, cm.Data, "CLUSTER_POOLER_RW_ENDPOINT") + require.Len(t, cm.OwnerReferences, 1) + assert.Equal(t, "cluster-uid", string(cm.OwnerReferences[0].UID)) + }) + + t.Run("includes pooler endpoints when poolers exist", func(t *testing.T) { + rwPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{Name: "my-cluster-pooler-rw", Namespace: "default"}, + } + roPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{Name: "my-cluster-pooler-ro", Namespace: "default"}, + } + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(rwPooler, roPooler).Build() + cm, err := generateConfigMap(context.Background(), c, scheme, cluster.DeepCopy(), cnpgCluster, "my-secret") + + require.NoError(t, err) + assert.Equal(t, "my-cluster-pooler-rw.default", cm.Data["CLUSTER_POOLER_RW_ENDPOINT"]) + assert.Equal(t, "my-cluster-pooler-ro.default", cm.Data["CLUSTER_POOLER_RO_ENDPOINT"]) + }) + + t.Run("uses existing configmap name from status", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).Build() + pg := cluster.DeepCopy() + pg.Status.Resources = &enterprisev4.PostgresClusterResources{ + ConfigMapRef: &corev1.LocalObjectReference{Name: "custom-configmap"}, + } + + cm, err := generateConfigMap(context.Background(), c, scheme, pg, cnpgCluster, "my-secret") + + require.NoError(t, err) + assert.Equal(t, "custom-configmap", cm.Name) + }) +} + +func TestPoolerInstanceCount(t *testing.T) { + tests := []struct { + name string + pooler *cnpgv1.Pooler + expectedDesired int32 + expectedScheduled int32 + }{ + { + name: "nil instances defaults desired to 1", + pooler: &cnpgv1.Pooler{ + Status: cnpgv1.PoolerStatus{Instances: 3}, + }, + expectedDesired: 1, + expectedScheduled: 3, + }, + { + name: "explicit instances returns spec value", + pooler: &cnpgv1.Pooler{ + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(5))}, + Status: cnpgv1.PoolerStatus{Instances: 2}, + }, + expectedDesired: 5, + expectedScheduled: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + desired, scheduled := poolerInstanceCount(tt.pooler) + + assert.Equal(t, tt.expectedDesired, desired) + assert.Equal(t, tt.expectedScheduled, scheduled) + }) + } +} + +func TestGeneratePassword(t *testing.T) { + pw, err := generatePassword() + + require.NoError(t, err) + assert.Len(t, pw, 32) + + t.Run("generates unique passwords", func(t *testing.T) { + pw2, err := generatePassword() + + require.NoError(t, err) + assert.NotEqual(t, pw, pw2) + }) +} + +func TestCreateOrUpdateConnectionPoolers(t *testing.T) { + scheme := runtime.NewScheme() + corev1.AddToScheme(scheme) + cnpgv1.AddToScheme(scheme) + enterprisev4.AddToScheme(scheme) + + poolerInstances := int32(2) + poolerMode := enterprisev4.ConnectionPoolerModeTransaction + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + UID: "cluster-uid", + }, + } + cnpgCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-cluster", + Namespace: "default", + }, + } + cfg := &MergedConfig{ + CNPG: &enterprisev4.CNPGConfig{ + ConnectionPooler: &enterprisev4.ConnectionPoolerConfig{ + Instances: &poolerInstances, + Mode: &poolerMode, + Config: map[string]string{"default_pool_size": "25"}, + }, + }, + } + + expectedPoolerSpec := func(poolerType string) cnpgv1.PoolerSpec { + return cnpgv1.PoolerSpec{ + Cluster: cnpgv1.LocalObjectReference{Name: "my-cluster"}, + Instances: ptr.To(int32(2)), + Type: cnpgv1.PoolerType(poolerType), + PgBouncer: &cnpgv1.PgBouncerSpec{ + PoolMode: cnpgv1.PgBouncerPoolMode("transaction"), + Parameters: map[string]string{"default_pool_size": "25"}, + }, + } + } + + t.Run("creates both rw and ro poolers", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := createOrUpdateConnectionPoolers(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpgCluster) + + require.NoError(t, err) + + rw := &cnpgv1.Pooler{} + require.NoError(t, c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-rw", Namespace: "default"}, rw)) + assert.Equal(t, expectedPoolerSpec("rw"), rw.Spec) + require.Len(t, rw.OwnerReferences, 1) + assert.Equal(t, "cluster-uid", string(rw.OwnerReferences[0].UID)) + + ro := &cnpgv1.Pooler{} + require.NoError(t, c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-ro", Namespace: "default"}, ro)) + assert.Equal(t, expectedPoolerSpec("ro"), ro.Spec) + require.Len(t, ro.OwnerReferences, 1) + assert.Equal(t, "cluster-uid", string(ro.OwnerReferences[0].UID)) + }) + + t.Run("no-op when both poolers already exist", func(t *testing.T) { + existing := []client.Object{ + &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{Name: "my-cluster-pooler-rw", Namespace: "default"}, + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(1))}, + }, + &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{Name: "my-cluster-pooler-ro", Namespace: "default"}, + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(1))}, + }, + } + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(existing...).Build() + + err := createOrUpdateConnectionPoolers(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpgCluster) + + require.NoError(t, err) + rw := &cnpgv1.Pooler{} + require.NoError(t, c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-rw", Namespace: "default"}, rw)) + assert.Equal(t, int32(1), *rw.Spec.Instances) + ro := &cnpgv1.Pooler{} + require.NoError(t, c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-ro", Namespace: "default"}, ro)) + assert.Equal(t, int32(1), *ro.Spec.Instances) + }) +} From 7ea2f8c750a3b6cfd52c63956290c5dee464762b Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Wed, 25 Mar 2026 21:07:35 +0100 Subject: [PATCH 04/36] Added needed infra setup + pgcluster integration test base --- .../controller/postgrescluster_controller.go | 2 + .../postgrescluster_controller_test.go | 263 +++++++++++++++--- internal/controller/suite_test.go | 207 +++++++------- 3 files changed, 320 insertions(+), 152 deletions(-) diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index dfa1f7eaf..6a2181afc 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -142,6 +142,8 @@ func cnpgPoolerPredicator() predicate.Predicate { } // secretPredicator triggers only on owner reference changes. + +// secretPredicator filters Secret events to trigger reconciles on creation, deletion, or owner reference changes. func secretPredicator() predicate.Predicate { return predicate.Funcs{ CreateFunc: func(event.CreateEvent) bool { return true }, diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index c0f3493d9..0f690754b 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -18,67 +18,248 @@ package controller import ( "context" + "fmt" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/api/resource" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" ) var _ = Describe("PostgresCluster Controller", func() { - Context("When reconciling a resource", func() { - const resourceName = "test-resource" - ctx := context.Background() + var ( + ctx context.Context + namespace string + clusterName string + className string + reconciler *PostgresClusterReconciler + req reconcile.Request + cnpg *cnpgv1.Cluster + ) + + BeforeEach(func() { + specLine := CurrentSpecReport().LeafNodeLocation.LineNumber + nameSuffix := fmt.Sprintf("%d-%d-%d", GinkgoParallelProcess(), GinkgoRandomSeed(), specLine) + + ctx = context.Background() + namespace = "default" + clusterName = "postgresql-cluster-dev-" + nameSuffix + className = "postgresql-dev-" + nameSuffix + cnpg = &cnpgv1.Cluster{} + + // Arrange: class defaults used by getMergedConfig() + postgresVersion := "15.10" + instances := int32(2) + storage := resource.MustParse("1Gi") + poolerEnabled := false + + class := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{Name: className}, + Spec: enterprisev4.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterprisev4.PostgresClusterClassConfig{ + Instances: &instances, + Storage: &storage, + PostgresVersion: &postgresVersion, + ConnectionPoolerEnabled: &poolerEnabled, + }, + }, + } + Expect(k8sClient.Create(ctx, class)).To(Succeed()) + + pc := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: namespace}, + Spec: enterprisev4.PostgresClusterSpec{ + Class: className, + ClusterDeletionPolicy: &[]string{"Delete"}[0], + }, + } + Expect(k8sClient.Create(ctx, pc)).To(Succeed()) + + reconciler = &PostgresClusterReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + req = reconcile.Request{NamespacedName: types.NamespacedName{Name: clusterName, Namespace: namespace}} + }) + + JustBeforeEach(func() { + By("Reconciling the created resource") + result, err := reconciler.Reconcile(ctx, req) + Expect(err).NotTo(HaveOccurred()) + Expect(result.RequeueAfter).To(BeZero()) + }) - typeNamespacedName := types.NamespacedName{ - Name: resourceName, - Namespace: "default", // TODO(user):Modify as needed + AfterEach(func() { + By("Deleting PostgresCluster and letting reconcile run finalizer cleanup") + key := types.NamespacedName{Name: clusterName, Namespace: namespace} + pc := &enterprisev4.PostgresCluster{} + + // Best-effort delete (object might already be gone in some specs) + err := k8sClient.Get(ctx, key, pc) + if err == nil { + Expect(k8sClient.Delete(ctx, pc)).To(Succeed()) + } else { + Expect(apierrors.IsNotFound(err)).To(BeTrue()) } - postgresCluster := &enterprisev4.PostgresCluster{} - - BeforeEach(func() { - By("creating the custom resource for the Kind PostgresCluster") - err := k8sClient.Get(ctx, typeNamespacedName, postgresCluster) - if err != nil && errors.IsNotFound(err) { - resource := &enterprisev4.PostgresCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: resourceName, - Namespace: "default", - }, - // TODO(user): Specify other spec details if needed. + + // Drive delete reconcile path until finalizer is removed and object disappears + Eventually(func() bool { + _, recErr := reconciler.Reconcile(ctx, req) + if recErr != nil { + // Some envtest runs may not have CNPG CRDs installed in the API server. + // In that case, remove finalizer directly so fixture teardown remains deterministic. + if meta.IsNoMatchError(recErr) { + current := &enterprisev4.PostgresCluster{} + getErr := k8sClient.Get(ctx, key, current) + if apierrors.IsNotFound(getErr) { + return true + } + if getErr != nil { + return false + } + controllerutil.RemoveFinalizer(current, core.PostgresClusterFinalizerName) + if err := k8sClient.Update(ctx, current); err != nil && !apierrors.IsNotFound(err) { + return false + } + if err := k8sClient.Delete(ctx, current); err != nil && !apierrors.IsNotFound(err) { + return false + } + } else { + return false } - Expect(k8sClient.Create(ctx, resource)).To(Succeed()) } - }) + getErr := k8sClient.Get(ctx, key, &enterprisev4.PostgresCluster{}) + return apierrors.IsNotFound(getErr) + }, "10s", "500ms").Should(BeTrue()) - AfterEach(func() { - // TODO(user): Cleanup logic after each test, like removing the resource instance. - resource := &enterprisev4.PostgresCluster{} - err := k8sClient.Get(ctx, typeNamespacedName, resource) - Expect(err).NotTo(HaveOccurred()) + By("Cleaning up PostgresClusterClass fixture") + class := &enterprisev4.PostgresClusterClass{} + classKey := types.NamespacedName{Name: className} // cluster-scoped CR + err = k8sClient.Get(ctx, classKey, class) + if err == nil { + Expect(k8sClient.Delete(ctx, class)).To(Succeed()) + } else { + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + } + }) - By("Cleanup the specific resource instance PostgresCluster") - Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) - }) - It("should successfully reconcile the resource", func() { - By("Reconciling the created resource") - controllerReconciler := &PostgresClusterReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), - } + Context("Happy path and convergence", func() { + pc := &enterprisev4.PostgresCluster{} + It("PC-01 creates managed resources and status refs", func() { + By("creating CNPG cluster via reconcile and avaiting healthy") + Eventually(func() error { + _, err := reconciler.Reconcile(ctx, req) + if err != nil { + return err + } + if err := k8sClient.Get(ctx, req.NamespacedName, cnpg); err != nil { + return err + } + cnpg.Status.Phase = cnpgv1.PhaseHealthy + return k8sClient.Status().Update(ctx, cnpg) + }, "10s", "250ms").Should(Succeed()) + + By("reconciling until managed resources are published in status") + Eventually(func() bool { + _, err := reconciler.Reconcile(ctx, req) + if err != nil { + return false + } + current := &enterprisev4.PostgresCluster{} + if err := k8sClient.Get(ctx, req.NamespacedName, current); err != nil { + return false + } + return current.Status.Resources != nil && + current.Status.Resources.SuperUserSecretRef != nil && + current.Status.Resources.ConfigMapRef != nil + }, "20s", "250ms").Should(BeTrue()) - _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: typeNamespacedName, - }) - Expect(err).NotTo(HaveOccurred()) - // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. - // Example: If you expect a certain status condition after reconciliation, verify it here. + By("asserting finalizer contract") + pc := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, req.NamespacedName, pc)).To(Succeed()) + Expect(controllerutil.ContainsFinalizer(pc, core.PostgresClusterFinalizerName)).To(BeTrue()) + + By("asserting status references are published") + Expect(pc.Status.Resources).NotTo(BeNil()) + Expect(pc.Status.Resources.SuperUserSecretRef).NotTo(BeNil()) + Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) + + By("asserting Secret ownership and existence") + secret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: pc.Status.Resources.SuperUserSecretRef.Name, Namespace: namespace, + }, secret)).To(Succeed()) + Expect(metav1.IsControlledBy(secret, pc)).To(BeTrue()) + + By("asserting CNPG Cluster projection and ownership") + cnpg := &cnpgv1.Cluster{} + Expect(k8sClient.Get(ctx, req.NamespacedName, cnpg)).To(Succeed()) + Expect(metav1.IsControlledBy(cnpg, pc)).To(BeTrue()) + Expect(cnpg.Spec.Instances).To(Equal(2)) + Expect(cnpg.Spec.ImageName).To(ContainSubstring("postgresql:15.10")) + Expect(cnpg.Spec.StorageConfiguration.Size).To(Equal("1Gi")) + + By("asserting ConfigMap contract consumed by clients") + cm := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: pc.Status.Resources.ConfigMapRef.Name, Namespace: namespace, + }, cm)).To(Succeed()) + Expect(metav1.IsControlledBy(cm, pc)).To(BeTrue()) + Expect(cm.Data).To(HaveKeyWithValue("DEFAULT_CLUSTER_PORT", "5432")) + Expect(cm.Data).To(HaveKey("SUPER_USER_SECRET_REF")) + Expect(cm.Data).To(HaveKey("CLUSTER_RW_ENDPOINT")) + }) + It("PC-02 adds finalizer on reconcile", func() { + Expect(k8sClient.Get(ctx, req.NamespacedName, pc)).To(Succeed()) + Expect(pc.ObjectMeta.Finalizers).To(ContainElement(core.PostgresClusterFinalizerName)) }) + It("PC-07 is idempotent across repeated reconciles", func() {}) }) + + Context("Deletion and finalizer", func() { + It("PC-03 Delete policy removes children and finalizer", func() {}) + It("PC-04 Retain policy preserves children and removes ownerRefs", func() {}) + }) + + Context("Failure and drift", func() { + It("PC-05 fails when PostgresClusterClass is missing", func() {}) + It("PC-06 restores drifted managed spec", func() {}) + }) + + Context("Predicates", func() { + It("PC-08 triggers on generation/finalizer/deletion changes", func() {}) + It("PC-09 ignores no-op updates", func() {}) + }) + + // Context("When reconciling a resource", func() { + + // It("should successfully reconcile the resource", func() { + // By("Reconciling the created resource") + // // controllerReconciler := &PostgresClusterReconciler{ + // // Client: k8sClient, + // // Scheme: k8sClient.Scheme(), + // // } + + // // _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ + // // NamespacedName: typeNamespacedName, + // // }) + // err := errors.New("test error") + // Expect(err).NotTo(HaveOccurred()) + + // }) + // }) }) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 142a8720c..30cb99f64 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -1,5 +1,5 @@ /* -Copyright (c) 2018-2022 Splunk Inc. All rights reserved. +Copyright 2026. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,155 +19,140 @@ package controller import ( "context" "fmt" + "os" "path/filepath" + "sort" "testing" - "time" + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "go.uber.org/zap/zapcore" + + "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - clientgoscheme "k8s.io/client-go/kubernetes/scheme" - ctrl "sigs.k8s.io/controller-runtime" - - enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" - enterpriseApi "github.com/splunk/splunk-operator/api/v4" - //+kubebuilder:scaffold:imports + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + // +kubebuilder:scaffold:imports ) -var cfg *rest.Config -var k8sClient client.Client -var testEnv *envtest.Environment -var k8sManager ctrl.Manager +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. -func TestAPIs(t *testing.T) { +var ( + ctx context.Context + cancel context.CancelFunc + testEnv *envtest.Environment + cfg *rest.Config + k8sClient client.Client +) + +func TestControllers(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Controller Suite") } -var _ = BeforeSuite(func(ctx context.Context) { - opts := zap.Options{ - Development: true, - TimeEncoder: zapcore.RFC3339NanoTimeEncoder, - } - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true), zap.UseFlagOptions(&opts))) +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - By("bootstrapping test environment") + ctx, cancel = context.WithCancel(context.TODO()) + + var err error + err = enterprisev4.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = cnpgv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + // +kubebuilder:scaffold:scheme + By("bootstrapping test environment") + cnpgCRDDirectory, err := getCNPGCRDDirectory() + Expect(err).NotTo(HaveOccurred()) testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + CRDDirectoryPaths: []string{ + filepath.Join("..", "..", "config", "crd", "bases"), + cnpgCRDDirectory, + }, ErrorIfCRDPathMissing: true, } - var err error + // Retrieve the first found binary directory to allow running tests from IDEs + if getFirstFoundEnvTestBinaryDir() != "" { + testEnv.BinaryAssetsDirectory = getFirstFoundEnvTestBinaryDir() + } // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) - err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = enterpriseApiV3.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - err = enterpriseApiV3.AddToScheme(clientgoscheme.Scheme) + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) +}) - err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) +var _ = AfterSuite(func() { + By("tearing down the test environment") + cancel() + err := testEnv.Stop() Expect(err).NotTo(HaveOccurred()) +}) - //+kubebuilder:scaffold:scheme - - // Create New Manager for controller - k8sManager, err = ctrl.NewManager(cfg, ctrl.Options{ - Scheme: clientgoscheme.Scheme, - }) - Expect(err).ToNot(HaveOccurred()) - if err := (&ClusterManagerReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) +// getFirstFoundEnvTestBinaryDir locates the first binary in the specified path. +// ENVTEST-based tests depend on specific binaries, usually located in paths set by +// controller-runtime. When running tests directly (e.g., via an IDE) without using +// Makefile targets, the 'BinaryAssetsDirectory' must be explicitly configured. +// +// This function streamlines the process by finding the required binaries, similar to +// setting the 'KUBEBUILDER_ASSETS' environment variable. To ensure the binaries are +// properly set up, run 'make setup-envtest' beforehand. +func getFirstFoundEnvTestBinaryDir() string { + basePath := filepath.Join("..", "..", "bin", "k8s") + entries, err := os.ReadDir(basePath) + if err != nil { + logf.Log.Error(err, "Failed to read directory", "path", basePath) + return "" } - if err := (&ClusterMasterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) + for _, entry := range entries { + if entry.IsDir() { + return filepath.Join(basePath, entry.Name()) + } } - if err := (&IndexerClusterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&IngestorClusterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&LicenseManagerReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } - if err := (&LicenseMasterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) + return "" +} + +func getCNPGCRDDirectory() (string, error) { + // Optional escape hatch for CI/local overrides. + if explicit := os.Getenv("CNPG_CRD_DIR"); explicit != "" { + return explicit, nil } - if err := (&MonitoringConsoleReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) + + moduleRoot := os.Getenv("GOMODCACHE") + if moduleRoot == "" { + gopath := os.Getenv("GOPATH") + if gopath == "" { + home, err := os.UserHomeDir() + if err != nil { + return "", err + } + gopath = filepath.Join(home, "go") + } + moduleRoot = filepath.Join(gopath, "pkg", "mod") } - if err := (&SearchHeadClusterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) + + pattern := filepath.Join(moduleRoot, "github.com", "cloudnative-pg", "cloudnative-pg@*", "config", "crd", "bases") + matches, err := filepath.Glob(pattern) + if err != nil { + return "", err } - if err := (&StandaloneReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) + if len(matches) == 0 { + return "", fmt.Errorf("CNPG CRD directory not found; set CNPG_CRD_DIR or download github.com/cloudnative-pg/cloudnative-pg module") } - go func() { - err = k8sManager.Start(ctrl.SetupSignalHandler()) - fmt.Printf("error %v", err.Error()) - Expect(err).ToNot(HaveOccurred()) - }() - - Expect(err).ToNot(HaveOccurred()) - - k8sClient, err = client.New(cfg, client.Options{Scheme: clientgoscheme.Scheme}) - Expect(err).NotTo(HaveOccurred()) - Expect(k8sClient).NotTo(BeNil()) - -}, NodeTimeout(time.Second*500)) - -var _ = AfterSuite(func() { - By("tearing down the test environment") - testEnv.Stop() -}) + sort.Strings(matches) + return matches[len(matches)-1], nil +} From 167c15b1bfedcf204454afe081db6278be3c2b0d Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Tue, 24 Mar 2026 18:09:32 +0100 Subject: [PATCH 05/36] unit tests for postgres database --- pkg/postgresql/database/core/database.go | 107 +- .../database/core/database_unit_test.go | 1641 +++++++++++++++++ 2 files changed, 1692 insertions(+), 56 deletions(-) create mode 100644 pkg/postgresql/database/core/database_unit_test.go diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index 1ae2227d7..50c99beed 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -41,7 +41,7 @@ func PostgresDatabaseService( logger.Info("Reconciling PostgresDatabase", "name", postgresDB.Name, "namespace", postgresDB.Namespace) updateStatus := func(conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { - return setStatus(ctx, c, postgresDB, conditionType, conditionStatus, reason, message, phase) + return persistStatus(ctx, c, postgresDB, conditionType, conditionStatus, reason, message, phase) } // Finalizer: cleanup on deletion, register on creation. @@ -68,23 +68,24 @@ func PostgresDatabaseService( } // Phase: ClusterValidation - cluster, clusterStatus, err := ensureClusterReady(ctx, c, postgresDB) + cluster, err := fetchCluster(ctx, c, postgresDB) if err != nil { + if errors.IsNotFound(err) { + if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterNotFound, "Cluster CR not found", pendingDBPhase); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: clusterNotFoundRetryDelay}, nil + } if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterInfoFetchFailed, "Can't reach Cluster CR due to transient errors", pendingDBPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err } + clusterStatus := getClusterReadyStatus(cluster) logger.Info("Cluster validation done", "clusterName", postgresDB.Spec.ClusterRef.Name, "status", clusterStatus) switch clusterStatus { - case ClusterNotFound: - if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterNotFound, "Cluster CR not found", pendingDBPhase); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{RequeueAfter: clusterNotFoundRetryDelay}, nil - case ClusterNotReady, ClusterNoProvisionerRef: if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterProvisioning, "Cluster is not in ready state yet", pendingDBPhase); err != nil { return ctrl.Result{}, err @@ -299,23 +300,27 @@ func reconcileRWRolePrivileges( return stderrors.Join(errs...) } -func ensureClusterReady(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase) (*enterprisev4.PostgresCluster, clusterReadyStatus, error) { +func fetchCluster(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase) (*enterprisev4.PostgresCluster, error) { logger := log.FromContext(ctx) cluster := &enterprisev4.PostgresCluster{} if err := c.Get(ctx, types.NamespacedName{Name: postgresDB.Spec.ClusterRef.Name, Namespace: postgresDB.Namespace}, cluster); err != nil { if errors.IsNotFound(err) { - return nil, ClusterNotFound, nil + return nil, err } logger.Error(err, "Failed to fetch Cluster", "name", postgresDB.Spec.ClusterRef.Name) - return nil, ClusterNotReady, err + return nil, err } + return cluster, nil +} + +func getClusterReadyStatus(cluster *enterprisev4.PostgresCluster) clusterReadyStatus { if cluster.Status.Phase == nil || *cluster.Status.Phase != string(ClusterReady) { - return cluster, ClusterNotReady, nil + return ClusterNotReady } if cluster.Status.ProvisionerRef == nil { - return cluster, ClusterNoProvisionerRef, nil + return ClusterNoProvisionerRef } - return cluster, ClusterReady, nil + return ClusterReady } func getDesiredUsers(postgresDB *enterprisev4.PostgresDatabase) []string { @@ -383,30 +388,8 @@ func parseRoleNames(raw []byte) []string { func patchManagedRoles(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) error { logger := log.FromContext(ctx) - allRoles := make([]enterprisev4.ManagedRole, 0, len(postgresDB.Spec.Databases)*2) - for _, dbSpec := range postgresDB.Spec.Databases { - allRoles = append(allRoles, - enterprisev4.ManagedRole{ - Name: adminRoleName(dbSpec.Name), - Exists: true, - PasswordSecretRef: &corev1.SecretKeySelector{LocalObjectReference: corev1.LocalObjectReference{Name: roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleAdmin)}, - Key: secretKeyPassword}, - }, - enterprisev4.ManagedRole{ - Name: rwRoleName(dbSpec.Name), - Exists: true, - PasswordSecretRef: &corev1.SecretKeySelector{LocalObjectReference: corev1.LocalObjectReference{Name: roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleRW)}, - Key: secretKeyPassword}, - }) - } - rolePatch := &unstructured.Unstructured{ - Object: map[string]any{ - "apiVersion": cluster.APIVersion, - "kind": cluster.Kind, - "metadata": map[string]any{"name": cluster.Name, "namespace": cluster.Namespace}, - "spec": map[string]any{"managedRoles": allRoles}, - }, - } + allRoles := buildManagedRoles(postgresDB.Name, postgresDB.Spec.Databases) + rolePatch := buildManagedRolesPatch(cluster, allRoles) fieldManager := fieldManagerName(postgresDB.Name) if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManager)); err != nil { logger.Error(err, "Failed to add users to PostgresCluster", "postgresDatabase", postgresDB.Name) @@ -442,20 +425,11 @@ func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtim logger := log.FromContext(ctx) for _, dbSpec := range postgresDB.Spec.Databases { cnpgDBName := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) - reclaimPolicy := cnpgv1.DatabaseReclaimDelete - if dbSpec.DeletionPolicy == deletionPolicyRetain { - reclaimPolicy = cnpgv1.DatabaseReclaimRetain - } cnpgDB := &cnpgv1.Database{ ObjectMeta: metav1.ObjectMeta{Name: cnpgDBName, Namespace: postgresDB.Namespace}, } _, err := controllerutil.CreateOrUpdate(ctx, c, cnpgDB, func() error { - cnpgDB.Spec = cnpgv1.DatabaseSpec{ - Name: dbSpec.Name, - Owner: adminRoleName(dbSpec.Name), - ClusterRef: corev1.LocalObjectReference{Name: cluster.Status.ProvisionerRef.Name}, - ReclaimPolicy: reclaimPolicy, - } + cnpgDB.Spec = buildCNPGDatabaseSpec(cluster.Status.ProvisionerRef.Name, dbSpec) reAdopting := cnpgDB.Annotations[annotationRetainedFrom] == postgresDB.Name if reAdopting { logger.Info("Re-adopting orphaned CNPG Database", "name", cnpgDBName) @@ -488,7 +462,12 @@ func verifyDatabasesReady(ctx context.Context, c client.Client, postgresDB *ente return notReady, nil } -func setStatus(ctx context.Context, c client.Client, db *enterprisev4.PostgresDatabase, conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { +func persistStatus(ctx context.Context, c client.Client, db *enterprisev4.PostgresDatabase, conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { + applyStatus(db, conditionType, conditionStatus, reason, message, phase) + return c.Status().Update(ctx, db) +} + +func applyStatus(db *enterprisev4.PostgresDatabase, conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) { meta.SetStatusCondition(&db.Status.Conditions, metav1.Condition{ Type: string(conditionType), Status: conditionStatus, @@ -498,7 +477,6 @@ func setStatus(ctx context.Context, c client.Client, db *enterprisev4.PostgresDa }) p := string(phase) db.Status.Phase = &p - return c.Status().Update(ctx, db) } func buildDeletionPlan(databases []enterprisev4.DatabaseDefinition) deletionPlan { @@ -703,9 +681,9 @@ func deleteSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev return nil } -func buildRetainedRoles(postgresDBName string, retained []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { - roles := make([]enterprisev4.ManagedRole, 0, len(retained)*2) - for _, dbSpec := range retained { +func buildManagedRoles(postgresDBName string, databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { + roles := make([]enterprisev4.ManagedRole, 0, len(databases)*2) + for _, dbSpec := range databases { roles = append(roles, enterprisev4.ManagedRole{ Name: adminRoleName(dbSpec.Name), @@ -724,9 +702,8 @@ func buildRetainedRoles(postgresDBName string, retained []enterprisev4.DatabaseD return roles } -func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster, retained []enterprisev4.DatabaseDefinition) error { - roles := buildRetainedRoles(postgresDB.Name, retained) - rolePatch := &unstructured.Unstructured{ +func buildManagedRolesPatch(cluster *enterprisev4.PostgresCluster, roles []enterprisev4.ManagedRole) *unstructured.Unstructured { + return &unstructured.Unstructured{ Object: map[string]any{ "apiVersion": cluster.APIVersion, "kind": cluster.Kind, @@ -734,6 +711,11 @@ func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresD "spec": map[string]any{"managedRoles": roles}, }, } +} + +func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster, retained []enterprisev4.DatabaseDefinition) error { + roles := buildManagedRoles(postgresDB.Name, retained) + rolePatch := buildManagedRolesPatch(cluster, roles) if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManagerName(postgresDB.Name))); err != nil { return fmt.Errorf("patching managed roles on deletion: %w", err) } @@ -832,6 +814,19 @@ func buildPasswordSecret(postgresDB *enterprisev4.PostgresDatabase, secretName, } } +func buildCNPGDatabaseSpec(clusterName string, dbSpec enterprisev4.DatabaseDefinition) cnpgv1.DatabaseSpec { + reclaimPolicy := cnpgv1.DatabaseReclaimDelete + if dbSpec.DeletionPolicy == deletionPolicyRetain { + reclaimPolicy = cnpgv1.DatabaseReclaimRetain + } + return cnpgv1.DatabaseSpec{ + Name: dbSpec.Name, + Owner: adminRoleName(dbSpec.Name), + ClusterRef: corev1.LocalObjectReference{Name: clusterName}, + ReclaimPolicy: reclaimPolicy, + } +} + func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, endpoints clusterEndpoints) error { logger := log.FromContext(ctx) for _, dbSpec := range postgresDB.Spec.Databases { diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go new file mode 100644 index 000000000..0bde24a16 --- /dev/null +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -0,0 +1,1641 @@ +package core + +// The following functions are intentionally not tested directly here. +// Their business logic is covered by narrower helper tests where practical, +// and the remaining behavior is mostly controller-runtime orchestration: +// - PostgresDatabaseService +// - patchManagedRoles +// - reconcileCNPGDatabases +// - handleDeletion +// - orphanRetainedResources +// - deleteRemovedResources +// - cleanupManagedRoles + +import ( + "context" + "encoding/json" + "errors" + "testing" + "unicode" + + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" +) + +// managedRolesFieldsRaw is a helper to construct the raw managed fields JSON for testing parseRoleNames and related functions. +func managedRolesFieldsRaw(t *testing.T, keys ...string) []byte { + t.Helper() + + managedRoles := make(map[string]any, len(keys)) + for _, key := range keys { + managedRoles[key] = map[string]any{} + } + + raw, err := json.Marshal(map[string]any{ + "f:spec": map[string]any{ + "f:managedRoles": managedRoles, + }, + }) + require.NoError(t, err) + + return raw +} + +type stubDBRepo struct { + execErr error + calls []string +} + +// ExecGrants is a stub implementation of the DBRepo interface that records calls and returns a predefined error. +func (r *stubDBRepo) ExecGrants(_ context.Context, dbName string) error { + r.calls = append(r.calls, dbName) + return r.execErr +} + +// boolPtr is a helper to get a pointer to a bool value, used for testing conditions with pointer fields. +func boolPtr(v bool) *bool { + return &v +} + +// strPtr is a helper to get a pointer to a string value, used for testing pointer string fields. +func strPtr(s string) *string { + return &s +} + +func databaseNames(defs []enterprisev4.DatabaseDefinition) []string { + names := make([]string, 0, len(defs)) + for _, def := range defs { + names = append(names, def.Name) + } + return names +} + +func assertGeneratedPassword(t *testing.T, got string, wantLength, wantDigits int) { + t.Helper() + + digitCount := 0 + for _, r := range got { + if unicode.IsDigit(r) { + digitCount++ + continue + } + + assert.Truef(t, unicode.IsLetter(r), "password contains unsupported rune %q", r) + } + + assert.Len(t, got, wantLength) + assert.Equal(t, wantDigits, digitCount) +} + +// testScheme constructs a runtime.Scheme with the necessary API types registered for testing. +func testScheme(t *testing.T) *runtime.Scheme { + t.Helper() + + scheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(enterprisev4.AddToScheme(scheme)) + utilruntime.Must(cnpgv1.AddToScheme(scheme)) + + return scheme +} + +// testClient constructs a fake client with the given scheme and initial objects for testing. +func testClient(t *testing.T, scheme *runtime.Scheme, objs ...client.Object) client.Client { + t.Helper() + + builder := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(objs...) + + return builder.Build() +} + +func TestGetDesiredUsers(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "main_db"}, + {Name: "secondary_db"}, + }, + }, + } + want := []string{ + "main_db_admin", + "main_db_rw", + "secondary_db_admin", + "secondary_db_rw", + } + + got := getDesiredUsers(postgresDB) + + assert.Equal(t, want, got) +} + +func TestGetUsersInClusterSpec(t *testing.T) { + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "main_db_admin"}, + {Name: "main_db_rw"}, + }, + }, + } + want := []string{"main_db_admin", "main_db_rw"} + + got := getUsersInClusterSpec(cluster) + + assert.Equal(t, want, got) +} + +func TestParseRoleNames(t *testing.T) { + validKey, err := json.Marshal(map[string]string{"name": "main_db_admin"}) + require.NoError(t, err) + ignoredKey, err := json.Marshal(map[string]string{"other": "value"}) + require.NoError(t, err) + + tests := []struct { + name string + raw []byte + want []string + }{ + { + name: "extracts role names from managed roles fields", + raw: managedRolesFieldsRaw( + t, + "k:"+string(validKey), + "k:"+string(ignoredKey), + "plain-key", + ), + want: []string{"main_db_admin"}, + }, + { + name: "returns nil on invalid json", + raw: []byte(`{"f:spec"`), + want: nil, + }, + { + name: "returns empty when managed roles missing", + raw: []byte(`{"f:spec":{}}`), + want: nil, + }, + { + name: "returns empty when spec field is missing entirely", + raw: []byte(`{"f:metadata":{}}`), + want: nil, + }, + } + + for _, tst := range tests { + + t.Run(tst.name, func(t *testing.T) { + got := parseRoleNames(tst.raw) + + assert.ElementsMatch(t, tst.want, got) + }) + } +} + +func TestManagedRoleOwners(t *testing.T) { + roleKey, err := json.Marshal(map[string]string{"name": "main_db_admin"}) + require.NoError(t, err) + secondRoleKey, err := json.Marshal(map[string]string{"name": "main_db_rw"}) + require.NoError(t, err) + + managedFields := []metav1.ManagedFieldsEntry{ + {Manager: "ignored"}, + { + Manager: "postgresdatabase-other", + FieldsV1: &metav1.FieldsV1{ + Raw: managedRolesFieldsRaw( + t, + "k:"+string(roleKey), + "k:"+string(secondRoleKey), + ), + }, + }, + { + Manager: "postgresdatabase-newer", + FieldsV1: &metav1.FieldsV1{ + Raw: managedRolesFieldsRaw(t, "k:"+string(roleKey)), + }, + }, + } + want := map[string]string{ + "main_db_admin": "postgresdatabase-newer", + "main_db_rw": "postgresdatabase-other", + } + + got := managedRoleOwners(managedFields) + + assert.Equal(t, want, got) +} + +func TestGetRoleConflicts(t *testing.T) { + roleKey, err := json.Marshal(map[string]string{"name": "main_db_admin"}) + require.NoError(t, err) + sameOwnerKey, err := json.Marshal(map[string]string{"name": "main_db_rw"}) + require.NoError(t, err) + unrelatedKey, err := json.Marshal(map[string]string{"name": "audit_admin"}) + require.NoError(t, err) + + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{Name: "primary"}, + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{{Name: "main_db"}}, + }, + } + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + ManagedFields: []metav1.ManagedFieldsEntry{ + { + Manager: "postgresdatabase-legacy", + FieldsV1: &metav1.FieldsV1{ + Raw: managedRolesFieldsRaw( + t, + "k:"+string(roleKey), + "k:"+string(unrelatedKey), + ), + }, + }, + { + Manager: fieldManagerName(postgresDB.Name), + FieldsV1: &metav1.FieldsV1{ + Raw: managedRolesFieldsRaw(t, "k:"+string(sameOwnerKey)), + }, + }, + }, + }, + } + want := []string{"main_db_admin (owned by postgresdatabase-legacy)"} + + got := getRoleConflicts(postgresDB, cluster) + + assert.ElementsMatch(t, want, got) +} + +func TestVerifyRolesReady(t *testing.T) { + tests := []struct { + name string + expectedUsers []string + cluster *cnpgv1.Cluster + wantNotReady []string + wantErr string + }{ + { + name: "returns error when a role cannot reconcile", + expectedUsers: []string{"main_db_admin", "main_db_rw"}, + cluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + ManagedRolesStatus: cnpgv1.ManagedRoles{ + CannotReconcile: map[string][]string{ + "main_db_rw": {"reserved role"}, + }, + }, + }, + }, + wantErr: "user main_db_rw reconciliation failed: [reserved role]", + }, + { + name: "returns missing roles that are not reconciled yet", + expectedUsers: []string{"main_db_admin", "main_db_rw", "analytics_admin"}, + cluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + ManagedRolesStatus: cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"main_db_admin", "analytics_admin"}, + }, + }, + }, + }, + wantNotReady: []string{"main_db_rw"}, + }, + { + name: "returns pending reconciliation roles as not ready", + expectedUsers: []string{"main_db_admin", "main_db_rw"}, + cluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + ManagedRolesStatus: cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"main_db_admin"}, + cnpgv1.RoleStatusPendingReconciliation: {"main_db_rw"}, + }, + }, + }, + }, + wantNotReady: []string{"main_db_rw"}, + }, + { + name: "returns empty when all roles are reconciled", + expectedUsers: []string{"main_db_admin"}, + cluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + ManagedRolesStatus: cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"main_db_admin"}, + }, + }, + }, + }, + wantNotReady: nil, + }, + } + + for _, tst := range tests { + + t.Run(tst.name, func(t *testing.T) { + gotNotReady, err := verifyRolesReady(context.Background(), tst.expectedUsers, tst.cluster) + if tst.wantErr != "" { + require.Error(t, err) + assert.Equal(t, tst.wantErr, err.Error()) + return + } + require.NoError(t, err) + assert.Equal(t, tst.wantNotReady, gotNotReady) + }) + } +} + +func TestReconcileRWRolePrivileges(t *testing.T) { + tests := []struct { + name string + dbNames []string + newRepoErrs map[string]error + execErrs map[string]error + wantRepoCalls []string + wantExecCalls map[string][]string + wantErrContains []string + }{ + { + name: "returns nil when all databases succeed", + dbNames: []string{"payments", "analytics"}, + wantRepoCalls: []string{"payments", "analytics"}, + wantExecCalls: map[string][]string{ + "payments": {"payments"}, + "analytics": {"analytics"}, + }, + }, + { + name: "continues after repo creation and exec errors", + dbNames: []string{"payments", "analytics", "audit"}, + newRepoErrs: map[string]error{"payments": errors.New("connect failed")}, + execErrs: map[string]error{"analytics": errors.New("grant failed")}, + wantRepoCalls: []string{"payments", "analytics", "audit"}, + wantExecCalls: map[string][]string{ + "analytics": {"analytics"}, + "audit": {"audit"}, + }, + wantErrContains: []string{ + "database payments: connect failed", + "database analytics: grant failed", + }, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + repos := make(map[string]*stubDBRepo, len(tst.dbNames)) + repoCalls := make([]string, 0, len(tst.dbNames)) + + for _, dbName := range tst.dbNames { + repos[dbName] = &stubDBRepo{execErr: tst.execErrs[dbName]} + } + + newDBRepo := func(_ context.Context, host, dbName, password string) (DBRepo, error) { + repoCalls = append(repoCalls, dbName) + if err := tst.newRepoErrs[dbName]; err != nil { + return nil, err + } + + return repos[dbName], nil + } + + err := reconcileRWRolePrivileges(context.Background(), "rw.example.internal", "supersecret", tst.dbNames, newDBRepo) + + assert.Equal(t, tst.wantRepoCalls, repoCalls) + for dbName, wantCalls := range tst.wantExecCalls { + assert.Equal(t, wantCalls, repos[dbName].calls) + } + + if len(tst.wantErrContains) == 0 { + assert.NoError(t, err) + return + } + + require.Error(t, err) + for _, wantMsg := range tst.wantErrContains { + assert.ErrorContains(t, err, wantMsg) + } + }) + } +} + +func TestGetClusterReadyStatus(t *testing.T) { + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + wantStatus clusterReadyStatus + }{ + { + name: "returns not ready when phase is nil", + cluster: &enterprisev4.PostgresCluster{}, + wantStatus: ClusterNotReady, + }, + { + name: "returns not ready when phase is not ready", + cluster: &enterprisev4.PostgresCluster{ + Status: enterprisev4.PostgresClusterStatus{ + Phase: strPtr("Provisioning"), + }, + }, + wantStatus: ClusterNotReady, + }, + { + name: "returns no provisioner ref when phase is ready but ref is missing", + cluster: &enterprisev4.PostgresCluster{ + Status: enterprisev4.PostgresClusterStatus{ + Phase: strPtr(string(ClusterReady)), + }, + }, + wantStatus: ClusterNoProvisionerRef, + }, + { + name: "returns ready when phase and provisioner ref are present", + cluster: &enterprisev4.PostgresCluster{ + Status: enterprisev4.PostgresClusterStatus{ + Phase: strPtr(string(ClusterReady)), + ProvisionerRef: &corev1.ObjectReference{Name: "cnpg-primary", Namespace: "dbs"}, + }, + }, + wantStatus: ClusterReady, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + assert.Equal(t, tst.wantStatus, getClusterReadyStatus(tst.cluster)) + }) + } +} + +// Uses a fake client because fetching the referenced Cluster depends on API reads. +func TestFetchCluster(t *testing.T) { + scheme := testScheme(t) + + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + wantName string + wantErr string + wantAbsent bool + }{ + { + name: "returns not found when cluster is absent", + wantAbsent: true, + }, + { + name: "returns referenced cluster when present", + cluster: &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "primary", Namespace: "dbs"}, + }, + wantName: "primary", + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{Name: "db", Namespace: "dbs"}, + Spec: enterprisev4.PostgresDatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "primary"}, + }, + } + + var objs []client.Object + if tst.cluster != nil { + objs = append(objs, tst.cluster) + } + + c := testClient(t, scheme, objs...) + cluster, err := fetchCluster(context.Background(), c, postgresDB) + + if tst.wantAbsent { + require.Error(t, err) + assert.True(t, apierrors.IsNotFound(err)) + assert.Nil(t, cluster) + return + } + + if tst.wantErr != "" { + require.Error(t, err) + assert.ErrorContains(t, err, tst.wantErr) + return + } + + require.NoError(t, err) + require.NotNil(t, cluster) + assert.Equal(t, tst.wantName, cluster.Name) + }) + } + + t.Run("returns error on client failure", func(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{Name: "db", Namespace: "dbs"}, + Spec: enterprisev4.PostgresDatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "primary"}, + }, + } + c := fake.NewClientBuilder(). + WithScheme(scheme). + WithInterceptorFuncs(interceptor.Funcs{ + Get: func(_ context.Context, _ client.WithWatch, _ client.ObjectKey, _ client.Object, _ ...client.GetOption) error { + return errors.New("api unavailable") + }, + }). + Build() + + cluster, err := fetchCluster(context.Background(), c, postgresDB) + + require.Error(t, err) + assert.Nil(t, cluster) + assert.ErrorContains(t, err, "api unavailable") + }) +} + +// Uses a fake client because the helper mutates status in-memory and persists it through the status subresource. +func TestSetStatus(t *testing.T) { + scheme := testScheme(t) + existing := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + Generation: 7, + }, + } + c := testClient(t, scheme, existing) + postgresDB := &enterprisev4.PostgresDatabase{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: existing.Name, Namespace: existing.Namespace}, postgresDB)) + + err := persistStatus( + context.Background(), + c, + postgresDB, + clusterReady, + metav1.ConditionTrue, + reasonClusterAvailable, + "Cluster is operational", + provisioningDBPhase, + ) + + require.NoError(t, err) + require.NotNil(t, postgresDB.Status.Phase) + assert.Equal(t, string(provisioningDBPhase), *postgresDB.Status.Phase) + require.Len(t, postgresDB.Status.Conditions, 1) + assert.Equal(t, string(clusterReady), postgresDB.Status.Conditions[0].Type) + assert.Equal(t, metav1.ConditionTrue, postgresDB.Status.Conditions[0].Status) + assert.Equal(t, string(reasonClusterAvailable), postgresDB.Status.Conditions[0].Reason) + assert.Equal(t, "Cluster is operational", postgresDB.Status.Conditions[0].Message) + assert.Equal(t, postgresDB.Generation, postgresDB.Status.Conditions[0].ObservedGeneration) + + got := &enterprisev4.PostgresDatabase{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: postgresDB.Name, Namespace: postgresDB.Namespace}, got)) + require.NotNil(t, got.Status.Phase) + assert.Equal(t, *postgresDB.Status.Phase, *got.Status.Phase) + require.Len(t, got.Status.Conditions, 1) + assert.Equal(t, postgresDB.Status.Conditions[0], got.Status.Conditions[0]) +} + +// Uses a fake client because readiness is determined from CNPG Database objects in the API. +func TestVerifyDatabasesReady(t *testing.T) { + scheme := testScheme(t) + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{Name: "primary", Namespace: "dbs"}, + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + {Name: "analytics"}, + }, + }, + } + + tests := []struct { + name string + objects []client.Object + wantNotReady []string + wantErr string + }{ + { + name: "returns empty when all databases are applied", + objects: []client.Object{ + &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: "primary-payments", Namespace: "dbs"}, + Status: cnpgv1.DatabaseStatus{Applied: boolPtr(true)}, + }, + &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: "primary-analytics", Namespace: "dbs"}, + Status: cnpgv1.DatabaseStatus{Applied: boolPtr(true)}, + }, + }, + wantNotReady: nil, + }, + { + name: "returns names for databases that are not applied", + objects: []client.Object{ + &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: "primary-payments", Namespace: "dbs"}, + Status: cnpgv1.DatabaseStatus{Applied: boolPtr(false)}, + }, + &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: "primary-analytics", Namespace: "dbs"}, + }, + }, + wantNotReady: []string{"payments", "analytics"}, + }, + { + name: "returns error when a database is missing", + objects: []client.Object{ + &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: "primary-payments", Namespace: "dbs"}, + Status: cnpgv1.DatabaseStatus{Applied: boolPtr(true)}, + }, + }, + wantErr: "getting CNPG Database primary-analytics", + }, + } + + for _, tst := range tests { + + t.Run(tst.name, func(t *testing.T) { + c := testClient(t, scheme, tst.objects...) + + got, err := verifyDatabasesReady(context.Background(), c, postgresDB) + + if tst.wantErr != "" { + require.Error(t, err) + assert.ErrorContains(t, err, tst.wantErr) + return + } + + require.NoError(t, err) + assert.Equal(t, tst.wantNotReady, got) + }) + } +} + +// Uses a fake client because the helper wraps Kubernetes get/not-found behavior. +func TestGetSecret(t *testing.T) { + scheme := testScheme(t) + + t.Run("returns secret when found", func(t *testing.T) { + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "db-secret", Namespace: "dbs"}, + Data: map[string][]byte{secretKeyPassword: []byte("value")}, + } + c := testClient(t, scheme, existing) + + secret, err := getSecret(context.Background(), c, "dbs", "db-secret") + + require.NoError(t, err) + require.NotNil(t, secret) + assert.Equal(t, existing.Name, secret.Name) + assert.Equal(t, "value", string(secret.Data[secretKeyPassword])) + }) + + t.Run("returns nil nil when secret is absent", func(t *testing.T) { + c := testClient(t, scheme) + + secret, err := getSecret(context.Background(), c, "dbs", "missing") + + require.NoError(t, err) + assert.Nil(t, secret) + }) +} + +// Uses a fake client because adoption updates object metadata and persists it through the client. +func TestAdoptResource(t *testing.T) { + scheme := testScheme(t) + postgresDB := &enterprisev4.PostgresDatabase{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + } + configMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary-payments-config", + Namespace: "dbs", + Annotations: map[string]string{annotationRetainedFrom: "primary", "keep": "true"}, + }, + } + c := testClient(t, scheme, postgresDB, configMap) + + err := adoptResource(context.Background(), c, scheme, postgresDB, configMap) + + require.NoError(t, err) + + updated := &corev1.ConfigMap{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: configMap.Name, Namespace: configMap.Namespace}, updated)) + assert.Equal(t, "true", updated.Annotations["keep"]) + _, exists := updated.Annotations[annotationRetainedFrom] + assert.False(t, exists) + require.Len(t, updated.OwnerReferences, 1) + assert.Equal(t, postgresDB.UID, updated.OwnerReferences[0].UID) +} + +// Uses a fake client because these helpers mutate existing API objects during orphaning. +func TestOrphanResourceHelpers(t *testing.T) { + scheme := testScheme(t) + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + } + databases := []enterprisev4.DatabaseDefinition{{Name: "payments"}} + + t.Run("orphanCNPGDatabases strips owner and adds retain annotation", func(t *testing.T) { + db := &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary-payments", + Namespace: "dbs", + OwnerReferences: []metav1.OwnerReference{ + {UID: postgresDB.UID, Name: postgresDB.Name}, + {UID: types.UID("other"), Name: "other"}, + }, + }, + } + c := testClient(t, scheme, db) + + require.NoError(t, orphanCNPGDatabases(context.Background(), c, postgresDB, databases)) + + updated := &cnpgv1.Database{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: db.Name, Namespace: db.Namespace}, updated)) + assert.Equal(t, postgresDB.Name, updated.Annotations[annotationRetainedFrom]) + require.Len(t, updated.OwnerReferences, 1) + assert.Equal(t, types.UID("other"), updated.OwnerReferences[0].UID) + }) + + t.Run("orphanConfigMaps skips not found", func(t *testing.T) { + c := testClient(t, scheme) + require.NoError(t, orphanConfigMaps(context.Background(), c, postgresDB, databases)) + }) + + t.Run("orphanSecrets skips already retained secret", func(t *testing.T) { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary-payments-admin", + Namespace: "dbs", + Annotations: map[string]string{annotationRetainedFrom: postgresDB.Name}, + }, + } + c := testClient(t, scheme, secret) + + require.NoError(t, orphanSecrets(context.Background(), c, postgresDB, databases)) + + updated := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secret.Name, Namespace: secret.Namespace}, updated)) + assert.Equal(t, postgresDB.Name, updated.Annotations[annotationRetainedFrom]) + assert.Empty(t, updated.OwnerReferences) + assert.Equal(t, secret, updated) + }) +} + +// Uses a fake client because these helpers delete Kubernetes resources and must verify API state. +func TestDeleteResourceHelpers(t *testing.T) { + scheme := testScheme(t) + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{Name: "primary", Namespace: "dbs"}, + } + databases := []enterprisev4.DatabaseDefinition{{Name: "payments"}} + + t.Run("deleteCNPGDatabases removes existing object", func(t *testing.T) { + db := &cnpgv1.Database{ObjectMeta: metav1.ObjectMeta{Name: "primary-payments", Namespace: "dbs"}} + c := testClient(t, scheme, db) + require.NoError(t, deleteCNPGDatabases(context.Background(), c, postgresDB, databases)) + }) + + t.Run("deleteConfigMaps ignores missing objects", func(t *testing.T) { + c := testClient(t, scheme) + require.NoError(t, deleteConfigMaps(context.Background(), c, postgresDB, databases)) + }) + + t.Run("deleteSecrets deletes admin and rw secrets", func(t *testing.T) { + admin := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "primary-payments-admin", Namespace: "dbs"}} + rw := &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "primary-payments-rw", Namespace: "dbs"}} + c := testClient(t, scheme, admin, rw) + require.NoError(t, deleteSecrets(context.Background(), c, postgresDB, databases)) + }) +} + +func TestGeneratePassword(t *testing.T) { + wantLength := passwordLength + wantDigits := passwordDigits + + got, err := generatePassword() + + require.NoError(t, err) + assertGeneratedPassword(t, got, wantLength, wantDigits) +} + +// Uses a fake client because the helper creates Secret objects and persists owner references through the Kubernetes API. +func TestCreateUserSecret(t *testing.T) { + scheme := testScheme(t) + postgresDB := &enterprisev4.PostgresDatabase{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + } + + t.Run("creates secret with generated credentials", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + wantManagedBy := "splunk-operator" + wantReload := "true" + wantUsername := roleName + wantOwnerUID := postgresDB.UID + wantPasswordLength := passwordLength + wantPasswordDigits := passwordDigits + c := testClient(t, scheme) + + err := createUserSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, secretName, got.Name) + assert.Equal(t, postgresDB.Namespace, got.Namespace) + assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) + assert.Equal(t, wantReload, got.Labels[labelCNPGReload]) + assert.Equal(t, wantUsername, string(got.Data["username"])) + assertGeneratedPassword(t, string(got.Data[secretKeyPassword]), wantPasswordLength, wantPasswordDigits) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) + }) + + t.Run("returns nil when secret already exists", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + wantUsername := roleName + wantPassword := "existing-password" + existing := buildPasswordSecret(postgresDB, secretName, wantUsername, wantPassword) + c := testClient(t, scheme, existing) + + err := createUserSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) + assert.Empty(t, got.OwnerReferences) + }) +} + +// Uses a fake client because the helper decides between get/create/adopt behavior based on Secret state in the API. +func TestEnsureSecret(t *testing.T) { + scheme := testScheme(t) + postgresDB := &enterprisev4.PostgresDatabase{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + } + + t.Run("creates missing secret", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + wantManagedBy := "splunk-operator" + wantReload := "true" + wantUsername := roleName + wantOwnerUID := postgresDB.UID + wantPasswordLength := passwordLength + wantPasswordDigits := passwordDigits + c := testClient(t, scheme) + + err := ensureSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) + assert.Equal(t, wantReload, got.Labels[labelCNPGReload]) + assert.Equal(t, wantUsername, string(got.Data["username"])) + assertGeneratedPassword(t, string(got.Data[secretKeyPassword]), wantPasswordLength, wantPasswordDigits) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) + }) + + t.Run("re-adopts retained secret", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + wantUsername := roleName + wantPassword := "existing-password" + wantOwnerUID := postgresDB.UID + wantKeep := "true" + retained := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + Annotations: map[string]string{ + annotationRetainedFrom: postgresDB.Name, + "keep": wantKeep, + }, + OwnerReferences: []metav1.OwnerReference{ + {UID: types.UID("old-owner"), Name: "old-owner"}, + }, + }, + Data: map[string][]byte{ + "username": []byte(wantUsername), + secretKeyPassword: []byte(wantPassword), + }, + } + c := testClient(t, scheme, retained) + + err := ensureSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, wantKeep, got.Annotations["keep"]) + _, hasRetainedAnnotation := got.Annotations[annotationRetainedFrom] + assert.False(t, hasRetainedAnnotation) + assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) + assert.Contains(t, got.OwnerReferences, metav1.OwnerReference{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + Name: postgresDB.Name, + UID: wantOwnerUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }) + }) + + t.Run("does nothing for existing managed secret", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + wantUsername := roleName + wantPassword := "existing-password" + wantKeep := "true" + wantOwnerUID := postgresDB.UID + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + Annotations: map[string]string{ + "keep": wantKeep, + }, + OwnerReferences: []metav1.OwnerReference{ + {UID: wantOwnerUID, Name: postgresDB.Name}, + }, + }, + Data: map[string][]byte{ + "username": []byte(wantUsername), + secretKeyPassword: []byte(wantPassword), + }, + } + c := testClient(t, scheme, existing) + + err := ensureSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, wantKeep, got.Annotations["keep"]) + assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) + }) +} + +// Uses a fake client because the helper reconciles multiple Secret objects through the Kubernetes API. +func TestReconcileUserSecrets(t *testing.T) { + scheme := testScheme(t) + postgresDB := &enterprisev4.PostgresDatabase{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + {Name: "analytics"}, + }, + }, + } + + t.Run("creates secrets for each database role", func(t *testing.T) { + c := testClient(t, scheme) + wantSecrets := []struct { + name string + username string + }{ + {name: "primary-payments-admin", username: "payments_admin"}, + {name: "primary-payments-rw", username: "payments_rw"}, + {name: "primary-analytics-admin", username: "analytics_admin"}, + {name: "primary-analytics-rw", username: "analytics_rw"}, + } + + err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB) + + require.NoError(t, err) + for _, want := range wantSecrets { + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: want.name, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, want.username, string(got.Data["username"])) + assertGeneratedPassword(t, string(got.Data[secretKeyPassword]), passwordLength, passwordDigits) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, postgresDB.UID, got.OwnerReferences[0].UID) + } + }) + + t.Run("is idempotent when secrets already exist", func(t *testing.T) { + c := testClient(t, scheme) + + require.NoError(t, reconcileUserSecrets(context.Background(), c, scheme, postgresDB)) + + before := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: "primary-payments-admin", Namespace: postgresDB.Namespace}, before)) + beforePassword := append([]byte(nil), before.Data[secretKeyPassword]...) + + err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB) + + require.NoError(t, err) + + after := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: "primary-payments-admin", Namespace: postgresDB.Namespace}, after)) + assert.Equal(t, beforePassword, after.Data[secretKeyPassword]) + require.Len(t, after.OwnerReferences, 1) + assert.Equal(t, postgresDB.UID, after.OwnerReferences[0].UID) + }) +} + +// Uses a fake client because the helper reconciles ConfigMaps through CreateOrUpdate and persists re-adoption metadata. +func TestReconcileRoleConfigMaps(t *testing.T) { + scheme := testScheme(t) + endpoints := clusterEndpoints{ + RWHost: "rw.default.svc.cluster.local", + ROHost: "ro.default.svc.cluster.local", + PoolerRWHost: "pooler-rw.default.svc.cluster.local", + PoolerROHost: "pooler-ro.default.svc.cluster.local", + } + + t.Run("creates configmaps for all databases", func(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + {Name: "analytics"}, + }, + }, + } + wantManagedBy := "splunk-operator" + wantOwnerUID := postgresDB.UID + wantPaymentsName := "primary-payments-config" + wantAnalyticsName := "primary-analytics-config" + wantPaymentsData := buildDatabaseConfigMapBody("payments", endpoints) + wantAnalyticsData := buildDatabaseConfigMapBody("analytics", endpoints) + c := testClient(t, scheme) + + err := reconcileRoleConfigMaps(context.Background(), c, scheme, postgresDB, endpoints) + + require.NoError(t, err) + + gotPayments := &corev1.ConfigMap{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: wantPaymentsName, Namespace: postgresDB.Namespace}, gotPayments)) + assert.Equal(t, wantManagedBy, gotPayments.Labels[labelManagedBy]) + assert.Equal(t, wantPaymentsData, gotPayments.Data) + require.Len(t, gotPayments.OwnerReferences, 1) + assert.Equal(t, wantOwnerUID, gotPayments.OwnerReferences[0].UID) + + gotAnalytics := &corev1.ConfigMap{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: wantAnalyticsName, Namespace: postgresDB.Namespace}, gotAnalytics)) + assert.Equal(t, wantManagedBy, gotAnalytics.Labels[labelManagedBy]) + assert.Equal(t, wantAnalyticsData, gotAnalytics.Data) + require.Len(t, gotAnalytics.OwnerReferences, 1) + assert.Equal(t, wantOwnerUID, gotAnalytics.OwnerReferences[0].UID) + }) + + t.Run("re-adopts retained configmap", func(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + }, + }, + } + cmName := "primary-payments-config" + wantManagedBy := "splunk-operator" + wantOwnerUID := postgresDB.UID + wantKeep := "true" + wantData := buildDatabaseConfigMapBody("payments", endpoints) + retained := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmName, + Namespace: postgresDB.Namespace, + Labels: map[string]string{labelManagedBy: wantManagedBy}, + Annotations: map[string]string{ + annotationRetainedFrom: postgresDB.Name, + "keep": wantKeep, + }, + OwnerReferences: []metav1.OwnerReference{ + {UID: types.UID("old-owner"), Name: "old-owner"}, + }, + }, + Data: map[string]string{ + "dbname": "stale", + }, + } + c := testClient(t, scheme, retained) + + err := reconcileRoleConfigMaps(context.Background(), c, scheme, postgresDB, endpoints) + + require.NoError(t, err) + + got := &corev1.ConfigMap{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: cmName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) + assert.Equal(t, wantKeep, got.Annotations["keep"]) + _, hasRetainedAnnotation := got.Annotations[annotationRetainedFrom] + assert.False(t, hasRetainedAnnotation) + assert.Equal(t, wantData, got.Data) + assert.Contains(t, got.OwnerReferences, metav1.OwnerReference{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + Name: postgresDB.Name, + UID: wantOwnerUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }) + }) +} + +func TestBuildDeletionPlan(t *testing.T) { + databases := []enterprisev4.DatabaseDefinition{ + {Name: "payments", DeletionPolicy: deletionPolicyRetain}, + {Name: "analytics"}, + {Name: "audit", DeletionPolicy: deletionPolicyRetain}, + } + wantRetainedNames := []string{"payments", "audit"} + wantDeletedNames := []string{"analytics"} + + got := buildDeletionPlan(databases) + + assert.ElementsMatch(t, wantRetainedNames, databaseNames(got.retained)) + assert.ElementsMatch(t, wantDeletedNames, databaseNames(got.deleted)) +} + +func TestBuildManagedRoles(t *testing.T) { + databases := []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + {Name: "analytics"}, + } + want := []enterprisev4.ManagedRole{ + { + Name: "payments_admin", + Exists: true, + PasswordSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "primary-payments-admin"}, + Key: secretKeyPassword, + }, + }, + { + Name: "payments_rw", + Exists: true, + PasswordSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "primary-payments-rw"}, + Key: secretKeyPassword, + }, + }, + { + Name: "analytics_admin", + Exists: true, + PasswordSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "primary-analytics-admin"}, + Key: secretKeyPassword, + }, + }, + { + Name: "analytics_rw", + Exists: true, + PasswordSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "primary-analytics-rw"}, + Key: secretKeyPassword, + }, + }, + } + + got := buildManagedRoles("primary", databases) + + assert.Equal(t, want, got) +} + +func TestBuildManagedRolesPatch(t *testing.T) { + cluster := &enterprisev4.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + }, + } + roles := buildManagedRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) + + got := buildManagedRolesPatch(cluster, roles) + + assert.Equal(t, cluster.APIVersion, got.Object["apiVersion"]) + assert.Equal(t, cluster.Kind, got.Object["kind"]) + assert.Equal(t, map[string]any{"name": cluster.Name, "namespace": cluster.Namespace}, got.Object["metadata"]) + assert.Equal(t, map[string]any{"managedRoles": roles}, got.Object["spec"]) +} + +func TestPatchManagedRolesOnDeletion(t *testing.T) { + scheme := testScheme(t) + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + }, + } + cluster := &enterprisev4.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + }, + } + retained := []enterprisev4.DatabaseDefinition{{Name: "payments"}} + want := buildManagedRoles(postgresDB.Name, retained) + c := testClient(t, scheme, cluster) + + err := patchManagedRolesOnDeletion(context.Background(), c, postgresDB, cluster, retained) + + require.NoError(t, err) + + got := &enterprisev4.PostgresCluster{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}, got)) + assert.Equal(t, want, got.Spec.ManagedRoles) +} + +func TestStripOwnerReference(t *testing.T) { + obj := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + OwnerReferences: []metav1.OwnerReference{ + {UID: types.UID("remove-me"), Name: "db"}, + {UID: types.UID("keep-me"), Name: "cluster"}, + }, + }, + } + + stripOwnerReference(obj, types.UID("remove-me")) + + require.Len(t, obj.OwnerReferences, 1) + assert.Equal(t, types.UID("keep-me"), obj.OwnerReferences[0].UID) +} + +func TestBuildPasswordSecret(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + }, + } + wantName := "primary-payments-admin" + wantNamespace := "dbs" + wantManagedBy := "splunk-operator" + wantReload := "true" + wantUsername := "payments_admin" + wantPassword := "topsecret" + + got := buildPasswordSecret(postgresDB, wantName, wantUsername, wantPassword) + + assert.Equal(t, wantName, got.Name) + assert.Equal(t, wantNamespace, got.Namespace) + assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) + assert.Equal(t, wantReload, got.Labels[labelCNPGReload]) + assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) +} + +func TestBuildCNPGDatabaseSpec(t *testing.T) { + tests := []struct { + name string + db enterprisev4.DatabaseDefinition + want cnpgv1.DatabaseSpec + }{ + { + name: "uses delete reclaim policy by default", + db: enterprisev4.DatabaseDefinition{Name: "payments"}, + want: cnpgv1.DatabaseSpec{ + Name: "payments", + Owner: "payments_admin", + ClusterRef: corev1.LocalObjectReference{Name: "cnpg-primary"}, + ReclaimPolicy: cnpgv1.DatabaseReclaimDelete, + }, + }, + { + name: "uses retain reclaim policy when deletion policy is retain", + db: enterprisev4.DatabaseDefinition{Name: "analytics", DeletionPolicy: deletionPolicyRetain}, + want: cnpgv1.DatabaseSpec{ + Name: "analytics", + Owner: "analytics_admin", + ClusterRef: corev1.LocalObjectReference{Name: "cnpg-primary"}, + ReclaimPolicy: cnpgv1.DatabaseReclaimRetain, + }, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + got := buildCNPGDatabaseSpec("cnpg-primary", tst.db) + assert.Equal(t, tst.want, got) + }) + } +} + +func TestBuildDatabaseConfigMapBody(t *testing.T) { + tests := []struct { + name string + endpoints clusterEndpoints + want map[string]string + }{ + { + name: "without pooler endpoints", + endpoints: clusterEndpoints{ + RWHost: "rw.default.svc.cluster.local", + ROHost: "ro.default.svc.cluster.local", + }, + want: map[string]string{ + "dbname": "payments", + "port": postgresPort, + "rw-host": "rw.default.svc.cluster.local", + "ro-host": "ro.default.svc.cluster.local", + "admin-user": "payments_admin", + "rw-user": "payments_rw", + }, + }, + { + name: "includes pooler endpoints when available", + endpoints: clusterEndpoints{ + RWHost: "rw.default.svc.cluster.local", + ROHost: "ro.default.svc.cluster.local", + PoolerRWHost: "pooler-rw.default.svc.cluster.local", + PoolerROHost: "pooler-ro.default.svc.cluster.local", + }, + want: map[string]string{ + "dbname": "payments", + "port": postgresPort, + "rw-host": "rw.default.svc.cluster.local", + "ro-host": "ro.default.svc.cluster.local", + "admin-user": "payments_admin", + "rw-user": "payments_rw", + "pooler-rw-host": "pooler-rw.default.svc.cluster.local", + "pooler-ro-host": "pooler-ro.default.svc.cluster.local", + }, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + got := buildDatabaseConfigMapBody("payments", tst.endpoints) + assert.Equal(t, tst.want, got) + }) + } +} + +func TestResolveClusterEndpoints(t *testing.T) { + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + cnpg *cnpgv1.Cluster + namespace string + want clusterEndpoints + }{ + { + name: "without connection pooler", + cluster: &enterprisev4.PostgresCluster{}, + cnpg: &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "cnpg-primary"}, + Status: cnpgv1.ClusterStatus{ + WriteService: "primary-rw", + ReadService: "primary-ro", + }, + }, + namespace: "dbs", + want: clusterEndpoints{ + RWHost: "primary-rw.dbs.svc.cluster.local", + ROHost: "primary-ro.dbs.svc.cluster.local", + }, + }, + { + name: "with connection pooler", + cluster: &enterprisev4.PostgresCluster{ + Status: enterprisev4.PostgresClusterStatus{ + ConnectionPoolerStatus: &enterprisev4.ConnectionPoolerStatus{Enabled: true}, + }, + }, + cnpg: &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "cnpg-primary"}, + Status: cnpgv1.ClusterStatus{ + WriteService: "primary-rw", + ReadService: "primary-ro", + }, + }, + namespace: "dbs", + want: clusterEndpoints{ + RWHost: "primary-rw.dbs.svc.cluster.local", + ROHost: "primary-ro.dbs.svc.cluster.local", + PoolerRWHost: "cnpg-primary-pooler-rw.dbs.svc.cluster.local", + PoolerROHost: "cnpg-primary-pooler-ro.dbs.svc.cluster.local", + }, + }, + } + + for _, tst := range tests { + + t.Run(tst.name, func(t *testing.T) { + got := resolveClusterEndpoints(tst.cluster, tst.cnpg, tst.namespace) + assert.Equal(t, tst.want, got) + }) + } +} + +func TestPopulateDatabaseStatus(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{Name: "primary"}, + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + {Name: "analytics"}, + }, + }, + } + want := []enterprisev4.DatabaseInfo{ + { + Name: "payments", + Ready: true, + AdminUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "primary-payments-admin"}, + Key: secretKeyPassword, + }, + RWUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "primary-payments-rw"}, + Key: secretKeyPassword, + }, + ConfigMapRef: &corev1.LocalObjectReference{Name: "primary-payments-config"}, + }, + { + Name: "analytics", + Ready: true, + AdminUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "primary-analytics-admin"}, + Key: secretKeyPassword, + }, + RWUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "primary-analytics-rw"}, + Key: secretKeyPassword, + }, + ConfigMapRef: &corev1.LocalObjectReference{Name: "primary-analytics-config"}, + }, + } + + got := populateDatabaseStatus(postgresDB) + + assert.Equal(t, want, got) +} + +func TestHasNewDatabases(t *testing.T) { + tests := []struct { + name string + postgresDB *enterprisev4.PostgresDatabase + want bool + }{ + { + name: "returns true when spec contains a new database", + postgresDB: &enterprisev4.PostgresDatabase{ + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + {Name: "analytics"}, + }, + }, + Status: enterprisev4.PostgresDatabaseStatus{ + Databases: []enterprisev4.DatabaseInfo{ + {Name: "payments"}, + }, + }, + }, + want: true, + }, + { + name: "returns false when all spec databases already exist in status", + postgresDB: &enterprisev4.PostgresDatabase{ + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + }, + }, + Status: enterprisev4.PostgresDatabaseStatus{ + Databases: []enterprisev4.DatabaseInfo{ + {Name: "payments"}, + {Name: "legacy-extra"}, + }, + }, + }, + want: false, + }, + } + + for _, tst := range tests { + + t.Run(tst.name, func(t *testing.T) { + got := hasNewDatabases(tst.postgresDB) + assert.Equal(t, tst.want, got) + }) + } +} + +func TestNamingHelpers(t *testing.T) { + tests := []struct { + name string + got string + want string + }{ + {name: "field manager", got: fieldManagerName("primary"), want: "postgresdatabase-primary"}, + {name: "admin role", got: adminRoleName("payments"), want: "payments_admin"}, + {name: "rw role", got: rwRoleName("payments"), want: "payments_rw"}, + {name: "cnpg database", got: cnpgDatabaseName("primary", "payments"), want: "primary-payments"}, + {name: "role secret", got: roleSecretName("primary", "payments", "admin"), want: "primary-payments-admin"}, + {name: "config map", got: configMapName("primary", "payments"), want: "primary-payments-config"}, + } + + for _, tst := range tests { + + t.Run(tst.name, func(t *testing.T) { + assert.Equal(t, tst.want, tst.got) + }) + } +} From ad02ceb2dbea4dd0cef3750556f0cb8a17b39b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Wed, 25 Mar 2026 17:44:13 +0100 Subject: [PATCH 06/36] Simplify events filtering logic, small tweaks --- .../controller/postgrescluster_controller.go | 124 +++++++----------- pkg/postgresql/cluster/core/cluster.go | 29 ++-- pkg/postgresql/database/core/database.go | 4 +- 3 files changed, 63 insertions(+), 94 deletions(-) diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index dfa1f7eaf..163a07f9d 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -30,7 +30,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/predicate" ) @@ -61,6 +60,7 @@ func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Requ // SetupWithManager registers the controller and owned resource watches. func (r *PostgresClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). + WithEventFilter(predicate.Funcs{GenericFunc: func(event.GenericEvent) bool { return false }}). For(&enterprisev4.PostgresCluster{}, builder.WithPredicates(postgresClusterPredicator())). Owns(&cnpgv1.Cluster{}, builder.WithPredicates(cnpgClusterPredicator())). Owns(&cnpgv1.Pooler{}, builder.WithPredicates(cnpgPoolerPredicator())). @@ -73,107 +73,71 @@ func (r *PostgresClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { Complete(r) } -func deletionTimestampChanged(oldObj, newObj metav1.Object) bool { - return !equality.Semantic.DeepEqual(oldObj.GetDeletionTimestamp(), newObj.GetDeletionTimestamp()) -} - func ownerReferencesChanged(oldObj, newObj metav1.Object) bool { return !equality.Semantic.DeepEqual(oldObj.GetOwnerReferences(), newObj.GetOwnerReferences()) } -// postgresClusterPredicator triggers on generation changes, deletion, and finalizer transitions. +// postgresClusterPredicator triggers on spec changes, deletion, and finalizer transitions. func postgresClusterPredicator() predicate.Predicate { - return predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return true }, - DeleteFunc: func(event.DeleteEvent) bool { return true }, - UpdateFunc: func(e event.UpdateEvent) bool { - oldObj, oldOK := e.ObjectOld.(*enterprisev4.PostgresCluster) - newObj, newOK := e.ObjectNew.(*enterprisev4.PostgresCluster) - if !oldOK || !newOK { - return true - } - if oldObj.Generation != newObj.Generation { - return true - } - if deletionTimestampChanged(oldObj, newObj) { - return true - } - // Finalizer changes indicate registration or deletion  always reconcile. - return controllerutil.ContainsFinalizer(oldObj, clustercore.PostgresClusterFinalizerName) != - controllerutil.ContainsFinalizer(newObj, clustercore.PostgresClusterFinalizerName) + return predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + // DeletionTimestamp set means the object entered the deletion phase. + if !equality.Semantic.DeepEqual(e.ObjectOld.GetDeletionTimestamp(), e.ObjectNew.GetDeletionTimestamp()) { + return true + } + // Finalizer list change signals a cleanup lifecycle transition. + return !equality.Semantic.DeepEqual(e.ObjectOld.GetFinalizers(), e.ObjectNew.GetFinalizers()) + }, }, - GenericFunc: func(event.GenericEvent) bool { return false }, - } + ) } -// cnpgClusterPredicator triggers only on phase changes or owner reference changes. +// cnpgClusterPredicator triggers on spec changes, phase changes, or owner reference changes. +// Generation catches spec drift before CNPG reflects it in status. func cnpgClusterPredicator() predicate.Predicate { - return predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return true }, - DeleteFunc: func(event.DeleteEvent) bool { return true }, - UpdateFunc: func(e event.UpdateEvent) bool { - oldObj, oldOK := e.ObjectOld.(*cnpgv1.Cluster) - newObj, newOK := e.ObjectNew.(*cnpgv1.Cluster) - if !oldOK || !newOK { - return true - } - return oldObj.Status.Phase != newObj.Status.Phase || - ownerReferencesChanged(oldObj, newObj) + return predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + oldObj := e.ObjectOld.(*cnpgv1.Cluster) + newObj := e.ObjectNew.(*cnpgv1.Cluster) + return oldObj.Status.Phase != newObj.Status.Phase || + ownerReferencesChanged(oldObj, newObj) + }, }, - GenericFunc: func(event.GenericEvent) bool { return false }, - } + ) } -// cnpgPoolerPredicator triggers only on instance count changes. +// cnpgPoolerPredicator triggers on spec changes or instance count changes. +// Generation catches spec drift before CNPG reflects it in instance status. func cnpgPoolerPredicator() predicate.Predicate { - return predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return true }, - DeleteFunc: func(event.DeleteEvent) bool { return true }, - UpdateFunc: func(e event.UpdateEvent) bool { - oldObj, oldOK := e.ObjectOld.(*cnpgv1.Pooler) - newObj, newOK := e.ObjectNew.(*cnpgv1.Pooler) - if !oldOK || !newOK { - return true - } - return oldObj.Status.Instances != newObj.Status.Instances + return predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + oldObj := e.ObjectOld.(*cnpgv1.Pooler) + newObj := e.ObjectNew.(*cnpgv1.Pooler) + return oldObj.Status.Instances != newObj.Status.Instances + }, }, - GenericFunc: func(event.GenericEvent) bool { return false }, - } + ) } -// secretPredicator triggers only on owner reference changes. +// secretPredicator triggers only when ownership changes. +// In retain-state mode we release ownership (remove ownerRef) without deleting the Secret, +// so this transition must trigger reconciliation to update our tracking state. func secretPredicator() predicate.Predicate { return predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return true }, - DeleteFunc: func(event.DeleteEvent) bool { return true }, UpdateFunc: func(e event.UpdateEvent) bool { - oldObj, oldOK := e.ObjectOld.(*corev1.Secret) - newObj, newOK := e.ObjectNew.(*corev1.Secret) - if !oldOK || !newOK { - return true - } - return ownerReferencesChanged(oldObj, newObj) + return ownerReferencesChanged(e.ObjectOld, e.ObjectNew) }, - GenericFunc: func(event.GenericEvent) bool { return false }, } } -// configMapPredicator triggers on data, label, annotation, or owner reference changes. +// configMapPredicator triggers on any content change. +// ConfigMap has no status subresource, so every resourceVersion bump is a real change. func configMapPredicator() predicate.Predicate { - return predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return true }, - DeleteFunc: func(event.DeleteEvent) bool { return true }, - UpdateFunc: func(e event.UpdateEvent) bool { - oldObj, oldOK := e.ObjectOld.(*corev1.ConfigMap) - newObj, newOK := e.ObjectNew.(*corev1.ConfigMap) - if !oldOK || !newOK { - return true - } - return !equality.Semantic.DeepEqual(oldObj.Data, newObj.Data) || - !equality.Semantic.DeepEqual(oldObj.Labels, newObj.Labels) || - !equality.Semantic.DeepEqual(oldObj.Annotations, newObj.Annotations) || - ownerReferencesChanged(oldObj, newObj) - }, - GenericFunc: func(event.GenericEvent) bool { return false }, - } + return predicate.ResourceVersionChangedPredicate{} } diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 3459101a4..0b6bb5670 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -18,6 +18,7 @@ package core import ( "context" + "errors" "fmt" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" @@ -72,11 +73,13 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim return ctrl.Result{}, nil } logger.Error(err, "Failed to handle finalizer") + errs := []error{err} if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterDeleteFailed, fmt.Sprintf("Failed to delete resources during cleanup: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") + errs = append(errs, statusErr) } - return ctrl.Result{}, err + return ctrl.Result{}, errors.Join(errs...) } if postgresCluster.GetDeletionTimestamp() != nil { logger.Info("PostgresCluster is being deleted, cleanup complete") @@ -88,8 +91,8 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim controllerutil.AddFinalizer(postgresCluster, PostgresClusterFinalizerName) if err := c.Update(ctx, postgresCluster); err != nil { if apierrors.IsConflict(err) { - logger.Info("Conflict while adding finalizer, will retry on next reconcile") - return ctrl.Result{Requeue: true}, nil + logger.Error(err, "Conflict while adding finalizer. Retrying...") + return ctrl.Result{}, fmt.Errorf("conflict while adding finalizer: %w", err) } logger.Error(err, "Failed to add finalizer to PostgresCluster") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) @@ -98,7 +101,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim return ctrl.Result{}, nil } - // 2. Load the referenced PostgresClusterClass. + // Load the referenced PostgresClusterClass. clusterClass := &enterprisev4.PostgresClusterClass{} if err := c.Get(ctx, client.ObjectKey{Name: postgresCluster.Spec.Class}, clusterClass); err != nil { logger.Error(err, "Unable to fetch referenced PostgresClusterClass", "className", postgresCluster.Spec.Class) @@ -109,7 +112,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim return ctrl.Result{}, err } - // 3. Merge PostgresClusterSpec on top of PostgresClusterClass defaults. + // Merge PostgresClusterSpec on top of PostgresClusterClass defaults. mergedConfig, err := getMergedConfig(clusterClass, postgresCluster) if err != nil { logger.Error(err, "Failed to merge PostgresCluster configuration") @@ -120,7 +123,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim return ctrl.Result{}, err } - // 4. Resolve or derive the superuser secret name. + // Resolve or derive the superuser secret name. if postgresCluster.Status.Resources != nil && postgresCluster.Status.Resources.SuperUserSecretRef != nil { postgresSecretName = postgresCluster.Status.Resources.SuperUserSecretRef.Name logger.Info("Using existing secret from status", "name", postgresSecretName) @@ -189,10 +192,10 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } } - // 5. Build desired CNPG Cluster spec. + // Build desired CNPG Cluster spec. desiredSpec := buildCNPGClusterSpec(mergedConfig, postgresSecretName) - // 6. Fetch existing CNPG Cluster or create it. + // Fetch existing CNPG Cluster or create it. existingCNPG := &cnpgv1.Cluster{} err = c.Get(ctx, types.NamespacedName{Name: postgresCluster.Name, Namespace: postgresCluster.Namespace}, existingCNPG) switch { @@ -222,7 +225,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim return ctrl.Result{}, err } - // 7. Patch CNPG Cluster spec if drift detected. + // Patch CNPG Cluster spec if drift detected. cnpgCluster = existingCNPG currentNormalized := normalizeCNPGClusterSpec(cnpgCluster.Spec, mergedConfig.Spec.PostgreSQLConfig) desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, mergedConfig.Spec.PostgreSQLConfig) @@ -249,7 +252,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } } - // 7a. Reconcile ManagedRoles. + // Reconcile ManagedRoles. if err := reconcileManagedRoles(ctx, c, postgresCluster, cnpgCluster); err != nil { logger.Error(err, "Failed to reconcile managed roles") if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonManagedRolesFailed, @@ -259,7 +262,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim return ctrl.Result{}, err } - // 7b. Reconcile Connection Pooler. + // Reconcile Connection Pooler. poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled switch { case !poolerEnabled: @@ -342,7 +345,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } } - // 8. Reconcile ConfigMap when CNPG cluster is healthy. + // Reconcile ConfigMap when CNPG cluster is healthy. if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { logger.Info("CNPG Cluster is ready, reconciling ConfigMap for connection details") desiredCM, err := generateConfigMap(ctx, c, scheme, postgresCluster, cnpgCluster, postgresSecretName) @@ -387,7 +390,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } } - // 9. Final status sync. + // Final status sync. if err := syncStatus(ctx, c, postgresCluster, cnpgCluster); err != nil { logger.Error(err, "Failed to sync status") if apierrors.IsConflict(err) { diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index 50c99beed..dadd1fbbd 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -105,10 +105,12 @@ func PostgresDatabaseService( "If you deleted a previous PostgresDatabase, recreate it with the original name to re-adopt the orphaned resources.", strings.Join(roleConflicts, ", ")) logger.Error(nil, conflictMsg) + errs := []error{fmt.Errorf("role conflict detected: %s", strings.Join(roleConflicts, ", "))} if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRoleConflict, conflictMsg, failedDBPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") + errs = append(errs, fmt.Errorf("failed to update status: %w", statusErr)) } - return ctrl.Result{}, nil + return ctrl.Result{}, stderrors.Join(errs...) } // We need the CNPG Cluster directly because PostgresCluster status does not yet From 0936891f76a30c652f9b5b4c81961df119415f05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Thu, 26 Mar 2026 17:53:26 +0100 Subject: [PATCH 07/36] Apply PR suggestions and fixes --- api/v4/postgrescluster_types.go | 8 +-- api/v4/postgresclusterclass_types.go | 8 ++- api/v4/postgresdatabase_types.go | 6 +- api/v4/zz_generated.deepcopy.go | 5 -- ...ise.splunk.com_postgresclusterclasses.yaml | 11 ++-- ...nterprise.splunk.com_postgresclusters.yaml | 39 ++--------- ...terprise.splunk.com_postgresdatabases.yaml | 10 ++- .../patch_preserve_unknown_fields.yaml | 26 +++++++- pkg/postgresql/cluster/core/cluster.go | 66 ++++++++++++++++--- pkg/postgresql/database/core/database.go | 8 ++- 10 files changed, 119 insertions(+), 68 deletions(-) diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 6ddb14c9d..3e3dd0da7 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -44,7 +44,7 @@ type ManagedRole struct { // Validation rules ensure immutability of Class, and that Storage and PostgresVersion can only be set once and cannot be removed or downgraded. // +kubebuilder:validation:XValidation:rule="!has(oldSelf.postgresVersion) || (has(self.postgresVersion) && int(self.postgresVersion.split('.')[0]) >= int(oldSelf.postgresVersion.split('.')[0]))",messageExpression="!has(self.postgresVersion) ? 'postgresVersion cannot be removed once set (was: ' + oldSelf.postgresVersion + ')' : 'postgresVersion major version cannot be downgraded (from: ' + oldSelf.postgresVersion + ', to: ' + self.postgresVersion + ')'" // +kubebuilder:validation:XValidation:rule="!has(oldSelf.storage) || (has(self.storage) && quantity(self.storage).compareTo(quantity(oldSelf.storage)) >= 0)",messageExpression="!has(self.storage) ? 'storage cannot be removed once set (was: ' + string(oldSelf.storage) + ')' : 'storage size cannot be decreased (from: ' + string(oldSelf.storage) + ', to: ' + string(self.storage) + ')'" -// +kubebuilder:validation:XValidation:rule="!has(self.connectionPoolerConfig)",message="connectionPoolerConfig cannot be overridden on PostgresCluster" + type PostgresClusterSpec struct { // This field is IMMUTABLE after creation. // +kubebuilder:validation:Required @@ -92,14 +92,9 @@ type PostgresClusterSpec struct { // ConnectionPoolerEnabled controls whether PgBouncer connection pooling is deployed for this cluster. // When set, takes precedence over the class-level connectionPoolerEnabled value. - // +kubebuilder:default=false // +optional ConnectionPoolerEnabled *bool `json:"connectionPoolerEnabled,omitempty"` - // Only takes effect when connection pooling is enabled. - // +optional - ConnectionPoolerConfig *ConnectionPoolerConfig `json:"connectionPoolerConfig,omitempty"` - // ManagedRoles contains PostgreSQL roles that should be created in the cluster. // This field supports Server-Side Apply with per-role granularity, allowing // multiple PostgresDatabase controllers to manage different roles independently. @@ -185,6 +180,7 @@ type ConnectionPoolerStatus struct { // +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` // PostgresCluster is the Schema for the postgresclusters API. +// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) <= 50",message="name must be 50 characters or fewer to accommodate derived resource names" type PostgresCluster struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` diff --git a/api/v4/postgresclusterclass_types.go b/api/v4/postgresclusterclass_types.go index 9945ec669..7f02e5633 100644 --- a/api/v4/postgresclusterclass_types.go +++ b/api/v4/postgresclusterclass_types.go @@ -24,8 +24,10 @@ import ( // +kubebuilder:validation:XValidation:rule="!has(self.cnpg) || self.provisioner == 'postgresql.cnpg.io'",message="cnpg config can only be set when provisioner is postgresql.cnpg.io" // +kubebuilder:validation:XValidation:rule="!has(self.config) || !has(self.config.connectionPoolerEnabled) || !self.config.connectionPoolerEnabled || (has(self.cnpg) && has(self.cnpg.connectionPooler))",message="cnpg.connectionPooler must be set when config.connectionPoolerEnabled is true" +// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="PostgresClusterClass is immutable after creation" // PostgresClusterClassSpec defines the desired state of PostgresClusterClass. // PostgresClusterClass is immutable after creation - it serves as a template for Cluster CRs. + type PostgresClusterClassSpec struct { // Provisioner identifies which database provisioner to use. // Currently supported: "postgresql.cnpg.io" (CloudNativePG) @@ -174,9 +176,9 @@ type PostgresClusterClassStatus struct { // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster // +kubebuilder:printcolumn:name="Provisioner",type=string,JSONPath=`.spec.provisioner` -// +kubebuilder:printcolumn:name="Instances",type=integer,JSONPath=`.spec.postgresClusterConfig.instances` -// +kubebuilder:printcolumn:name="Storage",type=string,JSONPath=`.spec.postgresClusterConfig.storage` -// +kubebuilder:printcolumn:name="Version",type=string,JSONPath=`.spec.postgresClusterConfig.postgresVersion` +// +kubebuilder:printcolumn:name="Instances",type=integer,JSONPath=`.spec.config.instances` +// +kubebuilder:printcolumn:name="Storage",type=string,JSONPath=`.spec.config.storage` +// +kubebuilder:printcolumn:name="Version",type=string,JSONPath=`.spec.config.postgresVersion` // +kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` // +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` diff --git a/api/v4/postgresdatabase_types.go b/api/v4/postgresdatabase_types.go index edab619b0..f89b7229a 100644 --- a/api/v4/postgresdatabase_types.go +++ b/api/v4/postgresdatabase_types.go @@ -23,6 +23,7 @@ import ( // PostgresDatabaseSpec defines the desired state of PostgresDatabase. // +kubebuilder:validation:XValidation:rule="self.clusterRef == oldSelf.clusterRef",message="clusterRef is immutable" +// +kubebuilder:validation:XValidation:rule="self.clusterRef.name != ''",message="clusterRef.name must not be empty" type PostgresDatabaseSpec struct { // Reference to Postgres Cluster managed by postgresCluster controller // +kubebuilder:validation:Required @@ -37,7 +38,9 @@ type PostgresDatabaseSpec struct { type DatabaseDefinition struct { // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 // +kubebuilder:validation:MaxLength=30 + // +kubebuilder:validation:Pattern=`^[a-z_][a-z0-9_]*$` Name string `json:"name"` Extensions []string `json:"extensions,omitempty"` // +kubebuilder:validation:Enum=Delete;Retain @@ -51,7 +54,7 @@ type DatabaseInfo struct { DatabaseRef *corev1.LocalObjectReference `json:"databaseRef,omitempty"` AdminUserSecretRef *corev1.SecretKeySelector `json:"adminUserSecretRef,omitempty"` RWUserSecretRef *corev1.SecretKeySelector `json:"rwUserSecretRef,omitempty"` - ConfigMapRef *corev1.LocalObjectReference `json:"configMap,omitempty"` + ConfigMapRef *corev1.LocalObjectReference `json:"configMapRef,omitempty"` } // PostgresDatabaseStatus defines the observed state of PostgresDatabase. @@ -74,6 +77,7 @@ type PostgresDatabaseStatus struct { // +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` // PostgresDatabase is the Schema for the postgresdatabases API. +// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) <= 50",message="name must be 50 characters or fewer to accommodate derived resource names" type PostgresDatabase struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` diff --git a/api/v4/zz_generated.deepcopy.go b/api/v4/zz_generated.deepcopy.go index d9535fb93..c698411c7 100644 --- a/api/v4/zz_generated.deepcopy.go +++ b/api/v4/zz_generated.deepcopy.go @@ -1413,11 +1413,6 @@ func (in *PostgresClusterSpec) DeepCopyInto(out *PostgresClusterSpec) { *out = new(bool) **out = **in } - if in.ConnectionPoolerConfig != nil { - in, out := &in.ConnectionPoolerConfig, &out.ConnectionPoolerConfig - *out = new(ConnectionPoolerConfig) - (*in).DeepCopyInto(*out) - } if in.ManagedRoles != nil { in, out := &in.ManagedRoles, &out.ManagedRoles *out = make([]ManagedRole, len(*in)) diff --git a/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml b/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml index 70ef3536b..cd3d7ab7a 100644 --- a/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml +++ b/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml @@ -18,13 +18,13 @@ spec: - jsonPath: .spec.provisioner name: Provisioner type: string - - jsonPath: .spec.postgresClusterConfig.instances + - jsonPath: .spec.config.instances name: Instances type: integer - - jsonPath: .spec.postgresClusterConfig.storage + - jsonPath: .spec.config.storage name: Storage type: string - - jsonPath: .spec.postgresClusterConfig.postgresVersion + - jsonPath: .spec.config.postgresVersion name: Version type: string - jsonPath: .status.phase @@ -58,9 +58,6 @@ spec: metadata: type: object spec: - description: |- - PostgresClusterClassSpec defines the desired state of PostgresClusterClass. - PostgresClusterClass is immutable after creation - it serves as a template for Cluster CRs. properties: cnpg: description: |- @@ -251,6 +248,8 @@ spec: is true rule: '!has(self.config) || !has(self.config.connectionPoolerEnabled) || !self.config.connectionPoolerEnabled || (has(self.cnpg) && has(self.cnpg.connectionPooler))' + - message: PostgresClusterClass is immutable after creation + rule: self == oldSelf status: description: PostgresClusterClassStatus defines the observed state of PostgresClusterClass. diff --git a/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml b/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml index 14ba142d6..09c7de13c 100644 --- a/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml +++ b/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml @@ -47,9 +47,6 @@ spec: metadata: type: object spec: - description: |- - PostgresClusterSpec defines the desired state of PostgresCluster. - Validation rules ensure immutability of Class, and that Storage and PostgresVersion can only be set once and cannot be removed or downgraded. properties: class: description: This field is IMMUTABLE after creation. @@ -66,37 +63,7 @@ spec: - Delete - Retain type: string - connectionPoolerConfig: - description: Only takes effect when connection pooling is enabled. - properties: - config: - additionalProperties: - type: string - description: |- - Config contains PgBouncer configuration parameters. - Passed directly to CNPG Pooler spec.pgbouncer.parameters. - See: https://cloudnative-pg.io/docs/1.28/connection_pooling/#pgbouncer-configuration-options - type: object - instances: - default: 3 - description: |- - Instances is the number of PgBouncer pod replicas. - Higher values provide better availability and load distribution. - format: int32 - maximum: 10 - minimum: 1 - type: integer - mode: - default: transaction - description: Mode defines the connection pooling strategy. - enum: - - session - - transaction - - statement - type: string - type: object connectionPoolerEnabled: - default: false description: |- ConnectionPoolerEnabled controls whether PgBouncer connection pooling is deployed for this cluster. When set, takes precedence over the class-level connectionPoolerEnabled value. @@ -270,8 +237,6 @@ spec: '' + string(self.storage) + '')''' rule: '!has(oldSelf.storage) || (has(self.storage) && quantity(self.storage).compareTo(quantity(oldSelf.storage)) >= 0)' - - message: connectionPoolerConfig cannot be overridden on PostgresCluster - rule: '!has(self.connectionPoolerConfig)' status: description: PostgresClusterStatus defines the observed state of PostgresCluster. properties: @@ -463,6 +428,10 @@ spec: type: object type: object type: object + x-kubernetes-validations: + - message: name must be 50 characters or fewer to accommodate derived resource + names + rule: size(self.metadata.name) <= 50 served: true storage: true subresources: diff --git a/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml b/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml index d8df534d3..dbe0fef64 100644 --- a/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml +++ b/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml @@ -79,6 +79,8 @@ spec: type: array name: maxLength: 30 + minLength: 1 + pattern: ^[a-z_][a-z0-9_]*$ type: string required: - name @@ -96,6 +98,8 @@ spec: x-kubernetes-validations: - message: clusterRef is immutable rule: self.clusterRef == oldSelf.clusterRef + - message: clusterRef.name must not be empty + rule: self.clusterRef.name != '' status: description: PostgresDatabaseStatus defines the observed state of PostgresDatabase. properties: @@ -182,7 +186,7 @@ spec: - key type: object x-kubernetes-map-type: atomic - configMap: + configMapRef: description: |- LocalObjectReference contains enough information to let you locate the referenced object inside the same namespace. @@ -253,6 +257,10 @@ spec: type: string type: object type: object + x-kubernetes-validations: + - message: name must be 50 characters or fewer to accommodate derived resource + names + rule: size(self.metadata.name) <= 50 served: true storage: true subresources: diff --git a/config/crd/patches/patch_preserve_unknown_fields.yaml b/config/crd/patches/patch_preserve_unknown_fields.yaml index 1eacab2e3..b614d4518 100644 --- a/config/crd/patches/patch_preserve_unknown_fields.yaml +++ b/config/crd/patches/patch_preserve_unknown_fields.yaml @@ -47,10 +47,34 @@ metadata: spec: preserveUnknownFields: false ---- +--- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: searchheadclusters.enterprise.splunk.com +spec: + preserveUnknownFields: false + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: postgresclusters.enterprise.splunk.com +spec: + preserveUnknownFields: false + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: postgresclusterclasses.enterprise.splunk.com +spec: + preserveUnknownFields: false + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: postgresdatabases.enterprise.splunk.com spec: preserveUnknownFields: false \ No newline at end of file diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 0b6bb5670..d073b79cf 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -91,8 +91,8 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim controllerutil.AddFinalizer(postgresCluster, PostgresClusterFinalizerName) if err := c.Update(ctx, postgresCluster); err != nil { if apierrors.IsConflict(err) { - logger.Error(err, "Conflict while adding finalizer. Retrying...") - return ctrl.Result{}, fmt.Errorf("conflict while adding finalizer: %w", err) + logger.Info("Conflict while adding finalizer, will requeue") + return ctrl.Result{Requeue: true}, nil } logger.Error(err, "Failed to add finalizer to PostgresCluster") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) @@ -264,6 +264,30 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim // Reconcile Connection Pooler. poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + + rwPoolerExists, err := poolerExists(ctx, c, postgresCluster, readWriteEndpoint) + if err != nil { + logger.Error(err, "Failed to check RW pooler existence") + errs := []error{err} + if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + errs = append(errs, statusErr) + } + return ctrl.Result{}, errors.Join(errs...) + } + roPoolerExists, err := poolerExists(ctx, c, postgresCluster, readOnlyEndpoint) + if err != nil { + logger.Error(err, "Failed to check RO pooler existence") + errs := []error{err} + if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + errs = append(errs, statusErr) + } + return ctrl.Result{}, errors.Join(errs...) + } + switch { case !poolerEnabled: if err := deleteConnectionPoolers(ctx, c, postgresCluster); err != nil { @@ -277,7 +301,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim postgresCluster.Status.ConnectionPoolerStatus = nil meta.RemoveStatusCondition(&postgresCluster.Status.Conditions, string(poolerReady)) - case !poolerExists(ctx, c, postgresCluster, readWriteEndpoint) || !poolerExists(ctx, c, postgresCluster, readOnlyEndpoint): + case !rwPoolerExists || !roPoolerExists: if mergedConfig.CNPG == nil || mergedConfig.CNPG.ConnectionPooler == nil { logger.Info("Connection pooler enabled but no config found in class or cluster spec, skipping", "class", postgresCluster.Spec.Class, "cluster", postgresCluster.Name) @@ -444,6 +468,9 @@ func getMergedConfig(class *enterprisev4.PostgresClusterClass, cluster *enterpri if len(result.PgHBA) == 0 { result.PgHBA = defaults.PgHBA } + if result.ConnectionPoolerEnabled == nil { + result.ConnectionPoolerEnabled = defaults.ConnectionPoolerEnabled + } } if result.Instances == nil || result.PostgresVersion == nil || result.Storage == nil { @@ -578,20 +605,20 @@ func poolerResourceName(clusterName, poolerType string) string { return fmt.Sprintf("%s%s%s", clusterName, defaultPoolerSuffix, poolerType) } -func poolerExists(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, poolerType string) bool { +func poolerExists(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, poolerType string) (bool, error) { pooler := &cnpgv1.Pooler{} err := c.Get(ctx, types.NamespacedName{ Name: poolerResourceName(cluster.Name, poolerType), Namespace: cluster.Namespace, }, pooler) if apierrors.IsNotFound(err) { - return false + return false, nil } if err != nil { log.FromContext(ctx).Error(err, "Failed to check pooler existence", "type", poolerType) - return false + return false, err } - return true + return true, nil } func arePoolersReady(rwPooler, roPooler *cnpgv1.Pooler) bool { @@ -666,7 +693,11 @@ func deleteConnectionPoolers(ctx context.Context, c client.Client, cluster *ente logger := log.FromContext(ctx) for _, poolerType := range []string{readWriteEndpoint, readOnlyEndpoint} { poolerName := poolerResourceName(cluster.Name, poolerType) - if !poolerExists(ctx, c, cluster, poolerType) { + exist, err := poolerExists(ctx, c, cluster, poolerType) + if err != nil { + return fmt.Errorf("Can't check the pooler exist due to transient error %w", err) + } + if !exist { continue } pooler := &cnpgv1.Pooler{} @@ -769,7 +800,11 @@ func syncStatus(ctx context.Context, c client.Client, cluster *enterprisev4.Post } // setStatus sets the phase, condition and persists the status. +// It skips the API write when the resulting status is identical to the current +// state, avoiding unnecessary etcd churn and ResourceVersion bumps on stable clusters. func setStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, condType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { + before := cluster.Status.DeepCopy() + p := string(phase) cluster.Status.Phase = &p meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{ @@ -779,6 +814,11 @@ func setStatus(ctx context.Context, c client.Client, cluster *enterprisev4.Postg Message: message, ObservedGeneration: cluster.Generation, }) + + if equality.Semantic.DeepEqual(*before, cluster.Status) { + return nil + } + if err := c.Status().Update(ctx, cluster); err != nil { return fmt.Errorf("failed to update PostgresCluster status: %w", err) } @@ -800,7 +840,15 @@ func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Sch "SUPER_USER_NAME": superUsername, "SUPER_USER_SECRET_REF": secretName, } - if poolerExists(ctx, c, cluster, readWriteEndpoint) && poolerExists(ctx, c, cluster, readOnlyEndpoint) { + rwExists, err := poolerExists(ctx, c, cluster, readWriteEndpoint) + if err != nil { + return nil, fmt.Errorf("failed to check RW pooler existence: %w", err) + } + roExists, err := poolerExists(ctx, c, cluster, readOnlyEndpoint) + if err != nil { + return nil, fmt.Errorf("failed to check RO pooler existence: %w", err) + } + if rwExists && roExists { data["CLUSTER_POOLER_RW_ENDPOINT"] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readWriteEndpoint), cnpgCluster.Namespace) data["CLUSTER_POOLER_RO_ENDPOINT"] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readOnlyEndpoint), cnpgCluster.Namespace) } diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index dadd1fbbd..483076774 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -52,12 +52,18 @@ func PostgresDatabaseService( } return ctrl.Result{}, nil } + // Add finalizer if not present. if !controllerutil.ContainsFinalizer(postgresDB, postgresDatabaseFinalizerName) { controllerutil.AddFinalizer(postgresDB, postgresDatabaseFinalizerName) if err := c.Update(ctx, postgresDB); err != nil { + if errors.IsConflict(err) { + logger.Info("Conflict while adding finalizer, will requeue") + return ctrl.Result{Requeue: true}, nil + } logger.Error(err, "Failed to add finalizer to PostgresDatabase") - return ctrl.Result{}, err + return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) } + logger.Info("Finalizer added successfully") return ctrl.Result{}, nil } From 1920b63aae100c5d66aa7f73cea0aa45946e758d Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Thu, 26 Mar 2026 18:36:46 +0100 Subject: [PATCH 08/36] add integration tests for database reconcilier --- .../postgresdatabase_controller_test.go | 564 ++++++++++++++++-- internal/controller/suite_test.go | 21 +- .../cluster/core/cluster_unit_test.go | 5 +- pkg/postgresql/database/core/database.go | 24 +- .../database/core/database_unit_test.go | 5 +- 5 files changed, 561 insertions(+), 58 deletions(-) diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index 4e0589cad..614efc49b 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -18,67 +18,539 @@ package controller import ( "context" + "fmt" + "slices" + "time" + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "k8s.io/apimachinery/pkg/api/errors" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" +) - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +const postgresDatabaseFinalizer = "postgresdatabases.enterprise.splunk.com/finalizer" - enterprisev4 "github.com/splunk/splunk-operator/api/v4" -) +func reconcilePostgresDatabase(ctx context.Context, nn types.NamespacedName) (ctrl.Result, error) { + reconciler := &PostgresDatabaseReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + return reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: nn}) +} -var _ = Describe("Database Controller", func() { - Context("When reconciling a resource", func() { - const resourceName = "test-resource" - - ctx := context.Background() - - typeNamespacedName := types.NamespacedName{ - Name: resourceName, - Namespace: "default", // TODO(user):Modify as needed - } - database := &enterprisev4.PostgresDatabase{} - - BeforeEach(func() { - By("creating the custom resource for the Kind Database") - err := k8sClient.Get(ctx, typeNamespacedName, database) - if err != nil && errors.IsNotFound(err) { - resource := &enterprisev4.PostgresDatabase{ - ObjectMeta: metav1.ObjectMeta{ - Name: resourceName, - Namespace: "default", - }, - // TODO(user): Specify other spec details if needed. - } - Expect(k8sClient.Create(ctx, resource)).To(Succeed()) - } +func managedRoleNames(roles []enterprisev4.ManagedRole) []string { + names := make([]string, 0, len(roles)) + for _, role := range roles { + names = append(names, role.Name) + } + return names +} + +func adminRoleNameForTest(dbName string) string { + return dbName + "_admin" +} + +func rwRoleNameForTest(dbName string) string { + return dbName + "_rw" +} + +func ownedByPostgresDatabase(postgresDB *enterprisev4.PostgresDatabase) []metav1.OwnerReference { + controller := true + blockOwnerDeletion := true + return []metav1.OwnerReference{{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + Name: postgresDB.Name, + UID: postgresDB.UID, + Controller: &controller, + BlockOwnerDeletion: &blockOwnerDeletion, + }} +} + +func createPostgresDatabaseResource(ctx context.Context, namespace, resourceName, clusterName string, databases []enterprisev4.DatabaseDefinition, finalizers ...string) *enterprisev4.PostgresDatabase { + postgresDB := &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: resourceName, + Namespace: namespace, + Finalizers: finalizers, + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: clusterName}, + Databases: databases, + }, + } + Expect(k8sClient.Create(ctx, postgresDB)).To(Succeed()) + return postgresDB +} + +func createPostgresClusterResource(ctx context.Context, namespace, clusterName string) *enterprisev4.PostgresCluster { + postgresCluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + Namespace: namespace, + }, + Spec: enterprisev4.PostgresClusterSpec{ + Class: "dev", + }, + } + Expect(k8sClient.Create(ctx, postgresCluster)).To(Succeed()) + return postgresCluster +} + +func markPostgresClusterReady(ctx context.Context, postgresCluster *enterprisev4.PostgresCluster, cnpgClusterName, namespace string, poolerEnabled bool) { + clusterPhase := "Ready" + postgresCluster.Status.Phase = &clusterPhase + postgresCluster.Status.ProvisionerRef = &corev1.ObjectReference{ + APIVersion: cnpgv1.SchemeGroupVersion.String(), + Kind: "Cluster", + Name: cnpgClusterName, + Namespace: namespace, + } + if poolerEnabled { + postgresCluster.Status.ConnectionPoolerStatus = &enterprisev4.ConnectionPoolerStatus{Enabled: true} + } + Expect(k8sClient.Status().Update(ctx, postgresCluster)).To(Succeed()) +} + +func createCNPGClusterResource(ctx context.Context, namespace, cnpgClusterName string) *cnpgv1.Cluster { + cnpgCluster := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: cnpgClusterName, + Namespace: namespace, + }, + Spec: cnpgv1.ClusterSpec{ + Instances: 1, + StorageConfiguration: cnpgv1.StorageConfiguration{ + Size: "1Gi", + }, + }, + } + Expect(k8sClient.Create(ctx, cnpgCluster)).To(Succeed()) + return cnpgCluster +} + +func markCNPGClusterReady(ctx context.Context, cnpgCluster *cnpgv1.Cluster, reconciledRoles []string, writeService, readService string) { + cnpgCluster.Status.ManagedRolesStatus = cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: reconciledRoles, + }, + } + cnpgCluster.Status.WriteService = writeService + cnpgCluster.Status.ReadService = readService + Expect(k8sClient.Status().Update(ctx, cnpgCluster)).To(Succeed()) +} + +type readyClusterScenario struct { + namespace string + resourceName string + clusterName string + cnpgClusterName string + dbName string + requestName types.NamespacedName +} + +func newReadyClusterScenario(namespace, resourceName, clusterName, cnpgClusterName, dbName string) readyClusterScenario { + return readyClusterScenario{ + namespace: namespace, + resourceName: resourceName, + clusterName: clusterName, + cnpgClusterName: cnpgClusterName, + dbName: dbName, + requestName: types.NamespacedName{Name: resourceName, Namespace: namespace}, + } +} + +func seedReadyClusterScenario(ctx context.Context, scenario readyClusterScenario, poolerEnabled bool) { + createPostgresDatabaseResource(ctx, scenario.namespace, scenario.resourceName, scenario.clusterName, []enterprisev4.DatabaseDefinition{{Name: scenario.dbName}}) + postgresCluster := createPostgresClusterResource(ctx, scenario.namespace, scenario.clusterName) + markPostgresClusterReady(ctx, postgresCluster, scenario.cnpgClusterName, scenario.namespace, poolerEnabled) + cnpgCluster := createCNPGClusterResource(ctx, scenario.namespace, scenario.cnpgClusterName) + markCNPGClusterReady(ctx, cnpgCluster, []string{adminRoleNameForTest(scenario.dbName), rwRoleNameForTest(scenario.dbName)}, "tenant-rw", "tenant-ro") +} + +func expectReconcileResult(result ctrl.Result, err error, requeueAfter time.Duration) { + Expect(err).NotTo(HaveOccurred()) + Expect(result.RequeueAfter).To(Equal(requeueAfter)) +} + +func expectEmptyReconcileResult(result ctrl.Result, err error) { + Expect(err).NotTo(HaveOccurred()) + Expect(result).To(Equal(ctrl.Result{})) +} + +func fetchPostgresDatabase(ctx context.Context, requestName types.NamespacedName) *enterprisev4.PostgresDatabase { + current := &enterprisev4.PostgresDatabase{} + Expect(k8sClient.Get(ctx, requestName, current)).To(Succeed()) + return current +} + +func expectFinalizerAdded(ctx context.Context, requestName types.NamespacedName) *enterprisev4.PostgresDatabase { + current := fetchPostgresDatabase(ctx, requestName) + Expect(current.Finalizers).To(ContainElement(postgresDatabaseFinalizer)) + return current +} + +func seedExistingDatabaseStatus(ctx context.Context, current *enterprisev4.PostgresDatabase, dbName string) { + current.Status.Databases = []enterprisev4.DatabaseInfo{{Name: dbName}} + Expect(k8sClient.Status().Update(ctx, current)).To(Succeed()) +} + +func expectProvisionedArtifacts(ctx context.Context, scenario readyClusterScenario, owner *enterprisev4.PostgresDatabase) { + adminSecret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, adminSecret)).To(Succeed()) + Expect(adminSecret.Data).To(HaveKey("password")) + Expect(metav1.IsControlledBy(adminSecret, owner)).To(BeTrue()) + + rwSecret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-rw", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, rwSecret)).To(Succeed()) + Expect(rwSecret.Data).To(HaveKey("password")) + Expect(metav1.IsControlledBy(rwSecret, owner)).To(BeTrue()) + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) + Expect(configMap.Data).To(HaveKeyWithValue("ro-host", "tenant-ro."+scenario.namespace+".svc.cluster.local")) + Expect(configMap.Data).To(HaveKeyWithValue("admin-user", adminRoleNameForTest(scenario.dbName))) + Expect(configMap.Data).To(HaveKeyWithValue("rw-user", rwRoleNameForTest(scenario.dbName))) + Expect(metav1.IsControlledBy(configMap, owner)).To(BeTrue()) +} + +func expectManagedRolesPatched(ctx context.Context, scenario readyClusterScenario) { + updatedCluster := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: scenario.clusterName, Namespace: scenario.namespace}, updatedCluster)).To(Succeed()) + Expect(managedRoleNames(updatedCluster.Spec.ManagedRoles)).To(ConsistOf(adminRoleNameForTest(scenario.dbName), rwRoleNameForTest(scenario.dbName))) +} + +func expectCNPGDatabaseCreated(ctx context.Context, scenario readyClusterScenario, owner *enterprisev4.PostgresDatabase) *cnpgv1.Database { + cnpgDatabase := &cnpgv1.Database{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, cnpgDatabase)).To(Succeed()) + Expect(cnpgDatabase.Spec.Name).To(Equal(scenario.dbName)) + Expect(cnpgDatabase.Spec.Owner).To(Equal(adminRoleNameForTest(scenario.dbName))) + Expect(cnpgDatabase.Spec.ClusterRef.Name).To(Equal(scenario.cnpgClusterName)) + Expect(metav1.IsControlledBy(cnpgDatabase, owner)).To(BeTrue()) + return cnpgDatabase +} + +func markCNPGDatabaseApplied(ctx context.Context, cnpgDatabase *cnpgv1.Database) { + applied := true + cnpgDatabase.Status.Applied = &applied + Expect(k8sClient.Status().Update(ctx, cnpgDatabase)).To(Succeed()) +} + +func expectPoolerConfigMap(ctx context.Context, scenario readyClusterScenario) { + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("pooler-rw-host", scenario.cnpgClusterName+"-pooler-rw."+scenario.namespace+".svc.cluster.local")) + Expect(configMap.Data).To(HaveKeyWithValue("pooler-ro-host", scenario.cnpgClusterName+"-pooler-ro."+scenario.namespace+".svc.cluster.local")) +} + +func seedMissingClusterScenario(ctx context.Context, namespace, resourceName string, finalizers ...string) types.NamespacedName { + createPostgresDatabaseResource(ctx, namespace, resourceName, "absent-cluster", []enterprisev4.DatabaseDefinition{{Name: "appdb"}}, finalizers...) + return types.NamespacedName{Name: resourceName, Namespace: namespace} +} + +func seedConflictScenario(ctx context.Context, namespace, resourceName, clusterName string) types.NamespacedName { + createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{{Name: "appdb"}}, postgresDatabaseFinalizer) + postgresCluster := createPostgresClusterResource(ctx, namespace, clusterName) + markPostgresClusterReady(ctx, postgresCluster, "unused-cnpg", namespace, false) + return types.NamespacedName{Name: resourceName, Namespace: namespace} +} + +func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, clusterName string, postgresDB *enterprisev4.PostgresDatabase, dbNames ...string) { + ownerReferences := ownedByPostgresDatabase(postgresDB) + for _, dbName := range dbNames { + Expect(k8sClient.Create(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s-admin", resourceName, dbName), + Namespace: namespace, + OwnerReferences: ownerReferences, + }, + })).To(Succeed()) + + Expect(k8sClient.Create(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s-rw", resourceName, dbName), + Namespace: namespace, + OwnerReferences: ownerReferences, + }, + })).To(Succeed()) + + Expect(k8sClient.Create(ctx, &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s-config", resourceName, dbName), + Namespace: namespace, + OwnerReferences: ownerReferences, + }, + })).To(Succeed()) + + Expect(k8sClient.Create(ctx, &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s", resourceName, dbName), + Namespace: namespace, + OwnerReferences: ownerReferences, + }, + Spec: cnpgv1.DatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: clusterName}, + Name: dbName, + Owner: adminRoleNameForTest(dbName), + }, + })).To(Succeed()) + } +} + +func expectRetainedArtifact(ctx context.Context, name, namespace, resourceName string, obj client.Object) { + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, obj)).To(Succeed()) + Expect(obj.GetAnnotations()).To(HaveKeyWithValue("enterprise.splunk.com/retained-from", resourceName)) + Expect(obj.GetOwnerReferences()).To(BeEmpty()) +} + +func expectDeletedArtifact(ctx context.Context, name, namespace string, obj client.Object) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, obj) + Expect(apierrors.IsNotFound(err)).To(BeTrue(), "expected %s to be deleted", name) +} + +func expectStatusPhase(current *enterprisev4.PostgresDatabase, expectedPhase string) { + Expect(current.Status.Phase).NotTo(BeNil()) + Expect(*current.Status.Phase).To(Equal(expectedPhase)) +} + +func expectStatusCondition(current *enterprisev4.PostgresDatabase, conditionType string, expectedStatus metav1.ConditionStatus, expectedReason string) { + condition := meta.FindStatusCondition(current.Status.Conditions, conditionType) + Expect(condition).NotTo(BeNil(), "missing status condition %s", conditionType) + Expect(condition.Status).To(Equal(expectedStatus), "unexpected status for %s", conditionType) + Expect(condition.Reason).To(Equal(expectedReason), "unexpected reason for %s", conditionType) +} + +func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, expectedDatabase enterprisev4.DatabaseInfo) { + expectStatusPhase(current, "Ready") + Expect(current.Status.ObservedGeneration).NotTo(BeNil()) + Expect(*current.Status.ObservedGeneration).To(Equal(generation)) + Expect(current.Status.Databases).To(HaveLen(1)) + Expect(current.Status.Databases[0].Name).To(Equal(expectedDatabase.Name)) + Expect(current.Status.Databases[0].Ready).To(Equal(expectedDatabase.Ready)) + Expect(current.Status.Databases[0].AdminUserSecretRef).NotTo(BeNil()) + Expect(current.Status.Databases[0].RWUserSecretRef).NotTo(BeNil()) + Expect(current.Status.Databases[0].ConfigMapRef).NotTo(BeNil()) +} + +var _ = Describe("PostgresDatabase Controller", func() { + var ( + ctx context.Context + namespace string + ) + + BeforeEach(func() { + ctx = context.Background() + namespace = fmt.Sprintf("postgresdatabase-%d", time.Now().UnixNano()) + Expect(k8sClient.Create(ctx, &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: namespace}, + })).To(Succeed()) + }) + + AfterEach(func() { + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}} + Expect(k8sClient.Delete(ctx, ns)).To(Succeed()) + }) + + When("the referenced PostgresCluster is missing", func() { + Context("on the first reconcile", func() { + It("adds the finalizer", func() { + requestName := seedMissingClusterScenario(ctx, namespace, "missing-cluster") + + result, err := reconcilePostgresDatabase(ctx, requestName) + expectEmptyReconcileResult(result, err) + + current := fetchPostgresDatabase(ctx, requestName) + Expect(current.Finalizers).To(ContainElement(postgresDatabaseFinalizer)) + }) + }) + + Context("after the finalizer is already present", func() { + It("reports ClusterNotFound and requeues", func() { + requestName := seedMissingClusterScenario(ctx, namespace, "missing-cluster-with-finalizer", postgresDatabaseFinalizer) + + result, err := reconcilePostgresDatabase(ctx, requestName) + expectReconcileResult(result, err, 30*time.Second) + + current := fetchPostgresDatabase(ctx, requestName) + expectStatusPhase(current, "Pending") + expectStatusCondition(current, "ClusterReady", metav1.ConditionFalse, "ClusterNotFound") + clusterReady := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") + Expect(clusterReady.ObservedGeneration).To(Equal(current.Generation)) + }) + }) + }) + + When("the referenced PostgresCluster is ready", func() { + Context("and live grants are not invoked", func() { + It("reconciles secrets, configmaps, roles, and CNPG databases", func() { + scenario := newReadyClusterScenario(namespace, "ready-cluster", "tenant-cluster", "tenant-cnpg", "appdb") + seedReadyClusterScenario(ctx, scenario, false) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current := expectFinalizerAdded(ctx, scenario.requestName) + seedExistingDatabaseStatus(ctx, current, scenario.dbName) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + expectProvisionedArtifacts(ctx, scenario, current) + expectManagedRolesPatched(ctx, scenario) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + cnpgDatabase := expectCNPGDatabaseCreated(ctx, scenario, current) + markCNPGDatabaseApplied(ctx, cnpgDatabase) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current = fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + expectStatusCondition(current, "ClusterReady", metav1.ConditionTrue, "ClusterAvailable") + expectStatusCondition(current, "SecretsReady", metav1.ConditionTrue, "SecretsCreated") + expectStatusCondition(current, "ConfigMapsReady", metav1.ConditionTrue, "ConfigMapsCreated") + expectStatusCondition(current, "RolesReady", metav1.ConditionTrue, "UsersAvailable") + expectStatusCondition(current, "DatabasesReady", metav1.ConditionTrue, "DatabasesAvailable") + Expect(meta.FindStatusCondition(current.Status.Conditions, "PrivilegesReady")).To(BeNil()) + }) }) - AfterEach(func() { - // TODO(user): Cleanup logic after each test, like removing the resource instance. - resource := &enterprisev4.PostgresDatabase{} - err := k8sClient.Get(ctx, typeNamespacedName, resource) - Expect(err).NotTo(HaveOccurred()) + Context("and connection pooling is enabled", func() { + It("adds pooler endpoints to the generated ConfigMap", func() { + scenario := newReadyClusterScenario(namespace, "pooler-cluster", "pooler-postgres", "pooler-cnpg", "appdb") + seedReadyClusterScenario(ctx, scenario, true) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) - By("Cleanup the specific resource instance Database") - Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + current := fetchPostgresDatabase(ctx, scenario.requestName) + seedExistingDatabaseStatus(ctx, current, scenario.dbName) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + expectPoolerConfigMap(ctx, scenario) + }) }) - It("should successfully reconcile the resource", func() { - By("Reconciling the created resource") - controllerReconciler := &PostgresDatabaseReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), + }) + + When("role ownership conflicts exist", func() { + It("marks the resource failed and stops provisioning dependent resources", func() { + resourceName := "conflict-cluster" + clusterName := "conflict-postgres" + requestName := seedConflictScenario(ctx, namespace, resourceName, clusterName) + + conflictPatch := &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": enterprisev4.GroupVersion.String(), + "kind": "PostgresCluster", + "metadata": map[string]any{ + "name": clusterName, + "namespace": namespace, + }, + "spec": map[string]any{ + "managedRoles": []map[string]any{ + {"name": "appdb_admin", "exists": true}, + {"name": "appdb_rw", "exists": true}, + }, + }, + }, } + Expect(k8sClient.Patch(ctx, conflictPatch, client.Apply, client.FieldOwner("postgresdatabase-legacy"))).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, requestName) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("role conflict detected")) + Expect(result).To(Equal(ctrl.Result{})) + + current := fetchPostgresDatabase(ctx, requestName) + expectStatusPhase(current, "Failed") + expectStatusCondition(current, "RolesReady", metav1.ConditionFalse, "RoleConflict") + + rolesReady := meta.FindStatusCondition(current.Status.Conditions, "RolesReady") + Expect(rolesReady.Message).To(ContainSubstring("appdb_admin")) + Expect(rolesReady.Message).To(ContainSubstring("postgresdatabase-legacy")) + + configMap := &corev1.ConfigMap{} + err = k8sClient.Get(ctx, types.NamespacedName{Name: "conflict-cluster-appdb-config", Namespace: namespace}, configMap) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + + cnpgDatabase := &cnpgv1.Database{} + err = k8sClient.Get(ctx, types.NamespacedName{Name: "conflict-cluster-appdb", Namespace: namespace}, cnpgDatabase) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + }) + + When("the PostgresDatabase is being deleted", func() { + Context("with retained and deleted databases", func() { + It("orphans retained resources, removes deleted resources, and patches managed roles", func() { + resourceName := "delete-cluster" + clusterName := "delete-postgres" + requestName := types.NamespacedName{Name: resourceName, Namespace: namespace} + + postgresDB := createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{ + {Name: "keepdb", DeletionPolicy: "Retain"}, + {Name: "dropdb"}, + }, postgresDatabaseFinalizer) + Expect(k8sClient.Get(ctx, requestName, postgresDB)).To(Succeed()) + + createPostgresClusterResource(ctx, namespace, clusterName) + + initialRolesPatch := &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": enterprisev4.GroupVersion.String(), + "kind": "PostgresCluster", + "metadata": map[string]any{ + "name": clusterName, + "namespace": namespace, + }, + "spec": map[string]any{ + "managedRoles": []map[string]any{ + {"name": "keepdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-keepdb-admin", "key": "password"}}, + {"name": "keepdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-keepdb-rw", "key": "password"}}, + {"name": "dropdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-dropdb-admin", "key": "password"}}, + {"name": "dropdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-dropdb-rw", "key": "password"}}, + }, + }, + }, + } + Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-delete-cluster"))).To(Succeed()) + + seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, "keepdb", "dropdb") + + Expect(k8sClient.Delete(ctx, postgresDB)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, requestName) + expectEmptyReconcileResult(result, err) + + expectRetainedArtifact(ctx, "delete-cluster-keepdb-config", namespace, resourceName, &corev1.ConfigMap{}) + expectRetainedArtifact(ctx, "delete-cluster-keepdb-admin", namespace, resourceName, &corev1.Secret{}) + expectRetainedArtifact(ctx, "delete-cluster-keepdb-rw", namespace, resourceName, &corev1.Secret{}) + expectRetainedArtifact(ctx, "delete-cluster-keepdb", namespace, resourceName, &cnpgv1.Database{}) + + expectDeletedArtifact(ctx, "delete-cluster-dropdb-config", namespace, &corev1.ConfigMap{}) + expectDeletedArtifact(ctx, "delete-cluster-dropdb-admin", namespace, &corev1.Secret{}) + expectDeletedArtifact(ctx, "delete-cluster-dropdb-rw", namespace, &corev1.Secret{}) + expectDeletedArtifact(ctx, "delete-cluster-dropdb", namespace, &cnpgv1.Database{}) + + updatedCluster := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: namespace}, updatedCluster)).To(Succeed()) + Expect(managedRoleNames(updatedCluster.Spec.ManagedRoles)).To(ConsistOf("keepdb_admin", "keepdb_rw")) - _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: typeNamespacedName, + current := &enterprisev4.PostgresDatabase{} + err = k8sClient.Get(ctx, requestName, current) + Expect(apierrors.IsNotFound(err) || !slices.Contains(current.Finalizers, postgresDatabaseFinalizer)).To(BeTrue()) }) - Expect(err).NotTo(HaveOccurred()) - // TODO(user): Add more specific assertions depending on your controller's reconciliation logic. - // Example: If you expect a certain status condition after reconciliation, verify it here. }) }) }) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 142a8720c..9356a011f 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -19,10 +19,13 @@ package controller import ( "context" "fmt" + "os/exec" "path/filepath" + "strings" "testing" "time" + cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "go.uber.org/zap/zapcore" @@ -46,6 +49,14 @@ var k8sClient client.Client var testEnv *envtest.Environment var k8sManager ctrl.Manager +func resolveCNPGModuleDir() string { + cmd := exec.Command("go", "list", "-f", "{{.Dir}}", "-m", "github.com/cloudnative-pg/cloudnative-pg") + output, err := cmd.Output() + Expect(err).NotTo(HaveOccurred()) + + return strings.TrimSpace(string(output)) +} + func TestAPIs(t *testing.T) { RegisterFailHandler(Fail) @@ -61,8 +72,12 @@ var _ = BeforeSuite(func(ctx context.Context) { By("bootstrapping test environment") + cnpgModuleDir := resolveCNPGModuleDir() testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + CRDDirectoryPaths: []string{ + filepath.Join("..", "..", "config", "crd", "bases"), + filepath.Join(cnpgModuleDir, "config", "crd", "bases"), + }, ErrorIfCRDPathMissing: true, } @@ -76,6 +91,9 @@ var _ = BeforeSuite(func(ctx context.Context) { err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) Expect(err).NotTo(HaveOccurred()) + err = cnpgv1.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = enterpriseApiV3.AddToScheme(clientgoscheme.Scheme) Expect(err).NotTo(HaveOccurred()) @@ -152,7 +170,6 @@ var _ = BeforeSuite(func(ctx context.Context) { }).SetupWithManager(k8sManager); err != nil { Expect(err).NotTo(HaveOccurred()) } - go func() { err = k8sManager.Start(ctrl.SetupSignalHandler()) fmt.Printf("error %v", err.Error()) diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index e87173afb..e2466f54b 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -717,9 +717,8 @@ func TestPoolerExists(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() - - got := poolerExists(context.Background(), c, cluster, "rw") - + got, err := poolerExists(context.Background(), c, cluster, "rw") + assert.NoError(t, err) assert.Equal(t, tt.expected, got) }) } diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index 483076774..9ad701b01 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -20,6 +20,7 @@ import ( "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -397,7 +398,11 @@ func parseRoleNames(raw []byte) []string { func patchManagedRoles(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) error { logger := log.FromContext(ctx) allRoles := buildManagedRoles(postgresDB.Name, postgresDB.Spec.Databases) - rolePatch := buildManagedRolesPatch(cluster, allRoles) + rolePatch, err := buildManagedRolesPatch(cluster, allRoles, c.Scheme()) + if err != nil { + logger.Error(err, "Failed to build managed roles patch", "postgresDatabase", postgresDB.Name) + return fmt.Errorf("building managed roles patch for PostgresDatabase %s: %w", postgresDB.Name, err) + } fieldManager := fieldManagerName(postgresDB.Name) if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManager)); err != nil { logger.Error(err, "Failed to add users to PostgresCluster", "postgresDatabase", postgresDB.Name) @@ -710,20 +715,27 @@ func buildManagedRoles(postgresDBName string, databases []enterprisev4.DatabaseD return roles } -func buildManagedRolesPatch(cluster *enterprisev4.PostgresCluster, roles []enterprisev4.ManagedRole) *unstructured.Unstructured { +func buildManagedRolesPatch(cluster *enterprisev4.PostgresCluster, roles []enterprisev4.ManagedRole, scheme *runtime.Scheme) (*unstructured.Unstructured, error) { + gvk, err := apiutil.GVKForObject(cluster, scheme) + if err != nil { + return nil, fmt.Errorf("failed to get GVK for Cluster: %w", err) + } return &unstructured.Unstructured{ Object: map[string]any{ - "apiVersion": cluster.APIVersion, - "kind": cluster.Kind, + "apiVersion": gvk.GroupVersion().String(), + "kind": gvk.Kind, "metadata": map[string]any{"name": cluster.Name, "namespace": cluster.Namespace}, "spec": map[string]any{"managedRoles": roles}, }, - } + }, nil } func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster, retained []enterprisev4.DatabaseDefinition) error { roles := buildManagedRoles(postgresDB.Name, retained) - rolePatch := buildManagedRolesPatch(cluster, roles) + rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) + if err != nil { + return fmt.Errorf("building managed roles patch: %w", err) + } if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManagerName(postgresDB.Name))); err != nil { return fmt.Errorf("patching managed roles on deletion: %w", err) } diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go index 0bde24a16..8d4da6c52 100644 --- a/pkg/postgresql/database/core/database_unit_test.go +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -1289,6 +1289,7 @@ func TestBuildManagedRoles(t *testing.T) { } func TestBuildManagedRolesPatch(t *testing.T) { + scheme := testScheme(t) cluster := &enterprisev4.PostgresCluster{ TypeMeta: metav1.TypeMeta{ APIVersion: enterprisev4.GroupVersion.String(), @@ -1300,8 +1301,10 @@ func TestBuildManagedRolesPatch(t *testing.T) { }, } roles := buildManagedRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) + c := testClient(t, scheme, cluster) - got := buildManagedRolesPatch(cluster, roles) + got, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) + require.NoError(t, err) assert.Equal(t, cluster.APIVersion, got.Object["apiVersion"]) assert.Equal(t, cluster.Kind, got.Object["kind"]) From b96a4ebdce4afdd70d626aaf0ad6f52ef5c32806 Mon Sep 17 00:00:00 2001 From: Kamil Ubych <56136249+limak9182@users.noreply.github.com> Date: Mon, 30 Mar 2026 15:49:47 +0200 Subject: [PATCH 09/36] PostgresCluster and PostgresDatabase events emitting (#1798) * events emitting * emmiting events for secrets and configmaps * predicates change for database storming * revert to use emitOnConditionTransition * filter out only create events * comments cleanup * events parity (started&finished) with requeue --- cmd/main.go | 10 ++- .../controller/postgrescluster_controller.go | 8 +- .../controller/postgresdatabase_controller.go | 20 +++-- pkg/postgresql/cluster/core/cluster.go | 75 +++++++++++++++---- pkg/postgresql/cluster/core/events.go | 62 +++++++++++++++ pkg/postgresql/cluster/core/types.go | 12 +++ pkg/postgresql/database/core/database.go | 47 +++++++++--- pkg/postgresql/database/core/events.go | 58 ++++++++++++++ pkg/postgresql/database/core/types.go | 10 +++ 9 files changed, 265 insertions(+), 37 deletions(-) create mode 100644 pkg/postgresql/cluster/core/events.go create mode 100644 pkg/postgresql/database/core/events.go diff --git a/cmd/main.go b/cmd/main.go index 9b960c2bc..332623f0d 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -283,15 +283,17 @@ func main() { os.Exit(1) } if err := (&controller.PostgresDatabaseReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("postgresdatabase-controller"), }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "PostgresDatabase") os.Exit(1) } if err := (&controller.PostgresClusterReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("postgrescluster-controller"), }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "PostgresCluster") os.Exit(1) diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 163a07f9d..70b11c9e6 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" @@ -41,7 +42,8 @@ const ( // PostgresClusterReconciler reconciles PostgresCluster resources. type PostgresClusterReconciler struct { client.Client - Scheme *runtime.Scheme + Scheme *runtime.Scheme + Recorder record.EventRecorder } // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters,verbs=get;list;watch;create;update;patch;delete @@ -52,9 +54,11 @@ type PostgresClusterReconciler struct { // +kubebuilder:rbac:groups=postgresql.cnpg.io,resources=clusters/status,verbs=get // +kubebuilder:rbac:groups=postgresql.cnpg.io,resources=poolers,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=postgresql.cnpg.io,resources=poolers/status,verbs=get +// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - return clustercore.PostgresClusterService(ctx, r.Client, r.Scheme, req) + rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder} + return clustercore.PostgresClusterService(ctx, rc, req) } // SetupWithManager registers the controller and owned resource watches. diff --git a/internal/controller/postgresdatabase_controller.go b/internal/controller/postgresdatabase_controller.go index 40faa3eb3..0c6db9628 100644 --- a/internal/controller/postgresdatabase_controller.go +++ b/internal/controller/postgresdatabase_controller.go @@ -29,6 +29,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" @@ -41,7 +42,8 @@ import ( // PostgresDatabaseReconciler reconciles a PostgresDatabase object. type PostgresDatabaseReconciler struct { client.Client - Scheme *runtime.Scheme + Scheme *runtime.Scheme + Recorder record.EventRecorder } const ( @@ -56,6 +58,7 @@ const ( //+kubebuilder:rbac:groups=postgresql.cnpg.io,resources=databases,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;delete //+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;delete +//+kubebuilder:rbac:groups=core,resources=events,verbs=create;patch func (r *PostgresDatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := log.FromContext(ctx) @@ -68,7 +71,8 @@ func (r *PostgresDatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Req } return ctrl.Result{}, err } - return dbcore.PostgresDatabaseService(ctx, r.Client, r.Scheme, postgresDB, dbadapter.NewDBRepository) + rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder} + return dbcore.PostgresDatabaseService(ctx, rc, postgresDB, dbadapter.NewDBRepository) } // SetupWithManager sets up the controller with the Manager. @@ -104,9 +108,15 @@ func (r *PostgresDatabaseReconciler) SetupWithManager(mgr ctrl.Manager) error { }, ), )). - Owns(&cnpgv1.Database{}). - Owns(&corev1.Secret{}). - Owns(&corev1.ConfigMap{}). + Owns(&cnpgv1.Database{}, builder.WithPredicates(predicate.Funcs{ + CreateFunc: func(event.CreateEvent) bool { return false }, + })). + Owns(&corev1.Secret{}, builder.WithPredicates(predicate.Funcs{ + CreateFunc: func(event.CreateEvent) bool { return false }, + })). + Owns(&corev1.ConfigMap{}, builder.WithPredicates(predicate.Funcs{ + CreateFunc: func(event.CreateEvent) bool { return false }, + })). Named("postgresdatabase"). WithOptions(controller.Options{ MaxConcurrentReconciles: DatabaseTotalWorker, diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index d073b79cf..8642362fe 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -39,7 +39,8 @@ import ( ) // PostgresClusterService is the application service entry point called by the primary adapter (reconciler). -func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtime.Scheme, req ctrl.Request) (ctrl.Result, error) { +func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.Request) (ctrl.Result, error) { + c := rc.Client logger := log.FromContext(ctx) logger.Info("Reconciling PostgresCluster", "name", req.Name, "namespace", req.Namespace) @@ -67,12 +68,13 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } // Finalizer handling must come before any other processing. - if err := handleFinalizer(ctx, c, scheme, postgresCluster, secret); err != nil { + if err := handleFinalizer(ctx, rc, postgresCluster, secret); err != nil { if apierrors.IsNotFound(err) { logger.Info("PostgresCluster already deleted, skipping finalizer update") return ctrl.Result{}, nil } logger.Error(err, "Failed to handle finalizer") + rc.emitWarning(postgresCluster, EventCleanupFailed, fmt.Sprintf("Cleanup failed: %v", err)) errs := []error{err} if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterDeleteFailed, fmt.Sprintf("Failed to delete resources during cleanup: %v", err), failedClusterPhase); statusErr != nil { @@ -105,6 +107,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim clusterClass := &enterprisev4.PostgresClusterClass{} if err := c.Get(ctx, client.ObjectKey{Name: postgresCluster.Spec.Class}, clusterClass); err != nil { logger.Error(err, "Unable to fetch referenced PostgresClusterClass", "className", postgresCluster.Spec.Class) + rc.emitWarning(postgresCluster, EventClusterClassNotFound, fmt.Sprintf("ClusterClass %s not found", postgresCluster.Spec.Class)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterClassNotFound, fmt.Sprintf("ClusterClass %s not found: %v", postgresCluster.Spec.Class, err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -116,6 +119,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim mergedConfig, err := getMergedConfig(clusterClass, postgresCluster) if err != nil { logger.Error(err, "Failed to merge PostgresCluster configuration") + rc.emitWarning(postgresCluster, EventConfigMergeFailed, fmt.Sprintf("Failed to merge configuration: %v", err)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonInvalidConfiguration, fmt.Sprintf("Failed to merge configuration: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -135,6 +139,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim secretExists, secretErr := clusterSecretExists(ctx, c, postgresCluster.Namespace, postgresSecretName, secret) if secretErr != nil { logger.Error(secretErr, "Failed to check if PostgresCluster secret exists", "name", postgresSecretName) + rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -143,8 +148,9 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } if !secretExists { logger.Info("Creating PostgresCluster secret", "name", postgresSecretName) - if err := ensureClusterSecret(ctx, c, scheme, postgresCluster, postgresSecretName, secret); err != nil { + if err := ensureClusterSecret(ctx, c, rc.Scheme, postgresCluster, postgresSecretName, secret); err != nil { logger.Error(err, "Failed to ensure PostgresCluster secret", "name", postgresSecretName) + rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, fmt.Sprintf("Failed to generate PostgresCluster secret: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -159,23 +165,26 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim logger.Error(err, "Failed to update status after secret creation") return ctrl.Result{}, err } + rc.emitNormal(postgresCluster, EventSecretReady, fmt.Sprintf("Superuser secret %s created", postgresSecretName)) logger.Info("SuperUserSecretRef persisted to status") } // Re-attach ownerRef if it was stripped (e.g. by a Retain-policy deletion of a previous cluster). - hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), postgresCluster, scheme) + hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), postgresCluster, rc.Scheme) if ownerRefErr != nil { logger.Error(ownerRefErr, "Failed to check owner reference on Secret") return ctrl.Result{}, fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) } if secretExists && !hasOwnerRef { logger.Info("Connecting existing secret to PostgresCluster by adding owner reference", "name", postgresSecretName) + rc.emitNormal(postgresCluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", postgresSecretName)) originalSecret := secret.DeepCopy() - if err := ctrl.SetControllerReference(postgresCluster, secret, scheme); err != nil { + if err := ctrl.SetControllerReference(postgresCluster, secret, rc.Scheme); err != nil { return ctrl.Result{}, fmt.Errorf("failed to set controller reference on existing secret: %w", err) } if err := patchObject(ctx, c, originalSecret, secret, "Secret"); err != nil { logger.Error(err, "Failed to patch existing secret with controller reference") + rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonSuperUserSecretFailed, fmt.Sprintf("Failed to patch existing secret: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -201,15 +210,17 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim switch { case apierrors.IsNotFound(err): logger.Info("CNPG Cluster not found, creating", "name", postgresCluster.Name) - newCluster := buildCNPGCluster(scheme, postgresCluster, mergedConfig, postgresSecretName) + newCluster := buildCNPGCluster(rc.Scheme, postgresCluster, mergedConfig, postgresSecretName) if err := c.Create(ctx, newCluster); err != nil { logger.Error(err, "Failed to create CNPG Cluster") + rc.emitWarning(postgresCluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildFailed, fmt.Sprintf("Failed to create CNPG Cluster: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err } + rc.emitNormal(postgresCluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, "CNPG Cluster created", pendingClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -241,12 +252,19 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim return ctrl.Result{Requeue: true}, nil case patchErr != nil: logger.Error(patchErr, "Failed to patch CNPG Cluster", "name", cnpgCluster.Name) + rc.emitWarning(postgresCluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterPatchFailed, fmt.Sprintf("Failed to patch CNPG Cluster: %v", patchErr), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, patchErr default: + if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, + "CNPG Cluster spec updated, waiting for healthy state", provisioningClusterPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status after patch") + return ctrl.Result{Requeue: true}, nil + } + rc.emitNormal(postgresCluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") logger.Info("CNPG Cluster patched successfully, requeueing for status update", "name", cnpgCluster.Name) return ctrl.Result{RequeueAfter: retryDelay}, nil } @@ -255,6 +273,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim // Reconcile ManagedRoles. if err := reconcileManagedRoles(ctx, c, postgresCluster, cnpgCluster); err != nil { logger.Error(err, "Failed to reconcile managed roles") + rc.emitWarning(postgresCluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", err)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -320,14 +339,16 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } return ctrl.Result{RequeueAfter: retryDelay}, nil } - if err := createOrUpdateConnectionPoolers(ctx, c, scheme, postgresCluster, mergedConfig, cnpgCluster); err != nil { + if err := createOrUpdateConnectionPoolers(ctx, c, rc.Scheme, postgresCluster, mergedConfig, cnpgCluster); err != nil { logger.Error(err, "Failed to reconcile connection pooler") + rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err } + rc.emitNormal(postgresCluster, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") logger.Info("Connection Poolers created, requeueing to check readiness") if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, "Connection poolers are being provisioned", provisioningClusterPhase); statusErr != nil { @@ -359,22 +380,27 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim return ctrl.Result{RequeueAfter: retryDelay}, nil default: + oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) + copy(oldConditions, postgresCluster.Status.Conditions) if err := syncPoolerStatus(ctx, c, postgresCluster); err != nil { logger.Error(err, "Failed to sync pooler status") + rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, fmt.Sprintf("Failed to sync pooler status: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err } + rc.emitPoolerReadyTransition(postgresCluster, oldConditions) } // Reconcile ConfigMap when CNPG cluster is healthy. if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { logger.Info("CNPG Cluster is ready, reconciling ConfigMap for connection details") - desiredCM, err := generateConfigMap(ctx, c, scheme, postgresCluster, cnpgCluster, postgresSecretName) + desiredCM, err := generateConfigMap(ctx, c, rc.Scheme, postgresCluster, cnpgCluster, postgresSecretName) if err != nil { logger.Error(err, "Failed to generate ConfigMap") + rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, fmt.Sprintf("Failed to generate ConfigMap: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -387,7 +413,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim cm.Annotations = desiredCM.Annotations cm.Labels = desiredCM.Labels if !metav1.IsControlledBy(cm, postgresCluster) { - if err := ctrl.SetControllerReference(postgresCluster, cm, scheme); err != nil { + if err := ctrl.SetControllerReference(postgresCluster, cm, rc.Scheme); err != nil { return fmt.Errorf("set controller reference failed: %w", err) } } @@ -395,6 +421,7 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim }) if err != nil { logger.Error(err, "Failed to reconcile ConfigMap", "name", desiredCM.Name) + rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err), failedClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -403,8 +430,10 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } switch createOrUpdateResult { case controllerutil.OperationResultCreated: + rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s created", desiredCM.Name)) logger.Info("ConfigMap created", "name", desiredCM.Name) case controllerutil.OperationResultUpdated: + rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s updated", desiredCM.Name)) logger.Info("ConfigMap updated", "name", desiredCM.Name) default: logger.Info("ConfigMap unchanged", "name", desiredCM.Name) @@ -415,6 +444,10 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } // Final status sync. + var oldPhase string + if postgresCluster.Status.Phase != nil { + oldPhase = *postgresCluster.Status.Phase + } if err := syncStatus(ctx, c, postgresCluster, cnpgCluster); err != nil { logger.Error(err, "Failed to sync status") if apierrors.IsConflict(err) { @@ -423,6 +456,11 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim } return ctrl.Result{}, fmt.Errorf("failed to sync status: %w", err) } + var newPhase string + if postgresCluster.Status.Phase != nil { + newPhase = *postgresCluster.Status.Phase + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, newPhase) if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { rwPooler := &cnpgv1.Pooler{} rwErr := c.Get(ctx, types.NamespacedName{ @@ -436,7 +474,10 @@ func PostgresClusterService(ctx context.Context, c client.Client, scheme *runtim }, roPooler) if rwErr == nil && roErr == nil && arePoolersReady(rwPooler, roPooler) { logger.Info("Poolers are ready, syncing pooler status") + poolerOldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) + copy(poolerOldConditions, postgresCluster.Status.Conditions) _ = syncPoolerStatus(ctx, c, postgresCluster) + rc.emitPoolerReadyTransition(postgresCluster, poolerOldConditions) } } logger.Info("Reconciliation complete") @@ -924,7 +965,9 @@ func deleteCNPGCluster(ctx context.Context, c client.Client, cnpgCluster *cnpgv1 // handleFinalizer processes deletion cleanup: removes poolers, then deletes or orphans the CNPG Cluster // based on ClusterDeletionPolicy, then removes the finalizer. -func handleFinalizer(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, secret *corev1.Secret) error { +func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterprisev4.PostgresCluster, secret *corev1.Secret) error { + c := rc.Client + scheme := rc.Scheme logger := log.FromContext(ctx) if cluster.GetDeletionTimestamp() == nil { logger.Info("PostgresCluster not marked for deletion, skipping finalizer logic") @@ -947,17 +990,16 @@ func handleFinalizer(ctx context.Context, c client.Client, scheme *runtime.Schem } logger.Info("Processing finalizer cleanup for PostgresCluster") - if err := deleteConnectionPoolers(ctx, c, cluster); err != nil { - logger.Error(err, "Failed to delete connection poolers during cleanup") - return fmt.Errorf("failed to delete connection poolers: %w", err) - } - - // Dereference *string — empty string falls through to default (unknown policy). policy := "" if cluster.Spec.ClusterDeletionPolicy != nil { policy = *cluster.Spec.ClusterDeletionPolicy } + if err := deleteConnectionPoolers(ctx, c, cluster); err != nil { + logger.Error(err, "Failed to delete connection poolers during cleanup") + return fmt.Errorf("failed to delete connection poolers: %w", err) + } + switch policy { case clusterDeletionPolicyDelete: logger.Info("ClusterDeletionPolicy is 'Delete', deleting CNPG Cluster and associated resources") @@ -1024,6 +1066,7 @@ func handleFinalizer(ctx context.Context, c client.Client, scheme *runtime.Schem logger.Error(err, "Failed to remove finalizer from PostgresCluster") return fmt.Errorf("failed to remove finalizer: %w", err) } + rc.emitNormal(cluster, EventCleanupComplete, fmt.Sprintf("Cleanup complete (policy: %s)", policy)) logger.Info("Finalizer removed, cleanup complete") return nil } diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go new file mode 100644 index 000000000..afcfd768e --- /dev/null +++ b/pkg/postgresql/cluster/core/events.go @@ -0,0 +1,62 @@ +package core + +import ( + "fmt" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + EventSecretReady = "SecretReady" + EventConfigMapReady = "ConfigMapReady" + EventClusterAdopted = "ClusterAdopted" + EventClusterCreationStarted = "ClusterCreationStarted" + EventClusterUpdateStarted = "ClusterUpdateStarted" + EventClusterReady = "ClusterReady" + EventPoolerCreationStarted = "PoolerCreationStarted" + EventPoolerReady = "PoolerReady" + EventCleanupComplete = "CleanupComplete" + EventClusterClassNotFound = "ClusterClassNotFound" + EventConfigMergeFailed = "ConfigMergeFailed" + EventSecretReconcileFailed = "SecretReconcileFailed" + EventClusterCreateFailed = "ClusterCreateFailed" + EventClusterUpdateFailed = "ClusterUpdateFailed" + EventManagedRolesFailed = "ManagedRolesFailed" + EventPoolerReconcileFailed = "PoolerReconcileFailed" + EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" + EventClusterDegraded = "ClusterDegraded" + EventCleanupFailed = "CleanupFailed" +) + +func (rc *ReconcileContext) emitNormal(obj client.Object, reason, message string) { + rc.Recorder.Event(obj, corev1.EventTypeNormal, reason, message) +} + +func (rc *ReconcileContext) emitWarning(obj client.Object, reason, message string) { + rc.Recorder.Event(obj, corev1.EventTypeWarning, reason, message) +} + +// emitClusterPhaseTransition emits ClusterReady or ClusterDegraded only on +// actual phase transitions. Provisioning and Configuring are expected phases +// after our own create/update operations, so they don't emit ClusterDegraded. +func (rc *ReconcileContext) emitClusterPhaseTransition(obj client.Object, oldPhase, newPhase string) { + switch { + case oldPhase != string(readyClusterPhase) && newPhase == string(readyClusterPhase): + rc.emitNormal(obj, EventClusterReady, "Cluster is up and running") + // only when cluster degraded from ready but not to provisioning or configuring + case oldPhase == string(readyClusterPhase) && newPhase != string(readyClusterPhase) && + newPhase != string(provisioningClusterPhase) && newPhase != string(configuringClusterPhase): + rc.emitWarning(obj, EventClusterDegraded, fmt.Sprintf("Cluster entered phase: %s", newPhase)) + } +} + +// emitPoolerReadyTransition emits PoolerReady only when the condition was not +// previously True — prevents re-emission on every reconcile while already ready. +func (rc *ReconcileContext) emitPoolerReadyTransition(obj client.Object, conditions []metav1.Condition) { + if !meta.IsStatusConditionTrue(conditions, string(poolerReady)) { + rc.emitNormal(obj, EventPoolerReady, "Connection poolers are ready") + } +} diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index 19886fd73..042a5ae82 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -5,8 +5,20 @@ import ( enterprisev4 "github.com/splunk/splunk-operator/api/v4" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" ) +// ReconcileContext bundles infrastructure dependencies injected by the controller +// shell (primary adapter). The service layer declares what it needs via this struct +// rather than reaching into context — keeping ports explicit and testable. +type ReconcileContext struct { + Client client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder +} + // normalizedCNPGClusterSpec is a subset of cnpgv1.ClusterSpec fields used for drift detection. // Only fields we set in buildCNPGClusterSpec are included — CNPG-injected defaults are excluded // to avoid false-positive drift on every reconcile. diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index 483076774..19f9b7ced 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -32,11 +32,11 @@ type NewDBRepoFunc func(ctx context.Context, host, dbName, password string) (DBR // newDBRepo is injected to keep the core free of pgx imports. func PostgresDatabaseService( ctx context.Context, - c client.Client, - scheme *runtime.Scheme, + rc *ReconcileContext, postgresDB *enterprisev4.PostgresDatabase, newDBRepo NewDBRepoFunc, ) (ctrl.Result, error) { + c := rc.Client logger := log.FromContext(ctx) logger.Info("Reconciling PostgresDatabase", "name", postgresDB.Name, "namespace", postgresDB.Namespace) @@ -46,8 +46,9 @@ func PostgresDatabaseService( // Finalizer: cleanup on deletion, register on creation. if postgresDB.GetDeletionTimestamp() != nil { - if err := handleDeletion(ctx, c, postgresDB); err != nil { + if err := handleDeletion(ctx, rc, postgresDB); err != nil { logger.Error(err, "Cleanup failed for PostgresDatabase") + rc.emitWarning(postgresDB, EventCleanupFailed, fmt.Sprintf("Cleanup failed: %v", err)) return ctrl.Result{}, err } return ctrl.Result{}, nil @@ -77,6 +78,7 @@ func PostgresDatabaseService( cluster, err := fetchCluster(ctx, c, postgresDB) if err != nil { if errors.IsNotFound(err) { + rc.emitWarning(postgresDB, EventClusterNotFound, fmt.Sprintf("PostgresCluster %s not found", postgresDB.Spec.ClusterRef.Name)) if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterNotFound, "Cluster CR not found", pendingDBPhase); err != nil { return ctrl.Result{}, err } @@ -93,12 +95,14 @@ func PostgresDatabaseService( switch clusterStatus { case ClusterNotReady, ClusterNoProvisionerRef: + rc.emitWarning(postgresDB, EventClusterNotReady, "Referenced PostgresCluster is not ready yet") if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterProvisioning, "Cluster is not in ready state yet", pendingDBPhase); err != nil { return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil case ClusterReady: + rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, clusterReady, EventClusterValidated, "Referenced PostgresCluster is ready") if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonClusterAvailable, "Cluster is operational", provisioningDBPhase); err != nil { return ctrl.Result{}, err } @@ -111,6 +115,7 @@ func PostgresDatabaseService( "If you deleted a previous PostgresDatabase, recreate it with the original name to re-adopt the orphaned resources.", strings.Join(roleConflicts, ", ")) logger.Error(nil, conflictMsg) + rc.emitWarning(postgresDB, EventRoleConflict, conflictMsg) errs := []error{fmt.Errorf("role conflict detected: %s", strings.Join(roleConflicts, ", "))} if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRoleConflict, conflictMsg, failedDBPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -132,13 +137,15 @@ func PostgresDatabaseService( // Phase: CredentialProvisioning — secrets must exist before roles are patched. // CNPG rejects a PasswordSecretRef pointing at a missing secret. - if err := reconcileUserSecrets(ctx, c, scheme, postgresDB); err != nil { + if err := reconcileUserSecrets(ctx, c, rc.Scheme, postgresDB); err != nil { + rc.emitWarning(postgresDB, EventUserSecretsFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err)) if statusErr := updateStatus(secretsReady, metav1.ConditionFalse, reasonSecretsCreationFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err), provisioningDBPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err } + rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, secretsReady, EventSecretsReady, fmt.Sprintf("All secrets provisioned for %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(secretsReady, metav1.ConditionTrue, reasonSecretsCreated, fmt.Sprintf("All secrets provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { return ctrl.Result{}, err @@ -147,13 +154,15 @@ func PostgresDatabaseService( // Phase: ConnectionMetadata — ConfigMaps carry connection info consumers need as soon // as databases are ready, so they are created alongside secrets. endpoints := resolveClusterEndpoints(cluster, cnpgCluster, postgresDB.Namespace) - if err := reconcileRoleConfigMaps(ctx, c, scheme, postgresDB, endpoints); err != nil { + if err := reconcileRoleConfigMaps(ctx, c, rc.Scheme, postgresDB, endpoints); err != nil { + rc.emitWarning(postgresDB, EventAccessConfigFailed, fmt.Sprintf("Failed to reconcile ConfigMaps: %v", err)) if statusErr := updateStatus(configMapsReady, metav1.ConditionFalse, reasonConfigMapsCreationFailed, fmt.Sprintf("Failed to reconcile ConfigMaps: %v", err), provisioningDBPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err } + rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, configMapsReady, EventConfigMapsReady, fmt.Sprintf("All ConfigMaps provisioned for %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(configMapsReady, metav1.ConditionTrue, reasonConfigMapsCreated, fmt.Sprintf("All ConfigMaps provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { return ctrl.Result{}, err @@ -173,8 +182,10 @@ func PostgresDatabaseService( logger.Info("User spec changed, patching CNPG Cluster", "missing", missing) if err := patchManagedRoles(ctx, c, postgresDB, cluster); err != nil { logger.Error(err, "Failed to patch users in CNPG Cluster") + rc.emitWarning(postgresDB, EventManagedRolesPatchFailed, fmt.Sprintf("Failed to patch managed roles: %v", err)) return ctrl.Result{}, err } + rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles, waiting for %d roles to reconcile", len(desiredUsers))) if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for %d roles to be reconciled", len(desiredUsers)), provisioningDBPhase); err != nil { return ctrl.Result{}, err @@ -184,6 +195,7 @@ func PostgresDatabaseService( notReadyRoles, err := verifyRolesReady(ctx, desiredUsers, cnpgCluster) if err != nil { + rc.emitWarning(postgresDB, EventRoleFailed, fmt.Sprintf("Role reconciliation failed: %v", err)) if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, fmt.Sprintf("Role creation failed: %v", err), failedDBPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -197,16 +209,22 @@ func PostgresDatabaseService( } return ctrl.Result{RequeueAfter: retryDelay}, nil } + rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("All %d roles reconciled", len(desiredUsers))) if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonUsersAvailable, fmt.Sprintf("All %d users in PostgreSQL", len(desiredUsers)), provisioningDBPhase); err != nil { return ctrl.Result{}, err } // Phase: DatabaseProvisioning - if err := reconcileCNPGDatabases(ctx, c, scheme, postgresDB, cluster); err != nil { + adopted, err := reconcileCNPGDatabases(ctx, c, rc.Scheme, postgresDB, cluster) + if err != nil { logger.Error(err, "Failed to reconcile CNPG Databases") + rc.emitWarning(postgresDB, EventDatabasesReconcileFailed, fmt.Sprintf("Failed to reconcile databases: %v", err)) return ctrl.Result{}, err } + if len(adopted) > 0 { + rc.emitNormal(postgresDB, EventResourcesAdopted, fmt.Sprintf("Adopted retained databases: %v", adopted)) + } notReadyDBs, err := verifyDatabasesReady(ctx, c, postgresDB) if err != nil { @@ -214,12 +232,14 @@ func PostgresDatabaseService( return ctrl.Result{}, err } if len(notReadyDBs) > 0 { + rc.emitOnceBeforeWait(postgresDB, postgresDB.Status.Conditions, databasesReady, EventDatabaseReconciliationStarted, fmt.Sprintf("Reconciling %d databases, waiting for readiness", len(postgresDB.Spec.Databases))) if err := updateStatus(databasesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for databases to be ready: %v", notReadyDBs), provisioningDBPhase); err != nil { return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil } + rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, databasesReady, EventDatabasesReady, fmt.Sprintf("All %d databases ready", len(postgresDB.Spec.Databases))) if err := updateStatus(databasesReady, metav1.ConditionTrue, reasonDatabasesAvailable, fmt.Sprintf("All %d databases ready", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { return ctrl.Result{}, err @@ -255,18 +275,21 @@ func PostgresDatabaseService( } if err := reconcileRWRolePrivileges(ctx, endpoints.RWHost, string(pw), dbNames, newDBRepo); err != nil { + rc.emitWarning(postgresDB, EventPrivilegesGrantFailed, fmt.Sprintf("Failed to grant RW role privileges: %v", err)) if statusErr := updateStatus(privilegesReady, metav1.ConditionFalse, reasonPrivilegesGrantFailed, fmt.Sprintf("Failed to grant RW role privileges: %v", err), provisioningDBPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") } return ctrl.Result{}, err } + rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, privilegesReady, EventPrivilegesReady, fmt.Sprintf("RW role privileges granted for all %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(privilegesReady, metav1.ConditionTrue, reasonPrivilegesGranted, fmt.Sprintf("RW role privileges granted for all %d databases", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { return ctrl.Result{}, err } } + rc.emitNormal(postgresDB, EventPostgresDatabaseReady, fmt.Sprintf("PostgresDatabase %s is ready", postgresDB.Name)) postgresDB.Status.Databases = populateDatabaseStatus(postgresDB) postgresDB.Status.ObservedGeneration = &postgresDB.Generation @@ -429,8 +452,9 @@ func verifyRolesReady(ctx context.Context, expectedUsers []string, cnpgCluster * return notReady, nil } -func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) error { +func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) ([]string, error) { logger := log.FromContext(ctx) + var adopted []string for _, dbSpec := range postgresDB.Spec.Databases { cnpgDBName := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) cnpgDB := &cnpgv1.Database{ @@ -442,6 +466,7 @@ func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtim if reAdopting { logger.Info("Re-adopting orphaned CNPG Database", "name", cnpgDBName) delete(cnpgDB.Annotations, annotationRetainedFrom) + adopted = append(adopted, dbSpec.Name) } if cnpgDB.CreationTimestamp.IsZero() || reAdopting { return controllerutil.SetControllerReference(postgresDB, cnpgDB, scheme) @@ -449,10 +474,10 @@ func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtim return nil }) if err != nil { - return fmt.Errorf("reconciling CNPG Database %s: %w", cnpgDBName, err) + return adopted, fmt.Errorf("reconciling CNPG Database %s: %w", cnpgDBName, err) } } - return nil + return adopted, nil } func verifyDatabasesReady(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase) ([]string, error) { @@ -499,7 +524,8 @@ func buildDeletionPlan(databases []enterprisev4.DatabaseDefinition) deletionPlan return plan } -func handleDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase) error { +func handleDeletion(ctx context.Context, rc *ReconcileContext, postgresDB *enterprisev4.PostgresDatabase) error { + c := rc.Client plan := buildDeletionPlan(postgresDB.Spec.Databases) if err := orphanRetainedResources(ctx, c, postgresDB, plan.retained); err != nil { return err @@ -517,6 +543,7 @@ func handleDeletion(ctx context.Context, c client.Client, postgresDB *enterprise } return fmt.Errorf("removing finalizer: %w", err) } + rc.emitNormal(postgresDB, EventCleanupComplete, fmt.Sprintf("Cleanup complete (%d retained, %d deleted)", len(plan.retained), len(plan.deleted))) log.FromContext(ctx).Info("Cleanup complete", "name", postgresDB.Name, "retained", len(plan.retained), "deleted", len(plan.deleted)) return nil } diff --git a/pkg/postgresql/database/core/events.go b/pkg/postgresql/database/core/events.go new file mode 100644 index 000000000..987b8bbfb --- /dev/null +++ b/pkg/postgresql/database/core/events.go @@ -0,0 +1,58 @@ +package core + +import ( + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + EventPostgresDatabaseReady = "PostgresDatabaseReady" + EventResourcesAdopted = "ResourcesAdopted" + EventClusterValidated = "ClusterValidated" + EventSecretsReady = "SecretsReady" + EventConfigMapsReady = "ConfigMapsReady" + EventRoleReconciliationStarted = "RoleReconciliationStarted" + EventRolesReady = "RolesReady" + EventDatabaseReconciliationStarted = "DatabaseReconciliationStarted" + EventDatabasesReady = "DatabasesReady" + EventPrivilegesReady = "PrivilegesReady" + EventCleanupComplete = "CleanupComplete" + EventClusterNotFound = "ClusterNotFound" + EventClusterNotReady = "ClusterNotReady" + EventRoleConflict = "RoleConflict" + EventUserSecretsFailed = "UserSecretsFailed" + EventAccessConfigFailed = "AccessConfigFailed" + EventManagedRolesPatchFailed = "ManagedRolesPatchFailed" + EventRoleFailed = "RoleFailed" + EventDatabasesReconcileFailed = "DatabasesReconcileFailed" + EventPrivilegesGrantFailed = "PrivilegesGrantFailed" + EventCleanupFailed = "CleanupFailed" +) + +func (rc *ReconcileContext) emitNormal(obj client.Object, reason, message string) { + rc.Recorder.Event(obj, corev1.EventTypeNormal, reason, message) +} + +func (rc *ReconcileContext) emitWarning(obj client.Object, reason, message string) { + rc.Recorder.Event(obj, corev1.EventTypeWarning, reason, message) +} + +// emitOnConditionTransition emits a Normal event only when the condition is not +// already True — prevents duplicate events on repeated reconciles. +func (rc *ReconcileContext) emitOnConditionTransition(obj client.Object, conditions []metav1.Condition, condType conditionTypes, reason, message string) { + if !meta.IsStatusConditionTrue(conditions, string(condType)) { + rc.emitNormal(obj, reason, message) + } +} + +// emitOnceBeforeWait emits a Normal event when the condition is either absent +// or currently True — i.e. the first time we enter a wait cycle. On subsequent +// requeue polls the condition is already False, so no duplicate is emitted. +func (rc *ReconcileContext) emitOnceBeforeWait(obj client.Object, conditions []metav1.Condition, condType conditionTypes, reason, message string) { + cond := meta.FindStatusCondition(conditions, string(condType)) + if cond == nil || cond.Status == metav1.ConditionTrue { + rc.emitNormal(obj, reason, message) + } +} diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index 0d1fa116a..bf07fd19f 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -4,8 +4,18 @@ import ( "time" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" ) +// ReconcileContext bundles infrastructure dependencies injected by the controller +type ReconcileContext struct { + Client client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder +} + type reconcileDBPhases string type conditionTypes string type conditionReasons string From b5bed4daea749397f4df0a31371d81fd8faec46e Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Mon, 30 Mar 2026 20:19:44 +0200 Subject: [PATCH 10/36] suite_test revert and simplify, cover more reconcile cases --- .../controller/postgrescluster_controller.go | 2 - .../postgrescluster_controller_test.go | 382 +++++++++++------- internal/controller/suite_test.go | 223 +++++----- 3 files changed, 368 insertions(+), 239 deletions(-) diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 6a2181afc..dfa1f7eaf 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -142,8 +142,6 @@ func cnpgPoolerPredicator() predicate.Predicate { } // secretPredicator triggers only on owner reference changes. - -// secretPredicator filters Secret events to trigger reconciles on creation, deletion, or owner reference changes. func secretPredicator() predicate.Predicate { return predicate.Funcs{ CreateFunc: func(event.CreateEvent) bool { return true }, diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 0f690754b..d405a35bf 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -19,8 +19,9 @@ package controller import ( "context" "fmt" + "strconv" + "time" - corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" @@ -38,56 +39,81 @@ import ( "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" ) +/* +* Test cases: +* PC-01 creates managed resources and status refs +* PC-02 adds finalizer on reconcile +* PC-07 is idempotent across repeated reconciles +* PC-03 Delete policy removes children and finalizer +* PC-04 Retain policy preserves children and removes ownerRefs +* PC-05 fails when PostgresClusterClass is missing +* PC-06 restores drifted managed spec +* PC-08 triggers on generation/finalizer/deletion changes +* PC-09 ignores no-op updates + */ + var _ = Describe("PostgresCluster Controller", func() { + const ( + postgresVersion = "15.10" + clusterMemberCount = int32(2) + storageAmount = "1Gi" + poolerEnabled = false + deletePolicy = "Delete" + retainPolicy = "Retain" + namespace = "default" + classNamePrefix = "postgresql-dev-" + clusterNamePrefix = "postgresql-cluster-dev-" + provisioner = "postgresql.cnpg.io" + ) + var ( - ctx context.Context - namespace string - clusterName string - className string - reconciler *PostgresClusterReconciler - req reconcile.Request - cnpg *cnpgv1.Cluster + ctx context.Context + clusterName string + className string + pgCluster *enterprisev4.PostgresCluster + pgClusterClass *enterprisev4.PostgresClusterClass + pgClusterKey types.NamespacedName + pgClusterClassKey types.NamespacedName + reconciler *PostgresClusterReconciler + req reconcile.Request ) BeforeEach(func() { - specLine := CurrentSpecReport().LeafNodeLocation.LineNumber - nameSuffix := fmt.Sprintf("%d-%d-%d", GinkgoParallelProcess(), GinkgoRandomSeed(), specLine) + nameSuffix := fmt.Sprintf("%d-%d-%d", + GinkgoParallelProcess(), + GinkgoRandomSeed(), + CurrentSpecReport().LeafNodeLocation.LineNumber, + ) ctx = context.Background() - namespace = "default" - clusterName = "postgresql-cluster-dev-" + nameSuffix - className = "postgresql-dev-" + nameSuffix - cnpg = &cnpgv1.Cluster{} - - // Arrange: class defaults used by getMergedConfig() - postgresVersion := "15.10" - instances := int32(2) - storage := resource.MustParse("1Gi") - poolerEnabled := false - - class := &enterprisev4.PostgresClusterClass{ + clusterName = clusterNamePrefix + nameSuffix + className = classNamePrefix + nameSuffix + pgClusterKey = types.NamespacedName{Name: clusterName, Namespace: namespace} + pgClusterClassKey = types.NamespacedName{Name: className, Namespace: namespace} + + pgClusterClass = &enterprisev4.PostgresClusterClass{ ObjectMeta: metav1.ObjectMeta{Name: className}, Spec: enterprisev4.PostgresClusterClassSpec{ - Provisioner: "postgresql.cnpg.io", + Provisioner: provisioner, Config: &enterprisev4.PostgresClusterClassConfig{ - Instances: &instances, - Storage: &storage, - PostgresVersion: &postgresVersion, - ConnectionPoolerEnabled: &poolerEnabled, + Instances: &[]int32{clusterMemberCount}[0], + Storage: &[]resource.Quantity{resource.MustParse(storageAmount)}[0], + PostgresVersion: &[]string{postgresVersion}[0], + ConnectionPoolerEnabled: &[]bool{poolerEnabled}[0], }, }, } - Expect(k8sClient.Create(ctx, class)).To(Succeed()) - pc := &enterprisev4.PostgresCluster{ + Expect(k8sClient.Create(ctx, pgClusterClass)).To(Succeed()) + + pgCluster = &enterprisev4.PostgresCluster{ ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: namespace}, Spec: enterprisev4.PostgresClusterSpec{ Class: className, - ClusterDeletionPolicy: &[]string{"Delete"}[0], + ClusterDeletionPolicy: &[]string{deletePolicy}[0], }, } - Expect(k8sClient.Create(ctx, pc)).To(Succeed()) reconciler = &PostgresClusterReconciler{ Client: k8sClient, @@ -96,22 +122,13 @@ var _ = Describe("PostgresCluster Controller", func() { req = reconcile.Request{NamespacedName: types.NamespacedName{Name: clusterName, Namespace: namespace}} }) - JustBeforeEach(func() { - By("Reconciling the created resource") - result, err := reconciler.Reconcile(ctx, req) - Expect(err).NotTo(HaveOccurred()) - Expect(result.RequeueAfter).To(BeZero()) - }) - AfterEach(func() { By("Deleting PostgresCluster and letting reconcile run finalizer cleanup") - key := types.NamespacedName{Name: clusterName, Namespace: namespace} - pc := &enterprisev4.PostgresCluster{} // Best-effort delete (object might already be gone in some specs) - err := k8sClient.Get(ctx, key, pc) + err := k8sClient.Get(ctx, pgClusterKey, pgCluster) if err == nil { - Expect(k8sClient.Delete(ctx, pc)).To(Succeed()) + Expect(k8sClient.Delete(ctx, pgCluster)).To(Succeed()) } else { Expect(apierrors.IsNotFound(err)).To(BeTrue()) } @@ -124,7 +141,7 @@ var _ = Describe("PostgresCluster Controller", func() { // In that case, remove finalizer directly so fixture teardown remains deterministic. if meta.IsNoMatchError(recErr) { current := &enterprisev4.PostgresCluster{} - getErr := k8sClient.Get(ctx, key, current) + getErr := k8sClient.Get(ctx, pgClusterKey, current) if apierrors.IsNotFound(getErr) { return true } @@ -142,124 +159,203 @@ var _ = Describe("PostgresCluster Controller", func() { return false } } - getErr := k8sClient.Get(ctx, key, &enterprisev4.PostgresCluster{}) + getErr := k8sClient.Get(ctx, pgClusterKey, &enterprisev4.PostgresCluster{}) return apierrors.IsNotFound(getErr) }, "10s", "500ms").Should(BeTrue()) By("Cleaning up PostgresClusterClass fixture") - class := &enterprisev4.PostgresClusterClass{} - classKey := types.NamespacedName{Name: className} // cluster-scoped CR - err = k8sClient.Get(ctx, classKey, class) + err = k8sClient.Get(ctx, pgClusterClassKey, pgClusterClass) if err == nil { - Expect(k8sClient.Delete(ctx, class)).To(Succeed()) + Expect(k8sClient.Delete(ctx, pgClusterClass)).To(Succeed()) } else { Expect(apierrors.IsNotFound(err)).To(BeTrue()) } }) - Context("Happy path and convergence", func() { - pc := &enterprisev4.PostgresCluster{} - It("PC-01 creates managed resources and status refs", func() { - By("creating CNPG cluster via reconcile and avaiting healthy") - Eventually(func() error { - _, err := reconciler.Reconcile(ctx, req) - if err != nil { - return err - } - if err := k8sClient.Get(ctx, req.NamespacedName, cnpg); err != nil { - return err - } - cnpg.Status.Phase = cnpgv1.PhaseHealthy - return k8sClient.Status().Update(ctx, cnpg) - }, "10s", "250ms").Should(Succeed()) - - By("reconciling until managed resources are published in status") - Eventually(func() bool { - _, err := reconciler.Reconcile(ctx, req) - if err != nil { - return false - } - current := &enterprisev4.PostgresCluster{} - if err := k8sClient.Get(ctx, req.NamespacedName, current); err != nil { - return false - } - return current.Status.Resources != nil && - current.Status.Resources.SuperUserSecretRef != nil && - current.Status.Resources.ConfigMapRef != nil - }, "20s", "250ms").Should(BeTrue()) - - By("asserting finalizer contract") - pc := &enterprisev4.PostgresCluster{} - Expect(k8sClient.Get(ctx, req.NamespacedName, pc)).To(Succeed()) - Expect(controllerutil.ContainsFinalizer(pc, core.PostgresClusterFinalizerName)).To(BeTrue()) - - By("asserting status references are published") - Expect(pc.Status.Resources).NotTo(BeNil()) - Expect(pc.Status.Resources.SuperUserSecretRef).NotTo(BeNil()) - Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) - - By("asserting Secret ownership and existence") - secret := &corev1.Secret{} - Expect(k8sClient.Get(ctx, types.NamespacedName{ - Name: pc.Status.Resources.SuperUserSecretRef.Name, Namespace: namespace, - }, secret)).To(Succeed()) - Expect(metav1.IsControlledBy(secret, pc)).To(BeTrue()) - - By("asserting CNPG Cluster projection and ownership") - cnpg := &cnpgv1.Cluster{} - Expect(k8sClient.Get(ctx, req.NamespacedName, cnpg)).To(Succeed()) - Expect(metav1.IsControlledBy(cnpg, pc)).To(BeTrue()) - Expect(cnpg.Spec.Instances).To(Equal(2)) - Expect(cnpg.Spec.ImageName).To(ContainSubstring("postgresql:15.10")) - Expect(cnpg.Spec.StorageConfiguration.Size).To(Equal("1Gi")) - - By("asserting ConfigMap contract consumed by clients") - cm := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{ - Name: pc.Status.Resources.ConfigMapRef.Name, Namespace: namespace, - }, cm)).To(Succeed()) - Expect(metav1.IsControlledBy(cm, pc)).To(BeTrue()) - Expect(cm.Data).To(HaveKeyWithValue("DEFAULT_CLUSTER_PORT", "5432")) - Expect(cm.Data).To(HaveKey("SUPER_USER_SECRET_REF")) - Expect(cm.Data).To(HaveKey("CLUSTER_RW_ENDPOINT")) - }) - It("PC-02 adds finalizer on reconcile", func() { - Expect(k8sClient.Get(ctx, req.NamespacedName, pc)).To(Succeed()) - Expect(pc.ObjectMeta.Finalizers).To(ContainElement(core.PostgresClusterFinalizerName)) + When("under typical usage and expecting healthy PostgresCluster state", func() { + Context("when reconciling", func() { + // PC-02 + It("adds finalizer on reconcile", func() { + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + + Eventually(func() bool { + pc := &enterprisev4.PostgresCluster{} + if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { + return false + } + return controllerutil.ContainsFinalizer(pc, core.PostgresClusterFinalizerName) + }, "10s", "250ms").Should(BeTrue()) + }) + + // PC-01 + It("creates managed resources and status refs", func() { + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + + Eventually(func(g Gomega) { + pc := &enterprisev4.PostgresCluster{} + g.Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + + cond := meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + g.Expect(cond.Reason).To(Equal("CNPGClusterProvisioning")) + }, "20s", "250ms").Should(Succeed()) + + // Simulate external CNPG controller status progression. + Eventually(func() error { + cnpg := &cnpgv1.Cluster{} + if err := k8sClient.Get(ctx, pgClusterKey, cnpg); err != nil { + return err + } + cnpg.Status.Phase = cnpgv1.PhaseHealthy + return k8sClient.Status().Update(ctx, cnpg) // update event + }, "10s", "250ms").Should(Succeed()) + + // Expect cnpg status progression propagation + Eventually(func(g Gomega) { + pc := &enterprisev4.PostgresCluster{} + g.Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + + cond := meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(cond.Reason).To(Equal("CNPGClusterHealthy")) + }, "20s", "250ms").Should(Succeed()) + + Eventually(func(g Gomega) { + pc := &enterprisev4.PostgresCluster{} + g.Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + g.Expect(pc.Status.Resources).NotTo(BeNil()) + g.Expect(pc.Status.Resources.SuperUserSecretRef).NotTo(BeNil()) + g.Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) + }, "20s", "250ms").Should(Succeed()) + }) + + // PC-07 + It("is idempotent across repeated reconciles", func() { + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + + // Trigger extra update events that should not change desired state semantics. + Eventually(func() error { + pc := &enterprisev4.PostgresCluster{} + if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { + return err + } + if pc.Annotations == nil { + pc.Annotations = map[string]string{} + } + pc.Annotations["test.bump"] = strconv.FormatInt(time.Now().UnixNano(), 10) + return k8sClient.Update(ctx, pc) // update event + }, "10s", "250ms").Should(Succeed()) + + Eventually(func(g Gomega) { + cnpg := &cnpgv1.Cluster{} + g.Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + g.Expect(cnpg.Spec.Instances).To(Equal(int(clusterMemberCount))) + }, "20s", "250ms").Should(Succeed()) + }) }) - It("PC-07 is idempotent across repeated reconciles", func() {}) }) - Context("Deletion and finalizer", func() { - It("PC-03 Delete policy removes children and finalizer", func() {}) - It("PC-04 Retain policy preserves children and removes ownerRefs", func() {}) - }) + When("deleting a PostgresCluster", func() { + // PC-03 + Context("and clusterDeletionPolicy is set to Delete", func() { + It("removes children and finalizer", func() { + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + + pc := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + Expect(k8sClient.Delete(ctx, pc)).To(Succeed()) // delete event + + Eventually(func() bool { + err := k8sClient.Get(ctx, pgClusterKey, &enterprisev4.PostgresCluster{}) + return apierrors.IsNotFound(err) + }, "30s", "250ms").Should(BeTrue()) + }) + }) - Context("Failure and drift", func() { - It("PC-05 fails when PostgresClusterClass is missing", func() {}) - It("PC-06 restores drifted managed spec", func() {}) - }) + // PC-04 + Context("when clusterDeletionPolicy is set to Retain", func() { + It("preserves retained resources and removes owner refs", func() { + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + + // Trigger update event: switch policy to Retain before delete. + Eventually(func() error { + pc := &enterprisev4.PostgresCluster{} + if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { + return err + } + pc.Spec.ClusterDeletionPolicy = &[]string{retainPolicy}[0] + return k8sClient.Update(ctx, pc) + }, "10s", "250ms").Should(Succeed()) + + pc := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + Expect(k8sClient.Delete(ctx, pc)).To(Succeed()) // delete event - Context("Predicates", func() { - It("PC-08 triggers on generation/finalizer/deletion changes", func() {}) - It("PC-09 ignores no-op updates", func() {}) + Eventually(func() bool { + err := k8sClient.Get(ctx, pgClusterKey, &enterprisev4.PostgresCluster{}) + return apierrors.IsNotFound(err) + }, "30s", "250ms").Should(BeTrue()) + + }) + }) }) - // Context("When reconciling a resource", func() { + When("reconciling with invalid or drifted dependencies", func() { + // PC-05 + Context("when referenced class does not exist", func() { + It("fails with class-not-found condition", func() { + clusterName = "bad-" + clusterName + className = "missing-class" - // It("should successfully reconcile the resource", func() { - // By("Reconciling the created resource") - // // controllerReconciler := &PostgresClusterReconciler{ - // // Client: k8sClient, - // // Scheme: k8sClient.Scheme(), - // // } + bad := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: namespace}, + Spec: enterprisev4.PostgresClusterSpec{Class: className}, + } + Expect(k8sClient.Create(ctx, bad)).To(Succeed()) // create event - // // _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ - // // NamespacedName: typeNamespacedName, - // // }) - // err := errors.New("test error") - // Expect(err).NotTo(HaveOccurred()) + Eventually(func() bool { + current := &enterprisev4.PostgresCluster{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: bad.Name, Namespace: namespace}, current); err != nil { + return false + } + cond := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") + return cond != nil && cond.Reason == "ClusterClassNotFound" + }, "20s", "250ms").Should(BeTrue()) + }) + }) + + // PC-06 + Context("when managed child spec drifts from desired state", func() { + It("restores drifted managed spec", func() { + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) - // }) - // }) + Eventually(func() error { + return k8sClient.Get(ctx, pgClusterKey, &cnpgv1.Cluster{}) + }, "20s", "250ms").Should(Succeed()) + + Eventually(func() error { + pc := &enterprisev4.PostgresCluster{} + if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { + return err + } + if pc.Annotations == nil { + pc.Annotations = map[string]string{} + } + pc.Annotations["drift-trigger"] = strconv.FormatInt(time.Now().UnixNano(), 10) + pc.Spec.Instances = &[]int32{8}[0] + return k8sClient.Update(ctx, pc) + }, "10s", "250ms").Should(Succeed()) + + Eventually(func() bool { + cnpg := &cnpgv1.Cluster{} + if err := k8sClient.Get(ctx, pgClusterKey, cnpg); err != nil { + return false + } + return cnpg.Spec.Instances == int(clusterMemberCount) + }, "20s", "250ms").Should(BeTrue()) + }) + }) + }) }) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 30cb99f64..d614aa7f5 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -1,5 +1,5 @@ /* -Copyright 2026. +Copyright (c) 2018-2022 Splunk Inc. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,138 +21,173 @@ import ( "fmt" "os" "path/filepath" - "sort" "testing" + "time" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - - "k8s.io/client-go/kubernetes/scheme" + "go.uber.org/zap/zapcore" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - enterprisev4 "github.com/splunk/splunk-operator/api/v4" - // +kubebuilder:scaffold:imports -) - -// These tests use Ginkgo (BDD-style Go testing framework). Refer to -// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" -var ( - ctx context.Context - cancel context.CancelFunc - testEnv *envtest.Environment - cfg *rest.Config - k8sClient client.Client + enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + //+kubebuilder:scaffold:imports ) -func TestControllers(t *testing.T) { +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment +var k8sManager ctrl.Manager + +func TestAPIs(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Controller Suite") } -var _ = BeforeSuite(func() { - logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) - - ctx, cancel = context.WithCancel(context.TODO()) - - var err error - err = enterprisev4.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - err = cnpgv1.AddToScheme(scheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - - // +kubebuilder:scaffold:scheme +var _ = BeforeSuite(func(ctx context.Context) { + opts := zap.Options{ + Development: true, + TimeEncoder: zapcore.RFC3339NanoTimeEncoder, + } + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true), zap.UseFlagOptions(&opts))) By("bootstrapping test environment") - cnpgCRDDirectory, err := getCNPGCRDDirectory() - Expect(err).NotTo(HaveOccurred()) + testEnv = &envtest.Environment{ - CRDDirectoryPaths: []string{ - filepath.Join("..", "..", "config", "crd", "bases"), - cnpgCRDDirectory, - }, + CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, } - - // Retrieve the first found binary directory to allow running tests from IDEs - if getFirstFoundEnvTestBinaryDir() != "" { - testEnv.BinaryAssetsDirectory = getFirstFoundEnvTestBinaryDir() + cnpgCRDDir := os.Getenv("CNPG_CRD_DIR") + if cnpgCRDDir == "" { + homeDir, err := os.UserHomeDir() + Expect(err).NotTo(HaveOccurred()) + matches, err := filepath.Glob(filepath.Join(homeDir, "go", "pkg", "mod", "github.com", "cloudnative-pg", "cloudnative-pg@*", "config", "crd", "bases")) + Expect(err).NotTo(HaveOccurred()) + Expect(matches).NotTo(BeEmpty(), "CNPG CRD directory not found; set CNPG_CRD_DIR if module cache is custom") + cnpgCRDDir = matches[len(matches)-1] } + testEnv.CRDDirectoryPaths = append(testEnv.CRDDirectoryPaths, cnpgCRDDir) + + var err error // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) - k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + err = cnpgv1.AddToScheme(clientgoscheme.Scheme) Expect(err).NotTo(HaveOccurred()) - Expect(k8sClient).NotTo(BeNil()) -}) -var _ = AfterSuite(func() { - By("tearing down the test environment") - cancel() - err := testEnv.Stop() + err = enterpriseApiV3.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) Expect(err).NotTo(HaveOccurred()) -}) -// getFirstFoundEnvTestBinaryDir locates the first binary in the specified path. -// ENVTEST-based tests depend on specific binaries, usually located in paths set by -// controller-runtime. When running tests directly (e.g., via an IDE) without using -// Makefile targets, the 'BinaryAssetsDirectory' must be explicitly configured. -// -// This function streamlines the process by finding the required binaries, similar to -// setting the 'KUBEBUILDER_ASSETS' environment variable. To ensure the binaries are -// properly set up, run 'make setup-envtest' beforehand. -func getFirstFoundEnvTestBinaryDir() string { - basePath := filepath.Join("..", "..", "bin", "k8s") - entries, err := os.ReadDir(basePath) - if err != nil { - logf.Log.Error(err, "Failed to read directory", "path", basePath) - return "" + err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = enterpriseApiV3.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + //+kubebuilder:scaffold:scheme + + // Create New Manager for controller + k8sManager, err = ctrl.NewManager(cfg, ctrl.Options{ + Scheme: clientgoscheme.Scheme, + }) + Expect(err).ToNot(HaveOccurred()) + if err := (&ClusterManagerReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) } - for _, entry := range entries { - if entry.IsDir() { - return filepath.Join(basePath, entry.Name()) - } + if err := (&ClusterMasterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) } - return "" -} - -func getCNPGCRDDirectory() (string, error) { - // Optional escape hatch for CI/local overrides. - if explicit := os.Getenv("CNPG_CRD_DIR"); explicit != "" { - return explicit, nil + if err := (&IndexerClusterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) } - - moduleRoot := os.Getenv("GOMODCACHE") - if moduleRoot == "" { - gopath := os.Getenv("GOPATH") - if gopath == "" { - home, err := os.UserHomeDir() - if err != nil { - return "", err - } - gopath = filepath.Join(home, "go") - } - moduleRoot = filepath.Join(gopath, "pkg", "mod") + if err := (&IngestorClusterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) } - - pattern := filepath.Join(moduleRoot, "github.com", "cloudnative-pg", "cloudnative-pg@*", "config", "crd", "bases") - matches, err := filepath.Glob(pattern) - if err != nil { - return "", err + if err := (&LicenseManagerReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&LicenseMasterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&MonitoringConsoleReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) } - if len(matches) == 0 { - return "", fmt.Errorf("CNPG CRD directory not found; set CNPG_CRD_DIR or download github.com/cloudnative-pg/cloudnative-pg module") + if err := (&SearchHeadClusterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&StandaloneReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) + } + if err := (&PostgresClusterReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + }).SetupWithManager(k8sManager); err != nil { + Expect(err).NotTo(HaveOccurred()) } - sort.Strings(matches) - return matches[len(matches)-1], nil -} + go func() { + err = k8sManager.Start(ctrl.SetupSignalHandler()) + fmt.Printf("error %v", err.Error()) + Expect(err).ToNot(HaveOccurred()) + }() + + Expect(err).ToNot(HaveOccurred()) + + k8sClient, err = client.New(cfg, client.Options{Scheme: clientgoscheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + +}, NodeTimeout(time.Second*500)) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + testEnv.Stop() +}) From 80a4e6e22d7d2fb2f5dd1e2db98c33ea270acf34 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Tue, 31 Mar 2026 10:59:45 +0200 Subject: [PATCH 11/36] synchronous postgres cluster tests --- .../postgrescluster_controller_test.go | 187 ++++++++---------- internal/controller/suite_test.go | 6 - 2 files changed, 80 insertions(+), 113 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index d405a35bf..4619c9ac8 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -19,8 +19,6 @@ package controller import ( "context" "fmt" - "strconv" - "time" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -79,6 +77,13 @@ var _ = Describe("PostgresCluster Controller", func() { req reconcile.Request ) + reconcileNTimes := func(times int) { + for i := 0; i < times; i++ { + _, err := reconciler.Reconcile(ctx, req) + Expect(err).NotTo(HaveOccurred()) + } + } + BeforeEach(func() { nameSuffix := fmt.Sprintf("%d-%d-%d", GinkgoParallelProcess(), @@ -177,82 +182,59 @@ var _ = Describe("PostgresCluster Controller", func() { // PC-02 It("adds finalizer on reconcile", func() { Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + reconcileNTimes(1) - Eventually(func() bool { - pc := &enterprisev4.PostgresCluster{} - if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { - return false - } - return controllerutil.ContainsFinalizer(pc, core.PostgresClusterFinalizerName) - }, "10s", "250ms").Should(BeTrue()) + pc := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + Expect(controllerutil.ContainsFinalizer(pc, core.PostgresClusterFinalizerName)).To(BeTrue()) }) // PC-01 It("creates managed resources and status refs", func() { Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + // pass 1: add finalizer; pass 2: create CNPG cluster/secret/status. + reconcileNTimes(2) - Eventually(func(g Gomega) { - pc := &enterprisev4.PostgresCluster{} - g.Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) - - cond := meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") - g.Expect(cond).NotTo(BeNil()) - g.Expect(cond.Status).To(Equal(metav1.ConditionFalse)) - g.Expect(cond.Reason).To(Equal("CNPGClusterProvisioning")) - }, "20s", "250ms").Should(Succeed()) + pc := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + cond := meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") + Expect(cond).NotTo(BeNil()) + Expect(cond.Status).To(Equal(metav1.ConditionFalse)) + Expect(cond.Reason).To(Equal("ClusterBuildSucceeded")) // Simulate external CNPG controller status progression. - Eventually(func() error { - cnpg := &cnpgv1.Cluster{} - if err := k8sClient.Get(ctx, pgClusterKey, cnpg); err != nil { - return err - } - cnpg.Status.Phase = cnpgv1.PhaseHealthy - return k8sClient.Status().Update(ctx, cnpg) // update event - }, "10s", "250ms").Should(Succeed()) + cnpg := &cnpgv1.Cluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + cnpg.Status.Phase = cnpgv1.PhaseHealthy + Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) + reconcileNTimes(1) // Expect cnpg status progression propagation - Eventually(func(g Gomega) { - pc := &enterprisev4.PostgresCluster{} - g.Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) - - cond := meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") - g.Expect(cond).NotTo(BeNil()) - g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) - g.Expect(cond.Reason).To(Equal("CNPGClusterHealthy")) - }, "20s", "250ms").Should(Succeed()) - - Eventually(func(g Gomega) { - pc := &enterprisev4.PostgresCluster{} - g.Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) - g.Expect(pc.Status.Resources).NotTo(BeNil()) - g.Expect(pc.Status.Resources.SuperUserSecretRef).NotTo(BeNil()) - g.Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) - }, "20s", "250ms").Should(Succeed()) + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + cond = meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") + Expect(cond).NotTo(BeNil()) + Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + Expect(cond.Reason).To(Equal("CNPGClusterHealthy")) + Expect(pc.Status.Resources).NotTo(BeNil()) + Expect(pc.Status.Resources.SuperUserSecretRef).NotTo(BeNil()) + Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) }) // PC-07 It("is idempotent across repeated reconciles", func() { Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + reconcileNTimes(2) + reconcileNTimes(3) - // Trigger extra update events that should not change desired state semantics. - Eventually(func() error { - pc := &enterprisev4.PostgresCluster{} - if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { - return err - } - if pc.Annotations == nil { - pc.Annotations = map[string]string{} - } - pc.Annotations["test.bump"] = strconv.FormatInt(time.Now().UnixNano(), 10) - return k8sClient.Update(ctx, pc) // update event - }, "10s", "250ms").Should(Succeed()) - - Eventually(func(g Gomega) { - cnpg := &cnpgv1.Cluster{} - g.Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) - g.Expect(cnpg.Spec.Instances).To(Equal(int(clusterMemberCount))) - }, "20s", "250ms").Should(Succeed()) + cnpg := &cnpgv1.Cluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + Expect(cnpg.Spec.Instances).To(Equal(int(clusterMemberCount))) + + pc := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + cond := meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") + Expect(cond).NotTo(BeNil()) + Expect(cond.ObservedGeneration).To(Equal(pc.Generation)) }) }) }) @@ -262,14 +244,19 @@ var _ = Describe("PostgresCluster Controller", func() { Context("and clusterDeletionPolicy is set to Delete", func() { It("removes children and finalizer", func() { Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + reconcileNTimes(2) pc := &enterprisev4.PostgresCluster{} Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) - Expect(k8sClient.Delete(ctx, pc)).To(Succeed()) // delete event + Expect(k8sClient.Delete(ctx, pc)).To(Succeed()) Eventually(func() bool { - err := k8sClient.Get(ctx, pgClusterKey, &enterprisev4.PostgresCluster{}) - return apierrors.IsNotFound(err) + _, err := reconciler.Reconcile(ctx, req) + if err != nil { + return false + } + getErr := k8sClient.Get(ctx, pgClusterKey, &enterprisev4.PostgresCluster{}) + return apierrors.IsNotFound(getErr) }, "30s", "250ms").Should(BeTrue()) }) }) @@ -278,26 +265,20 @@ var _ = Describe("PostgresCluster Controller", func() { Context("when clusterDeletionPolicy is set to Retain", func() { It("preserves retained resources and removes owner refs", func() { Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) - - // Trigger update event: switch policy to Retain before delete. - Eventually(func() error { - pc := &enterprisev4.PostgresCluster{} - if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { - return err - } - pc.Spec.ClusterDeletionPolicy = &[]string{retainPolicy}[0] - return k8sClient.Update(ctx, pc) - }, "10s", "250ms").Should(Succeed()) + reconcileNTimes(2) pc := &enterprisev4.PostgresCluster{} Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) - Expect(k8sClient.Delete(ctx, pc)).To(Succeed()) // delete event + Expect(k8sClient.Delete(ctx, pc)).To(Succeed()) Eventually(func() bool { - err := k8sClient.Get(ctx, pgClusterKey, &enterprisev4.PostgresCluster{}) - return apierrors.IsNotFound(err) + _, err := reconciler.Reconcile(ctx, req) + if err != nil { + return false + } + getErr := k8sClient.Get(ctx, pgClusterKey, &enterprisev4.PostgresCluster{}) + return apierrors.IsNotFound(getErr) }, "30s", "250ms").Should(BeTrue()) - }) }) }) @@ -306,18 +287,25 @@ var _ = Describe("PostgresCluster Controller", func() { // PC-05 Context("when referenced class does not exist", func() { It("fails with class-not-found condition", func() { - clusterName = "bad-" + clusterName - className = "missing-class" + badName := "bad-" + clusterName + badKey := types.NamespacedName{Name: badName, Namespace: namespace} bad := &enterprisev4.PostgresCluster{ - ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: namespace}, - Spec: enterprisev4.PostgresClusterSpec{Class: className}, + ObjectMeta: metav1.ObjectMeta{Name: badName, Namespace: namespace}, + Spec: enterprisev4.PostgresClusterSpec{Class: "missing-class"}, } - Expect(k8sClient.Create(ctx, bad)).To(Succeed()) // create event + Expect(k8sClient.Create(ctx, bad)).To(Succeed()) + DeferCleanup(func() { _ = k8sClient.Delete(ctx, bad) }) + + // pass 1 adds finalizer, pass 2 reaches class lookup and sets failure condition. + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: badKey}) + Expect(err).NotTo(HaveOccurred()) + _, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: badKey}) + Expect(err).To(HaveOccurred()) Eventually(func() bool { current := &enterprisev4.PostgresCluster{} - if err := k8sClient.Get(ctx, types.NamespacedName{Name: bad.Name, Namespace: namespace}, current); err != nil { + if err := k8sClient.Get(ctx, badKey, current); err != nil { return false } cond := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") @@ -330,31 +318,16 @@ var _ = Describe("PostgresCluster Controller", func() { Context("when managed child spec drifts from desired state", func() { It("restores drifted managed spec", func() { Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + reconcileNTimes(2) - Eventually(func() error { - return k8sClient.Get(ctx, pgClusterKey, &cnpgv1.Cluster{}) - }, "20s", "250ms").Should(Succeed()) - - Eventually(func() error { - pc := &enterprisev4.PostgresCluster{} - if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { - return err - } - if pc.Annotations == nil { - pc.Annotations = map[string]string{} - } - pc.Annotations["drift-trigger"] = strconv.FormatInt(time.Now().UnixNano(), 10) - pc.Spec.Instances = &[]int32{8}[0] - return k8sClient.Update(ctx, pc) - }, "10s", "250ms").Should(Succeed()) + cnpg := &cnpgv1.Cluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + cnpg.Spec.Instances = 8 + Expect(k8sClient.Update(ctx, cnpg)).To(Succeed()) - Eventually(func() bool { - cnpg := &cnpgv1.Cluster{} - if err := k8sClient.Get(ctx, pgClusterKey, cnpg); err != nil { - return false - } - return cnpg.Spec.Instances == int(clusterMemberCount) - }, "20s", "250ms").Should(BeTrue()) + reconcileNTimes(2) + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + Expect(cnpg.Spec.Instances).To(Equal(int(clusterMemberCount))) }) }) }) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index d614aa7f5..3805a28b8 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -166,12 +166,6 @@ var _ = BeforeSuite(func(ctx context.Context) { }).SetupWithManager(k8sManager); err != nil { Expect(err).NotTo(HaveOccurred()) } - if err := (&PostgresClusterReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - }).SetupWithManager(k8sManager); err != nil { - Expect(err).NotTo(HaveOccurred()) - } go func() { err = k8sManager.Start(ctrl.SetupSignalHandler()) From 66157f82c74a1364265dd0a9471f731ff0afaae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Tue, 31 Mar 2026 13:50:51 +0200 Subject: [PATCH 12/36] Cleanup our logging approach --- api/v4/postgresdatabase_types.go | 2 +- ...terprise.splunk.com_postgresdatabases.yaml | 2 +- pkg/postgresql/cluster/core/cluster.go | 107 +++++++++--------- pkg/postgresql/database/core/database.go | 60 +++++----- 4 files changed, 81 insertions(+), 90 deletions(-) diff --git a/api/v4/postgresdatabase_types.go b/api/v4/postgresdatabase_types.go index f89b7229a..02e8ca0b8 100644 --- a/api/v4/postgresdatabase_types.go +++ b/api/v4/postgresdatabase_types.go @@ -23,7 +23,7 @@ import ( // PostgresDatabaseSpec defines the desired state of PostgresDatabase. // +kubebuilder:validation:XValidation:rule="self.clusterRef == oldSelf.clusterRef",message="clusterRef is immutable" -// +kubebuilder:validation:XValidation:rule="self.clusterRef.name != ''",message="clusterRef.name must not be empty" +// +kubebuilder:validation:XValidation:rule="self.clusterRef.name.size() > 0",message="clusterRef.name must not be empty" type PostgresDatabaseSpec struct { // Reference to Postgres Cluster managed by postgresCluster controller // +kubebuilder:validation:Required diff --git a/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml b/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml index dbe0fef64..b134d5738 100644 --- a/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml +++ b/config/crd/bases/enterprise.splunk.com_postgresdatabases.yaml @@ -99,7 +99,7 @@ spec: - message: clusterRef is immutable rule: self.clusterRef == oldSelf.clusterRef - message: clusterRef.name must not be empty - rule: self.clusterRef.name != '' + rule: self.clusterRef.name.size() > 0 status: description: PostgresDatabaseStatus defines the observed state of PostgresDatabase. properties: diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 8642362fe..3334011c6 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -42,7 +42,7 @@ import ( func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl.Request) (ctrl.Result, error) { c := rc.Client logger := log.FromContext(ctx) - logger.Info("Reconciling PostgresCluster", "name", req.Name, "namespace", req.Namespace) + logger.Info("Reconciling PostgresCluster") var cnpgCluster *cnpgv1.Cluster var poolerEnabled bool @@ -56,13 +56,16 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger.Info("PostgresCluster deleted, skipping reconciliation") return ctrl.Result{}, nil } - logger.Error(err, "Unable to fetch PostgresCluster") + logger.Error(err, "Failed to fetch PostgresCluster") return ctrl.Result{}, err } if postgresCluster.Status.Resources == nil { postgresCluster.Status.Resources = &enterprisev4.PostgresClusterResources{} } + logger = logger.WithValues("postgresCluster", postgresCluster.Name) + ctx = log.IntoContext(ctx, logger) + updateStatus := func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { return setStatus(ctx, c, postgresCluster, conditionType, status, reason, message, phase) } @@ -84,7 +87,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, errors.Join(errs...) } if postgresCluster.GetDeletionTimestamp() != nil { - logger.Info("PostgresCluster is being deleted, cleanup complete") + logger.Info("Deletion cleanup complete, finalizer removed") return ctrl.Result{}, nil } @@ -99,14 +102,14 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger.Error(err, "Failed to add finalizer to PostgresCluster") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) } - logger.Info("Finalizer added successfully") + logger.Info("Finalizer added") return ctrl.Result{}, nil } // Load the referenced PostgresClusterClass. clusterClass := &enterprisev4.PostgresClusterClass{} if err := c.Get(ctx, client.ObjectKey{Name: postgresCluster.Spec.Class}, clusterClass); err != nil { - logger.Error(err, "Unable to fetch referenced PostgresClusterClass", "className", postgresCluster.Spec.Class) + logger.Error(err, "Failed to fetch PostgresClusterClass", "className", postgresCluster.Spec.Class) rc.emitWarning(postgresCluster, EventClusterClassNotFound, fmt.Sprintf("ClusterClass %s not found", postgresCluster.Spec.Class)) if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterClassNotFound, fmt.Sprintf("ClusterClass %s not found: %v", postgresCluster.Spec.Class, err), failedClusterPhase); statusErr != nil { @@ -130,10 +133,10 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. // Resolve or derive the superuser secret name. if postgresCluster.Status.Resources != nil && postgresCluster.Status.Resources.SuperUserSecretRef != nil { postgresSecretName = postgresCluster.Status.Resources.SuperUserSecretRef.Name - logger.Info("Using existing secret from status", "name", postgresSecretName) + logger.Info("Superuser secret resolved from status", "name", postgresSecretName) } else { postgresSecretName = fmt.Sprintf("%s%s", postgresCluster.Name, defaultSecretSuffix) - logger.Info("Generating new secret name", "name", postgresSecretName) + logger.Info("Superuser secret name derived", "name", postgresSecretName) } secretExists, secretErr := clusterSecretExists(ctx, c, postgresCluster.Namespace, postgresSecretName, secret) @@ -147,7 +150,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, secretErr } if !secretExists { - logger.Info("Creating PostgresCluster secret", "name", postgresSecretName) + logger.Info("Superuser secret creation started", "name", postgresSecretName) if err := ensureClusterSecret(ctx, c, rc.Scheme, postgresCluster, postgresSecretName, secret); err != nil { logger.Error(err, "Failed to ensure PostgresCluster secret", "name", postgresSecretName) rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) @@ -166,7 +169,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, err } rc.emitNormal(postgresCluster, EventSecretReady, fmt.Sprintf("Superuser secret %s created", postgresSecretName)) - logger.Info("SuperUserSecretRef persisted to status") + logger.Info("Superuser secret ref persisted to status") } // Re-attach ownerRef if it was stripped (e.g. by a Retain-policy deletion of a previous cluster). @@ -176,7 +179,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) } if secretExists && !hasOwnerRef { - logger.Info("Connecting existing secret to PostgresCluster by adding owner reference", "name", postgresSecretName) + logger.Info("Existing secret linked to PostgresCluster", "name", postgresSecretName) rc.emitNormal(postgresCluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", postgresSecretName)) originalSecret := secret.DeepCopy() if err := ctrl.SetControllerReference(postgresCluster, secret, rc.Scheme); err != nil { @@ -191,7 +194,6 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } return ctrl.Result{}, err } - logger.Info("Existing secret linked successfully") } if postgresCluster.Status.Resources.SuperUserSecretRef == nil { @@ -209,7 +211,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. err = c.Get(ctx, types.NamespacedName{Name: postgresCluster.Name, Namespace: postgresCluster.Namespace}, existingCNPG) switch { case apierrors.IsNotFound(err): - logger.Info("CNPG Cluster not found, creating", "name", postgresCluster.Name) + logger.Info("CNPG Cluster creation started", "name", postgresCluster.Name) newCluster := buildCNPGCluster(rc.Scheme, postgresCluster, mergedConfig, postgresSecretName) if err := c.Create(ctx, newCluster); err != nil { logger.Error(err, "Failed to create CNPG Cluster") @@ -225,7 +227,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. "CNPG Cluster created", pendingClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") } - logger.Info("CNPG Cluster created successfully, requeueing for status update", "name", postgresCluster.Name) + logger.Info("CNPG Cluster created, requeueing for status update", "name", postgresCluster.Name) return ctrl.Result{RequeueAfter: retryDelay}, nil case err != nil: logger.Error(err, "Failed to get CNPG Cluster") @@ -242,7 +244,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, mergedConfig.Spec.PostgreSQLConfig) if !equality.Semantic.DeepEqual(currentNormalized, desiredNormalized) { - logger.Info("Detected drift in CNPG Cluster spec, patching", "name", cnpgCluster.Name) + logger.Info("CNPG Cluster spec drift detected, patch started", "name", cnpgCluster.Name) originalCluster := cnpgCluster.DeepCopy() cnpgCluster.Spec = desiredSpec @@ -265,7 +267,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{Requeue: true}, nil } rc.emitNormal(postgresCluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") - logger.Info("CNPG Cluster patched successfully, requeueing for status update", "name", cnpgCluster.Name) + logger.Info("CNPG Cluster patched, requeueing for status update", "name", cnpgCluster.Name) return ctrl.Result{RequeueAfter: retryDelay}, nil } } @@ -349,7 +351,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, err } rc.emitNormal(postgresCluster, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") - logger.Info("Connection Poolers created, requeueing to check readiness") + logger.Info("Connection pooler creation started, requeueing") if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, "Connection poolers are being provisioned", provisioningClusterPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") @@ -396,7 +398,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. // Reconcile ConfigMap when CNPG cluster is healthy. if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { - logger.Info("CNPG Cluster is ready, reconciling ConfigMap for connection details") + logger.Info("CNPG Cluster healthy, reconciling ConfigMap") desiredCM, err := generateConfigMap(ctx, c, rc.Scheme, postgresCluster, cnpgCluster, postgresSecretName) if err != nil { logger.Error(err, "Failed to generate ConfigMap") @@ -414,7 +416,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. cm.Labels = desiredCM.Labels if !metav1.IsControlledBy(cm, postgresCluster) { if err := ctrl.SetControllerReference(postgresCluster, cm, rc.Scheme); err != nil { - return fmt.Errorf("set controller reference failed: %w", err) + return fmt.Errorf("setting controller reference: %w", err) } } return nil @@ -473,7 +475,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. Namespace: postgresCluster.Namespace, }, roPooler) if rwErr == nil && roErr == nil && arePoolersReady(rwPooler, roPooler) { - logger.Info("Poolers are ready, syncing pooler status") + logger.Info("Poolers ready, syncing status") poolerOldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) copy(poolerOldConditions, postgresCluster.Status.Conditions) _ = syncPoolerStatus(ctx, c, postgresCluster) @@ -626,7 +628,7 @@ func reconcileManagedRoles(ctx context.Context, c client.Client, cluster *enterp return nil } - logger.Info("CNPG Cluster roles differ from desired state, updating", + logger.Info("CNPG Cluster roles drift detected, update started", "currentCount", len(currentRoles), "desiredCount", len(desiredRoles)) originalCluster := cnpgCluster.DeepCopy() @@ -636,9 +638,9 @@ func reconcileManagedRoles(ctx context.Context, c client.Client, cluster *enterp cnpgCluster.Spec.Managed.Roles = desiredRoles if err := c.Patch(ctx, cnpgCluster, client.MergeFrom(originalCluster)); err != nil { - return fmt.Errorf("failed to patch CNPG Cluster with managed roles: %w", err) + return fmt.Errorf("patching CNPG Cluster managed roles: %w", err) } - logger.Info("Successfully updated CNPG Cluster with managed roles", "roleCount", len(desiredRoles)) + logger.Info("CNPG Cluster managed roles updated", "roleCount", len(desiredRoles)) return nil } @@ -655,11 +657,7 @@ func poolerExists(ctx context.Context, c client.Client, cluster *enterprisev4.Po if apierrors.IsNotFound(err) { return false, nil } - if err != nil { - log.FromContext(ctx).Error(err, "Failed to check pooler existence", "type", poolerType) - return false, err - } - return true, nil + return err == nil, err } func arePoolersReady(rwPooler, roPooler *cnpgv1.Pooler) bool { @@ -687,15 +685,16 @@ func poolerInstanceCount(p *cnpgv1.Pooler) (desired, scheduled int32) { // createOrUpdateConnectionPoolers creates RW and RO poolers if they don't exist. func createOrUpdateConnectionPoolers(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster) error { if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readWriteEndpoint); err != nil { - return fmt.Errorf("failed to reconcile RW pooler: %w", err) + return fmt.Errorf("reconciling RW pooler: %w", err) } if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readOnlyEndpoint); err != nil { - return fmt.Errorf("failed to reconcile RO pooler: %w", err) + return fmt.Errorf("reconciling RO pooler: %w", err) } return nil } func createConnectionPooler(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerType string) error { + logger := log.FromContext(ctx) poolerName := poolerResourceName(cluster.Name, poolerType) existing := &cnpgv1.Pooler{} err := c.Get(ctx, types.NamespacedName{Name: poolerName, Namespace: cluster.Namespace}, existing) @@ -705,7 +704,7 @@ func createConnectionPooler(ctx context.Context, c client.Client, scheme *runtim if !apierrors.IsNotFound(err) { return err } - log.FromContext(ctx).Info("Creating CNPG Pooler", "name", poolerName, "type", poolerType) + logger.Info("CNPG Pooler creation started", "name", poolerName, "type", poolerType) return c.Create(ctx, buildCNPGPooler(scheme, cluster, cfg, cnpgCluster, poolerType)) } @@ -736,7 +735,7 @@ func deleteConnectionPoolers(ctx context.Context, c client.Client, cluster *ente poolerName := poolerResourceName(cluster.Name, poolerType) exist, err := poolerExists(ctx, c, cluster, poolerType) if err != nil { - return fmt.Errorf("Can't check the pooler exist due to transient error %w", err) + return fmt.Errorf("checking pooler existence: %w", err) } if !exist { continue @@ -746,11 +745,11 @@ func deleteConnectionPoolers(ctx context.Context, c client.Client, cluster *ente if apierrors.IsNotFound(err) { continue } - return fmt.Errorf("failed to get pooler %s: %w", poolerName, err) + return fmt.Errorf("getting pooler %s: %w", poolerName, err) } - logger.Info("Deleting CNPG Pooler", "name", poolerName) + logger.Info("CNPG Pooler deletion started", "name", poolerName) if err := c.Delete(ctx, pooler); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("failed to delete pooler %s: %w", poolerName, err) + return fmt.Errorf("deleting pooler %s: %w", poolerName, err) } } return nil @@ -956,9 +955,9 @@ func deleteCNPGCluster(ctx context.Context, c client.Client, cnpgCluster *cnpgv1 logger.Info("CNPG Cluster not found, skipping deletion") return nil } - logger.Info("Deleting CNPG Cluster", "name", cnpgCluster.Name) + logger.Info("CNPG Cluster deletion started", "name", cnpgCluster.Name) if err := c.Delete(ctx, cnpgCluster); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("failed to delete CNPG Cluster: %w", err) + return fmt.Errorf("deleting CNPG Cluster: %w", err) } return nil } @@ -985,10 +984,10 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr cnpgCluster = nil logger.Info("CNPG cluster not found during cleanup") } else { - return fmt.Errorf("failed to fetch CNPG cluster during cleanup: %w", err) + return fmt.Errorf("fetching CNPG cluster: %w", err) } } - logger.Info("Processing finalizer cleanup for PostgresCluster") + logger.Info("Finalizer cleanup started") policy := "" if cluster.Spec.ClusterDeletionPolicy != nil { @@ -996,35 +995,33 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr } if err := deleteConnectionPoolers(ctx, c, cluster); err != nil { - logger.Error(err, "Failed to delete connection poolers during cleanup") - return fmt.Errorf("failed to delete connection poolers: %w", err) + return fmt.Errorf("deleting connection poolers: %w", err) } switch policy { case clusterDeletionPolicyDelete: - logger.Info("ClusterDeletionPolicy is 'Delete', deleting CNPG Cluster and associated resources") + logger.Info("ClusterDeletionPolicy 'Delete', CNPG Cluster deletion started") if cnpgCluster != nil { if err := deleteCNPGCluster(ctx, c, cnpgCluster); err != nil { - logger.Error(err, "Failed to delete CNPG Cluster during finalizer cleanup") - return fmt.Errorf("failed to delete CNPG Cluster during finalizer cleanup: %w", err) + return fmt.Errorf("deleting CNPG Cluster: %w", err) } } else { logger.Info("CNPG Cluster not found, skipping deletion") } case clusterDeletionPolicyRetain: - logger.Info("ClusterDeletionPolicy is 'Retain', removing owner references to orphan CNPG Cluster") + logger.Info("ClusterDeletionPolicy 'Retain', orphaning CNPG Cluster") if cnpgCluster != nil { originalCNPG := cnpgCluster.DeepCopy() refRemoved, err := removeOwnerRef(scheme, cluster, cnpgCluster) if err != nil { - return fmt.Errorf("failed to remove owner reference from CNPG cluster: %w", err) + return fmt.Errorf("removing owner reference from CNPG cluster: %w", err) } if !refRemoved { logger.Info("Owner reference already removed from CNPG Cluster, skipping patch") } if err := patchObject(ctx, c, originalCNPG, cnpgCluster, "CNPGCluster"); err != nil { - return fmt.Errorf("failed to patch CNPG cluster after removing owner reference: %w", err) + return fmt.Errorf("patching CNPG cluster after removing owner reference: %w", err) } logger.Info("Removed owner reference from CNPG Cluster") } @@ -1034,19 +1031,18 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr secretName := cluster.Status.Resources.SuperUserSecretRef.Name if err := c.Get(ctx, types.NamespacedName{Name: secretName, Namespace: cluster.Namespace}, secret); err != nil { if !apierrors.IsNotFound(err) { - logger.Error(err, "Failed to fetch Secret during cleanup") - return fmt.Errorf("failed to fetch secret during cleanup: %w", err) + return fmt.Errorf("fetching secret during cleanup: %w", err) } logger.Info("Secret not found, skipping owner reference removal", "secret", secretName) } else { originalSecret := secret.DeepCopy() refRemoved, err := removeOwnerRef(scheme, cluster, secret) if err != nil { - return fmt.Errorf("failed to remove owner reference from Secret: %w", err) + return fmt.Errorf("removing owner reference from Secret: %w", err) } if refRemoved { if err := patchObject(ctx, c, originalSecret, secret, "Secret"); err != nil { - return fmt.Errorf("failed to patch Secret after removing owner reference: %w", err) + return fmt.Errorf("patching Secret after removing owner reference: %w", err) } } logger.Info("Removed owner reference from Secret") @@ -1063,8 +1059,7 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr logger.Info("PostgresCluster already deleted, skipping finalizer update") return nil } - logger.Error(err, "Failed to remove finalizer from PostgresCluster") - return fmt.Errorf("failed to remove finalizer: %w", err) + return fmt.Errorf("removing finalizer: %w", err) } rc.emitNormal(cluster, EventCleanupComplete, fmt.Sprintf("Cleanup complete (policy: %s)", policy)) logger.Info("Finalizer removed, cleanup complete") @@ -1074,13 +1069,13 @@ func handleFinalizer(ctx context.Context, rc *ReconcileContext, cluster *enterpr func removeOwnerRef(scheme *runtime.Scheme, owner, obj client.Object) (bool, error) { hasRef, err := controllerutil.HasOwnerReference(obj.GetOwnerReferences(), owner, scheme) if err != nil { - return false, fmt.Errorf("failed to check owner reference: %w", err) + return false, fmt.Errorf("checking owner reference: %w", err) } if !hasRef { return false, nil } if err := controllerutil.RemoveOwnerReference(owner, obj, scheme); err != nil { - return false, fmt.Errorf("failed to remove owner reference: %w", err) + return false, fmt.Errorf("removing owner reference: %w", err) } return true, nil } @@ -1093,9 +1088,9 @@ func patchObject(ctx context.Context, c client.Client, original, obj client.Obje logger.Info("Object not found, skipping patch", "kind", kind, "name", obj.GetName()) return nil } - return fmt.Errorf("failed to patch %s object: %w", kind, err) + return fmt.Errorf("patching %s: %w", kind, err) } - logger.Info("Patched object successfully", "kind", kind, "name", obj.GetName()) + logger.Info("Object patched", "kind", kind, "name", obj.GetName()) return nil } diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index 269ecb706..f84a35fd9 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -38,8 +38,9 @@ func PostgresDatabaseService( newDBRepo NewDBRepoFunc, ) (ctrl.Result, error) { c := rc.Client - logger := log.FromContext(ctx) - logger.Info("Reconciling PostgresDatabase", "name", postgresDB.Name, "namespace", postgresDB.Namespace) + logger := log.FromContext(ctx).WithValues("postgresDatabase", postgresDB.Name) + ctx = log.IntoContext(ctx, logger) + logger.Info("Reconciling PostgresDatabase") updateStatus := func(conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { return persistStatus(ctx, c, postgresDB, conditionType, conditionStatus, reason, message, phase) @@ -48,7 +49,7 @@ func PostgresDatabaseService( // Finalizer: cleanup on deletion, register on creation. if postgresDB.GetDeletionTimestamp() != nil { if err := handleDeletion(ctx, rc, postgresDB); err != nil { - logger.Error(err, "Cleanup failed for PostgresDatabase") + logger.Error(err, "Failed to clean up PostgresDatabase") rc.emitWarning(postgresDB, EventCleanupFailed, fmt.Sprintf("Cleanup failed: %v", err)) return ctrl.Result{}, err } @@ -65,7 +66,7 @@ func PostgresDatabaseService( logger.Error(err, "Failed to add finalizer to PostgresDatabase") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) } - logger.Info("Finalizer added successfully") + logger.Info("Finalizer added") return ctrl.Result{}, nil } @@ -92,7 +93,7 @@ func PostgresDatabaseService( return ctrl.Result{}, err } clusterStatus := getClusterReadyStatus(cluster) - logger.Info("Cluster validation done", "clusterName", postgresDB.Spec.ClusterRef.Name, "status", clusterStatus) + logger.Info("Cluster validation complete", "clusterName", postgresDB.Spec.ClusterRef.Name, "status", clusterStatus) switch clusterStatus { case ClusterNotReady, ClusterNoProvisionerRef: @@ -115,9 +116,10 @@ func PostgresDatabaseService( conflictMsg := fmt.Sprintf("Role conflict: %s. "+ "If you deleted a previous PostgresDatabase, recreate it with the original name to re-adopt the orphaned resources.", strings.Join(roleConflicts, ", ")) - logger.Error(nil, conflictMsg) + conflictErr := fmt.Errorf("role conflict detected: %s", strings.Join(roleConflicts, ", ")) + logger.Error(conflictErr, conflictMsg) rc.emitWarning(postgresDB, EventRoleConflict, conflictMsg) - errs := []error{fmt.Errorf("role conflict detected: %s", strings.Join(roleConflicts, ", "))} + errs := []error{conflictErr} if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRoleConflict, conflictMsg, failedDBPhase); statusErr != nil { logger.Error(statusErr, "Failed to update status") errs = append(errs, fmt.Errorf("failed to update status: %w", statusErr)) @@ -180,7 +182,7 @@ func PostgresDatabaseService( } if len(missing) > 0 { - logger.Info("User spec changed, patching CNPG Cluster", "missing", missing) + logger.Info("CNPG Cluster patch started, missing roles detected", "missing", missing) if err := patchManagedRoles(ctx, c, postgresDB, cluster); err != nil { logger.Error(err, "Failed to patch users in CNPG Cluster") rc.emitWarning(postgresDB, EventManagedRolesPatchFailed, fmt.Sprintf("Failed to patch managed roles: %v", err)) @@ -255,7 +257,7 @@ func PostgresDatabaseService( // (name + key) when the cluster was provisioned. This avoids depending on CNPG's // spec field and makes the key explicit. if cluster.Status.Resources == nil || cluster.Status.Resources.SuperUserSecretRef == nil { - return ctrl.Result{}, fmt.Errorf("PostgresCluster %s has no superuser secret ref in status", cluster.Name) + return ctrl.Result{}, fmt.Errorf("postgresCluster %s has no superuser secret ref in status", cluster.Name) } superSecretRef := cluster.Status.Resources.SuperUserSecretRef superSecret := &corev1.Secret{} @@ -263,7 +265,7 @@ func PostgresDatabaseService( Name: superSecretRef.Name, Namespace: postgresDB.Namespace, }, superSecret); err != nil { - return ctrl.Result{}, fmt.Errorf("fetching superuser secret %s: %w", superSecretRef.Name, err) + return ctrl.Result{}, fmt.Errorf("failed to fetch superuser secret %s: %w", superSecretRef.Name, err) } pw, ok := superSecret.Data[superSecretRef.Key] if !ok { @@ -298,7 +300,7 @@ func PostgresDatabaseService( if errors.IsConflict(err) { return ctrl.Result{Requeue: true}, nil } - return ctrl.Result{}, fmt.Errorf("persisting final status: %w", err) + return ctrl.Result{}, fmt.Errorf("failed to persist final status: %w", err) } logger.Info("All phases complete") @@ -318,13 +320,11 @@ func reconcileRWRolePrivileges( for _, dbName := range dbNames { repo, err := newDBRepo(ctx, rwHost, dbName, superPassword) if err != nil { - logger.Error(err, "Failed to connect to database", "database", dbName) - errs = append(errs, fmt.Errorf("database %s: %w", dbName, err)) + errs = append(errs, fmt.Errorf("connecting to database %s: %w", dbName, err)) continue } if err := repo.ExecGrants(ctx, dbName); err != nil { - logger.Error(err, "Failed to grant RW role privileges", "database", dbName) - errs = append(errs, fmt.Errorf("database %s: %w", dbName, err)) + errs = append(errs, fmt.Errorf("granting RW privileges on database %s: %w", dbName, err)) continue } logger.Info("RW role privileges granted", "database", dbName, "rwRole", rwRoleName(dbName)) @@ -333,13 +333,8 @@ func reconcileRWRolePrivileges( } func fetchCluster(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase) (*enterprisev4.PostgresCluster, error) { - logger := log.FromContext(ctx) cluster := &enterprisev4.PostgresCluster{} if err := c.Get(ctx, types.NamespacedName{Name: postgresDB.Spec.ClusterRef.Name, Namespace: postgresDB.Namespace}, cluster); err != nil { - if errors.IsNotFound(err) { - return nil, err - } - logger.Error(err, "Failed to fetch Cluster", "name", postgresDB.Spec.ClusterRef.Name) return nil, err } return cluster, nil @@ -423,15 +418,13 @@ func patchManagedRoles(ctx context.Context, c client.Client, postgresDB *enterpr allRoles := buildManagedRoles(postgresDB.Name, postgresDB.Spec.Databases) rolePatch, err := buildManagedRolesPatch(cluster, allRoles, c.Scheme()) if err != nil { - logger.Error(err, "Failed to build managed roles patch", "postgresDatabase", postgresDB.Name) return fmt.Errorf("building managed roles patch for PostgresDatabase %s: %w", postgresDB.Name, err) } fieldManager := fieldManagerName(postgresDB.Name) if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManager)); err != nil { - logger.Error(err, "Failed to add users to PostgresCluster", "postgresDatabase", postgresDB.Name) return fmt.Errorf("patching managed roles for PostgresDatabase %s: %w", postgresDB.Name, err) } - logger.Info("Users added to PostgresCluster via SSA", "postgresDatabase", postgresDB.Name, "roleCount", len(allRoles)) + logger.Info("Users added to PostgresCluster via SSA", "roleCount", len(allRoles)) return nil } @@ -440,7 +433,7 @@ func verifyRolesReady(ctx context.Context, expectedUsers []string, cnpgCluster * if cnpgCluster.Status.ManagedRolesStatus.CannotReconcile != nil { for _, userName := range expectedUsers { if errs, exists := cnpgCluster.Status.ManagedRolesStatus.CannotReconcile[userName]; exists { - return nil, fmt.Errorf("user %s reconciliation failed: %v", userName, errs) + return nil, fmt.Errorf("reconciling user %s: %v", userName, errs) } } } @@ -469,7 +462,7 @@ func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtim cnpgDB.Spec = buildCNPGDatabaseSpec(cluster.Status.ProvisionerRef.Name, dbSpec) reAdopting := cnpgDB.Annotations[annotationRetainedFrom] == postgresDB.Name if reAdopting { - logger.Info("Re-adopting orphaned CNPG Database", "name", cnpgDBName) + logger.Info("Orphaned CNPG Database re-adopted", "name", cnpgDBName) delete(cnpgDB.Annotations, annotationRetainedFrom) adopted = append(adopted, dbSpec.Name) } @@ -530,6 +523,7 @@ func buildDeletionPlan(databases []enterprisev4.DatabaseDefinition) deletionPlan } func handleDeletion(ctx context.Context, rc *ReconcileContext, postgresDB *enterprisev4.PostgresDatabase) error { + logger := log.FromContext(ctx) c := rc.Client plan := buildDeletionPlan(postgresDB.Spec.Databases) if err := orphanRetainedResources(ctx, c, postgresDB, plan.retained); err != nil { @@ -549,7 +543,7 @@ func handleDeletion(ctx context.Context, rc *ReconcileContext, postgresDB *enter return fmt.Errorf("removing finalizer: %w", err) } rc.emitNormal(postgresDB, EventCleanupComplete, fmt.Sprintf("Cleanup complete (%d retained, %d deleted)", len(plan.retained), len(plan.deleted))) - log.FromContext(ctx).Info("Cleanup complete", "name", postgresDB.Name, "retained", len(plan.retained), "deleted", len(plan.deleted)) + logger.Info("Cleanup complete", "retained", len(plan.retained), "deleted", len(plan.deleted)) return nil } @@ -574,6 +568,7 @@ func deleteRemovedResources(ctx context.Context, c client.Client, postgresDB *en } func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, plan deletionPlan) error { + logger := log.FromContext(ctx) if len(plan.deleted) == 0 { return nil } @@ -582,7 +577,7 @@ func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enter if !errors.IsNotFound(err) { return fmt.Errorf("getting PostgresCluster for role cleanup: %w", err) } - log.FromContext(ctx).Info("PostgresCluster already deleted, skipping role cleanup") + logger.Info("PostgresCluster already deleted, skipping role cleanup") return nil } return patchManagedRolesOnDeletion(ctx, c, postgresDB, cluster, plan.retained) @@ -745,7 +740,7 @@ func buildManagedRoles(postgresDBName string, databases []enterprisev4.DatabaseD func buildManagedRolesPatch(cluster *enterprisev4.PostgresCluster, roles []enterprisev4.ManagedRole, scheme *runtime.Scheme) (*unstructured.Unstructured, error) { gvk, err := apiutil.GVKForObject(cluster, scheme) if err != nil { - return nil, fmt.Errorf("failed to get GVK for Cluster: %w", err) + return nil, fmt.Errorf("getting GVK for Cluster: %w", err) } return &unstructured.Unstructured{ Object: map[string]any{ @@ -758,6 +753,7 @@ func buildManagedRolesPatch(cluster *enterprisev4.PostgresCluster, roles []enter } func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster, retained []enterprisev4.DatabaseDefinition) error { + logger := log.FromContext(ctx) roles := buildManagedRoles(postgresDB.Name, retained) rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) if err != nil { @@ -766,7 +762,7 @@ func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresD if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManagerName(postgresDB.Name))); err != nil { return fmt.Errorf("patching managed roles on deletion: %w", err) } - log.FromContext(ctx).Info("Patched managed roles on deletion", "postgresDatabase", postgresDB.Name, "retainedRoles", len(roles)) + logger.Info("Managed roles patched on deletion", "retainedRoles", len(roles)) return nil } @@ -811,10 +807,10 @@ func ensureSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, logger := log.FromContext(ctx) switch { case secret == nil: - logger.Info("Creating missing user secret", "name", secretName) + logger.Info("User secret creation started", "name", secretName) return createUserSecret(ctx, c, scheme, postgresDB, roleName, secretName) case secret.Annotations[annotationRetainedFrom] == postgresDB.Name: - logger.Info("Re-adopting orphaned secret", "name", secretName) + logger.Info("Orphaned secret re-adopted", "name", secretName) return adoptResource(ctx, c, scheme, postgresDB, secret) } return nil @@ -889,7 +885,7 @@ func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runti cm.Data = buildDatabaseConfigMapBody(dbSpec.Name, endpoints) reAdopting := cm.Annotations[annotationRetainedFrom] == postgresDB.Name if reAdopting { - logger.Info("Re-adopting orphaned ConfigMap", "name", cmName) + logger.Info("Orphaned ConfigMap re-adopted", "name", cmName) delete(cm.Annotations, annotationRetainedFrom) } if cm.CreationTimestamp.IsZero() || reAdopting { From 45545ba344e36cdd27cab167cc7e25f724ca74f6 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Wed, 1 Apr 2026 11:18:18 +0200 Subject: [PATCH 13/36] added recorder obj to pg cluster and database reconciler instances --- internal/controller/postgrescluster_controller_test.go | 6 ++++-- internal/controller/postgresdatabase_controller_test.go | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 4619c9ac8..ea4d66f64 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -23,6 +23,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/client-go/tools/record" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" . "github.com/onsi/ginkgo/v2" @@ -121,8 +122,9 @@ var _ = Describe("PostgresCluster Controller", func() { } reconciler = &PostgresClusterReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), + Client: k8sClient, + Scheme: k8sClient.Scheme(), + Recorder: record.NewFakeRecorder(100), } req = reconcile.Request{NamespacedName: types.NamespacedName{Name: clusterName, Namespace: namespace}} }) diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index 614efc49b..a1f5ed9ba 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -32,6 +32,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -41,8 +42,9 @@ const postgresDatabaseFinalizer = "postgresdatabases.enterprise.splunk.com/final func reconcilePostgresDatabase(ctx context.Context, nn types.NamespacedName) (ctrl.Result, error) { reconciler := &PostgresDatabaseReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), + Client: k8sClient, + Scheme: k8sClient.Scheme(), + Recorder: record.NewFakeRecorder(100), } return reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: nn}) } From e725c5396e389b0689dbfdf9045175392171c5c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Fri, 3 Apr 2026 16:03:57 +0200 Subject: [PATCH 14/36] Code modified to mark dbs as ensure: false when needed in cluster CR --- api/v4/postgrescluster_types.go | 2 +- .../postgrescluster_controller_test.go | 2 +- .../postgresdatabase_controller_test.go | 70 +++++++- pkg/postgresql/cluster/core/cluster.go | 1 - pkg/postgresql/database/core/database.go | 133 +++++++++----- .../database/core/database_unit_test.go | 163 +++++++++++++++--- pkg/postgresql/database/core/types.go | 3 +- 7 files changed, 304 insertions(+), 70 deletions(-) diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 3e3dd0da7..65d300f53 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -37,7 +37,7 @@ type ManagedRole struct { // Exists controls whether the role should be present (true) or absent (false) in PostgreSQL. // +kubebuilder:default=true // +optional - Exists bool `json:"exists,omitempty"` + Exists bool `json:"exists"` } // PostgresClusterSpec defines the desired state of PostgresCluster. diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index ea4d66f64..5687ae1f8 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -51,7 +51,7 @@ import ( * PC-09 ignores no-op updates */ -var _ = Describe("PostgresCluster Controller", func() { +var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { const ( postgresVersion = "15.10" diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index a1f5ed9ba..707d2c34b 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -309,6 +309,15 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl } } +func expectManagedRoleExists(cluster *enterprisev4.PostgresCluster, roleName string, exists bool) { + rolesByName := make(map[string]enterprisev4.ManagedRole, len(cluster.Spec.ManagedRoles)) + for _, r := range cluster.Spec.ManagedRoles { + rolesByName[r.Name] = r + } + Expect(rolesByName).To(HaveKey(roleName)) + Expect(rolesByName[roleName].Exists).To(Equal(exists), "role %s: expected Exists=%v", roleName, exists) +} + func expectRetainedArtifact(ctx context.Context, name, namespace, resourceName string, obj client.Object) { Expect(k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, obj)).To(Succeed()) Expect(obj.GetAnnotations()).To(HaveKeyWithValue("enterprise.splunk.com/retained-from", resourceName)) @@ -344,7 +353,7 @@ func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, Expect(current.Status.Databases[0].ConfigMapRef).NotTo(BeNil()) } -var _ = Describe("PostgresDatabase Controller", func() { +var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { var ( ctx context.Context namespace string @@ -493,6 +502,59 @@ var _ = Describe("PostgresDatabase Controller", func() { }) }) + When("a database is removed from spec.databases while the CR stays alive", func() { + It("marks the removed database roles as absent in postgres cluster and keeps the retained roles present", func() { + resourceName := "live-db-removal" + clusterName := "live-db-removal-postgres" + cnpgClusterName := "live-db-removal-cnpg" + requestName := types.NamespacedName{Name: resourceName, Namespace: namespace} + + postgresDB := createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{ + {Name: "keepdb"}, + {Name: "dropdb"}, + }, postgresDatabaseFinalizer) + Expect(k8sClient.Get(ctx, requestName, postgresDB)).To(Succeed()) + + postgresCluster := createPostgresClusterResource(ctx, namespace, clusterName) + markPostgresClusterReady(ctx, postgresCluster, cnpgClusterName, namespace, false) + cnpgCluster := createCNPGClusterResource(ctx, namespace, cnpgClusterName) + markCNPGClusterReady(ctx, cnpgCluster, []string{"keepdb_admin", "keepdb_rw", "dropdb_admin", "dropdb_rw"}, "tenant-rw", "tenant-ro") + + initialRolesPatch := &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": enterprisev4.GroupVersion.String(), + "kind": "PostgresCluster", + "metadata": map[string]any{"name": clusterName, "namespace": namespace}, + "spec": map[string]any{ + "managedRoles": []map[string]any{ + {"name": "keepdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-keepdb-admin", "key": "password"}}, + {"name": "keepdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-keepdb-rw", "key": "password"}}, + {"name": "dropdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-dropdb-admin", "key": "password"}}, + {"name": "dropdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-dropdb-rw", "key": "password"}}, + }, + }, + }, + } + Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-"+resourceName))).To(Succeed()) + + seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, "keepdb", "dropdb") + + postgresDB.Spec.Databases = []enterprisev4.DatabaseDefinition{{Name: "keepdb"}} + Expect(k8sClient.Update(ctx, postgresDB)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, requestName) + expectReconcileResult(result, err, 15*time.Second) + + updatedCluster := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: namespace}, updatedCluster)).To(Succeed()) + + expectManagedRoleExists(updatedCluster, "keepdb_admin", true) + expectManagedRoleExists(updatedCluster, "keepdb_rw", true) + expectManagedRoleExists(updatedCluster, "dropdb_admin", false) + expectManagedRoleExists(updatedCluster, "dropdb_rw", false) + }) + }) + When("the PostgresDatabase is being deleted", func() { Context("with retained and deleted databases", func() { It("orphans retained resources, removes deleted resources, and patches managed roles", func() { @@ -547,7 +609,11 @@ var _ = Describe("PostgresDatabase Controller", func() { updatedCluster := &enterprisev4.PostgresCluster{} Expect(k8sClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: namespace}, updatedCluster)).To(Succeed()) - Expect(managedRoleNames(updatedCluster.Spec.ManagedRoles)).To(ConsistOf("keepdb_admin", "keepdb_rw")) + + expectManagedRoleExists(updatedCluster, "keepdb_admin", true) + expectManagedRoleExists(updatedCluster, "keepdb_rw", true) + expectManagedRoleExists(updatedCluster, "dropdb_admin", false) + expectManagedRoleExists(updatedCluster, "dropdb_rw", false) current := &enterprisev4.PostgresDatabase{} err = k8sClient.Get(ctx, requestName, current) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 3334011c6..e09974ec0 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -606,7 +606,6 @@ func reconcileManagedRoles(ctx context.Context, c client.Client, cluster *enterp Name: role.Name, Ensure: cnpgv1.EnsureAbsent, } - // Exists bool replaces the old Ensure string enum ("present"/"absent"). if role.Exists { r.Ensure = cnpgv1.EnsurePresent r.Login = true diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index f84a35fd9..3a88bac80 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -172,31 +172,29 @@ func PostgresDatabaseService( } // Phase: RoleProvisioning - desiredUsers := getDesiredUsers(postgresDB) - actualRoles := getUsersInClusterSpec(cluster) - var missing []string - for _, role := range desiredUsers { - if !slices.Contains(actualRoles, role) { - missing = append(missing, role) - } - } - - if len(missing) > 0 { - logger.Info("CNPG Cluster patch started, missing roles detected", "missing", missing) - if err := patchManagedRoles(ctx, c, postgresDB, cluster); err != nil { + fieldManager := fieldManagerName(postgresDB.Name) + desired := buildDesiredRoles(postgresDB.Name, postgresDB.Spec.Databases) + rolesToAdd := findAddedRoleNames(cluster, desired) + rolesToRemove := absentRolesByName(findRemovedRoleNames(cluster, fieldManager, desired)) + allRoles := append(desired, rolesToRemove...) + + if len(rolesToAdd) > 0 || len(rolesToRemove) > 0 { + logger.Info("CNPG Cluster patch started, role drift detected", "toAdd", len(rolesToAdd), "toRemove", len(rolesToRemove)) + if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { logger.Error(err, "Failed to patch users in CNPG Cluster") rc.emitWarning(postgresDB, EventManagedRolesPatchFailed, fmt.Sprintf("Failed to patch managed roles: %v", err)) return ctrl.Result{}, err } - rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles, waiting for %d roles to reconcile", len(desiredUsers))) + rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove))) if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, - fmt.Sprintf("Waiting for %d roles to be reconciled", len(desiredUsers)), provisioningDBPhase); err != nil { + fmt.Sprintf("Waiting for roles to be reconciled: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil } - notReadyRoles, err := verifyRolesReady(ctx, desiredUsers, cnpgCluster) + roleNames := getDesiredUsers(postgresDB) + notReadyRoles, err := verifyRolesReady(ctx, roleNames, cnpgCluster) if err != nil { rc.emitWarning(postgresDB, EventRoleFailed, fmt.Sprintf("Role reconciliation failed: %v", err)) if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, @@ -212,9 +210,9 @@ func PostgresDatabaseService( } return ctrl.Result{RequeueAfter: retryDelay}, nil } - rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("All %d roles reconciled", len(desiredUsers))) + rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove))) if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonUsersAvailable, - fmt.Sprintf("All %d users in PostgreSQL", len(desiredUsers)), provisioningDBPhase); err != nil { + fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { return ctrl.Result{}, err } @@ -366,6 +364,9 @@ func getUsersInClusterSpec(cluster *enterprisev4.PostgresCluster) []string { return users } +// rolesMatchClusterSpec returns true if desired and actual contain the same roles +// (by name and Exists state), regardless of order. + func getRoleConflicts(postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) []string { myManager := fieldManagerName(postgresDB.Name) desired := make(map[string]struct{}, len(postgresDB.Spec.Databases)*2) @@ -413,18 +414,16 @@ func parseRoleNames(raw []byte) []string { return names } -func patchManagedRoles(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) error { +func patchManagedRoles(ctx context.Context, c client.Client, fieldManager string, cluster *enterprisev4.PostgresCluster, roles []enterprisev4.ManagedRole) error { logger := log.FromContext(ctx) - allRoles := buildManagedRoles(postgresDB.Name, postgresDB.Spec.Databases) - rolePatch, err := buildManagedRolesPatch(cluster, allRoles, c.Scheme()) + rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) if err != nil { - return fmt.Errorf("building managed roles patch for PostgresDatabase %s: %w", postgresDB.Name, err) + return fmt.Errorf("building managed roles patch: %w", err) } - fieldManager := fieldManagerName(postgresDB.Name) if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManager)); err != nil { - return fmt.Errorf("patching managed roles for PostgresDatabase %s: %w", postgresDB.Name, err) + return fmt.Errorf("patching managed roles: %w", err) } - logger.Info("Users added to PostgresCluster via SSA", "roleCount", len(allRoles)) + logger.Info("Managed roles patched", "count", len(roles)) return nil } @@ -580,7 +579,15 @@ func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enter logger.Info("PostgresCluster already deleted, skipping role cleanup") return nil } - return patchManagedRolesOnDeletion(ctx, c, postgresDB, cluster, plan.retained) + fieldManager := fieldManagerName(postgresDB.Name) + retainedRoles := buildDesiredRoles(postgresDB.Name, plan.retained) + rolesToRemove := buildRolesToRemove(plan.deleted) + allRoles := append(retainedRoles, rolesToRemove...) + if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { + return err + } + logger.Info("Managed roles patched on deletion", "retained", len(retainedRoles), "removed", len(rolesToRemove)) + return nil } func orphanCNPGDatabases(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, databases []enterprisev4.DatabaseDefinition) error { @@ -716,7 +723,67 @@ func deleteSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev return nil } -func buildManagedRoles(postgresDBName string, databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { +// buildRolesToRemove produces Exists:false entries for the given databases so CNPG drops their roles. +func buildRolesToRemove(databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { + roles := make([]enterprisev4.ManagedRole, 0, len(databases)*2) + for _, dbSpec := range databases { + roles = append(roles, + enterprisev4.ManagedRole{Name: adminRoleName(dbSpec.Name), Exists: false}, + enterprisev4.ManagedRole{Name: rwRoleName(dbSpec.Name), Exists: false}, + ) + } + return roles +} + +// absentRolesByName produces Exists:false entries from a list of raw role names. +// Used by the normal reconcile path where names come from SSA field manager parsing. +func absentRolesByName(names []string) []enterprisev4.ManagedRole { + roles := make([]enterprisev4.ManagedRole, 0, len(names)) + for _, name := range names { + roles = append(roles, enterprisev4.ManagedRole{Name: name, Exists: false}) + } + return roles +} + +// findAddedRoleNames returns role names from the desired list that are missing +// from the cluster spec or currently marked absent. +func findAddedRoleNames(cluster *enterprisev4.PostgresCluster, desired []enterprisev4.ManagedRole) []string { + current := make(map[string]bool, len(cluster.Spec.ManagedRoles)) + for _, r := range cluster.Spec.ManagedRoles { + current[r.Name] = r.Exists + } + var toAdd []string + for _, r := range desired { + exists, found := current[r.Name] + if !found || !exists { + toAdd = append(toAdd, r.Name) + } + } + return toAdd +} + +// findRemovedRoleNames returns role names currently owned by this field manager +// in the cluster spec that are absent from the desired list. +func findRemovedRoleNames(cluster *enterprisev4.PostgresCluster, manager string, desired []enterprisev4.ManagedRole) []string { + desiredSet := make(map[string]struct{}, len(desired)) + for _, r := range desired { + desiredSet[r.Name] = struct{}{} + } + owners := managedRoleOwners(cluster.ManagedFields) + var toRemove []string + for name, owner := range owners { + if owner == manager { + if _, ok := desiredSet[name]; !ok { + toRemove = append(toRemove, name) + } + } + } + return toRemove +} + +// buildDesiredRoles builds the full set of roles that should be present for the given databases. +// This is the input to findAddedRoleNames and findRemovedRoleNames. +func buildDesiredRoles(postgresDBName string, databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { roles := make([]enterprisev4.ManagedRole, 0, len(databases)*2) for _, dbSpec := range databases { roles = append(roles, @@ -752,20 +819,6 @@ func buildManagedRolesPatch(cluster *enterprisev4.PostgresCluster, roles []enter }, nil } -func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster, retained []enterprisev4.DatabaseDefinition) error { - logger := log.FromContext(ctx) - roles := buildManagedRoles(postgresDB.Name, retained) - rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) - if err != nil { - return fmt.Errorf("building managed roles patch: %w", err) - } - if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManagerName(postgresDB.Name))); err != nil { - return fmt.Errorf("patching managed roles on deletion: %w", err) - } - logger.Info("Managed roles patched on deletion", "retainedRoles", len(roles)) - return nil -} - func stripOwnerReference(obj metav1.Object, ownerUID types.UID) { refs := obj.GetOwnerReferences() filtered := make([]metav1.OwnerReference, 0, len(refs)) diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go index 8d4da6c52..0e8bee12b 100644 --- a/pkg/postgresql/database/core/database_unit_test.go +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -306,7 +306,7 @@ func TestVerifyRolesReady(t *testing.T) { }, }, }, - wantErr: "user main_db_rw reconciliation failed: [reserved role]", + wantErr: "reconciling user main_db_rw: [reserved role]", }, { name: "returns missing roles that are not reconciled yet", @@ -1283,7 +1283,7 @@ func TestBuildManagedRoles(t *testing.T) { }, } - got := buildManagedRoles("primary", databases) + got := buildDesiredRoles("primary", databases) assert.Equal(t, want, got) } @@ -1300,7 +1300,7 @@ func TestBuildManagedRolesPatch(t *testing.T) { Namespace: "dbs", }, } - roles := buildManagedRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) + roles := buildDesiredRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) c := testClient(t, scheme, cluster) got, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) @@ -1312,37 +1312,152 @@ func TestBuildManagedRolesPatch(t *testing.T) { assert.Equal(t, map[string]any{"managedRoles": roles}, got.Object["spec"]) } -func TestPatchManagedRolesOnDeletion(t *testing.T) { - scheme := testScheme(t) - postgresDB := &enterprisev4.PostgresDatabase{ - ObjectMeta: metav1.ObjectMeta{ - Name: "primary", - Namespace: "dbs", +func TestFindAddedRoleNames(t *testing.T) { + desired := buildDesiredRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}, {Name: "api"}}) + + tests := []struct { + name string + current []enterprisev4.ManagedRole + want []string + }{ + { + name: "all missing from cluster", + current: nil, + want: []string{"payments_admin", "payments_rw", "api_admin", "api_rw"}, + }, + { + name: "some already present", + current: []enterprisev4.ManagedRole{ + {Name: "payments_admin", Exists: true}, + {Name: "payments_rw", Exists: true}, + }, + want: []string{"api_admin", "api_rw"}, + }, + { + name: "role present but marked absent — should be re-added", + current: []enterprisev4.ManagedRole{ + {Name: "payments_admin", Exists: false}, + {Name: "payments_rw", Exists: true}, + }, + want: []string{"payments_admin", "api_admin", "api_rw"}, + }, + { + name: "all already present", + current: []enterprisev4.ManagedRole{ + {Name: "payments_admin", Exists: true}, + {Name: "payments_rw", Exists: true}, + {Name: "api_admin", Exists: true}, + {Name: "api_rw", Exists: true}, + }, + want: nil, }, } - cluster := &enterprisev4.PostgresCluster{ - TypeMeta: metav1.TypeMeta{ - APIVersion: enterprisev4.GroupVersion.String(), - Kind: "PostgresCluster", + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ManagedRoles: tc.current}, + } + got := findAddedRoleNames(cluster, desired) + assert.ElementsMatch(t, tc.want, got) + }) + } +} + +type roleFieldOwner struct { + manager string + roles []string +} + +func TestFindRemovedRoleNames(t *testing.T) { + manager := "splunk-operator-primary" + desired := buildDesiredRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) + + tests := []struct { + name string + fieldOwners []roleFieldOwner + want []string + }{ + { + name: "no roles owned by any manager", + fieldOwners: nil, + want: nil, }, - ObjectMeta: metav1.ObjectMeta{ - Name: "primary", - Namespace: "dbs", + { + name: "owned roles still in desired — nothing to remove", + fieldOwners: []roleFieldOwner{{manager: manager, roles: []string{"payments_admin", "payments_rw"}}}, + want: nil, + }, + { + name: "owned role no longer in desired — should be removed", + fieldOwners: []roleFieldOwner{{manager: manager, roles: []string{"payments_admin", "payments_rw", "api_admin", "api_rw"}}}, + want: []string{"api_admin", "api_rw"}, + }, + { + name: "role owned by different manager — ignored", + fieldOwners: []roleFieldOwner{{manager: "other-manager", roles: []string{"api_admin", "api_rw"}}}, + want: nil, }, } - retained := []enterprisev4.DatabaseDefinition{{Name: "payments"}} - want := buildManagedRoles(postgresDB.Name, retained) - c := testClient(t, scheme, cluster) - err := patchManagedRolesOnDeletion(context.Background(), c, postgresDB, cluster, retained) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var managedFields []metav1.ManagedFieldsEntry + for _, fo := range tc.fieldOwners { + keys := make([]string, len(fo.roles)) + for i, r := range fo.roles { + keys[i] = `k:{"name":"` + r + `"}` + } + managedFields = append(managedFields, metav1.ManagedFieldsEntry{ + Manager: fo.manager, + FieldsV1: &metav1.FieldsV1{Raw: managedRolesFieldsRaw(t, keys...)}, + Operation: metav1.ManagedFieldsOperationApply, + }) + } + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ManagedFields: managedFields}, + } + got := findRemovedRoleNames(cluster, manager, desired) + assert.ElementsMatch(t, tc.want, got) + }) + } +} + - require.NoError(t, err) +func TestBuildRolesToRemove(t *testing.T) { + tests := []struct { + name string + deleted []enterprisev4.DatabaseDefinition + want []enterprisev4.ManagedRole + }{ + { + name: "nothing to remove", + deleted: nil, + want: []enterprisev4.ManagedRole{}, + }, + { + name: "single database removed", + deleted: []enterprisev4.DatabaseDefinition{{Name: "api"}}, + want: []enterprisev4.ManagedRole{{Name: "api_admin", Exists: false}, {Name: "api_rw", Exists: false}}, + }, + { + name: "multiple databases removed", + deleted: []enterprisev4.DatabaseDefinition{{Name: "api"}, {Name: "payments"}}, + want: []enterprisev4.ManagedRole{ + {Name: "api_admin", Exists: false}, {Name: "api_rw", Exists: false}, + {Name: "payments_admin", Exists: false}, {Name: "payments_rw", Exists: false}, + }, + }, + } - got := &enterprisev4.PostgresCluster{} - require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}, got)) - assert.Equal(t, want, got.Spec.ManagedRoles) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, buildRolesToRemove(tc.deleted)) + }) + } } + func TestStripOwnerReference(t *testing.T) { obj := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index bf07fd19f..fb57dee91 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -31,7 +31,8 @@ const ( readWriteEndpoint string = "rw" deletionPolicyRetain string = "Retain" - + deletionPolicyDelete string = "Delete" + postgresDatabaseFinalizerName string = "postgresdatabases.enterprise.splunk.com/finalizer" annotationRetainedFrom string = "enterprise.splunk.com/retained-from" From bbc6c9c3f62c1dec30854ff19c7b6ae5418fbf82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Tue, 7 Apr 2026 20:01:27 +0200 Subject: [PATCH 15/36] Apply PR suggestions --- api/v4/postgrescluster_types.go | 1 - .../postgresdatabase_controller_test.go | 189 +++++++++++------- 2 files changed, 119 insertions(+), 71 deletions(-) diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 65d300f53..5adc91f13 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -36,7 +36,6 @@ type ManagedRole struct { // Exists controls whether the role should be present (true) or absent (false) in PostgreSQL. // +kubebuilder:default=true - // +optional Exists bool `json:"exists"` } diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index 707d2c34b..31f591573 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -40,6 +40,44 @@ import ( const postgresDatabaseFinalizer = "postgresdatabases.enterprise.splunk.com/finalizer" +// condition types +const ( + condClusterReady = "ClusterReady" + condSecretsReady = "SecretsReady" + condConfigMapsReady = "ConfigMapsReady" + condRolesReady = "RolesReady" + condDatabasesReady = "DatabasesReady" + condPrivilegesReady = "PrivilegesReady" +) + +// condition reasons +const ( + reasonClusterNotFound = "ClusterNotFound" + reasonClusterAvailable = "ClusterAvailable" + reasonSecretsCreated = "SecretsCreated" + reasonConfigMapsCreated = "ConfigMapsCreated" + reasonUsersAvailable = "UsersAvailable" + reasonDatabasesAvailable = "DatabasesAvailable" + reasonRoleConflict = "RoleConflict" +) + +// phases +const ( + phasePending = "Pending" + phaseReady = "Ready" + phaseFailed = "Failed" +) + +// annotations +const retainedFromAnnotation = "enterprise.splunk.com/retained-from" + +// database names used across tests +const ( + dbAppdb = "appdb" + dbKeepdb = "payments" + dbDropdb = "analytics" +) + func reconcilePostgresDatabase(ctx context.Context, nn types.NamespacedName) (ctrl.Result, error) { reconciler := &PostgresDatabaseReconciler{ Client: k8sClient, @@ -57,12 +95,20 @@ func managedRoleNames(roles []enterprisev4.ManagedRole) []string { return names } -func adminRoleNameForTest(dbName string) string { - return dbName + "_admin" -} +func adminRoleNameForTest(dbName string) string { return dbName + "_admin" } +func rwRoleNameForTest(dbName string) string { return dbName + "_rw" } -func rwRoleNameForTest(dbName string) string { - return dbName + "_rw" +func adminSecretNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s-admin", resourceName, dbName) +} +func rwSecretNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s-rw", resourceName, dbName) +} +func configMapNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s-config", resourceName, dbName) +} +func cnpgDatabaseNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s", resourceName, dbName) } func ownedByPostgresDatabase(postgresDB *enterprisev4.PostgresDatabase) []metav1.OwnerReference { @@ -208,17 +254,17 @@ func seedExistingDatabaseStatus(ctx context.Context, current *enterprisev4.Postg func expectProvisionedArtifacts(ctx context.Context, scenario readyClusterScenario, owner *enterprisev4.PostgresDatabase) { adminSecret := &corev1.Secret{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, adminSecret)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: adminSecretNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, adminSecret)).To(Succeed()) Expect(adminSecret.Data).To(HaveKey("password")) Expect(metav1.IsControlledBy(adminSecret, owner)).To(BeTrue()) rwSecret := &corev1.Secret{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-rw", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, rwSecret)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: rwSecretNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, rwSecret)).To(Succeed()) Expect(rwSecret.Data).To(HaveKey("password")) Expect(metav1.IsControlledBy(rwSecret, owner)).To(BeTrue()) configMap := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMapNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) Expect(configMap.Data).To(HaveKeyWithValue("ro-host", "tenant-ro."+scenario.namespace+".svc.cluster.local")) Expect(configMap.Data).To(HaveKeyWithValue("admin-user", adminRoleNameForTest(scenario.dbName))) @@ -234,7 +280,7 @@ func expectManagedRolesPatched(ctx context.Context, scenario readyClusterScenari func expectCNPGDatabaseCreated(ctx context.Context, scenario readyClusterScenario, owner *enterprisev4.PostgresDatabase) *cnpgv1.Database { cnpgDatabase := &cnpgv1.Database{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, cnpgDatabase)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cnpgDatabaseNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, cnpgDatabase)).To(Succeed()) Expect(cnpgDatabase.Spec.Name).To(Equal(scenario.dbName)) Expect(cnpgDatabase.Spec.Owner).To(Equal(adminRoleNameForTest(scenario.dbName))) Expect(cnpgDatabase.Spec.ClusterRef.Name).To(Equal(scenario.cnpgClusterName)) @@ -250,18 +296,18 @@ func markCNPGDatabaseApplied(ctx context.Context, cnpgDatabase *cnpgv1.Database) func expectPoolerConfigMap(ctx context.Context, scenario readyClusterScenario) { configMap := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMapNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) Expect(configMap.Data).To(HaveKeyWithValue("pooler-rw-host", scenario.cnpgClusterName+"-pooler-rw."+scenario.namespace+".svc.cluster.local")) Expect(configMap.Data).To(HaveKeyWithValue("pooler-ro-host", scenario.cnpgClusterName+"-pooler-ro."+scenario.namespace+".svc.cluster.local")) } func seedMissingClusterScenario(ctx context.Context, namespace, resourceName string, finalizers ...string) types.NamespacedName { - createPostgresDatabaseResource(ctx, namespace, resourceName, "absent-cluster", []enterprisev4.DatabaseDefinition{{Name: "appdb"}}, finalizers...) + createPostgresDatabaseResource(ctx, namespace, resourceName, "absent-cluster", []enterprisev4.DatabaseDefinition{{Name: dbAppdb}}, finalizers...) return types.NamespacedName{Name: resourceName, Namespace: namespace} } func seedConflictScenario(ctx context.Context, namespace, resourceName, clusterName string) types.NamespacedName { - createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{{Name: "appdb"}}, postgresDatabaseFinalizer) + createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{{Name: dbAppdb}}, postgresDatabaseFinalizer) postgresCluster := createPostgresClusterResource(ctx, namespace, clusterName) markPostgresClusterReady(ctx, postgresCluster, "unused-cnpg", namespace, false) return types.NamespacedName{Name: resourceName, Namespace: namespace} @@ -272,7 +318,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl for _, dbName := range dbNames { Expect(k8sClient.Create(ctx, &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-admin", resourceName, dbName), + Name: adminSecretNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -280,7 +326,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Expect(k8sClient.Create(ctx, &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-rw", resourceName, dbName), + Name: rwSecretNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -288,7 +334,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Expect(k8sClient.Create(ctx, &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-config", resourceName, dbName), + Name: configMapNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -296,7 +342,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Expect(k8sClient.Create(ctx, &cnpgv1.Database{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", resourceName, dbName), + Name: cnpgDatabaseNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -320,7 +366,7 @@ func expectManagedRoleExists(cluster *enterprisev4.PostgresCluster, roleName str func expectRetainedArtifact(ctx context.Context, name, namespace, resourceName string, obj client.Object) { Expect(k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, obj)).To(Succeed()) - Expect(obj.GetAnnotations()).To(HaveKeyWithValue("enterprise.splunk.com/retained-from", resourceName)) + Expect(obj.GetAnnotations()).To(HaveKeyWithValue(retainedFromAnnotation, resourceName)) Expect(obj.GetOwnerReferences()).To(BeEmpty()) } @@ -342,7 +388,7 @@ func expectStatusCondition(current *enterprisev4.PostgresDatabase, conditionType } func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, expectedDatabase enterprisev4.DatabaseInfo) { - expectStatusPhase(current, "Ready") + expectStatusPhase(current, phaseReady) Expect(current.Status.ObservedGeneration).NotTo(BeNil()) Expect(*current.Status.ObservedGeneration).To(Equal(generation)) Expect(current.Status.Databases).To(HaveLen(1)) @@ -393,9 +439,9 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { expectReconcileResult(result, err, 30*time.Second) current := fetchPostgresDatabase(ctx, requestName) - expectStatusPhase(current, "Pending") - expectStatusCondition(current, "ClusterReady", metav1.ConditionFalse, "ClusterNotFound") - clusterReady := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") + expectStatusPhase(current, phasePending) + expectStatusCondition(current, condClusterReady, metav1.ConditionFalse, reasonClusterNotFound) + clusterReady := meta.FindStatusCondition(current.Status.Conditions, condClusterReady) Expect(clusterReady.ObservedGeneration).To(Equal(current.Generation)) }) }) @@ -404,7 +450,7 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { When("the referenced PostgresCluster is ready", func() { Context("and live grants are not invoked", func() { It("reconciles secrets, configmaps, roles, and CNPG databases", func() { - scenario := newReadyClusterScenario(namespace, "ready-cluster", "tenant-cluster", "tenant-cnpg", "appdb") + scenario := newReadyClusterScenario(namespace, "ready-cluster", "tenant-cluster", "tenant-cnpg", dbAppdb) seedReadyClusterScenario(ctx, scenario, false) result, err := reconcilePostgresDatabase(ctx, scenario.requestName) @@ -428,18 +474,18 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { current = fetchPostgresDatabase(ctx, scenario.requestName) expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) - expectStatusCondition(current, "ClusterReady", metav1.ConditionTrue, "ClusterAvailable") - expectStatusCondition(current, "SecretsReady", metav1.ConditionTrue, "SecretsCreated") - expectStatusCondition(current, "ConfigMapsReady", metav1.ConditionTrue, "ConfigMapsCreated") - expectStatusCondition(current, "RolesReady", metav1.ConditionTrue, "UsersAvailable") - expectStatusCondition(current, "DatabasesReady", metav1.ConditionTrue, "DatabasesAvailable") - Expect(meta.FindStatusCondition(current.Status.Conditions, "PrivilegesReady")).To(BeNil()) + expectStatusCondition(current, condClusterReady, metav1.ConditionTrue, reasonClusterAvailable) + expectStatusCondition(current, condSecretsReady, metav1.ConditionTrue, reasonSecretsCreated) + expectStatusCondition(current, condConfigMapsReady, metav1.ConditionTrue, reasonConfigMapsCreated) + expectStatusCondition(current, condRolesReady, metav1.ConditionTrue, reasonUsersAvailable) + expectStatusCondition(current, condDatabasesReady, metav1.ConditionTrue, reasonDatabasesAvailable) + Expect(meta.FindStatusCondition(current.Status.Conditions, condPrivilegesReady)).To(BeNil()) }) }) Context("and connection pooling is enabled", func() { It("adds pooler endpoints to the generated ConfigMap", func() { - scenario := newReadyClusterScenario(namespace, "pooler-cluster", "pooler-postgres", "pooler-cnpg", "appdb") + scenario := newReadyClusterScenario(namespace, "pooler-cluster", "pooler-postgres", "pooler-cnpg", dbAppdb) seedReadyClusterScenario(ctx, scenario, true) result, err := reconcilePostgresDatabase(ctx, scenario.requestName) @@ -471,8 +517,8 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { }, "spec": map[string]any{ "managedRoles": []map[string]any{ - {"name": "appdb_admin", "exists": true}, - {"name": "appdb_rw", "exists": true}, + {"name": adminRoleNameForTest(dbAppdb), "exists": true}, + {"name": rwRoleNameForTest(dbAppdb), "exists": true}, }, }, }, @@ -485,19 +531,19 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { Expect(result).To(Equal(ctrl.Result{})) current := fetchPostgresDatabase(ctx, requestName) - expectStatusPhase(current, "Failed") - expectStatusCondition(current, "RolesReady", metav1.ConditionFalse, "RoleConflict") + expectStatusPhase(current, phaseFailed) + expectStatusCondition(current, condRolesReady, metav1.ConditionFalse, reasonRoleConflict) - rolesReady := meta.FindStatusCondition(current.Status.Conditions, "RolesReady") - Expect(rolesReady.Message).To(ContainSubstring("appdb_admin")) + rolesReady := meta.FindStatusCondition(current.Status.Conditions, condRolesReady) + Expect(rolesReady.Message).To(ContainSubstring(adminRoleNameForTest(dbAppdb))) Expect(rolesReady.Message).To(ContainSubstring("postgresdatabase-legacy")) configMap := &corev1.ConfigMap{} - err = k8sClient.Get(ctx, types.NamespacedName{Name: "conflict-cluster-appdb-config", Namespace: namespace}, configMap) + err = k8sClient.Get(ctx, types.NamespacedName{Name: configMapNameForTest("conflict-cluster", dbAppdb), Namespace: namespace}, configMap) Expect(apierrors.IsNotFound(err)).To(BeTrue()) cnpgDatabase := &cnpgv1.Database{} - err = k8sClient.Get(ctx, types.NamespacedName{Name: "conflict-cluster-appdb", Namespace: namespace}, cnpgDatabase) + err = k8sClient.Get(ctx, types.NamespacedName{Name: cnpgDatabaseNameForTest("conflict-cluster", dbAppdb), Namespace: namespace}, cnpgDatabase) Expect(apierrors.IsNotFound(err)).To(BeTrue()) }) }) @@ -510,15 +556,18 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { requestName := types.NamespacedName{Name: resourceName, Namespace: namespace} postgresDB := createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{ - {Name: "keepdb"}, - {Name: "dropdb"}, + {Name: dbKeepdb}, + {Name: dbDropdb}, }, postgresDatabaseFinalizer) Expect(k8sClient.Get(ctx, requestName, postgresDB)).To(Succeed()) postgresCluster := createPostgresClusterResource(ctx, namespace, clusterName) markPostgresClusterReady(ctx, postgresCluster, cnpgClusterName, namespace, false) cnpgCluster := createCNPGClusterResource(ctx, namespace, cnpgClusterName) - markCNPGClusterReady(ctx, cnpgCluster, []string{"keepdb_admin", "keepdb_rw", "dropdb_admin", "dropdb_rw"}, "tenant-rw", "tenant-ro") + markCNPGClusterReady(ctx, cnpgCluster, []string{ + adminRoleNameForTest(dbKeepdb), rwRoleNameForTest(dbKeepdb), + adminRoleNameForTest(dbDropdb), rwRoleNameForTest(dbDropdb), + }, "tenant-rw", "tenant-ro") initialRolesPatch := &unstructured.Unstructured{ Object: map[string]any{ @@ -527,19 +576,19 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { "metadata": map[string]any{"name": clusterName, "namespace": namespace}, "spec": map[string]any{ "managedRoles": []map[string]any{ - {"name": "keepdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-keepdb-admin", "key": "password"}}, - {"name": "keepdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-keepdb-rw", "key": "password"}}, - {"name": "dropdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-dropdb-admin", "key": "password"}}, - {"name": "dropdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-dropdb-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-rw", "key": "password"}}, }, }, }, } Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-"+resourceName))).To(Succeed()) - seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, "keepdb", "dropdb") + seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, dbKeepdb, dbDropdb) - postgresDB.Spec.Databases = []enterprisev4.DatabaseDefinition{{Name: "keepdb"}} + postgresDB.Spec.Databases = []enterprisev4.DatabaseDefinition{{Name: dbKeepdb}} Expect(k8sClient.Update(ctx, postgresDB)).To(Succeed()) result, err := reconcilePostgresDatabase(ctx, requestName) @@ -548,10 +597,10 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { updatedCluster := &enterprisev4.PostgresCluster{} Expect(k8sClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: namespace}, updatedCluster)).To(Succeed()) - expectManagedRoleExists(updatedCluster, "keepdb_admin", true) - expectManagedRoleExists(updatedCluster, "keepdb_rw", true) - expectManagedRoleExists(updatedCluster, "dropdb_admin", false) - expectManagedRoleExists(updatedCluster, "dropdb_rw", false) + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbDropdb), false) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbDropdb), false) }) }) @@ -563,8 +612,8 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { requestName := types.NamespacedName{Name: resourceName, Namespace: namespace} postgresDB := createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{ - {Name: "keepdb", DeletionPolicy: "Retain"}, - {Name: "dropdb"}, + {Name: dbKeepdb, DeletionPolicy: "Retain"}, + {Name: dbDropdb}, }, postgresDatabaseFinalizer) Expect(k8sClient.Get(ctx, requestName, postgresDB)).To(Succeed()) @@ -580,40 +629,40 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { }, "spec": map[string]any{ "managedRoles": []map[string]any{ - {"name": "keepdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-keepdb-admin", "key": "password"}}, - {"name": "keepdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-keepdb-rw", "key": "password"}}, - {"name": "dropdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-dropdb-admin", "key": "password"}}, - {"name": "dropdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-dropdb-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-rw", "key": "password"}}, }, }, }, } - Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-delete-cluster"))).To(Succeed()) + Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-"+resourceName))).To(Succeed()) - seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, "keepdb", "dropdb") + seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, dbKeepdb, dbDropdb) Expect(k8sClient.Delete(ctx, postgresDB)).To(Succeed()) result, err := reconcilePostgresDatabase(ctx, requestName) expectEmptyReconcileResult(result, err) - expectRetainedArtifact(ctx, "delete-cluster-keepdb-config", namespace, resourceName, &corev1.ConfigMap{}) - expectRetainedArtifact(ctx, "delete-cluster-keepdb-admin", namespace, resourceName, &corev1.Secret{}) - expectRetainedArtifact(ctx, "delete-cluster-keepdb-rw", namespace, resourceName, &corev1.Secret{}) - expectRetainedArtifact(ctx, "delete-cluster-keepdb", namespace, resourceName, &cnpgv1.Database{}) + expectRetainedArtifact(ctx, configMapNameForTest(resourceName, dbKeepdb), namespace, resourceName, &corev1.ConfigMap{}) + expectRetainedArtifact(ctx, adminSecretNameForTest(resourceName, dbKeepdb), namespace, resourceName, &corev1.Secret{}) + expectRetainedArtifact(ctx, rwSecretNameForTest(resourceName, dbKeepdb), namespace, resourceName, &corev1.Secret{}) + expectRetainedArtifact(ctx, cnpgDatabaseNameForTest(resourceName, dbKeepdb), namespace, resourceName, &cnpgv1.Database{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb-config", namespace, &corev1.ConfigMap{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb-admin", namespace, &corev1.Secret{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb-rw", namespace, &corev1.Secret{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb", namespace, &cnpgv1.Database{}) + expectDeletedArtifact(ctx, configMapNameForTest(resourceName, dbDropdb), namespace, &corev1.ConfigMap{}) + expectDeletedArtifact(ctx, adminSecretNameForTest(resourceName, dbDropdb), namespace, &corev1.Secret{}) + expectDeletedArtifact(ctx, rwSecretNameForTest(resourceName, dbDropdb), namespace, &corev1.Secret{}) + expectDeletedArtifact(ctx, cnpgDatabaseNameForTest(resourceName, dbDropdb), namespace, &cnpgv1.Database{}) updatedCluster := &enterprisev4.PostgresCluster{} Expect(k8sClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: namespace}, updatedCluster)).To(Succeed()) - expectManagedRoleExists(updatedCluster, "keepdb_admin", true) - expectManagedRoleExists(updatedCluster, "keepdb_rw", true) - expectManagedRoleExists(updatedCluster, "dropdb_admin", false) - expectManagedRoleExists(updatedCluster, "dropdb_rw", false) + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbDropdb), false) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbDropdb), false) current := &enterprisev4.PostgresDatabase{} err = k8sClient.Get(ctx, requestName, current) From 67a0ed2a455752a525627f12657b2da3989f2128 Mon Sep 17 00:00:00 2001 From: Kamil Ubych <56136249+limak9182@users.noreply.github.com> Date: Fri, 10 Apr 2026 13:49:07 +0200 Subject: [PATCH 16/36] postgres controllers metrics (#1811) * metrics * use Controller-runtime standard metrics * metrics labeled by errors * managedusers reset bug fix --- cmd/main.go | 25 +++-- .../controller/postgrescluster_controller.go | 14 ++- .../controller/postgresdatabase_controller.go | 15 ++- pkg/postgresql/cluster/core/cluster.go | 21 +++-- pkg/postgresql/cluster/core/types.go | 2 + pkg/postgresql/database/core/database.go | 14 ++- .../database/core/database_unit_test.go | 2 + pkg/postgresql/database/core/types.go | 11 ++- .../shared/adapter/prometheus/collector.go | 91 +++++++++++++++++++ .../shared/adapter/prometheus/noop.go | 15 +++ .../shared/adapter/prometheus/recorder.go | 91 +++++++++++++++++++ pkg/postgresql/shared/ports/metrics.go | 35 +++++++ 12 files changed, 307 insertions(+), 29 deletions(-) create mode 100644 pkg/postgresql/shared/adapter/prometheus/collector.go create mode 100644 pkg/postgresql/shared/adapter/prometheus/noop.go create mode 100644 pkg/postgresql/shared/adapter/prometheus/recorder.go create mode 100644 pkg/postgresql/shared/ports/metrics.go diff --git a/cmd/main.go b/cmd/main.go index 332623f0d..d7cc6cc8b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -25,6 +25,7 @@ import ( "path/filepath" "time" + crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/metrics/filters" intController "github.com/splunk/splunk-operator/internal/controller" @@ -55,6 +56,7 @@ import ( "github.com/splunk/splunk-operator/internal/controller" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" //+kubebuilder:scaffold:imports //extapi "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" ) @@ -282,18 +284,29 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "Telemetry") os.Exit(1) } + pgMetricsRecorder := pgprometheus.NewPrometheusRecorder() + if err := pgprometheus.Register(crmetrics.Registry); err != nil { + setupLog.Error(err, "unable to register PostgreSQL metrics") + os.Exit(1) + } + pgFleetMetricsCollector := pgprometheus.NewFleetCollector() + if err := (&controller.PostgresDatabaseReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("postgresdatabase-controller"), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("postgresdatabase-controller"), + Metrics: pgMetricsRecorder, + FleetCollector: pgFleetMetricsCollector, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "PostgresDatabase") os.Exit(1) } if err := (&controller.PostgresClusterReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("postgrescluster-controller"), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("postgrescluster-controller"), + Metrics: pgMetricsRecorder, + FleetCollector: pgFleetMetricsCollector, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "PostgresCluster") os.Exit(1) diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 70b11c9e6..c49e7ba4d 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -22,6 +22,8 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" clustercore "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" + pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -42,8 +44,10 @@ const ( // PostgresClusterReconciler reconciles PostgresCluster resources. type PostgresClusterReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder + Scheme *runtime.Scheme + Recorder record.EventRecorder + Metrics ports.Recorder + FleetCollector *pgprometheus.FleetCollector } // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters,verbs=get;list;watch;create;update;patch;delete @@ -57,8 +61,10 @@ type PostgresClusterReconciler struct { // +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder} - return clustercore.PostgresClusterService(ctx, rc, req) + rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics} + result, err := clustercore.PostgresClusterService(ctx, rc, req) + r.FleetCollector.CollectClusterMetrics(ctx, r.Client, r.Metrics) + return result, err } // SetupWithManager registers the controller and owned resource watches. diff --git a/internal/controller/postgresdatabase_controller.go b/internal/controller/postgresdatabase_controller.go index 0c6db9628..ab54da0fd 100644 --- a/internal/controller/postgresdatabase_controller.go +++ b/internal/controller/postgresdatabase_controller.go @@ -24,6 +24,8 @@ import ( enterprisev4 "github.com/splunk/splunk-operator/api/v4" dbadapter "github.com/splunk/splunk-operator/pkg/postgresql/database/adapter" dbcore "github.com/splunk/splunk-operator/pkg/postgresql/database/core" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" + pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -42,8 +44,10 @@ import ( // PostgresDatabaseReconciler reconciles a PostgresDatabase object. type PostgresDatabaseReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder + Scheme *runtime.Scheme + Recorder record.EventRecorder + Metrics ports.Recorder + FleetCollector *pgprometheus.FleetCollector } const ( @@ -71,8 +75,11 @@ func (r *PostgresDatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Req } return ctrl.Result{}, err } - rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder} - return dbcore.PostgresDatabaseService(ctx, rc, postgresDB, dbadapter.NewDBRepository) + rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics} + result, err := dbcore.PostgresDatabaseService(ctx, rc, postgresDB, dbadapter.NewDBRepository) + r.FleetCollector.CollectDatabaseMetrics(ctx, r.Client, r.Metrics) + + return result, err } // SetupWithManager sets up the controller with the Manager. diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index e09974ec0..01df3a1c7 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -24,6 +24,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" password "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -67,7 +68,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. ctx = log.IntoContext(ctx, logger) updateStatus := func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { - return setStatus(ctx, c, postgresCluster, conditionType, status, reason, message, phase) + return setStatus(ctx, c, rc.Metrics, postgresCluster, conditionType, status, reason, message, phase) } // Finalizer handling must come before any other processing. @@ -384,7 +385,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. default: oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) copy(oldConditions, postgresCluster.Status.Conditions) - if err := syncPoolerStatus(ctx, c, postgresCluster); err != nil { + if err := syncPoolerStatus(ctx, c, rc.Metrics, postgresCluster); err != nil { logger.Error(err, "Failed to sync pooler status") rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, @@ -450,7 +451,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if postgresCluster.Status.Phase != nil { oldPhase = *postgresCluster.Status.Phase } - if err := syncStatus(ctx, c, postgresCluster, cnpgCluster); err != nil { + if err := syncStatus(ctx, c, rc.Metrics, postgresCluster, cnpgCluster); err != nil { logger.Error(err, "Failed to sync status") if apierrors.IsConflict(err) { logger.Info("Conflict during status update, will requeue") @@ -478,7 +479,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger.Info("Poolers ready, syncing status") poolerOldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) copy(poolerOldConditions, postgresCluster.Status.Conditions) - _ = syncPoolerStatus(ctx, c, postgresCluster) + _ = syncPoolerStatus(ctx, c, rc.Metrics, postgresCluster) rc.emitPoolerReadyTransition(postgresCluster, poolerOldConditions) } } @@ -755,7 +756,7 @@ func deleteConnectionPoolers(ctx context.Context, c client.Client, cluster *ente } // syncPoolerStatus populates ConnectionPoolerStatus and the PoolerReady condition. -func syncPoolerStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster) error { +func syncPoolerStatus(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster) error { rwPooler := &cnpgv1.Pooler{} if err := c.Get(ctx, types.NamespacedName{ Name: poolerResourceName(cluster.Name, readWriteEndpoint), @@ -776,13 +777,13 @@ func syncPoolerStatus(ctx context.Context, c client.Client, cluster *enterprisev rwDesired, rwScheduled := poolerInstanceCount(rwPooler) roDesired, roScheduled := poolerInstanceCount(roPooler) - return setStatus(ctx, c, cluster, poolerReady, metav1.ConditionTrue, reasonAllInstancesReady, + return setStatus(ctx, c, metrics, cluster, poolerReady, metav1.ConditionTrue, reasonAllInstancesReady, fmt.Sprintf("%s: %d/%d, %s: %d/%d", readWriteEndpoint, rwScheduled, rwDesired, readOnlyEndpoint, roScheduled, roDesired), readyClusterPhase) } // syncStatus maps CNPG Cluster state to PostgresCluster status. -func syncStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) error { +func syncStatus(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) error { cluster.Status.ProvisionerRef = &corev1.ObjectReference{ APIVersion: "postgresql.cnpg.io/v1", Kind: "Cluster", @@ -835,13 +836,13 @@ func syncStatus(ctx context.Context, c client.Client, cluster *enterprisev4.Post message = fmt.Sprintf("CNPG cluster phase: %s", cnpgCluster.Status.Phase) } - return setStatus(ctx, c, cluster, clusterReady, condStatus, reason, message, phase) + return setStatus(ctx, c, metrics, cluster, clusterReady, condStatus, reason, message, phase) } // setStatus sets the phase, condition and persists the status. // It skips the API write when the resulting status is identical to the current // state, avoiding unnecessary etcd churn and ResourceVersion bumps on stable clusters. -func setStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, condType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { +func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster, condType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { before := cluster.Status.DeepCopy() p := string(phase) @@ -858,6 +859,8 @@ func setStatus(ctx context.Context, c client.Client, cluster *enterprisev4.Postg return nil } + metrics.IncStatusTransition(ports.ControllerCluster, string(condType), string(status), string(reason)) + if err := c.Status().Update(ctx, cluster); err != nil { return fmt.Errorf("failed to update PostgresCluster status: %w", err) } diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index 042a5ae82..7a43322fe 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -4,6 +4,7 @@ import ( "time" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" @@ -17,6 +18,7 @@ type ReconcileContext struct { Client client.Client Scheme *runtime.Scheme Recorder record.EventRecorder + Metrics ports.Recorder } // normalizedCNPGClusterSpec is a subset of cnpgv1.ClusterSpec fields used for drift detection. diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index 3a88bac80..377f1dcce 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -11,6 +11,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -43,7 +44,7 @@ func PostgresDatabaseService( logger.Info("Reconciling PostgresDatabase") updateStatus := func(conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { - return persistStatus(ctx, c, postgresDB, conditionType, conditionStatus, reason, message, phase) + return persistStatus(ctx, c, rc.Metrics, postgresDB, conditionType, conditionStatus, reason, message, phase) } // Finalizer: cleanup on deletion, register on creation. @@ -183,6 +184,10 @@ func PostgresDatabaseService( if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { logger.Error(err, "Failed to patch users in CNPG Cluster") rc.emitWarning(postgresDB, EventManagedRolesPatchFailed, fmt.Sprintf("Failed to patch managed roles: %v", err)) + if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, + fmt.Sprintf("Failed to patch managed roles: %v", err), failedDBPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } return ctrl.Result{}, err } rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove))) @@ -221,6 +226,10 @@ func PostgresDatabaseService( if err != nil { logger.Error(err, "Failed to reconcile CNPG Databases") rc.emitWarning(postgresDB, EventDatabasesReconcileFailed, fmt.Sprintf("Failed to reconcile databases: %v", err)) + if statusErr := updateStatus(databasesReady, metav1.ConditionFalse, reasonDatabaseReconcileFailed, + fmt.Sprintf("Failed to reconcile databases: %v", err), failedDBPhase); statusErr != nil { + logger.Error(statusErr, "Failed to update status") + } return ctrl.Result{}, err } if len(adopted) > 0 { @@ -492,8 +501,9 @@ func verifyDatabasesReady(ctx context.Context, c client.Client, postgresDB *ente return notReady, nil } -func persistStatus(ctx context.Context, c client.Client, db *enterprisev4.PostgresDatabase, conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { +func persistStatus(ctx context.Context, c client.Client, metrics ports.Recorder, db *enterprisev4.PostgresDatabase, conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { applyStatus(db, conditionType, conditionStatus, reason, message, phase) + metrics.IncStatusTransition(ports.ControllerDatabase, string(conditionType), string(conditionStatus), string(reason)) return c.Status().Update(ctx, db) } diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go index 0e8bee12b..fdba00f71 100644 --- a/pkg/postgresql/database/core/database_unit_test.go +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -20,6 +20,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -591,6 +592,7 @@ func TestSetStatus(t *testing.T) { err := persistStatus( context.Background(), c, + &pgprometheus.NoopRecorder{}, postgresDB, clusterReady, metav1.ConditionTrue, diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index fb57dee91..cbf7c15a9 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -4,16 +4,18 @@ import ( "time" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" ) -// ReconcileContext bundles infrastructure dependencies injected by the controller +// ReconcileContext bundles infrastructure dependencies injected by the controller. type ReconcileContext struct { Client client.Client Scheme *runtime.Scheme Recorder record.EventRecorder + Metrics ports.Recorder } type reconcileDBPhases string @@ -77,9 +79,10 @@ const ( reasonUsersAvailable conditionReasons = "UsersAvailable" reasonRoleConflict conditionReasons = "RoleConflict" reasonConfigMapsCreationFailed conditionReasons = "ConfigMapsCreationFailed" - reasonConfigMapsCreated conditionReasons = "ConfigMapsCreated" - reasonPrivilegesGranted conditionReasons = "PrivilegesGranted" - reasonPrivilegesGrantFailed conditionReasons = "PrivilegesGrantFailed" + reasonConfigMapsCreated conditionReasons = "ConfigMapsCreated" + reasonDatabaseReconcileFailed conditionReasons = "DatabaseReconcileFailed" + reasonPrivilegesGranted conditionReasons = "PrivilegesGranted" + reasonPrivilegesGrantFailed conditionReasons = "PrivilegesGrantFailed" // ClusterReady sentinel values returned by ensureClusterReady. // Exported so the controller adapter can switch on them if needed. diff --git a/pkg/postgresql/shared/adapter/prometheus/collector.go b/pkg/postgresql/shared/adapter/prometheus/collector.go new file mode 100644 index 000000000..bbd44944b --- /dev/null +++ b/pkg/postgresql/shared/adapter/prometheus/collector.go @@ -0,0 +1,91 @@ +package prometheus + +import ( + "context" + + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// FleetCollector recomputes fleet-state gauges from the K8s API (informer cache). +type FleetCollector struct{} + +// NewFleetCollector returns a new FleetCollector. +func NewFleetCollector() *FleetCollector { + return &FleetCollector{} +} + +// CollectClusterMetrics lists all PostgresCluster resources and updates phase +// gauges, pooler gauges, and managed-user gauges. +func (fc *FleetCollector) CollectClusterMetrics(ctx context.Context, c client.Client, recorder ports.Recorder) { + logger := log.FromContext(ctx) + + var list enterprisev4.PostgresClusterList + if err := c.List(ctx, &list); err != nil { + logger.Error(err, "Failed to list PostgresClusters for fleet metrics") + return + } + + phases := make(map[string]float64) + var poolerEnabledCount float64 + managedUserStates := map[string]float64{ + "desired": 0, + "reconciled": 0, + "pending": 0, + "failed": 0, + } + + for i := range list.Items { + cluster := &list.Items[i] + + // Phase gauge. + phase := "Unknown" + if cluster.Status.Phase != nil { + phase = *cluster.Status.Phase + } + phases[phase]++ + + // Pooler-enabled count. + if cluster.Spec.ConnectionPoolerEnabled != nil && *cluster.Spec.ConnectionPoolerEnabled { + poolerEnabledCount++ + } + + // Managed users. + managedUserStates["desired"] += float64(len(cluster.Spec.ManagedRoles)) + if cluster.Status.ManagedRolesStatus != nil { + managedUserStates["reconciled"] += float64(len(cluster.Status.ManagedRolesStatus.Reconciled)) + managedUserStates["pending"] += float64(len(cluster.Status.ManagedRolesStatus.Pending)) + managedUserStates["failed"] += float64(len(cluster.Status.ManagedRolesStatus.Failed)) + } + } + + recorder.SetClusterPhases(phases) + recorder.SetPoolerEnabledClusters(poolerEnabledCount) + recorder.SetManagedUsers(ports.ControllerCluster, managedUserStates) +} + +// CollectDatabaseMetrics lists all PostgresDatabase resources and updates +// phase gauges. +func (fc *FleetCollector) CollectDatabaseMetrics(ctx context.Context, c client.Client, recorder ports.Recorder) { + logger := log.FromContext(ctx) + + var list enterprisev4.PostgresDatabaseList + if err := c.List(ctx, &list); err != nil { + logger.Error(err, "Failed to list PostgresDatabases for fleet metrics") + return + } + + phases := make(map[string]float64) + for i := range list.Items { + db := &list.Items[i] + phase := "Unknown" + if db.Status.Phase != nil { + phase = *db.Status.Phase + } + phases[phase]++ + } + + recorder.SetDatabasePhases(phases) +} diff --git a/pkg/postgresql/shared/adapter/prometheus/noop.go b/pkg/postgresql/shared/adapter/prometheus/noop.go new file mode 100644 index 000000000..91b9307f8 --- /dev/null +++ b/pkg/postgresql/shared/adapter/prometheus/noop.go @@ -0,0 +1,15 @@ +package prometheus + +import "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" + +// NoopRecorder is a no-op implementation of Recorder for use in tests. +type NoopRecorder struct{} + +func (n *NoopRecorder) IncStatusTransition(string, string, string, string) {} +func (n *NoopRecorder) SetClusterPhases(map[string]float64) {} +func (n *NoopRecorder) SetPoolerEnabledClusters(float64) {} +func (n *NoopRecorder) SetDatabasePhases(map[string]float64) {} +func (n *NoopRecorder) SetManagedUsers(string, map[string]float64) {} + +// Compile-time interface check. +var _ ports.Recorder = (*NoopRecorder)(nil) diff --git a/pkg/postgresql/shared/adapter/prometheus/recorder.go b/pkg/postgresql/shared/adapter/prometheus/recorder.go new file mode 100644 index 000000000..61d3d409d --- /dev/null +++ b/pkg/postgresql/shared/adapter/prometheus/recorder.go @@ -0,0 +1,91 @@ +package prometheus + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" +) + +var ( + statusTransitionsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "splunk_operator_postgres_status_transitions_total", + Help: "Status condition transitions by controller, condition type, status, and reason.", + }, []string{"controller", "condition", "status", "reason"}) + + clusters = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "splunk_operator_postgres_clusters", + Help: "Current number of PostgresCluster resources by status phase.", + }, []string{"phase"}) + + poolerEnabledClusters = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "splunk_operator_postgres_clusters_pooler_enabled", + Help: "Current number of PostgresCluster resources with connection pooling enabled.", + }) + + databases = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "splunk_operator_postgres_databases", + Help: "Current number of PostgresDatabase resources by status phase.", + }, []string{"phase"}) + + managedUsers = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "splunk_operator_postgres_managed_users", + Help: "Current counts of managed users by state.", + }, []string{"controller", "state"}) + + allCollectors = []prometheus.Collector{ + statusTransitionsTotal, + clusters, + poolerEnabledClusters, + databases, + managedUsers, + } +) + +// Register registers all PostgreSQL metrics with the given registerer. +// Call once at startup from cmd/main.go. +func Register(registerer prometheus.Registerer) error { + for _, c := range allCollectors { + if err := registerer.Register(c); err != nil { + return err + } + } + return nil +} + +// PrometheusRecorder implements shared.Recorder using Prometheus client_golang. +type PrometheusRecorder struct{} + +// NewPrometheusRecorder returns a new PrometheusRecorder. +func NewPrometheusRecorder() *PrometheusRecorder { + return &PrometheusRecorder{} +} + +func (p *PrometheusRecorder) IncStatusTransition(controller, condition, status, reason string) { + statusTransitionsTotal.WithLabelValues(controller, condition, status, reason).Inc() +} + +func (p *PrometheusRecorder) SetClusterPhases(phases map[string]float64) { + clusters.Reset() // drop stale label combinations before re-populating + for phase, count := range phases { + clusters.WithLabelValues(phase).Set(count) + } +} + +func (p *PrometheusRecorder) SetPoolerEnabledClusters(count float64) { + poolerEnabledClusters.Set(count) +} + +func (p *PrometheusRecorder) SetDatabasePhases(phases map[string]float64) { + databases.Reset() // drop stale label combinations before re-populating + for phase, count := range phases { + databases.WithLabelValues(phase).Set(count) + } +} + +func (p *PrometheusRecorder) SetManagedUsers(controller string, states map[string]float64) { + for state, count := range states { + managedUsers.WithLabelValues(controller, state).Set(count) + } +} + +// Compile-time interface check. +var _ ports.Recorder = (*PrometheusRecorder)(nil) diff --git a/pkg/postgresql/shared/ports/metrics.go b/pkg/postgresql/shared/ports/metrics.go new file mode 100644 index 000000000..79ec2fbfb --- /dev/null +++ b/pkg/postgresql/shared/ports/metrics.go @@ -0,0 +1,35 @@ +package ports + +// Controller name labels. +const ( + ControllerCluster = "postgrescluster" + ControllerDatabase = "postgresdatabase" +) + +// Recorder is the port for all PostgreSQL controller metrics. +// Core service packages depend on this interface, never on Prometheus directly. +// +// Reconcile-level metrics (total count, duration, error count) are handled +// automatically by controller-runtime — see controller_runtime_reconcile_total, +// controller_runtime_reconcile_time_seconds, controller_runtime_reconcile_errors_total. +// +// Domain-specific business metrics are emitted automatically via IncStatusTransition +// every time a status condition is written. Fleet-level gauges are populated by the +// collector on each reconcile. +type Recorder interface { + // IncStatusTransition increments the status transition counter. + // Called automatically by persistStatus/setStatus — no manual calls needed in service code. + IncStatusTransition(controller, condition, status, reason string) + + // SetClusterPhases sets gauge values for cluster counts by phase. + SetClusterPhases(phases map[string]float64) + + // SetPoolerEnabledClusters sets the gauge for clusters with connection pooling enabled. + SetPoolerEnabledClusters(count float64) + + // SetDatabasePhases sets gauge values for database counts by phase. + SetDatabasePhases(phases map[string]float64) + + // SetManagedUsers sets the gauge for managed user states. + SetManagedUsers(controller string, states map[string]float64) +} From 86ce512568355dd76ff80fd6eb33b0ad8957ca3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Tue, 14 Apr 2026 23:11:06 +0200 Subject: [PATCH 17/36] Cleanup conflicting handling and separate business logic from infra errors --- .../controller/postgrescluster_controller.go | 6 +- .../controller/postgresdatabase_controller.go | 7 +- pkg/postgresql/cluster/core/cluster.go | 193 ++++++------------ pkg/postgresql/database/core/database.go | 7 - pkg/postgresql/shared/reconcile/errors.go | 42 ++++ 5 files changed, 116 insertions(+), 139 deletions(-) create mode 100644 pkg/postgresql/shared/reconcile/errors.go diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index c49e7ba4d..6d42d72bf 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -22,8 +22,9 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" clustercore "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" - "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" + sharedreconcile "github.com/splunk/splunk-operator/pkg/postgresql/shared/reconcile" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -64,6 +65,9 @@ func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Requ rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics} result, err := clustercore.PostgresClusterService(ctx, rc, req) r.FleetCollector.CollectClusterMetrics(ctx, r.Client, r.Metrics) + if sharedreconcile.IsPureConflict(err) { + return ctrl.Result{Requeue: true}, nil + } return result, err } diff --git a/internal/controller/postgresdatabase_controller.go b/internal/controller/postgresdatabase_controller.go index ab54da0fd..8a480abc2 100644 --- a/internal/controller/postgresdatabase_controller.go +++ b/internal/controller/postgresdatabase_controller.go @@ -24,8 +24,9 @@ import ( enterprisev4 "github.com/splunk/splunk-operator/api/v4" dbadapter "github.com/splunk/splunk-operator/pkg/postgresql/database/adapter" dbcore "github.com/splunk/splunk-operator/pkg/postgresql/database/core" - "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" + "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" + sharedreconcile "github.com/splunk/splunk-operator/pkg/postgresql/shared/reconcile" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -78,7 +79,9 @@ func (r *PostgresDatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Req rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics} result, err := dbcore.PostgresDatabaseService(ctx, rc, postgresDB, dbadapter.NewDBRepository) r.FleetCollector.CollectDatabaseMetrics(ctx, r.Client, r.Metrics) - + if sharedreconcile.IsPureConflict(err) { + return ctrl.Result{Requeue: true}, nil + } return result, err } diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 01df3a1c7..94e41407e 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -79,13 +79,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } logger.Error(err, "Failed to handle finalizer") rc.emitWarning(postgresCluster, EventCleanupFailed, fmt.Sprintf("Cleanup failed: %v", err)) - errs := []error{err} - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterDeleteFailed, - fmt.Sprintf("Failed to delete resources during cleanup: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - errs = append(errs, statusErr) - } - return ctrl.Result{}, errors.Join(errs...) + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterDeleteFailed, + fmt.Sprintf("Failed to delete resources during cleanup: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } if postgresCluster.GetDeletionTimestamp() != nil { logger.Info("Deletion cleanup complete, finalizer removed") @@ -96,10 +92,6 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if !controllerutil.ContainsFinalizer(postgresCluster, PostgresClusterFinalizerName) { controllerutil.AddFinalizer(postgresCluster, PostgresClusterFinalizerName) if err := c.Update(ctx, postgresCluster); err != nil { - if apierrors.IsConflict(err) { - logger.Info("Conflict while adding finalizer, will requeue") - return ctrl.Result{Requeue: true}, nil - } logger.Error(err, "Failed to add finalizer to PostgresCluster") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) } @@ -112,11 +104,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if err := c.Get(ctx, client.ObjectKey{Name: postgresCluster.Spec.Class}, clusterClass); err != nil { logger.Error(err, "Failed to fetch PostgresClusterClass", "className", postgresCluster.Spec.Class) rc.emitWarning(postgresCluster, EventClusterClassNotFound, fmt.Sprintf("ClusterClass %s not found", postgresCluster.Spec.Class)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterClassNotFound, - fmt.Sprintf("ClusterClass %s not found: %v", postgresCluster.Spec.Class, err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterClassNotFound, + fmt.Sprintf("ClusterClass %s not found: %v", postgresCluster.Spec.Class, err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } // Merge PostgresClusterSpec on top of PostgresClusterClass defaults. @@ -124,11 +114,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if err != nil { logger.Error(err, "Failed to merge PostgresCluster configuration") rc.emitWarning(postgresCluster, EventConfigMergeFailed, fmt.Sprintf("Failed to merge configuration: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonInvalidConfiguration, - fmt.Sprintf("Failed to merge configuration: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonInvalidConfiguration, + fmt.Sprintf("Failed to merge configuration: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } // Resolve or derive the superuser secret name. @@ -144,28 +132,20 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if secretErr != nil { logger.Error(secretErr, "Failed to check if PostgresCluster secret exists", "name", postgresSecretName) rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, - fmt.Sprintf("Failed to check secret existence: %v", secretErr), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, secretErr + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, + fmt.Sprintf("Failed to check secret existence: %v", secretErr), failedClusterPhase) + return ctrl.Result{}, errors.Join(secretErr, statusErr) } if !secretExists { logger.Info("Superuser secret creation started", "name", postgresSecretName) if err := ensureClusterSecret(ctx, c, rc.Scheme, postgresCluster, postgresSecretName, secret); err != nil { logger.Error(err, "Failed to ensure PostgresCluster secret", "name", postgresSecretName) rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, - fmt.Sprintf("Failed to generate PostgresCluster secret: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, + fmt.Sprintf("Failed to generate PostgresCluster secret: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } if err := c.Status().Update(ctx, postgresCluster); err != nil { - if apierrors.IsConflict(err) { - logger.Info("Conflict after secret creation, will requeue") - return ctrl.Result{Requeue: true}, nil - } logger.Error(err, "Failed to update status after secret creation") return ctrl.Result{}, err } @@ -189,11 +169,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if err := patchObject(ctx, c, originalSecret, secret, "Secret"); err != nil { logger.Error(err, "Failed to patch existing secret with controller reference") rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonSuperUserSecretFailed, - fmt.Sprintf("Failed to patch existing secret: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonSuperUserSecretFailed, + fmt.Sprintf("Failed to patch existing secret: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } } @@ -217,26 +195,22 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if err := c.Create(ctx, newCluster); err != nil { logger.Error(err, "Failed to create CNPG Cluster") rc.emitWarning(postgresCluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildFailed, - fmt.Sprintf("Failed to create CNPG Cluster: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildFailed, + fmt.Sprintf("Failed to create CNPG Cluster: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } rc.emitNormal(postgresCluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, "CNPG Cluster created", pendingClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + return ctrl.Result{}, statusErr } logger.Info("CNPG Cluster created, requeueing for status update", "name", postgresCluster.Name) return ctrl.Result{RequeueAfter: retryDelay}, nil case err != nil: logger.Error(err, "Failed to get CNPG Cluster") - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterGetFailed, - fmt.Sprintf("Failed to get CNPG Cluster: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterGetFailed, + fmt.Sprintf("Failed to get CNPG Cluster: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } // Patch CNPG Cluster spec if drift detected. @@ -250,22 +224,16 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. cnpgCluster.Spec = desiredSpec switch patchErr := patchObject(ctx, c, originalCluster, cnpgCluster, "CNPGCluster"); { - case apierrors.IsConflict(patchErr): - logger.Info("Conflict occurred while updating CNPG Cluster, requeueing", "name", cnpgCluster.Name) - return ctrl.Result{Requeue: true}, nil case patchErr != nil: logger.Error(patchErr, "Failed to patch CNPG Cluster", "name", cnpgCluster.Name) rc.emitWarning(postgresCluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterPatchFailed, - fmt.Sprintf("Failed to patch CNPG Cluster: %v", patchErr), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, patchErr + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterPatchFailed, + fmt.Sprintf("Failed to patch CNPG Cluster: %v", patchErr), failedClusterPhase) + return ctrl.Result{}, errors.Join(patchErr, statusErr) default: if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, "CNPG Cluster spec updated, waiting for healthy state", provisioningClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status after patch") - return ctrl.Result{Requeue: true}, nil + return ctrl.Result{}, statusErr } rc.emitNormal(postgresCluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") logger.Info("CNPG Cluster patched, requeueing for status update", "name", cnpgCluster.Name) @@ -277,11 +245,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if err := reconcileManagedRoles(ctx, c, postgresCluster, cnpgCluster); err != nil { logger.Error(err, "Failed to reconcile managed roles") rc.emitWarning(postgresCluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonManagedRolesFailed, - fmt.Sprintf("Failed to reconcile managed roles: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonManagedRolesFailed, + fmt.Sprintf("Failed to reconcile managed roles: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } // Reconcile Connection Pooler. @@ -290,35 +256,25 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. rwPoolerExists, err := poolerExists(ctx, c, postgresCluster, readWriteEndpoint) if err != nil { logger.Error(err, "Failed to check RW pooler existence") - errs := []error{err} - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - errs = append(errs, statusErr) - } - return ctrl.Result{}, errors.Join(errs...) + statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } roPoolerExists, err := poolerExists(ctx, c, postgresCluster, readOnlyEndpoint) if err != nil { logger.Error(err, "Failed to check RO pooler existence") - errs := []error{err} - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - errs = append(errs, statusErr) - } - return ctrl.Result{}, errors.Join(errs...) + statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } switch { case !poolerEnabled: if err := deleteConnectionPoolers(ctx, c, postgresCluster); err != nil { logger.Error(err, "Failed to delete connection poolers") - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to delete connection poolers: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to delete connection poolers: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } postgresCluster.Status.ConnectionPoolerStatus = nil meta.RemoveStatusCondition(&postgresCluster.Status.Conditions, string(poolerReady)) @@ -327,35 +283,29 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if mergedConfig.CNPG == nil || mergedConfig.CNPG.ConnectionPooler == nil { logger.Info("Connection pooler enabled but no config found in class or cluster spec, skipping", "class", postgresCluster.Spec.Class, "cluster", postgresCluster.Name) - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerConfigMissing, + statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerConfigMissing, fmt.Sprintf("Connection pooler is enabled but no config found in class %q or cluster %q", - postgresCluster.Spec.Class, postgresCluster.Name), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, nil + postgresCluster.Spec.Class, postgresCluster.Name), failedClusterPhase) + return ctrl.Result{}, statusErr } if cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { logger.Info("CNPG Cluster not healthy yet, pending pooler creation", "clusterPhase", cnpgCluster.Status.Phase) - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonCNPGClusterNotHealthy, - "Waiting for CNPG cluster to become healthy before creating poolers", pendingClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{RequeueAfter: retryDelay}, nil + statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonCNPGClusterNotHealthy, + "Waiting for CNPG cluster to become healthy before creating poolers", pendingClusterPhase) + return ctrl.Result{RequeueAfter: retryDelay}, statusErr } if err := createOrUpdateConnectionPoolers(ctx, c, rc.Scheme, postgresCluster, mergedConfig, cnpgCluster); err != nil { logger.Error(err, "Failed to reconcile connection pooler") rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to reconcile connection pooler: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to reconcile connection pooler: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } rc.emitNormal(postgresCluster, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") logger.Info("Connection pooler creation started, requeueing") if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, "Connection poolers are being provisioned", provisioningClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + return ctrl.Result{}, statusErr } return ctrl.Result{RequeueAfter: retryDelay}, nil @@ -373,14 +323,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return rwErr != nil || roErr != nil || !arePoolersReady(rwPooler, roPooler) }(): logger.Info("Connection Poolers are not ready yet, requeueing") - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, - "Connection poolers are being provisioned", pendingClusterPhase); statusErr != nil { - if apierrors.IsConflict(statusErr) { - logger.Info("Conflict updating pooler status, will requeue") - return ctrl.Result{Requeue: true}, nil - } - } - return ctrl.Result{RequeueAfter: retryDelay}, nil + statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, + "Connection poolers are being provisioned", pendingClusterPhase) + return ctrl.Result{RequeueAfter: retryDelay}, statusErr default: oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) @@ -388,11 +333,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if err := syncPoolerStatus(ctx, c, rc.Metrics, postgresCluster); err != nil { logger.Error(err, "Failed to sync pooler status") rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to sync pooler status: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, + fmt.Sprintf("Failed to sync pooler status: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } rc.emitPoolerReadyTransition(postgresCluster, oldConditions) } @@ -404,11 +347,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if err != nil { logger.Error(err, "Failed to generate ConfigMap") rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, - fmt.Sprintf("Failed to generate ConfigMap: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, + fmt.Sprintf("Failed to generate ConfigMap: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} createOrUpdateResult, err := controllerutil.CreateOrUpdate(ctx, c, cm, func() error { @@ -425,11 +366,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. if err != nil { logger.Error(err, "Failed to reconcile ConfigMap", "name", desiredCM.Name) rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, - fmt.Sprintf("Failed to reconcile ConfigMap: %v", err), failedClusterPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") - } - return ctrl.Result{}, err + statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, + fmt.Sprintf("Failed to reconcile ConfigMap: %v", err), failedClusterPhase) + return ctrl.Result{}, errors.Join(err, statusErr) } switch createOrUpdateResult { case controllerutil.OperationResultCreated: @@ -453,11 +392,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } if err := syncStatus(ctx, c, rc.Metrics, postgresCluster, cnpgCluster); err != nil { logger.Error(err, "Failed to sync status") - if apierrors.IsConflict(err) { - logger.Info("Conflict during status update, will requeue") - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, fmt.Errorf("failed to sync status: %w", err) + return ctrl.Result{}, err } var newPhase string if postgresCluster.Status.Phase != nil { diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index 377f1dcce..362b2939c 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -60,10 +60,6 @@ func PostgresDatabaseService( if !controllerutil.ContainsFinalizer(postgresDB, postgresDatabaseFinalizerName) { controllerutil.AddFinalizer(postgresDB, postgresDatabaseFinalizerName) if err := c.Update(ctx, postgresDB); err != nil { - if errors.IsConflict(err) { - logger.Info("Conflict while adding finalizer, will requeue") - return ctrl.Result{Requeue: true}, nil - } logger.Error(err, "Failed to add finalizer to PostgresDatabase") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) } @@ -304,9 +300,6 @@ func PostgresDatabaseService( postgresDB.Status.ObservedGeneration = &postgresDB.Generation if err := c.Status().Update(ctx, postgresDB); err != nil { - if errors.IsConflict(err) { - return ctrl.Result{Requeue: true}, nil - } return ctrl.Result{}, fmt.Errorf("failed to persist final status: %w", err) } diff --git a/pkg/postgresql/shared/reconcile/errors.go b/pkg/postgresql/shared/reconcile/errors.go new file mode 100644 index 000000000..2270770f8 --- /dev/null +++ b/pkg/postgresql/shared/reconcile/errors.go @@ -0,0 +1,42 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package reconcile + +import ( + apierrors "k8s.io/apimachinery/pkg/api/errors" +) + +// IsPureConflict reports whether err is non-nil and every non-nil error +// within it is a 409 Conflict. When a business error and a status-write +// conflict are joined together the business error takes priority and this +// returns false, preserving exponential backoff for real failures. +// +// TODO(human): implement this function. +// Guidance: errors.Join wraps multiple errors; use the Unwrap() []error +// interface to walk all joined errors. Consider all four cases: +// - err == nil → false +// - single conflict error → true +// - single non-conflict error → false +// - joined errors, mixed conflict → false (business error wins) +func IsPureConflict(err error) bool { + if err == nil { + return false + } + _ = apierrors.IsConflict // ensure the import is used once implemented + // TODO(human): replace this placeholder with the real implementation + return false +} From b00f7cc15e845d76e918b9bcee703c06caa8ae50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Wed, 15 Apr 2026 10:02:51 +0200 Subject: [PATCH 18/36] Add tests and fill isPureFunction --- .../postgrescluster_controller_test.go | 9 +- .../postgresdatabase_controller_test.go | 9 +- .../database/core/database_unit_test.go | 8 +- pkg/postgresql/database/core/types.go | 10 +-- pkg/postgresql/shared/reconcile/errors.go | 21 +++-- .../shared/reconcile/errors_test.go | 87 +++++++++++++++++++ 6 files changed, 117 insertions(+), 27 deletions(-) create mode 100644 pkg/postgresql/shared/reconcile/errors_test.go diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 5687ae1f8..9d4954d61 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -36,6 +36,7 @@ import ( enterprisev4 "github.com/splunk/splunk-operator/api/v4" "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" + pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" ) /* @@ -122,9 +123,11 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { } reconciler = &PostgresClusterReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), - Recorder: record.NewFakeRecorder(100), + Client: k8sClient, + Scheme: k8sClient.Scheme(), + Recorder: record.NewFakeRecorder(100), + Metrics: &pgprometheus.NoopRecorder{}, + FleetCollector: pgprometheus.NewFleetCollector(), } req = reconcile.Request{NamespacedName: types.NamespacedName{Name: clusterName, Namespace: namespace}} }) diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index 31f591573..44143919f 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -26,6 +26,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -80,9 +81,11 @@ const ( func reconcilePostgresDatabase(ctx context.Context, nn types.NamespacedName) (ctrl.Result, error) { reconciler := &PostgresDatabaseReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), - Recorder: record.NewFakeRecorder(100), + Client: k8sClient, + Scheme: k8sClient.Scheme(), + Recorder: record.NewFakeRecorder(100), + Metrics: &pgprometheus.NoopRecorder{}, + FleetCollector: pgprometheus.NewFleetCollector(), } return reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: nn}) } diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go index fdba00f71..c41d2dd59 100644 --- a/pkg/postgresql/database/core/database_unit_test.go +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -1425,12 +1425,11 @@ func TestFindRemovedRoleNames(t *testing.T) { } } - func TestBuildRolesToRemove(t *testing.T) { tests := []struct { - name string - deleted []enterprisev4.DatabaseDefinition - want []enterprisev4.ManagedRole + name string + deleted []enterprisev4.DatabaseDefinition + want []enterprisev4.ManagedRole }{ { name: "nothing to remove", @@ -1459,7 +1458,6 @@ func TestBuildRolesToRemove(t *testing.T) { } } - func TestStripOwnerReference(t *testing.T) { obj := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index cbf7c15a9..6511b502f 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -34,7 +34,7 @@ const ( deletionPolicyRetain string = "Retain" deletionPolicyDelete string = "Delete" - + postgresDatabaseFinalizerName string = "postgresdatabases.enterprise.splunk.com/finalizer" annotationRetainedFrom string = "enterprise.splunk.com/retained-from" @@ -79,10 +79,10 @@ const ( reasonUsersAvailable conditionReasons = "UsersAvailable" reasonRoleConflict conditionReasons = "RoleConflict" reasonConfigMapsCreationFailed conditionReasons = "ConfigMapsCreationFailed" - reasonConfigMapsCreated conditionReasons = "ConfigMapsCreated" - reasonDatabaseReconcileFailed conditionReasons = "DatabaseReconcileFailed" - reasonPrivilegesGranted conditionReasons = "PrivilegesGranted" - reasonPrivilegesGrantFailed conditionReasons = "PrivilegesGrantFailed" + reasonConfigMapsCreated conditionReasons = "ConfigMapsCreated" + reasonDatabaseReconcileFailed conditionReasons = "DatabaseReconcileFailed" + reasonPrivilegesGranted conditionReasons = "PrivilegesGranted" + reasonPrivilegesGrantFailed conditionReasons = "PrivilegesGrantFailed" // ClusterReady sentinel values returned by ensureClusterReady. // Exported so the controller adapter can switch on them if needed. diff --git a/pkg/postgresql/shared/reconcile/errors.go b/pkg/postgresql/shared/reconcile/errors.go index 2270770f8..1c777b5d7 100644 --- a/pkg/postgresql/shared/reconcile/errors.go +++ b/pkg/postgresql/shared/reconcile/errors.go @@ -24,19 +24,18 @@ import ( // within it is a 409 Conflict. When a business error and a status-write // conflict are joined together the business error takes priority and this // returns false, preserving exponential backoff for real failures. -// -// TODO(human): implement this function. -// Guidance: errors.Join wraps multiple errors; use the Unwrap() []error -// interface to walk all joined errors. Consider all four cases: -// - err == nil → false -// - single conflict error → true -// - single non-conflict error → false -// - joined errors, mixed conflict → false (business error wins) func IsPureConflict(err error) bool { if err == nil { return false } - _ = apierrors.IsConflict // ensure the import is used once implemented - // TODO(human): replace this placeholder with the real implementation - return false + // check if err can be unwrapped + if wrappedErrs, ok := err.(interface{ Unwrap() []error }); ok { + for _, err := range wrappedErrs.Unwrap() { + if !apierrors.IsConflict(err) { + return false + } + } + return true + } + return apierrors.IsConflict(err) } diff --git a/pkg/postgresql/shared/reconcile/errors_test.go b/pkg/postgresql/shared/reconcile/errors_test.go new file mode 100644 index 000000000..22398e49b --- /dev/null +++ b/pkg/postgresql/shared/reconcile/errors_test.go @@ -0,0 +1,87 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package reconcile + +import ( + "errors" + "fmt" + "testing" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var conflictErr = apierrors.NewConflict(schema.GroupResource{Group: "enterprise.splunk.com", Resource: "postgresclusters"}, "my-cluster", fmt.Errorf("rv mismatch")) +var businessErr = fmt.Errorf("failed to fetch ClusterClass") + +func TestIsPureConflict(t *testing.T) { + tests := []struct { + name string + err error + expected bool + }{ + { + name: "nil error", + err: nil, + expected: false, + }, + { + name: "single conflict error", + err: conflictErr, + expected: true, + }, + { + name: "single non-conflict error", + err: businessErr, + expected: false, + }, + { + name: "wrapped non-conflict error (fmt.Errorf %w)", + err: fmt.Errorf("reconcile failed: %w", businessErr), + expected: false, + }, + { + name: "joined: business + conflict — business wins", + err: errors.Join(businessErr, conflictErr), + expected: false, + }, + { + name: "joined: conflict + business — business wins regardless of order", + err: errors.Join(conflictErr, businessErr), + expected: false, + }, + { + name: "joined: two conflict errors", + err: errors.Join(conflictErr, conflictErr), + expected: true, + }, + { + name: "joined: single conflict (nil partner discarded by errors.Join)", + err: errors.Join(conflictErr, nil), + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := IsPureConflict(tt.err) + if got != tt.expected { + t.Errorf("IsPureConflict(%v) = %v, want %v", tt.err, got, tt.expected) + } + }) + } +} From 6f83dfbed195ffbb05877f45a84e89fb663daf96 Mon Sep 17 00:00:00 2001 From: Kamil Ubych <56136249+limak9182@users.noreply.github.com> Date: Wed, 15 Apr 2026 10:40:21 +0200 Subject: [PATCH 19/36] checking errors for ctrl.SetControllerReference (#1846) --- pkg/postgresql/cluster/core/cluster.go | 28 +++++++++++++------ .../cluster/core/cluster_unit_test.go | 9 ++++-- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 01df3a1c7..15000b891 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -213,7 +213,11 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. switch { case apierrors.IsNotFound(err): logger.Info("CNPG Cluster creation started", "name", postgresCluster.Name) - newCluster := buildCNPGCluster(rc.Scheme, postgresCluster, mergedConfig, postgresSecretName) + newCluster, err := buildCNPGCluster(rc.Scheme, postgresCluster, mergedConfig, postgresSecretName) + if err != nil { + logger.Error(err, "Failed to build CNPG Cluster", "name", postgresCluster.Name) + return ctrl.Result{}, err + } if err := c.Create(ctx, newCluster); err != nil { logger.Error(err, "Failed to create CNPG Cluster") rc.emitWarning(postgresCluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) @@ -560,13 +564,15 @@ func buildCNPGClusterSpec(cfg *MergedConfig, secretName string) cnpgv1.ClusterSp } } -func buildCNPGCluster(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, secretName string) *cnpgv1.Cluster { +func buildCNPGCluster(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, secretName string) (*cnpgv1.Cluster, error) { cnpg := &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{Name: cluster.Name, Namespace: cluster.Namespace}, Spec: buildCNPGClusterSpec(cfg, secretName), } - ctrl.SetControllerReference(cluster, cnpg, scheme) - return cnpg + if err := ctrl.SetControllerReference(cluster, cnpg, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on CNPG cluster: %w", err) + } + return cnpg, nil } func normalizeCNPGClusterSpec(spec cnpgv1.ClusterSpec, customDefinedParameters map[string]string) normalizedCNPGClusterSpec { @@ -705,10 +711,14 @@ func createConnectionPooler(ctx context.Context, c client.Client, scheme *runtim return err } logger.Info("CNPG Pooler creation started", "name", poolerName, "type", poolerType) - return c.Create(ctx, buildCNPGPooler(scheme, cluster, cfg, cnpgCluster, poolerType)) + pooler, err := buildCNPGPooler(scheme, cluster, cfg, cnpgCluster, poolerType) + if err != nil { + return err + } + return c.Create(ctx, pooler) } -func buildCNPGPooler(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerType string) *cnpgv1.Pooler { +func buildCNPGPooler(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerType string) (*cnpgv1.Pooler, error) { pc := cfg.CNPG.ConnectionPooler instances := *pc.Instances mode := cnpgv1.PgBouncerPoolMode(*pc.Mode) @@ -724,8 +734,10 @@ func buildCNPGPooler(scheme *runtime.Scheme, cluster *enterprisev4.PostgresClust }, }, } - ctrl.SetControllerReference(cluster, pooler, scheme) - return pooler + if err := ctrl.SetControllerReference(cluster, pooler, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on CNPG pooler: %w", err) + } + return pooler, nil } // deleteConnectionPoolers removes RW and RO poolers if they exist. diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index e2466f54b..1a0659c98 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -403,8 +403,9 @@ func TestBuildCNPGPooler(t *testing.T) { } t.Run("rw pooler", func(t *testing.T) { - pooler := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "rw") + pooler, err := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "rw") + require.NoError(t, err) assert.Equal(t, "my-cluster-pooler-rw", pooler.Name) assert.Equal(t, "db-ns", pooler.Namespace) assert.Equal(t, "my-cluster", pooler.Spec.Cluster.Name) @@ -418,8 +419,9 @@ func TestBuildCNPGPooler(t *testing.T) { }) t.Run("ro pooler", func(t *testing.T) { - pooler := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "ro") + pooler, err := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "ro") + require.NoError(t, err) assert.Equal(t, "my-cluster-pooler-ro", pooler.Name) assert.Equal(t, cnpgv1.PoolerType("ro"), pooler.Spec.Type) }) @@ -451,8 +453,9 @@ func TestBuildCNPGCluster(t *testing.T) { }, } - cluster := buildCNPGCluster(scheme, postgresCluster, cfg, "my-secret") + cluster, err := buildCNPGCluster(scheme, postgresCluster, cfg, "my-secret") + require.NoError(t, err) assert.Equal(t, "my-cluster", cluster.Name) assert.Equal(t, "db-ns", cluster.Namespace) require.Len(t, cluster.OwnerReferences, 1) From d3092c48e1217c6f0fdd56c647cf588260f35928 Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Wed, 1 Apr 2026 12:56:10 +0200 Subject: [PATCH 20/36] add basic grafana --- api/v4/postgrescluster_types.go | 24 + api/v4/postgresclusterclass_types.go | 30 + cmd/main.go | 2 + ...nterprise_v4_postgresclusterclass_dev.yaml | 5 + .../controller/postgrescluster_controller.go | 2 +- internal/controller/suite_test.go | 4 + pkg/postgresql/cluster/core/cluster.go | 58 ++ .../dashboards/postgres_observability.json | 136 ++++ pkg/postgresql/cluster/core/events.go | 39 +- pkg/postgresql/cluster/core/monitoring.go | 614 ++++++++++++++++++ .../cluster/core/monitoring_unit_test.go | 472 ++++++++++++++ 11 files changed, 1366 insertions(+), 20 deletions(-) create mode 100644 pkg/postgresql/cluster/core/dashboards/postgres_observability.json create mode 100644 pkg/postgresql/cluster/core/monitoring.go create mode 100644 pkg/postgresql/cluster/core/monitoring_unit_test.go diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 5adc91f13..1faae2820 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -107,6 +107,30 @@ type PostgresClusterSpec struct { // +kubebuilder:default=Retain // +optional ClusterDeletionPolicy *string `json:"clusterDeletionPolicy,omitempty"` + + // Observability contains configuration for monitoring and observability features. + // +optional + Observability *PostgresObservabilityOverride `json:"observability,omitempty"` +} + +// PostgresObservabilityOverride overrides observability configuration options for PostgresClusterClass. +type PostgresObservabilityOverride struct { + + // +optional + PostgreSQL *FeatureDisableOverride `json:"postgresql,omitempty"` + + // +optional + PgBouncer *FeatureDisableOverride `json:"pgbouncer,omitempty"` + + // +optional + GrafanaDashboard *FeatureDisableOverride `json:"grafanaDashboard,omitempty"` +} + +type FeatureDisableOverride struct { + // Disable set to true will disable the feature even if it's enabled in the class. + // +kubebuilder:default=false + // +optional + Disabled *bool `json:"disabled,omitempty"` } // PostgresClusterResources defines references to Kubernetes resources related to the PostgresCluster, such as ConfigMaps and Secrets. diff --git a/api/v4/postgresclusterclass_types.go b/api/v4/postgresclusterclass_types.go index 7f02e5633..743e98722 100644 --- a/api/v4/postgresclusterclass_types.go +++ b/api/v4/postgresclusterclass_types.go @@ -99,6 +99,13 @@ type PostgresClusterClassConfig struct { // +kubebuilder:default=false // +optional ConnectionPoolerEnabled *bool `json:"connectionPoolerEnabled,omitempty"` + + // Observability contains configuration for metrics and dashboards. + // When enabled, creates metrics resources and Grafana dashboard for clusters using this class. + // Can be overridden in PostgresCluster CR. + // +kubebuilder:default={} + // +optional + Observability *PostgresObservabilityClassConfig `json:"observability,omitempty"` } // ConnectionPoolerMode defines the PgBouncer connection pooling strategy. @@ -172,6 +179,29 @@ type PostgresClusterClassStatus struct { Phase *string `json:"phase,omitempty"` } +type PostgresObservabilityClassConfig struct { + // +optional + PostgreSQL *MetricsClassConfig `json:"postgresql,omitempty"` + // +optional + PgBouncer *MetricsClassConfig `json:"pgbouncer,omitempty"` + // +optional + GrafanaDashboard *GrafanaDashboardClassConfig `json:"grafanaDashboard,omitempty"` +} + +type MetricsClassConfig struct { + // Enabled controls whether metrics resources should be created for this target. + // +kubebuilder:default=false + // +optional + Enabled *bool `json:"enabled,omitempty"` +} + +type GrafanaDashboardClassConfig struct { + // Enabled controls whether a Grafana dashboard ConfigMap should be created for this class. + // +kubebuilder:default=false + // +optional + Enabled *bool `json:"enabled,omitempty"` +} + // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster diff --git a/cmd/main.go b/cmd/main.go index d7cc6cc8b..402264560 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -56,6 +56,7 @@ import ( "github.com/splunk/splunk-operator/internal/controller" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" //+kubebuilder:scaffold:imports //extapi "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" @@ -71,6 +72,7 @@ func init() { utilruntime.Must(enterpriseApi.AddToScheme(scheme)) utilruntime.Must(enterpriseApiV3.AddToScheme(scheme)) utilruntime.Must(cnpgv1.AddToScheme(scheme)) + utilruntime.Must(monitoringv1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme //utilruntime.Must(extapi.AddToScheme(scheme)) } diff --git a/config/samples/enterprise_v4_postgresclusterclass_dev.yaml b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml index a9846e36c..560958794 100644 --- a/config/samples/enterprise_v4_postgresclusterclass_dev.yaml +++ b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml @@ -27,6 +27,11 @@ spec: cpu: "1" memory: "2Gi" connectionPoolerEnabled: true + observability: + grafanaDashboard: + enabled: true + pgbouncer: + enabled: true cnpg: # Restart method - tolerate downtime in dev diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 6d42d72bf..75b5bc50f 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -50,7 +50,7 @@ type PostgresClusterReconciler struct { Metrics ports.Recorder FleetCollector *pgprometheus.FleetCollector } - +// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters/status,verbs=get;update;patch // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters/finalizers,verbs=update diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 9356a011f..8518541be 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -39,6 +39,7 @@ import ( clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" enterpriseApi "github.com/splunk/splunk-operator/api/v4" //+kubebuilder:scaffold:imports @@ -109,6 +110,9 @@ var _ = BeforeSuite(func(ctx context.Context) { err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) Expect(err).NotTo(HaveOccurred()) + err = monitoringv1.AddToScheme(clientgoscheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + //+kubebuilder:scaffold:scheme // Create New Manager for controller diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index ba9030f6f..6c3a06b07 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -344,6 +344,64 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. rc.emitPoolerReadyTransition(postgresCluster, oldConditions) } + if err := reconcilePostgreSQLMetricsService(ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass)); err != nil { + return ctrl.Result{}, err + } + + poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass) + rwPoolerMetricsEnabled := poolerMetricsEnabled && rwPoolerExists + roPoolerMetricsEnabled := poolerMetricsEnabled && roPoolerExists + if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled); err != nil { + return ctrl.Result{}, err + } + if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled); err != nil { + return ctrl.Result{}, err + } + + if err := reconcileGrafanaDashboardConfigMap(ctx, c, rc.Scheme, postgresCluster, isGrafanaDashboardEnabled(postgresCluster, clusterClass)); err != nil { + return ctrl.Result{}, err + } + + serviceMonitorUnavailableEmitted := false + handleServiceMonitorError := func(err error) (bool, error) { + if err == nil { + return false, nil + } + if !isServiceMonitorUnavailable(err) { + return false, err + } + if !serviceMonitorUnavailableEmitted { + serviceMonitorUnavailableEmitted = true + logger.Info("ServiceMonitor CRD unavailable, continuing without ServiceMonitors") + rc.emitWarning(postgresCluster, EventServiceMonitorUnavailable, + "ServiceMonitor CRD not found; continuing without Prometheus ServiceMonitors") + } + return true, nil + } + + if handled, err := handleServiceMonitorError( + reconcilePostgreSQLMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass)), + ); err != nil { + return ctrl.Result{}, err + } else if handled { + logger.Info("Skipped PostgreSQL ServiceMonitor reconciliation") + } + + if handled, err := handleServiceMonitorError( + reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled), + ); err != nil { + return ctrl.Result{}, err + } else if handled { + logger.Info("Skipped RW PgBouncer ServiceMonitor reconciliation") + } + if handled, err := handleServiceMonitorError( + reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled), + ); err != nil { + return ctrl.Result{}, err + } else if handled { + logger.Info("Skipped RO PgBouncer ServiceMonitor reconciliation") + } + // Reconcile ConfigMap when CNPG cluster is healthy. if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { logger.Info("CNPG Cluster healthy, reconciling ConfigMap") diff --git a/pkg/postgresql/cluster/core/dashboards/postgres_observability.json b/pkg/postgresql/cluster/core/dashboards/postgres_observability.json new file mode 100644 index 000000000..bbdf6eda7 --- /dev/null +++ b/pkg/postgresql/cluster/core/dashboards/postgres_observability.json @@ -0,0 +1,136 @@ +{ + "title": "PostgreSQL __CLUSTER_NAME__", + "uid": "pg-__CLUSTER_NAME__", + "schemaVersion": 39, + "version": 1, + "refresh": "30s", + "timezone": "browser", + "tags": ["postgresql", "cnpg", "pgbouncer"], + "editable": true, + "graphTooltip": 0, + "panels": [ + { + "id": 1, + "type": "stat", + "title": "PostgreSQL Instances", + "gridPos": { "x": 0, "y": 0, "w": 6, "h": 4 }, + "targets": [ + { + "expr": "count(max by (pod) (cnpg_pg_postmaster_start_time_seconds{namespace=\"__NAMESPACE__\",service=\"__POSTGRES_SERVICE__\"}))", + "refId": "A" + } + ], + "options": { + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "orientation": "horizontal", + "textMode": "value" + } + }, + { + "id": 2, + "type": "stat", + "title": "RW PgBouncer Pods Up", + "gridPos": { "x": 6, "y": 0, "w": 6, "h": 4 }, + "targets": [ + { + "expr": "round(sum(max by (pod) (cnpg_pgbouncer_up{namespace=\"__NAMESPACE__\",service=\"__RW_POOLER_SERVICE__\"})))", + "refId": "A" + } + ], + "options": { + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "orientation": "horizontal", + "textMode": "value" + } + }, + { + "id": 3, + "type": "stat", + "title": "RO PgBouncer Pods Up", + "gridPos": { "x": 12, "y": 0, "w": 6, "h": 4 }, + "targets": [ + { + "expr": "round(sum(max by (pod) (cnpg_pgbouncer_up{namespace=\"__NAMESPACE__\",service=\"__RO_POOLER_SERVICE__\"})))", + "refId": "A" + } + ], + "options": { + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "orientation": "horizontal", + "textMode": "value" + } + }, + { + "id": 4, + "type": "stat", + "title": "Total Database Size", + "gridPos": { "x": 18, "y": 0, "w": 6, "h": 4 }, + "targets": [ + { + "expr": "sum(max by (datname) (cnpg_pg_database_size_bytes{namespace=\"__NAMESPACE__\",service=\"__POSTGRES_SERVICE__\"}))", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "unit": "bytes" + } + }, + "options": { + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "orientation": "horizontal", + "textMode": "value" + } + }, + { + "id": 5, + "type": "timeseries", + "title": "WAL Files by Pod", + "gridPos": { "x": 0, "y": 4, "w": 8, "h": 8 }, + "targets": [ + { + "expr": "round(max by (pod) (cnpg_pg_wal_files_total{namespace=\"__NAMESPACE__\",service=\"__POSTGRES_SERVICE__\"}))", + "legendFormat": "{{pod}}", + "refId": "A" + } + ] + }, + { + "id": 6, + "type": "timeseries", + "title": "Archived WAL Rate by Pod", + "gridPos": { "x": 8, "y": 4, "w": 8, "h": 8 }, + "targets": [ + { + "expr": "max by (pod) (rate(cnpg_pg_stat_archiver_archived_count{namespace=\"__NAMESPACE__\",service=\"__POSTGRES_SERVICE__\"}[5m]))", + "legendFormat": "{{pod}}", + "refId": "A" + } + ] + }, + { + "id": 7, + "type": "timeseries", + "title": "PgBouncer Active Clients", + "gridPos": { "x": 16, "y": 4, "w": 8, "h": 8 }, + "targets": [ + { + "expr": "round(sum(cnpg_pgbouncer_pools_cl_active{namespace=\"__NAMESPACE__\",service=\"__RW_POOLER_SERVICE__\"}))", + "legendFormat": "rw", + "refId": "A" + }, + { + "expr": "round(sum(cnpg_pgbouncer_pools_cl_active{namespace=\"__NAMESPACE__\",service=\"__RO_POOLER_SERVICE__\"}))", + "legendFormat": "ro", + "refId": "B" + } + ] + } + ], + "templating": { + "list": [] + }, + "annotations": { + "list": [] + } +} diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index afcfd768e..73ded6cd5 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -10,25 +10,26 @@ import ( ) const ( - EventSecretReady = "SecretReady" - EventConfigMapReady = "ConfigMapReady" - EventClusterAdopted = "ClusterAdopted" - EventClusterCreationStarted = "ClusterCreationStarted" - EventClusterUpdateStarted = "ClusterUpdateStarted" - EventClusterReady = "ClusterReady" - EventPoolerCreationStarted = "PoolerCreationStarted" - EventPoolerReady = "PoolerReady" - EventCleanupComplete = "CleanupComplete" - EventClusterClassNotFound = "ClusterClassNotFound" - EventConfigMergeFailed = "ConfigMergeFailed" - EventSecretReconcileFailed = "SecretReconcileFailed" - EventClusterCreateFailed = "ClusterCreateFailed" - EventClusterUpdateFailed = "ClusterUpdateFailed" - EventManagedRolesFailed = "ManagedRolesFailed" - EventPoolerReconcileFailed = "PoolerReconcileFailed" - EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" - EventClusterDegraded = "ClusterDegraded" - EventCleanupFailed = "CleanupFailed" + EventSecretReady = "SecretReady" + EventConfigMapReady = "ConfigMapReady" + EventClusterAdopted = "ClusterAdopted" + EventClusterCreationStarted = "ClusterCreationStarted" + EventClusterUpdateStarted = "ClusterUpdateStarted" + EventClusterReady = "ClusterReady" + EventPoolerCreationStarted = "PoolerCreationStarted" + EventPoolerReady = "PoolerReady" + EventCleanupComplete = "CleanupComplete" + EventClusterClassNotFound = "ClusterClassNotFound" + EventConfigMergeFailed = "ConfigMergeFailed" + EventSecretReconcileFailed = "SecretReconcileFailed" + EventClusterCreateFailed = "ClusterCreateFailed" + EventClusterUpdateFailed = "ClusterUpdateFailed" + EventManagedRolesFailed = "ManagedRolesFailed" + EventPoolerReconcileFailed = "PoolerReconcileFailed" + EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" + EventServiceMonitorUnavailable = "ServiceMonitorUnavailable" + EventClusterDegraded = "ClusterDegraded" + EventCleanupFailed = "CleanupFailed" ) func (rc *ReconcileContext) emitNormal(obj client.Object, reason, message string) { diff --git a/pkg/postgresql/cluster/core/monitoring.go b/pkg/postgresql/cluster/core/monitoring.go new file mode 100644 index 000000000..7c942d22a --- /dev/null +++ b/pkg/postgresql/cluster/core/monitoring.go @@ -0,0 +1,614 @@ +package core + +import ( + "context" + _ "embed" + "fmt" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" + "strings" +) + +const ( + // metrics + postgresMetricsServiceSuffix = "-postgres-metrics" + postgresMetricsPortName = "metrics" + postgresMetricsPort = int32(9187) + poolerMetricsPortName = "metrics" + poolerMetricsPort = int32(9127) + grafanaDashboardConfigMapSuffix = "-grafana-dashboard" + + // labels + labelManagedBy = "app.kubernetes.io/managed-by" + labelManagedByValue = "postgrescluster-controller" + labelObservabilityComponent = "enterprise.splunk.com/observability-component" + cnpgClusterLabelName = "cnpg.io/cluster" + cnpgPoolerNameLabel = "cnpg.io/poolerName" + cnpgPodRoleInstance = "instance" + cnpgPodRoleLabelName = "cnpg.io/podRole" + grafanaDashboardLabelKey = "grafana_dashboard" + grafanaDashboardLabelValue = "1" +) + +func isPostgreSQLMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { + if class == nil || class.Spec.Config == nil || class.Spec.Config.Observability == nil { + return false + } + classCfg := class.Spec.Config.Observability.PostgreSQL + if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { + return false + } + if cluster == nil || cluster.Spec.Observability == nil || cluster.Spec.Observability.PostgreSQL == nil { + return true + } + override := cluster.Spec.Observability.PostgreSQL.Disabled + return override == nil || !*override +} + +func isConnectionPoolerEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { + if class == nil || class.Spec.Config == nil || class.Spec.Config.ConnectionPoolerEnabled == nil { + return false + } + if !*class.Spec.Config.ConnectionPoolerEnabled { + return false + } + if cluster == nil || cluster.Spec.ConnectionPoolerEnabled == nil { + return true + } + return *cluster.Spec.ConnectionPoolerEnabled +} + +func isConnectionPoolerMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { + if !isConnectionPoolerEnabled(cluster, class) { + return false + } + if class == nil || class.Spec.Config == nil || class.Spec.Config.Observability == nil { + return false + } + classCfg := class.Spec.Config.Observability.PgBouncer + if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { + return false + } + if cluster == nil || cluster.Spec.Observability == nil || cluster.Spec.Observability.PgBouncer == nil { + return true + } + override := cluster.Spec.Observability.PgBouncer.Disabled + return override == nil || !*override +} + +func isGrafanaDashboardEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { + if class == nil || class.Spec.Config == nil || class.Spec.Config.Observability == nil { + return false + } + classCfg := class.Spec.Config.Observability.GrafanaDashboard + if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { + return false + } + if cluster == nil || cluster.Spec.Observability == nil || cluster.Spec.Observability.GrafanaDashboard == nil { + return true + } + override := cluster.Spec.Observability.GrafanaDashboard.Disabled + return override == nil || !*override +} + +func buildPostgreSQLMetricsService(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster) (*corev1.Service, error) { + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + postgresMetricsServiceSuffix, + Namespace: cluster.Namespace, + Labels: map[string]string{ + labelManagedBy: labelManagedByValue, + labelObservabilityComponent: "postgresql-metrics", + cnpgClusterLabelName: cluster.Name, + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: map[string]string{ + cnpgClusterLabelName: cluster.Name, + cnpgPodRoleLabelName: cnpgPodRoleInstance, + }, + Ports: []corev1.ServicePort{ + { + Name: postgresMetricsPortName, + Port: postgresMetricsPort, + Protocol: corev1.ProtocolTCP, + TargetPort: intstr.FromString(postgresMetricsPortName), + }, + }, + }, + } + + if err := ctrl.SetControllerReference(cluster, svc, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on PostgreSQL metrics Service: %w", err) + } + + return svc, nil +} + +func poolerMetricsServiceName(clusterName, poolerType string) string { + return fmt.Sprintf("%s-pooler-%s-metrics", clusterName, poolerType) +} +func buildConnectionPoolerMetricsService( + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + poolerType string, +) (*corev1.Service, error) { + poolerName := poolerResourceName(cluster.Name, poolerType) + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerMetricsServiceName(cluster.Name, poolerType), + Namespace: cluster.Namespace, + Labels: map[string]string{ + labelManagedBy: labelManagedByValue, + labelObservabilityComponent: "pgbouncer-metrics", + cnpgClusterLabelName: cluster.Name, + cnpgPoolerNameLabel: poolerName, + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: map[string]string{ + cnpgPoolerNameLabel: poolerName, + }, + Ports: []corev1.ServicePort{ + { + Name: poolerMetricsPortName, + Port: poolerMetricsPort, + Protocol: corev1.ProtocolTCP, + TargetPort: intstr.FromString(poolerMetricsPortName), + }, + }, + }, + } + + if err := ctrl.SetControllerReference(cluster, svc, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on PgBouncer metrics Service: %w", err) + } + + return svc, nil +} + +func buildGrafanaDashboardConfigMap(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster) (*corev1.ConfigMap, error) { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name + grafanaDashboardConfigMapSuffix, + Namespace: cluster.Namespace, + Labels: map[string]string{ + labelManagedBy: labelManagedByValue, + labelObservabilityComponent: "grafana-dashboard", + cnpgClusterLabelName: cluster.Name, + grafanaDashboardLabelKey: grafanaDashboardLabelValue, + }, + }, + Data: map[string]string{ + "dashboard.json": buildBasicGrafanaDashboard(cluster), + }, + } + + if err := ctrl.SetControllerReference(cluster, cm, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on Grafana dashboard ConfigMap: %w", err) + } + + return cm, nil +} + +func isServiceMonitorUnavailable(err error) bool { + if err == nil { + return false + } + + if apierrors.IsNotFound(err) || apimeta.IsNoMatchError(err) { + return true + } + + msg := err.Error() + return strings.Contains(msg, "no matches for kind \"ServiceMonitor\"") || + strings.Contains(msg, "servicemonitors.monitoring.coreos.com") +} + +func reconcilePostgreSQLMetricsService(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, enabled bool) error { + logger := log.FromContext(ctx) + serviceName := cluster.Name + postgresMetricsServiceSuffix + + if !enabled { + existing := &corev1.Service{} + err := c.Get(ctx, types.NamespacedName{Name: serviceName, Namespace: cluster.Namespace}, existing) + switch { + case apierrors.IsNotFound(err): + return nil + case err != nil: + return fmt.Errorf("getting PostgreSQL metrics Service %s: %w", serviceName, err) + } + + logger.Info("Deleting PostgreSQL metrics Service", "name", serviceName) + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting PostgreSQL metrics Service %s: %w", serviceName, err) + } + return nil + } + + desired, err := buildPostgreSQLMetricsService(scheme, cluster) + if err != nil { + return fmt.Errorf("building PostgreSQL metrics Service: %w", err) + } + + live := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { + live.Labels = desired.Labels + live.Annotations = desired.Annotations + live.Spec.Type = desired.Spec.Type + live.Spec.Selector = desired.Spec.Selector + live.Spec.Ports = desired.Spec.Ports + + if !metav1.IsControlledBy(live, cluster) { + if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { + return fmt.Errorf("setting controller reference on PostgreSQL metrics Service: %w", err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling PostgreSQL metrics Service %s: %w", desired.Name, err) + } + + return nil +} + +func reconcileConnectionPoolerMetricsService( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + poolerType string, + enabled bool, +) error { + logger := log.FromContext(ctx) + serviceName := poolerMetricsServiceName(cluster.Name, poolerType) + + if !enabled { + existing := &corev1.Service{} + err := c.Get(ctx, types.NamespacedName{Name: serviceName, Namespace: cluster.Namespace}, existing) + switch { + case apierrors.IsNotFound(err): + return nil + case err != nil: + return fmt.Errorf("getting PgBouncer metrics Service %s: %w", serviceName, err) + } + + logger.Info("Deleting PgBouncer metrics Service", "name", serviceName, "poolerType", poolerType) + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting PgBouncer metrics Service %s: %w", serviceName, err) + } + return nil + } + + desired, err := buildConnectionPoolerMetricsService(scheme, cluster, poolerType) + if err != nil { + return fmt.Errorf("building PgBouncer metrics Service for %s pooler: %w", poolerType, err) + } + + live := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { + live.Labels = desired.Labels + live.Annotations = desired.Annotations + live.Spec.Type = desired.Spec.Type + live.Spec.Selector = desired.Spec.Selector + live.Spec.Ports = desired.Spec.Ports + + if !metav1.IsControlledBy(live, cluster) { + if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { + return fmt.Errorf("setting controller reference on PgBouncer metrics Service: %w", err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling PgBouncer metrics Service %s: %w", desired.Name, err) + } + + return nil +} + +func reconcileGrafanaDashboardConfigMap( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + enabled bool, +) error { + logger := log.FromContext(ctx) + configMapName := cluster.Name + grafanaDashboardConfigMapSuffix + + if !enabled { + existing := &corev1.ConfigMap{} + err := c.Get(ctx, types.NamespacedName{Name: configMapName, Namespace: cluster.Namespace}, existing) + switch { + case apierrors.IsNotFound(err): + return nil + case err != nil: + return fmt.Errorf("getting Grafana dashboard ConfigMap %s: %w", configMapName, err) + } + + logger.Info("Deleting Grafana dashboard ConfigMap", "name", configMapName) + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting Grafana dashboard ConfigMap %s: %w", configMapName, err) + } + return nil + } + + desired, err := buildGrafanaDashboardConfigMap(scheme, cluster) + if err != nil { + return fmt.Errorf("building Grafana dashboard ConfigMap: %w", err) + } + + live := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { + live.Labels = desired.Labels + live.Annotations = desired.Annotations + live.Data = desired.Data + + if !metav1.IsControlledBy(live, cluster) { + if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { + return fmt.Errorf("setting controller reference on Grafana dashboard ConfigMap: %w", err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling Grafana dashboard ConfigMap %s: %w", desired.Name, err) + } + + return nil +} + +func postgresMetricsServiceMonitorName(clusterName string) string { + return clusterName + "-postgres-metrics-monitor" +} + +func poolerMetricsServiceMonitorName(clusterName, poolerType string) string { + return fmt.Sprintf("%s-pooler-%s-metrics-monitor", clusterName, poolerType) +} + +func buildPostgreSQLMetricsServiceMonitor( + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, +) (*monitoringv1.ServiceMonitor, error) { + sm := &monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: postgresMetricsServiceMonitorName(cluster.Name), + Namespace: cluster.Namespace, + Labels: map[string]string{ + labelManagedBy: labelManagedByValue, + labelObservabilityComponent: "postgresql-metrics", + cnpgClusterLabelName: cluster.Name, + }, + }, + Spec: monitoringv1.ServiceMonitorSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + labelObservabilityComponent: "postgresql-metrics", + cnpgClusterLabelName: cluster.Name, + }, + }, + Endpoints: []monitoringv1.Endpoint{ + { + Port: postgresMetricsPortName, + Path: "/metrics", + Scheme: "http", + }, + }, + }, + } + + if err := ctrl.SetControllerReference(cluster, sm, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on PostgreSQL ServiceMonitor: %w", err) + } + + return sm, nil +} + +func buildConnectionPoolerMetricsServiceMonitor( + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + poolerType string, +) (*monitoringv1.ServiceMonitor, error) { + poolerName := poolerResourceName(cluster.Name, poolerType) + + sm := &monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerMetricsServiceMonitorName(cluster.Name, poolerType), + Namespace: cluster.Namespace, + Labels: map[string]string{ + labelManagedBy: labelManagedByValue, + labelObservabilityComponent: "pgbouncer-metrics", + cnpgClusterLabelName: cluster.Name, + cnpgPoolerNameLabel: poolerName, + }, + }, + Spec: monitoringv1.ServiceMonitorSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + labelObservabilityComponent: "pgbouncer-metrics", + cnpgClusterLabelName: cluster.Name, + cnpgPoolerNameLabel: poolerName, + }, + }, + Endpoints: []monitoringv1.Endpoint{ + { + Port: poolerMetricsPortName, + Path: "/metrics", + Scheme: "http", + }, + }, + }, + } + + if err := ctrl.SetControllerReference(cluster, sm, scheme); err != nil { + return nil, fmt.Errorf("setting controller reference on PgBouncer ServiceMonitor: %w", err) + } + + return sm, nil +} + +func reconcilePostgreSQLMetricsServiceMonitor( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + enabled bool, +) error { + logger := log.FromContext(ctx) + name := postgresMetricsServiceMonitorName(cluster.Name) + + if !enabled { + existing := &monitoringv1.ServiceMonitor{} + err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: cluster.Namespace}, existing) + switch { + case apierrors.IsNotFound(err): + return nil + case err != nil: + return fmt.Errorf("getting PostgreSQL ServiceMonitor %s: %w", name, err) + } + + logger.Info("Deleting PostgreSQL ServiceMonitor", "name", name) + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting PostgreSQL ServiceMonitor %s: %w", name, err) + } + return nil + } + + desired, err := buildPostgreSQLMetricsServiceMonitor(scheme, cluster) + if err != nil { + return fmt.Errorf("building PostgreSQL ServiceMonitor: %w", err) + } + + live := &monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { + live.Labels = desired.Labels + live.Annotations = desired.Annotations + live.Spec = desired.Spec + + if !metav1.IsControlledBy(live, cluster) { + if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { + return fmt.Errorf("setting controller reference on PostgreSQL ServiceMonitor: %w", err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling PostgreSQL ServiceMonitor %s: %w", desired.Name, err) + } + + return nil +} + +func reconcileConnectionPoolerMetricsServiceMonitor( + ctx context.Context, + c client.Client, + scheme *runtime.Scheme, + cluster *enterprisev4.PostgresCluster, + poolerType string, + enabled bool, +) error { + logger := log.FromContext(ctx) + name := poolerMetricsServiceMonitorName(cluster.Name, poolerType) + + if !enabled { + existing := &monitoringv1.ServiceMonitor{} + err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: cluster.Namespace}, existing) + switch { + case apierrors.IsNotFound(err): + return nil + case err != nil: + return fmt.Errorf("getting PgBouncer ServiceMonitor %s: %w", name, err) + } + + logger.Info("Deleting PgBouncer ServiceMonitor", "name", name, "poolerType", poolerType) + if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting PgBouncer ServiceMonitor %s: %w", name, err) + } + return nil + } + + desired, err := buildConnectionPoolerMetricsServiceMonitor(scheme, cluster, poolerType) + if err != nil { + return fmt.Errorf("building PgBouncer ServiceMonitor for %s pooler: %w", poolerType, err) + } + + live := &monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: desired.Name, + Namespace: desired.Namespace, + }, + } + + _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { + live.Labels = desired.Labels + live.Annotations = desired.Annotations + live.Spec = desired.Spec + + if !metav1.IsControlledBy(live, cluster) { + if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { + return fmt.Errorf("setting controller reference on PgBouncer ServiceMonitor: %w", err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("reconciling PgBouncer ServiceMonitor %s: %w", desired.Name, err) + } + + return nil +} + +//go:embed dashboards/postgres_observability.json +var postgresObservabilityDashboardTemplate string + +func buildBasicGrafanaDashboard(cluster *enterprisev4.PostgresCluster) string { + replacer := strings.NewReplacer( + "__CLUSTER_NAME__", cluster.Name, + "__NAMESPACE__", cluster.Namespace, + "__POSTGRES_SERVICE__", cluster.Name+postgresMetricsServiceSuffix, + "__RW_POOLER_SERVICE__", poolerMetricsServiceName(cluster.Name, readWriteEndpoint), + "__RO_POOLER_SERVICE__", poolerMetricsServiceName(cluster.Name, readOnlyEndpoint), + ) + + return replacer.Replace(postgresObservabilityDashboardTemplate) +} diff --git a/pkg/postgresql/cluster/core/monitoring_unit_test.go b/pkg/postgresql/cluster/core/monitoring_unit_test.go new file mode 100644 index 000000000..6c1d0715a --- /dev/null +++ b/pkg/postgresql/cluster/core/monitoring_unit_test.go @@ -0,0 +1,472 @@ +package core + +import ( + "encoding/json" + "errors" + "testing" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + enterprisev4 "github.com/splunk/splunk-operator/api/v4" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/utils/ptr" +) + +func TestIsPostgreSQLMetricsEnabled(t *testing.T) { + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + class *enterprisev4.PostgresClusterClass + want bool + }{ + { + name: "disabled when class observability is absent", + class: &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{}, + }, + }, + want: false, + }, + { + name: "enabled when class enables and cluster override is unset", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + ptr.To(true), + nil, + nil, + nil, + ), + want: true, + }, + { + name: "disabled when cluster override disables", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + Observability: &enterprisev4.PostgresObservabilityOverride{ + PostgreSQL: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + }, + }, + }, + class: newClassWithObservability( + ptr.To(true), + nil, + nil, + nil, + ), + want: false, + }, + { + name: "disabled when class disables even if cluster has override struct", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + Observability: &enterprisev4.PostgresObservabilityOverride{ + PostgreSQL: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(false)}, + }, + }, + }, + class: newClassWithObservability( + ptr.To(false), + nil, + nil, + nil, + ), + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isPostgreSQLMetricsEnabled(tt.cluster, tt.class) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestIsConnectionPoolerEnabled(t *testing.T) { + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + class *enterprisev4.PostgresClusterClass + want bool + }{ + { + name: "disabled when class config is absent", + class: &enterprisev4.PostgresClusterClass{}, + want: false, + }, + { + name: "inherits enabled class setting when cluster override is unset", + cluster: &enterprisev4.PostgresCluster{}, + class: &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + }, + want: true, + }, + { + name: "cluster can disable class enabled pooler", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ConnectionPoolerEnabled: ptr.To(false), + }, + }, + class: &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + }, + want: false, + }, + { + name: "class disabled wins", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + class: &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(false), + }, + }, + }, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isConnectionPoolerEnabled(tt.cluster, tt.class) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + class *enterprisev4.PostgresClusterClass + want bool + }{ + { + name: "disabled when pooler itself is disabled", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + nil, + ptr.To(true), + nil, + ptr.To(false), + ), + want: false, + }, + { + name: "enabled when pooler and pgbouncer metrics are enabled", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + nil, + ptr.To(true), + ptr.To(true), + ptr.To(true), + ), + want: true, + }, + { + name: "disabled when cluster override disables pgbouncer metrics", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + Observability: &enterprisev4.PostgresObservabilityOverride{ + PgBouncer: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + }, + }, + }, + class: newClassWithObservability( + nil, + ptr.To(true), + ptr.To(true), + ptr.To(true), + ), + want: false, + }, + { + name: "disabled when class disables pgbouncer metrics", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + nil, + ptr.To(true), + ptr.To(false), + ptr.To(true), + ), + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isConnectionPoolerMetricsEnabled(tt.cluster, tt.class) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestIsGrafanaDashboardEnabled(t *testing.T) { + tests := []struct { + name string + cluster *enterprisev4.PostgresCluster + class *enterprisev4.PostgresClusterClass + want bool + }{ + { + name: "enabled when class enables and cluster override is unset", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + nil, + nil, + nil, + ptr.To(true), + ), + want: true, + }, + { + name: "disabled when cluster override disables dashboard", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + Observability: &enterprisev4.PostgresObservabilityOverride{ + GrafanaDashboard: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + }, + }, + }, + class: newClassWithObservability( + nil, + nil, + nil, + ptr.To(true), + ), + want: false, + }, + { + name: "disabled when class disables dashboard", + cluster: &enterprisev4.PostgresCluster{}, + class: newClassWithObservability( + nil, + nil, + nil, + ptr.To(false), + ), + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isGrafanaDashboardEnabled(tt.cluster, tt.class) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestBuildPostgreSQLMetricsService(t *testing.T) { + scheme := newMonitoringTestScheme(t) + cluster := newTestMonitoringCluster() + + svc, err := buildPostgreSQLMetricsService(scheme, cluster) + require.NoError(t, err) + + assert.Equal(t, "postgresql-cluster-dev-postgres-metrics", svc.Name) + assert.Equal(t, cluster.Namespace, svc.Namespace) + assert.Equal(t, "postgresql-metrics", svc.Labels[labelObservabilityComponent]) + assert.Equal(t, cluster.Name, svc.Labels[cnpgClusterLabelName]) + assert.Equal(t, cluster.Name, svc.Spec.Selector[cnpgClusterLabelName]) + assert.Equal(t, cnpgPodRoleInstance, svc.Spec.Selector[cnpgPodRoleLabelName]) + require.Len(t, svc.Spec.Ports, 1) + assert.Equal(t, postgresMetricsPortName, svc.Spec.Ports[0].Name) + assert.Equal(t, postgresMetricsPort, svc.Spec.Ports[0].Port) + assert.Equal(t, postgresMetricsPortName, svc.Spec.Ports[0].TargetPort.StrVal) + assertMonitoringOwnerRef(t, svc.OwnerReferences, cluster) +} + +func TestBuildConnectionPoolerMetricsService(t *testing.T) { + scheme := newMonitoringTestScheme(t) + cluster := newTestMonitoringCluster() + + svc, err := buildConnectionPoolerMetricsService(scheme, cluster, readWriteEndpoint) + require.NoError(t, err) + + assert.Equal(t, "postgresql-cluster-dev-pooler-rw-metrics", svc.Name) + assert.Equal(t, "pgbouncer-metrics", svc.Labels[labelObservabilityComponent]) + assert.Equal(t, poolerResourceName(cluster.Name, readWriteEndpoint), svc.Labels[cnpgPoolerNameLabel]) + assert.Equal(t, poolerResourceName(cluster.Name, readWriteEndpoint), svc.Spec.Selector[cnpgPoolerNameLabel]) + require.Len(t, svc.Spec.Ports, 1) + assert.Equal(t, poolerMetricsPortName, svc.Spec.Ports[0].Name) + assert.Equal(t, poolerMetricsPort, svc.Spec.Ports[0].Port) + assert.Equal(t, poolerMetricsPortName, svc.Spec.Ports[0].TargetPort.StrVal) + assertMonitoringOwnerRef(t, svc.OwnerReferences, cluster) +} + +func TestBuildGrafanaDashboardConfigMap(t *testing.T) { + scheme := newMonitoringTestScheme(t) + cluster := newTestMonitoringCluster() + + cm, err := buildGrafanaDashboardConfigMap(scheme, cluster) + require.NoError(t, err) + + assert.Equal(t, "postgresql-cluster-dev-grafana-dashboard", cm.Name) + assert.Equal(t, "grafana-dashboard", cm.Labels[labelObservabilityComponent]) + assert.Equal(t, grafanaDashboardLabelValue, cm.Labels[grafanaDashboardLabelKey]) + assert.Contains(t, cm.Data, "dashboard.json") + assert.NotContains(t, cm.Data["dashboard.json"], "__CLUSTER_NAME__") + assert.Contains(t, cm.Data["dashboard.json"], cluster.Name) + assert.Contains(t, cm.Data["dashboard.json"], cluster.Namespace) + assert.Contains(t, cm.Data["dashboard.json"], cluster.Name+postgresMetricsServiceSuffix) + assert.Contains(t, cm.Data["dashboard.json"], poolerMetricsServiceName(cluster.Name, readWriteEndpoint)) + assert.Contains(t, cm.Data["dashboard.json"], poolerMetricsServiceName(cluster.Name, readOnlyEndpoint)) + + var dashboard map[string]any + require.NoError(t, json.Unmarshal([]byte(cm.Data["dashboard.json"]), &dashboard)) + assertMonitoringOwnerRef(t, cm.OwnerReferences, cluster) +} + +func TestBuildPostgreSQLMetricsServiceMonitor(t *testing.T) { + scheme := newMonitoringTestScheme(t) + cluster := newTestMonitoringCluster() + + sm, err := buildPostgreSQLMetricsServiceMonitor(scheme, cluster) + require.NoError(t, err) + + assert.Equal(t, "postgresql-cluster-dev-postgres-metrics-monitor", sm.Name) + assert.Equal(t, "postgresql-metrics", sm.Labels[labelObservabilityComponent]) + assert.Equal(t, cluster.Name, sm.Spec.Selector.MatchLabels[cnpgClusterLabelName]) + require.Len(t, sm.Spec.Endpoints, 1) + assert.Equal(t, postgresMetricsPortName, sm.Spec.Endpoints[0].Port) + assert.Equal(t, "/metrics", sm.Spec.Endpoints[0].Path) + assert.Equal(t, "http", sm.Spec.Endpoints[0].Scheme) + assertMonitoringOwnerRef(t, sm.OwnerReferences, cluster) +} + +func TestBuildConnectionPoolerMetricsServiceMonitor(t *testing.T) { + scheme := newMonitoringTestScheme(t) + cluster := newTestMonitoringCluster() + + sm, err := buildConnectionPoolerMetricsServiceMonitor(scheme, cluster, readOnlyEndpoint) + require.NoError(t, err) + + assert.Equal(t, "postgresql-cluster-dev-pooler-ro-metrics-monitor", sm.Name) + assert.Equal(t, "pgbouncer-metrics", sm.Labels[labelObservabilityComponent]) + assert.Equal(t, poolerResourceName(cluster.Name, readOnlyEndpoint), sm.Labels[cnpgPoolerNameLabel]) + assert.Equal(t, poolerResourceName(cluster.Name, readOnlyEndpoint), sm.Spec.Selector.MatchLabels[cnpgPoolerNameLabel]) + require.Len(t, sm.Spec.Endpoints, 1) + assert.Equal(t, poolerMetricsPortName, sm.Spec.Endpoints[0].Port) + assert.Equal(t, "/metrics", sm.Spec.Endpoints[0].Path) + assert.Equal(t, "http", sm.Spec.Endpoints[0].Scheme) + assertMonitoringOwnerRef(t, sm.OwnerReferences, cluster) +} + +func TestIsServiceMonitorUnavailable(t *testing.T) { + tests := []struct { + name string + err error + want bool + }{ + { + name: "nil error", + err: nil, + want: false, + }, + { + name: "not found error", + err: apierrors.NewNotFound(schema.GroupResource{Group: "monitoring.coreos.com", Resource: "servicemonitors"}, "test"), + want: true, + }, + { + name: "kind match string error", + err: errors.New("no matches for kind \"ServiceMonitor\" in version \"monitoring.coreos.com/v1\""), + want: true, + }, + { + name: "resource string error", + err: errors.New("servicemonitors.monitoring.coreos.com not found"), + want: true, + }, + { + name: "unrelated error", + err: errors.New("boom"), + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isServiceMonitorUnavailable(tt.err) + assert.Equal(t, tt.want, got) + }) + } +} + +func newMonitoringTestScheme(t *testing.T) *runtime.Scheme { + t.Helper() + + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + require.NoError(t, monitoringv1.AddToScheme(scheme)) + require.NoError(t, enterprisev4.AddToScheme(scheme)) + + return scheme +} + +func newTestMonitoringCluster() *enterprisev4.PostgresCluster { + return &enterprisev4.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "postgresql-cluster-dev", + Namespace: "test", + UID: "cluster-uid", + }, + } +} + +func newClassWithObservability( + postgresEnabled *bool, + poolerEnabled *bool, + pgBouncerMetricsEnabled *bool, + grafanaEnabled *bool, +) *enterprisev4.PostgresClusterClass { + return &enterprisev4.PostgresClusterClass{ + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: poolerEnabled, + Observability: &enterprisev4.PostgresObservabilityClassConfig{ + PostgreSQL: &enterprisev4.MetricsClassConfig{Enabled: postgresEnabled}, + PgBouncer: &enterprisev4.MetricsClassConfig{Enabled: pgBouncerMetricsEnabled}, + GrafanaDashboard: &enterprisev4.GrafanaDashboardClassConfig{Enabled: grafanaEnabled}, + }, + }, + }, + } +} + +func assertMonitoringOwnerRef(t *testing.T, ownerRefs []metav1.OwnerReference, cluster *enterprisev4.PostgresCluster) { + t.Helper() + + require.Len(t, ownerRefs, 1) + assert.Equal(t, cluster.APIVersion, ownerRefs[0].APIVersion) + assert.Equal(t, cluster.Kind, ownerRefs[0].Kind) + assert.Equal(t, cluster.Name, ownerRefs[0].Name) + assert.Equal(t, cluster.UID, ownerRefs[0].UID) + require.NotNil(t, ownerRefs[0].Controller) + assert.True(t, *ownerRefs[0].Controller) +} From 2822a8dbc63f556891309de3434b91af4f98fc1e Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Thu, 2 Apr 2026 16:46:17 +0200 Subject: [PATCH 21/36] removed grafana dashboard from code --- api/v4/postgrescluster_types.go | 5 +- api/v4/postgresclusterclass_types.go | 13 +- ...nterprise_v4_postgresclusterclass_dev.yaml | 3 +- pkg/postgresql/cluster/core/cluster.go | 39 +---- .../dashboards/postgres_observability.json | 136 ----------------- pkg/postgresql/cluster/core/events.go | 39 +++-- pkg/postgresql/cluster/core/monitoring.go | 143 +----------------- .../cluster/core/monitoring_unit_test.go | 137 +---------------- 8 files changed, 37 insertions(+), 478 deletions(-) delete mode 100644 pkg/postgresql/cluster/core/dashboards/postgres_observability.json diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 1faae2820..5c3608058 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -108,7 +108,7 @@ type PostgresClusterSpec struct { // +optional ClusterDeletionPolicy *string `json:"clusterDeletionPolicy,omitempty"` - // Observability contains configuration for monitoring and observability features. + // Observability contains configuration for metrics exposure features. // +optional Observability *PostgresObservabilityOverride `json:"observability,omitempty"` } @@ -121,9 +121,6 @@ type PostgresObservabilityOverride struct { // +optional PgBouncer *FeatureDisableOverride `json:"pgbouncer,omitempty"` - - // +optional - GrafanaDashboard *FeatureDisableOverride `json:"grafanaDashboard,omitempty"` } type FeatureDisableOverride struct { diff --git a/api/v4/postgresclusterclass_types.go b/api/v4/postgresclusterclass_types.go index 743e98722..74085d191 100644 --- a/api/v4/postgresclusterclass_types.go +++ b/api/v4/postgresclusterclass_types.go @@ -100,8 +100,8 @@ type PostgresClusterClassConfig struct { // +optional ConnectionPoolerEnabled *bool `json:"connectionPoolerEnabled,omitempty"` - // Observability contains configuration for metrics and dashboards. - // When enabled, creates metrics resources and Grafana dashboard for clusters using this class. + // Observability contains configuration for metrics exposure. + // When enabled, creates metrics resources for clusters using this class. // Can be overridden in PostgresCluster CR. // +kubebuilder:default={} // +optional @@ -184,8 +184,6 @@ type PostgresObservabilityClassConfig struct { PostgreSQL *MetricsClassConfig `json:"postgresql,omitempty"` // +optional PgBouncer *MetricsClassConfig `json:"pgbouncer,omitempty"` - // +optional - GrafanaDashboard *GrafanaDashboardClassConfig `json:"grafanaDashboard,omitempty"` } type MetricsClassConfig struct { @@ -195,13 +193,6 @@ type MetricsClassConfig struct { Enabled *bool `json:"enabled,omitempty"` } -type GrafanaDashboardClassConfig struct { - // Enabled controls whether a Grafana dashboard ConfigMap should be created for this class. - // +kubebuilder:default=false - // +optional - Enabled *bool `json:"enabled,omitempty"` -} - // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster diff --git a/config/samples/enterprise_v4_postgresclusterclass_dev.yaml b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml index 560958794..082d5fad9 100644 --- a/config/samples/enterprise_v4_postgresclusterclass_dev.yaml +++ b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml @@ -28,7 +28,7 @@ spec: memory: "2Gi" connectionPoolerEnabled: true observability: - grafanaDashboard: + postgresql: enabled: true pgbouncer: enabled: true @@ -41,4 +41,3 @@ spec: mode: transaction config: max_client_conn: "100" - diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 6c3a06b07..329571933 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -358,48 +358,21 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, err } - if err := reconcileGrafanaDashboardConfigMap(ctx, c, rc.Scheme, postgresCluster, isGrafanaDashboardEnabled(postgresCluster, clusterClass)); err != nil { - return ctrl.Result{}, err - } - - serviceMonitorUnavailableEmitted := false - handleServiceMonitorError := func(err error) (bool, error) { - if err == nil { - return false, nil - } - if !isServiceMonitorUnavailable(err) { - return false, err - } - if !serviceMonitorUnavailableEmitted { - serviceMonitorUnavailableEmitted = true - logger.Info("ServiceMonitor CRD unavailable, continuing without ServiceMonitors") - rc.emitWarning(postgresCluster, EventServiceMonitorUnavailable, - "ServiceMonitor CRD not found; continuing without Prometheus ServiceMonitors") - } - return true, nil - } - - if handled, err := handleServiceMonitorError( - reconcilePostgreSQLMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass)), + if err := reconcilePostgreSQLMetricsServiceMonitor( + ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass), ); err != nil { return ctrl.Result{}, err - } else if handled { - logger.Info("Skipped PostgreSQL ServiceMonitor reconciliation") } - if handled, err := handleServiceMonitorError( - reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled), + if err := reconcileConnectionPoolerMetricsServiceMonitor( + ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled, ); err != nil { return ctrl.Result{}, err - } else if handled { - logger.Info("Skipped RW PgBouncer ServiceMonitor reconciliation") } - if handled, err := handleServiceMonitorError( - reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled), + if err := reconcileConnectionPoolerMetricsServiceMonitor( + ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled, ); err != nil { return ctrl.Result{}, err - } else if handled { - logger.Info("Skipped RO PgBouncer ServiceMonitor reconciliation") } // Reconcile ConfigMap when CNPG cluster is healthy. diff --git a/pkg/postgresql/cluster/core/dashboards/postgres_observability.json b/pkg/postgresql/cluster/core/dashboards/postgres_observability.json deleted file mode 100644 index bbdf6eda7..000000000 --- a/pkg/postgresql/cluster/core/dashboards/postgres_observability.json +++ /dev/null @@ -1,136 +0,0 @@ -{ - "title": "PostgreSQL __CLUSTER_NAME__", - "uid": "pg-__CLUSTER_NAME__", - "schemaVersion": 39, - "version": 1, - "refresh": "30s", - "timezone": "browser", - "tags": ["postgresql", "cnpg", "pgbouncer"], - "editable": true, - "graphTooltip": 0, - "panels": [ - { - "id": 1, - "type": "stat", - "title": "PostgreSQL Instances", - "gridPos": { "x": 0, "y": 0, "w": 6, "h": 4 }, - "targets": [ - { - "expr": "count(max by (pod) (cnpg_pg_postmaster_start_time_seconds{namespace=\"__NAMESPACE__\",service=\"__POSTGRES_SERVICE__\"}))", - "refId": "A" - } - ], - "options": { - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "orientation": "horizontal", - "textMode": "value" - } - }, - { - "id": 2, - "type": "stat", - "title": "RW PgBouncer Pods Up", - "gridPos": { "x": 6, "y": 0, "w": 6, "h": 4 }, - "targets": [ - { - "expr": "round(sum(max by (pod) (cnpg_pgbouncer_up{namespace=\"__NAMESPACE__\",service=\"__RW_POOLER_SERVICE__\"})))", - "refId": "A" - } - ], - "options": { - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "orientation": "horizontal", - "textMode": "value" - } - }, - { - "id": 3, - "type": "stat", - "title": "RO PgBouncer Pods Up", - "gridPos": { "x": 12, "y": 0, "w": 6, "h": 4 }, - "targets": [ - { - "expr": "round(sum(max by (pod) (cnpg_pgbouncer_up{namespace=\"__NAMESPACE__\",service=\"__RO_POOLER_SERVICE__\"})))", - "refId": "A" - } - ], - "options": { - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "orientation": "horizontal", - "textMode": "value" - } - }, - { - "id": 4, - "type": "stat", - "title": "Total Database Size", - "gridPos": { "x": 18, "y": 0, "w": 6, "h": 4 }, - "targets": [ - { - "expr": "sum(max by (datname) (cnpg_pg_database_size_bytes{namespace=\"__NAMESPACE__\",service=\"__POSTGRES_SERVICE__\"}))", - "refId": "A" - } - ], - "fieldConfig": { - "defaults": { - "unit": "bytes" - } - }, - "options": { - "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, - "orientation": "horizontal", - "textMode": "value" - } - }, - { - "id": 5, - "type": "timeseries", - "title": "WAL Files by Pod", - "gridPos": { "x": 0, "y": 4, "w": 8, "h": 8 }, - "targets": [ - { - "expr": "round(max by (pod) (cnpg_pg_wal_files_total{namespace=\"__NAMESPACE__\",service=\"__POSTGRES_SERVICE__\"}))", - "legendFormat": "{{pod}}", - "refId": "A" - } - ] - }, - { - "id": 6, - "type": "timeseries", - "title": "Archived WAL Rate by Pod", - "gridPos": { "x": 8, "y": 4, "w": 8, "h": 8 }, - "targets": [ - { - "expr": "max by (pod) (rate(cnpg_pg_stat_archiver_archived_count{namespace=\"__NAMESPACE__\",service=\"__POSTGRES_SERVICE__\"}[5m]))", - "legendFormat": "{{pod}}", - "refId": "A" - } - ] - }, - { - "id": 7, - "type": "timeseries", - "title": "PgBouncer Active Clients", - "gridPos": { "x": 16, "y": 4, "w": 8, "h": 8 }, - "targets": [ - { - "expr": "round(sum(cnpg_pgbouncer_pools_cl_active{namespace=\"__NAMESPACE__\",service=\"__RW_POOLER_SERVICE__\"}))", - "legendFormat": "rw", - "refId": "A" - }, - { - "expr": "round(sum(cnpg_pgbouncer_pools_cl_active{namespace=\"__NAMESPACE__\",service=\"__RO_POOLER_SERVICE__\"}))", - "legendFormat": "ro", - "refId": "B" - } - ] - } - ], - "templating": { - "list": [] - }, - "annotations": { - "list": [] - } -} diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index 73ded6cd5..afcfd768e 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -10,26 +10,25 @@ import ( ) const ( - EventSecretReady = "SecretReady" - EventConfigMapReady = "ConfigMapReady" - EventClusterAdopted = "ClusterAdopted" - EventClusterCreationStarted = "ClusterCreationStarted" - EventClusterUpdateStarted = "ClusterUpdateStarted" - EventClusterReady = "ClusterReady" - EventPoolerCreationStarted = "PoolerCreationStarted" - EventPoolerReady = "PoolerReady" - EventCleanupComplete = "CleanupComplete" - EventClusterClassNotFound = "ClusterClassNotFound" - EventConfigMergeFailed = "ConfigMergeFailed" - EventSecretReconcileFailed = "SecretReconcileFailed" - EventClusterCreateFailed = "ClusterCreateFailed" - EventClusterUpdateFailed = "ClusterUpdateFailed" - EventManagedRolesFailed = "ManagedRolesFailed" - EventPoolerReconcileFailed = "PoolerReconcileFailed" - EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" - EventServiceMonitorUnavailable = "ServiceMonitorUnavailable" - EventClusterDegraded = "ClusterDegraded" - EventCleanupFailed = "CleanupFailed" + EventSecretReady = "SecretReady" + EventConfigMapReady = "ConfigMapReady" + EventClusterAdopted = "ClusterAdopted" + EventClusterCreationStarted = "ClusterCreationStarted" + EventClusterUpdateStarted = "ClusterUpdateStarted" + EventClusterReady = "ClusterReady" + EventPoolerCreationStarted = "PoolerCreationStarted" + EventPoolerReady = "PoolerReady" + EventCleanupComplete = "CleanupComplete" + EventClusterClassNotFound = "ClusterClassNotFound" + EventConfigMergeFailed = "ConfigMergeFailed" + EventSecretReconcileFailed = "SecretReconcileFailed" + EventClusterCreateFailed = "ClusterCreateFailed" + EventClusterUpdateFailed = "ClusterUpdateFailed" + EventManagedRolesFailed = "ManagedRolesFailed" + EventPoolerReconcileFailed = "PoolerReconcileFailed" + EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" + EventClusterDegraded = "ClusterDegraded" + EventCleanupFailed = "CleanupFailed" ) func (rc *ReconcileContext) emitNormal(obj client.Object, reason, message string) { diff --git a/pkg/postgresql/cluster/core/monitoring.go b/pkg/postgresql/cluster/core/monitoring.go index 7c942d22a..c2f1267f8 100644 --- a/pkg/postgresql/cluster/core/monitoring.go +++ b/pkg/postgresql/cluster/core/monitoring.go @@ -2,13 +2,12 @@ package core import ( "context" - _ "embed" "fmt" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -17,17 +16,15 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" - "strings" ) const ( // metrics - postgresMetricsServiceSuffix = "-postgres-metrics" - postgresMetricsPortName = "metrics" - postgresMetricsPort = int32(9187) - poolerMetricsPortName = "metrics" - poolerMetricsPort = int32(9127) - grafanaDashboardConfigMapSuffix = "-grafana-dashboard" + postgresMetricsServiceSuffix = "-postgres-metrics" + postgresMetricsPortName = "metrics" + postgresMetricsPort = int32(9187) + poolerMetricsPortName = "metrics" + poolerMetricsPort = int32(9127) // labels labelManagedBy = "app.kubernetes.io/managed-by" @@ -37,8 +34,6 @@ const ( cnpgPoolerNameLabel = "cnpg.io/poolerName" cnpgPodRoleInstance = "instance" cnpgPodRoleLabelName = "cnpg.io/podRole" - grafanaDashboardLabelKey = "grafana_dashboard" - grafanaDashboardLabelValue = "1" ) func isPostgreSQLMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { @@ -87,21 +82,6 @@ func isConnectionPoolerMetricsEnabled(cluster *enterprisev4.PostgresCluster, cla return override == nil || !*override } -func isGrafanaDashboardEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { - if class == nil || class.Spec.Config == nil || class.Spec.Config.Observability == nil { - return false - } - classCfg := class.Spec.Config.Observability.GrafanaDashboard - if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { - return false - } - if cluster == nil || cluster.Spec.Observability == nil || cluster.Spec.Observability.GrafanaDashboard == nil { - return true - } - override := cluster.Spec.Observability.GrafanaDashboard.Disabled - return override == nil || !*override -} - func buildPostgreSQLMetricsService(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster) (*corev1.Service, error) { svc := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ @@ -181,44 +161,6 @@ func buildConnectionPoolerMetricsService( return svc, nil } -func buildGrafanaDashboardConfigMap(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster) (*corev1.ConfigMap, error) { - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: cluster.Name + grafanaDashboardConfigMapSuffix, - Namespace: cluster.Namespace, - Labels: map[string]string{ - labelManagedBy: labelManagedByValue, - labelObservabilityComponent: "grafana-dashboard", - cnpgClusterLabelName: cluster.Name, - grafanaDashboardLabelKey: grafanaDashboardLabelValue, - }, - }, - Data: map[string]string{ - "dashboard.json": buildBasicGrafanaDashboard(cluster), - }, - } - - if err := ctrl.SetControllerReference(cluster, cm, scheme); err != nil { - return nil, fmt.Errorf("setting controller reference on Grafana dashboard ConfigMap: %w", err) - } - - return cm, nil -} - -func isServiceMonitorUnavailable(err error) bool { - if err == nil { - return false - } - - if apierrors.IsNotFound(err) || apimeta.IsNoMatchError(err) { - return true - } - - msg := err.Error() - return strings.Contains(msg, "no matches for kind \"ServiceMonitor\"") || - strings.Contains(msg, "servicemonitors.monitoring.coreos.com") -} - func reconcilePostgreSQLMetricsService(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, enabled bool) error { logger := log.FromContext(ctx) serviceName := cluster.Name + postgresMetricsServiceSuffix @@ -334,64 +276,6 @@ func reconcileConnectionPoolerMetricsService( return nil } -func reconcileGrafanaDashboardConfigMap( - ctx context.Context, - c client.Client, - scheme *runtime.Scheme, - cluster *enterprisev4.PostgresCluster, - enabled bool, -) error { - logger := log.FromContext(ctx) - configMapName := cluster.Name + grafanaDashboardConfigMapSuffix - - if !enabled { - existing := &corev1.ConfigMap{} - err := c.Get(ctx, types.NamespacedName{Name: configMapName, Namespace: cluster.Namespace}, existing) - switch { - case apierrors.IsNotFound(err): - return nil - case err != nil: - return fmt.Errorf("getting Grafana dashboard ConfigMap %s: %w", configMapName, err) - } - - logger.Info("Deleting Grafana dashboard ConfigMap", "name", configMapName) - if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("deleting Grafana dashboard ConfigMap %s: %w", configMapName, err) - } - return nil - } - - desired, err := buildGrafanaDashboardConfigMap(scheme, cluster) - if err != nil { - return fmt.Errorf("building Grafana dashboard ConfigMap: %w", err) - } - - live := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: desired.Name, - Namespace: desired.Namespace, - }, - } - - _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { - live.Labels = desired.Labels - live.Annotations = desired.Annotations - live.Data = desired.Data - - if !metav1.IsControlledBy(live, cluster) { - if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { - return fmt.Errorf("setting controller reference on Grafana dashboard ConfigMap: %w", err) - } - } - return nil - }) - if err != nil { - return fmt.Errorf("reconciling Grafana dashboard ConfigMap %s: %w", desired.Name, err) - } - - return nil -} - func postgresMetricsServiceMonitorName(clusterName string) string { return clusterName + "-postgres-metrics-monitor" } @@ -597,18 +481,3 @@ func reconcileConnectionPoolerMetricsServiceMonitor( return nil } - -//go:embed dashboards/postgres_observability.json -var postgresObservabilityDashboardTemplate string - -func buildBasicGrafanaDashboard(cluster *enterprisev4.PostgresCluster) string { - replacer := strings.NewReplacer( - "__CLUSTER_NAME__", cluster.Name, - "__NAMESPACE__", cluster.Namespace, - "__POSTGRES_SERVICE__", cluster.Name+postgresMetricsServiceSuffix, - "__RW_POOLER_SERVICE__", poolerMetricsServiceName(cluster.Name, readWriteEndpoint), - "__RO_POOLER_SERVICE__", poolerMetricsServiceName(cluster.Name, readOnlyEndpoint), - ) - - return replacer.Replace(postgresObservabilityDashboardTemplate) -} diff --git a/pkg/postgresql/cluster/core/monitoring_unit_test.go b/pkg/postgresql/cluster/core/monitoring_unit_test.go index 6c1d0715a..545ea25da 100644 --- a/pkg/postgresql/cluster/core/monitoring_unit_test.go +++ b/pkg/postgresql/cluster/core/monitoring_unit_test.go @@ -1,8 +1,6 @@ package core import ( - "encoding/json" - "errors" "testing" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" @@ -10,10 +8,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/utils/ptr" ) @@ -40,7 +36,6 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { ptr.To(true), nil, nil, - nil, ), want: true, }, @@ -57,7 +52,6 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { ptr.To(true), nil, nil, - nil, ), want: false, }, @@ -74,7 +68,6 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { ptr.To(false), nil, nil, - nil, ), want: false, }, @@ -168,7 +161,6 @@ func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { nil, ptr.To(true), nil, - ptr.To(false), ), want: false, }, @@ -179,7 +171,6 @@ func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { nil, ptr.To(true), ptr.To(true), - ptr.To(true), ), want: true, }, @@ -196,7 +187,6 @@ func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { nil, ptr.To(true), ptr.To(true), - ptr.To(true), ), want: false, }, @@ -207,7 +197,6 @@ func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { nil, ptr.To(true), ptr.To(false), - ptr.To(true), ), want: false, }, @@ -221,62 +210,6 @@ func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { } } -func TestIsGrafanaDashboardEnabled(t *testing.T) { - tests := []struct { - name string - cluster *enterprisev4.PostgresCluster - class *enterprisev4.PostgresClusterClass - want bool - }{ - { - name: "enabled when class enables and cluster override is unset", - cluster: &enterprisev4.PostgresCluster{}, - class: newClassWithObservability( - nil, - nil, - nil, - ptr.To(true), - ), - want: true, - }, - { - name: "disabled when cluster override disables dashboard", - cluster: &enterprisev4.PostgresCluster{ - Spec: enterprisev4.PostgresClusterSpec{ - Observability: &enterprisev4.PostgresObservabilityOverride{ - GrafanaDashboard: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, - }, - }, - }, - class: newClassWithObservability( - nil, - nil, - nil, - ptr.To(true), - ), - want: false, - }, - { - name: "disabled when class disables dashboard", - cluster: &enterprisev4.PostgresCluster{}, - class: newClassWithObservability( - nil, - nil, - nil, - ptr.To(false), - ), - want: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := isGrafanaDashboardEnabled(tt.cluster, tt.class) - assert.Equal(t, tt.want, got) - }) - } -} - func TestBuildPostgreSQLMetricsService(t *testing.T) { scheme := newMonitoringTestScheme(t) cluster := newTestMonitoringCluster() @@ -315,29 +248,6 @@ func TestBuildConnectionPoolerMetricsService(t *testing.T) { assertMonitoringOwnerRef(t, svc.OwnerReferences, cluster) } -func TestBuildGrafanaDashboardConfigMap(t *testing.T) { - scheme := newMonitoringTestScheme(t) - cluster := newTestMonitoringCluster() - - cm, err := buildGrafanaDashboardConfigMap(scheme, cluster) - require.NoError(t, err) - - assert.Equal(t, "postgresql-cluster-dev-grafana-dashboard", cm.Name) - assert.Equal(t, "grafana-dashboard", cm.Labels[labelObservabilityComponent]) - assert.Equal(t, grafanaDashboardLabelValue, cm.Labels[grafanaDashboardLabelKey]) - assert.Contains(t, cm.Data, "dashboard.json") - assert.NotContains(t, cm.Data["dashboard.json"], "__CLUSTER_NAME__") - assert.Contains(t, cm.Data["dashboard.json"], cluster.Name) - assert.Contains(t, cm.Data["dashboard.json"], cluster.Namespace) - assert.Contains(t, cm.Data["dashboard.json"], cluster.Name+postgresMetricsServiceSuffix) - assert.Contains(t, cm.Data["dashboard.json"], poolerMetricsServiceName(cluster.Name, readWriteEndpoint)) - assert.Contains(t, cm.Data["dashboard.json"], poolerMetricsServiceName(cluster.Name, readOnlyEndpoint)) - - var dashboard map[string]any - require.NoError(t, json.Unmarshal([]byte(cm.Data["dashboard.json"]), &dashboard)) - assertMonitoringOwnerRef(t, cm.OwnerReferences, cluster) -} - func TestBuildPostgreSQLMetricsServiceMonitor(t *testing.T) { scheme := newMonitoringTestScheme(t) cluster := newTestMonitoringCluster() @@ -373,47 +283,6 @@ func TestBuildConnectionPoolerMetricsServiceMonitor(t *testing.T) { assertMonitoringOwnerRef(t, sm.OwnerReferences, cluster) } -func TestIsServiceMonitorUnavailable(t *testing.T) { - tests := []struct { - name string - err error - want bool - }{ - { - name: "nil error", - err: nil, - want: false, - }, - { - name: "not found error", - err: apierrors.NewNotFound(schema.GroupResource{Group: "monitoring.coreos.com", Resource: "servicemonitors"}, "test"), - want: true, - }, - { - name: "kind match string error", - err: errors.New("no matches for kind \"ServiceMonitor\" in version \"monitoring.coreos.com/v1\""), - want: true, - }, - { - name: "resource string error", - err: errors.New("servicemonitors.monitoring.coreos.com not found"), - want: true, - }, - { - name: "unrelated error", - err: errors.New("boom"), - want: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := isServiceMonitorUnavailable(tt.err) - assert.Equal(t, tt.want, got) - }) - } -} - func newMonitoringTestScheme(t *testing.T) *runtime.Scheme { t.Helper() @@ -443,16 +312,14 @@ func newClassWithObservability( postgresEnabled *bool, poolerEnabled *bool, pgBouncerMetricsEnabled *bool, - grafanaEnabled *bool, ) *enterprisev4.PostgresClusterClass { return &enterprisev4.PostgresClusterClass{ Spec: enterprisev4.PostgresClusterClassSpec{ Config: &enterprisev4.PostgresClusterClassConfig{ ConnectionPoolerEnabled: poolerEnabled, Observability: &enterprisev4.PostgresObservabilityClassConfig{ - PostgreSQL: &enterprisev4.MetricsClassConfig{Enabled: postgresEnabled}, - PgBouncer: &enterprisev4.MetricsClassConfig{Enabled: pgBouncerMetricsEnabled}, - GrafanaDashboard: &enterprisev4.GrafanaDashboardClassConfig{Enabled: grafanaEnabled}, + PostgreSQL: &enterprisev4.MetricsClassConfig{Enabled: postgresEnabled}, + PgBouncer: &enterprisev4.MetricsClassConfig{Enabled: pgBouncerMetricsEnabled}, }, }, }, From 016b1aab04685162195531ea4fe37ca31c8b675a Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Fri, 3 Apr 2026 15:03:00 +0200 Subject: [PATCH 22/36] add grafana sample to docs --- docs/PostgreSQLObservabilityDashboard.json | 928 +++++++++++++++++++++ docs/PostgreSQLObservabilityDashboard.md | 65 ++ 2 files changed, 993 insertions(+) create mode 100644 docs/PostgreSQLObservabilityDashboard.json create mode 100644 docs/PostgreSQLObservabilityDashboard.md diff --git a/docs/PostgreSQLObservabilityDashboard.json b/docs/PostgreSQLObservabilityDashboard.json new file mode 100644 index 000000000..aa0ffc765 --- /dev/null +++ b/docs/PostgreSQLObservabilityDashboard.json @@ -0,0 +1,928 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "count(count by (pod) (cnpg_pg_postmaster_start_time_seconds{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"}))", + "legendFormat": "postgres pods", + "range": true, + "refId": "A" + } + ], + "title": "PostgreSQL Targets", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "Down" + }, + "1": { + "index": 1, + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "max(cnpg_pgbouncer_up{namespace=\"$namespace\",service=\"$cluster-pooler-rw-metrics\"})", + "legendFormat": "rw", + "range": true, + "refId": "A" + } + ], + "title": "RW Pooler", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "index": 0, + "text": "Down" + }, + "1": { + "index": 1, + "text": "Up" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "max(cnpg_pgbouncer_up{namespace=\"$namespace\",service=\"$cluster-pooler-ro-metrics\"})", + "legendFormat": "ro", + "range": true, + "refId": "A" + } + ], + "title": "RO Pooler", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 4, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(cnpg_pg_stat_archiver_archived_count{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"}[5m]))", + "legendFormat": "archive rate", + "range": true, + "refId": "A" + } + ], + "title": "Archive Rate", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 5, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(splunk_operator_postgres_databases{phase=\"Failed\"})", + "legendFormat": "failed", + "range": true, + "refId": "A" + } + ], + "title": "Failed Databases", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 4 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (datname) (cnpg_pg_database_size_bytes{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"})", + "legendFormat": "{{datname}}", + "range": true, + "refId": "A" + } + ], + "title": "Database Size by Database", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 4 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (service) (cnpg_pgbouncer_pools_cl_active{namespace=\"$namespace\",service=~\"$cluster-pooler-(rw|ro)-metrics\"})", + "legendFormat": "{{service}} active", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum by (service) (cnpg_pgbouncer_pools_cl_waiting{namespace=\"$namespace\",service=~\"$cluster-pooler-(rw|ro)-metrics\"})", + "legendFormat": "{{service}} waiting", + "range": true, + "refId": "B" + } + ], + "title": "PgBouncer Client Load", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.25 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum(rate(cnpg_pg_stat_archiver_archived_count{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"}[5m]))", + "legendFormat": "archived WAL / sec", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum(cnpg_pg_wal_files_total{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"})", + "legendFormat": "wal files total", + "range": true, + "refId": "B" + } + ], + "title": "WAL Activity", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (phase) (splunk_operator_postgres_databases)", + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "Fleet Database Phases", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (controller, result) (rate(splunk_operator_postgres_reconcile_total[5m]))", + "legendFormat": "{{controller}} {{result}}", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum by (controller, error_class) (rate(splunk_operator_postgres_reconcile_errors_total[5m]))", + "legendFormat": "{{controller}} errors {{error_class}}", + "range": true, + "refId": "B" + } + ], + "title": "Controller Reconcile Activity", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": [ + "postgresql", + "cnpg", + "pgbouncer", + "splunk-operator", + "reference" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(cnpg_pg_postmaster_start_time_seconds, namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(cnpg_pg_postmaster_start_time_seconds, namespace)", + "refId": "Prometheus-namespace" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(cnpg_pg_postmaster_start_time_seconds{namespace=\"$namespace\"}, service)", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(cnpg_pg_postmaster_start_time_seconds{namespace=\"$namespace\"}, service)", + "refId": "Prometheus-cluster" + }, + "refresh": 2, + "regex": "/(.*)-postgres-metrics/", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "PostgreSQL Observability Reference", + "uid": "postgresql-observability-reference", + "version": 1, + "weekStart": "" +} diff --git a/docs/PostgreSQLObservabilityDashboard.md b/docs/PostgreSQLObservabilityDashboard.md new file mode 100644 index 000000000..109ef78d2 --- /dev/null +++ b/docs/PostgreSQLObservabilityDashboard.md @@ -0,0 +1,65 @@ +# PostgreSQL Observability Dashboard Example + +This file provides a reference Grafana dashboard for the PostgreSQL observability model described in the PostgreSQL observability notes. + +The dashboard JSON lives at: + +- [PostgreSQLObservabilityDashboard.json](/Users/dpishchenkov/splunk-operator/docs/PostgreSQLObservabilityDashboard.json) + +## Purpose + +This dashboard is a reference artifact only. + +It is meant to show how a Grafana dashboard could combine: + +- runtime PostgreSQL and PgBouncer metrics exposed through the `PostgresCluster` observability path +- controller metrics emitted by the PostgreSQL controllers + +It is not meant to imply that Grafana runtime resources are managed by the operator. + +## Panels Included + +The sample dashboard includes: + +- PostgreSQL target count +- RW and RO PgBouncer availability +- WAL archive rate +- failed `PostgresDatabase` count +- database size by database +- PgBouncer active and waiting clients +- WAL activity +- fleet database phases +- controller reconcile activity and errors + +## Assumptions + +The sample queries assume: + +- Prometheus is scraping the PostgreSQL metrics `Service` created by the `PostgresCluster` controller +- Prometheus is scraping the PgBouncer metrics `Service` objects created for RW and RO poolers +- Prometheus series include `namespace` and `service` labels +- the cluster metrics service is named `-postgres-metrics` +- the PgBouncer metrics services are named `-pooler-rw-metrics` and `-pooler-ro-metrics` +- the controller metrics branch is present for the `splunk_operator_postgres_*` metrics + +If your Prometheus relabeling differs, you may need to adjust the dashboard queries. + +## Import Notes + +To use the dashboard: + +1. Import the JSON file into Grafana. +2. Select the correct Prometheus datasource. +3. Choose the namespace. +4. Choose the cluster name using the derived `cluster` variable. + +## Notes On Candidate Metrics + +Some PgBouncer queries in the sample use metrics that are good candidates but should still be verified against actual exporter output in the merged branch: + +- `cnpg_pgbouncer_pools_cl_waiting` +- `cnpg_pgbouncer_pools_maxwait` +- `cnpg_pgbouncer_stats_avg_wait_time` +- `cnpg_pgbouncer_stats_total_wait_time` + +If those exact series are not present, keep the panel shape and replace the query with the actual exported metric name. From 2cf1ca7a347dffb44f00f2e83007b5e658564d3f Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Thu, 9 Apr 2026 15:16:00 +0200 Subject: [PATCH 23/36] style: fix links to docs. --- docs/PostgreSQLObservabilityDashboard.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/PostgreSQLObservabilityDashboard.md b/docs/PostgreSQLObservabilityDashboard.md index 109ef78d2..22343afff 100644 --- a/docs/PostgreSQLObservabilityDashboard.md +++ b/docs/PostgreSQLObservabilityDashboard.md @@ -4,7 +4,7 @@ This file provides a reference Grafana dashboard for the PostgreSQL observabilit The dashboard JSON lives at: -- [PostgreSQLObservabilityDashboard.json](/Users/dpishchenkov/splunk-operator/docs/PostgreSQLObservabilityDashboard.json) +- [PostgreSQLObservabilityDashboard.json](./PostgreSQLObservabilityDashboard.json) ## Purpose From aa983fc41a7e9170f12fecc4e647375d0b1cd798 Mon Sep 17 00:00:00 2001 From: Kamil Ubych Date: Fri, 10 Apr 2026 11:54:13 +0200 Subject: [PATCH 24/36] changed metrics --- docs/PostgreSQLObservabilityDashboard.json | 456 ++++++++++++--------- 1 file changed, 269 insertions(+), 187 deletions(-) diff --git a/docs/PostgreSQLObservabilityDashboard.json b/docs/PostgreSQLObservabilityDashboard.json index aa0ffc765..913e730b6 100644 --- a/docs/PostgreSQLObservabilityDashboard.json +++ b/docs/PostgreSQLObservabilityDashboard.json @@ -52,7 +52,7 @@ }, "gridPos": { "h": 4, - "w": 4, + "w": 6, "x": 0, "y": 0 }, @@ -128,8 +128,8 @@ }, "gridPos": { "h": 4, - "w": 4, - "x": 4, + "w": 6, + "x": 6, "y": 0 }, "id": 2, @@ -204,8 +204,8 @@ }, "gridPos": { "h": 4, - "w": 4, - "x": 8, + "w": 6, + "x": 12, "y": 0 }, "id": 3, @@ -266,8 +266,8 @@ }, "gridPos": { "h": 4, - "w": 4, - "x": 12, + "w": 6, + "x": 18, "y": 0 }, "id": 4, @@ -298,68 +298,6 @@ "title": "Archive Rate", "type": "stat" }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 0 - }, - "id": 5, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "textMode": "value" - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "editorMode": "code", - "expr": "sum(splunk_operator_postgres_databases{phase=\"Failed\"})", - "legendFormat": "failed", - "range": true, - "refId": "A" - } - ], - "title": "Failed Databases", - "type": "stat" - }, { "datasource": { "type": "prometheus", @@ -647,6 +585,21 @@ "title": "WAL Activity", "type": "timeseries" }, + { + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 100, + "options": { + "content": "## Controller-Runtime (built-in, zero code)\nReconcile count, duration, errors — provided automatically by the framework.", + "mode": "markdown" + }, + "title": "", + "type": "text" + }, { "datasource": { "type": "prometheus", @@ -654,84 +607,82 @@ }, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 4, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } + "lineWidth": 2 }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] + "unit": "ops" + } }, "gridPos": { "h": 8, - "w": 12, + "w": 8, "x": 0, - "y": 12 + "y": 14 }, "id": 9, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "rate(controller_runtime_reconcile_total{controller=~\"postgresCluster|postgresdatabase\"}[1m])", + "legendFormat": "{{controller}} / {{result}}", + "range": true, + "refId": "A" } + ], + "title": "Reconcile Rate (per second)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, - "pluginVersion": "10.4.2", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "lineWidth": 2 + }, + "unit": "s" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 14 + }, + "id": 10, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "editorMode": "code", - "expr": "sum by (phase) (splunk_operator_postgres_databases)", - "legendFormat": "{{phase}}", + "expr": "histogram_quantile(0.99, sum by (controller, le) (rate(controller_runtime_reconcile_time_seconds_bucket{controller=~\"postgresCluster|postgresdatabase\"}[5m])))", + "legendFormat": "p99 {{controller}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (controller, le) (rate(controller_runtime_reconcile_time_seconds_bucket{controller=~\"postgresCluster|postgresdatabase\"}[5m])))", + "legendFormat": "p50 {{controller}}", + "range": true, + "refId": "B" } ], - "title": "Fleet Database Phases", + "title": "Reconcile Duration p50 / p99", "type": "timeseries" }, { @@ -742,90 +693,221 @@ "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "fixedColor": "red", + "mode": "fixed" }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", + "drawStyle": "bars", "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 4, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "lineWidth": 2 }, "unit": "ops" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 14 + }, + "id": 13, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "rate(controller_runtime_reconcile_errors_total{controller=~\"postgresCluster|postgresdatabase\"}[1m])", + "legendFormat": "{{controller}}", + "range": true, + "refId": "A" + } + ], + "title": "Reconcile Errors", + "type": "timeseries" + }, + { + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 200, + "options": { + "content": "## Domain Metrics (custom, `splunk_operator_postgres_*`)\nFleet state gauges (collected every reconcile) and status transitions (emitted on condition changes).", + "mode": "markdown" + }, + "title": "", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 24 + }, + "id": 14, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "values": [ + "value" + ] }, - "overrides": [] + "pieType": "donut" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "splunk_operator_postgres_databases", + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "Databases by Phase", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" }, "gridPos": { "h": 8, - "w": 12, - "x": 12, - "y": 12 + "w": 6, + "x": 6, + "y": 24 }, - "id": 10, + "id": 15, "options": { "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "displayMode": "table", + "placement": "right", + "values": [ + "value" + ] }, - "tooltip": { - "mode": "single", - "sort": "none" + "pieType": "donut" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "splunk_operator_postgres_clusters", + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "Clusters by Phase", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "unit": "short" } }, - "pluginVersion": "10.4.2", + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 24 + }, + "id": 16, + "options": { + "displayMode": "gradient", + "orientation": "horizontal" + }, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "editorMode": "code", - "expr": "sum by (controller, result) (rate(splunk_operator_postgres_reconcile_total[5m]))", - "legendFormat": "{{controller}} {{result}}", + "expr": "splunk_operator_postgres_managed_users", + "legendFormat": "{{state}}", "range": true, "refId": "A" + } + ], + "title": "Managed Users", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "bars", + "fillOpacity": 10, + "lineWidth": 2 + }, + "unit": "ops" }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/False/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 24 + }, + "id": 17, + "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, "editorMode": "code", - "expr": "sum by (controller, error_class) (rate(splunk_operator_postgres_reconcile_errors_total[5m]))", - "legendFormat": "{{controller}} errors {{error_class}}", + "expr": "sum by (controller, reason) (rate(splunk_operator_postgres_status_transitions_total{status=\"False\"}[5m]))", + "legendFormat": "{{controller}} / {{reason}}", "range": true, - "refId": "B" + "refId": "A" } ], - "title": "Controller Reconcile Activity", + "title": "Failures by Reason (per second)", "type": "timeseries" } ], From ed253364b88354e8c3acd4d6c0e5c9d19b1108d2 Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Sun, 12 Apr 2026 18:46:49 +0200 Subject: [PATCH 25/36] Update monitoring reconciliation logic, tests --- api/v4/postgrescluster_types.go | 14 +-- api/v4/postgresclusterclass_types.go | 11 +- .../postgrescluster_controller_test.go | 85 ++++++++++++- internal/controller/suite_test.go | 1 + ...monitoring.coreos.com_servicemonitors.yaml | 32 +++++ pkg/postgresql/cluster/core/cluster.go | 105 ++++++++++++---- pkg/postgresql/cluster/core/events.go | 49 +++++--- pkg/postgresql/cluster/core/monitoring.go | 32 ++--- .../cluster/core/monitoring_unit_test.go | 114 +++++------------- pkg/postgresql/cluster/core/types.go | 32 +++-- 10 files changed, 302 insertions(+), 173 deletions(-) create mode 100644 internal/controller/testdata/crds/monitoring.coreos.com_servicemonitors.yaml diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 5c3608058..6450b33fe 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -108,23 +108,23 @@ type PostgresClusterSpec struct { // +optional ClusterDeletionPolicy *string `json:"clusterDeletionPolicy,omitempty"` - // Observability contains configuration for metrics exposure features. + // Monitoring contains configuration for metrics exposure features. // +optional - Observability *PostgresObservabilityOverride `json:"observability,omitempty"` + Monitoring *PostgresClusterMonitoring `json:"monitoring,omitempty"` } -// PostgresObservabilityOverride overrides observability configuration options for PostgresClusterClass. -type PostgresObservabilityOverride struct { +// PostgresClusterMonitoring overrides monitoring configuration options for PostgresClusterClass. +type PostgresClusterMonitoring struct { // +optional - PostgreSQL *FeatureDisableOverride `json:"postgresql,omitempty"` + PostgreSQLMetrics *FeatureDisableOverride `json:"postgresqlMetrics,omitempty"` // +optional - PgBouncer *FeatureDisableOverride `json:"pgbouncer,omitempty"` + ConnectionPoolerMetrics *FeatureDisableOverride `json:"connectionPoolerMetrics,omitempty"` } type FeatureDisableOverride struct { - // Disable set to true will disable the feature even if it's enabled in the class. + // Disabled set to true will disable the feature even if it's enabled in the class. // +kubebuilder:default=false // +optional Disabled *bool `json:"disabled,omitempty"` diff --git a/api/v4/postgresclusterclass_types.go b/api/v4/postgresclusterclass_types.go index 74085d191..76281e8e8 100644 --- a/api/v4/postgresclusterclass_types.go +++ b/api/v4/postgresclusterclass_types.go @@ -49,6 +49,7 @@ type PostgresClusterClassSpec struct { CNPG *CNPGConfig `json:"cnpg,omitempty"` } +// +kubebuilder:validation:XValidation:rule="!has(self.monitoring) || !has(self.monitoring.connectionPoolerMetrics) || !has(self.monitoring.connectionPoolerMetrics.enabled) || !self.monitoring.connectionPoolerMetrics.enabled || (has(self.connectionPoolerEnabled) && self.connectionPoolerEnabled)",message="connectionPoolerEnabled must be true when monitoring.connectionPoolerMetrics.enabled is true" // PostgresClusterClassConfig contains provider-agnostic cluster configuration. // These fields define PostgresCluster infrastructure and can be overridden in PostgresCluster CR. type PostgresClusterClassConfig struct { @@ -100,12 +101,12 @@ type PostgresClusterClassConfig struct { // +optional ConnectionPoolerEnabled *bool `json:"connectionPoolerEnabled,omitempty"` - // Observability contains configuration for metrics exposure. + // Monitoring contains configuration for metrics exposure. // When enabled, creates metrics resources for clusters using this class. // Can be overridden in PostgresCluster CR. // +kubebuilder:default={} // +optional - Observability *PostgresObservabilityClassConfig `json:"observability,omitempty"` + Monitoring *PostgresMonitoringClassConfig `json:"monitoring,omitempty"` } // ConnectionPoolerMode defines the PgBouncer connection pooling strategy. @@ -179,11 +180,11 @@ type PostgresClusterClassStatus struct { Phase *string `json:"phase,omitempty"` } -type PostgresObservabilityClassConfig struct { +type PostgresMonitoringClassConfig struct { // +optional - PostgreSQL *MetricsClassConfig `json:"postgresql,omitempty"` + PostgreSQLMetrics *MetricsClassConfig `json:"postgresqlMetrics,omitempty"` // +optional - PgBouncer *MetricsClassConfig `json:"pgbouncer,omitempty"` + ConnectionPoolerMetrics *MetricsClassConfig `json:"connectionPoolerMetrics,omitempty"` } type MetricsClassConfig struct { diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 9d4954d61..59737a2a4 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -28,15 +28,17 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" enterprisev4 "github.com/splunk/splunk-operator/api/v4" "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" - pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" + corev1 "k8s.io/api/core/v1" ) /* @@ -86,6 +88,28 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { } } + recreateClassWithMonitoring := func(postgresMetricsEnabled bool) { + Expect(k8sClient.Delete(ctx, pgClusterClass)).To(Succeed()) + + pgClusterClass = &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{Name: className}, + Spec: enterprisev4.PostgresClusterClassSpec{ + Provisioner: provisioner, + Config: &enterprisev4.PostgresClusterClassConfig{ + Instances: &[]int32{clusterMemberCount}[0], + Storage: &[]resource.Quantity{resource.MustParse(storageAmount)}[0], + PostgresVersion: &[]string{postgresVersion}[0], + ConnectionPoolerEnabled: &[]bool{poolerEnabled}[0], + Monitoring: &enterprisev4.PostgresMonitoringClassConfig{ + PostgreSQLMetrics: &enterprisev4.MetricsClassConfig{Enabled: ptr.To(postgresMetricsEnabled)}, + }, + }, + }, + } + + Expect(k8sClient.Create(ctx, pgClusterClass)).To(Succeed()) + } + BeforeEach(func() { nameSuffix := fmt.Sprintf("%d-%d-%d", GinkgoParallelProcess(), @@ -241,6 +265,65 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond).NotTo(BeNil()) Expect(cond.ObservedGeneration).To(Equal(pc.Generation)) }) + + It("creates monitoring resources and sets MonitoringReady when monitoring is enabled", func() { + recreateClassWithMonitoring(true) + + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + reconcileNTimes(3) + + pc := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + cond := meta.FindStatusCondition(pc.Status.Conditions, "MonitoringReady") + Expect(cond).NotTo(BeNil()) + Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + Expect(cond.Reason).To(Equal("ObservabilityResourcesReady")) + + metricsService := &corev1.Service{} + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: clusterName + "-postgres-metrics", + Namespace: namespace, + }, metricsService)).To(Succeed()) + + serviceMonitor := &monitoringv1.ServiceMonitor{} + Expect(k8sClient.Get(ctx, types.NamespacedName{ + Name: clusterName + "-postgres-metrics-monitor", + Namespace: namespace, + }, serviceMonitor)).To(Succeed()) + }) + + It("removes monitoring resources and MonitoringReady when monitoring is disabled by cluster override", func() { + recreateClassWithMonitoring(true) + + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + reconcileNTimes(3) + + current := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, current)).To(Succeed()) + current.Spec.Monitoring = &enterprisev4.PostgresClusterMonitoring{ + PostgreSQLMetrics: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + } + Expect(k8sClient.Update(ctx, current)).To(Succeed()) + + reconcileNTimes(1) + + Expect(k8sClient.Get(ctx, pgClusterKey, current)).To(Succeed()) + Expect(meta.FindStatusCondition(current.Status.Conditions, "MonitoringReady")).To(BeNil()) + + metricsService := &corev1.Service{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: clusterName + "-postgres-metrics", + Namespace: namespace, + }, metricsService) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + + serviceMonitor := &monitoringv1.ServiceMonitor{} + err = k8sClient.Get(ctx, types.NamespacedName{ + Name: clusterName + "-postgres-metrics-monitor", + Namespace: namespace, + }, serviceMonitor) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) }) }) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 8518541be..cf86dac87 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -78,6 +78,7 @@ var _ = BeforeSuite(func(ctx context.Context) { CRDDirectoryPaths: []string{ filepath.Join("..", "..", "config", "crd", "bases"), filepath.Join(cnpgModuleDir, "config", "crd", "bases"), + filepath.Join("testdata", "crds"), }, ErrorIfCRDPathMissing: true, } diff --git a/internal/controller/testdata/crds/monitoring.coreos.com_servicemonitors.yaml b/internal/controller/testdata/crds/monitoring.coreos.com_servicemonitors.yaml new file mode 100644 index 000000000..230e31d50 --- /dev/null +++ b/internal/controller/testdata/crds/monitoring.coreos.com_servicemonitors.yaml @@ -0,0 +1,32 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: servicemonitors.monitoring.coreos.com +spec: + group: monitoring.coreos.com + scope: Namespaced + names: + plural: servicemonitors + singular: servicemonitor + kind: ServiceMonitor + listKind: ServiceMonitorList + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + x-kubernetes-preserve-unknown-fields: true + status: + type: object + x-kubernetes-preserve-unknown-fields: true diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 329571933..4b5152d32 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -344,35 +344,54 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. rc.emitPoolerReadyTransition(postgresCluster, oldConditions) } - if err := reconcilePostgreSQLMetricsService(ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass)); err != nil { - return ctrl.Result{}, err + postgresMetricsEnabled := isPostgreSQLMetricsEnabled(postgresCluster, clusterClass) + poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass) + rwPoolerMetricsEnabled := poolerMetricsEnabled && poolerEnabled && rwPoolerExists + roPoolerMetricsEnabled := poolerMetricsEnabled && poolerEnabled && roPoolerExists + monitoringEnabled := postgresMetricsEnabled || (poolerMetricsEnabled && poolerEnabled) + + monitoringFailure := func(reason conditionReasons, eventReason, message string, err error) (ctrl.Result, error) { + return ctrl.Result{}, handleMonitoringFailure(ctx, c, rc, postgresCluster, reason, eventReason, message, err) } - poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass) - rwPoolerMetricsEnabled := poolerMetricsEnabled && rwPoolerExists - roPoolerMetricsEnabled := poolerMetricsEnabled && roPoolerExists + oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) + copy(oldConditions, postgresCluster.Status.Conditions) + + if err := reconcilePostgreSQLMetricsService(ctx, c, rc.Scheme, postgresCluster, postgresMetricsEnabled); err != nil { + return monitoringFailure(reasonPostgresMetricsServiceFailed, EventMetricsServiceReconcileFailed, fmt.Sprintf("Failed to reconcile PostgreSQL metrics Service: %v", err), err) + } if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled); err != nil { - return ctrl.Result{}, err + return monitoringFailure(reasonPoolerMetricsServiceFailed, EventMetricsServiceReconcileFailed, fmt.Sprintf("Failed to reconcile RW pooler metrics Service: %v", err), err) } if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled); err != nil { - return ctrl.Result{}, err + return monitoringFailure(reasonPoolerMetricsServiceFailed, EventMetricsServiceReconcileFailed, fmt.Sprintf("Failed to reconcile RO pooler metrics Service: %v", err), err) } - - if err := reconcilePostgreSQLMetricsServiceMonitor( - ctx, c, rc.Scheme, postgresCluster, isPostgreSQLMetricsEnabled(postgresCluster, clusterClass), - ); err != nil { - return ctrl.Result{}, err + if err := reconcilePostgreSQLMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, postgresMetricsEnabled); err != nil { + return monitoringFailure(reasonPostgresMetricsMonitorFailed, EventServiceMonitorReconcileFailed, fmt.Sprintf("Failed to reconcile PostgreSQL metrics ServiceMonitor: %v", err), err) } - - if err := reconcileConnectionPoolerMetricsServiceMonitor( - ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled, - ); err != nil { - return ctrl.Result{}, err + if err := reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled); err != nil { + return monitoringFailure(reasonPoolerMetricsMonitorFailed, EventServiceMonitorReconcileFailed, fmt.Sprintf("Failed to reconcile RW pooler metrics ServiceMonitor: %v", err), err) } - if err := reconcileConnectionPoolerMetricsServiceMonitor( - ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled, - ); err != nil { - return ctrl.Result{}, err + if err := reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled); err != nil { + return monitoringFailure(reasonPoolerMetricsMonitorFailed, EventServiceMonitorReconcileFailed, fmt.Sprintf("Failed to reconcile RO pooler metrics ServiceMonitor: %v", err), err) + } + + if !monitoringEnabled { + if err := removeCondition(ctx, c, postgresCluster, monitoringReady); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, err + } + } else { + if err := setCondition(ctx, c, postgresCluster, monitoringReady, metav1.ConditionTrue, reasonObservabilityResourcesReady, "Monitoring resources are ready"); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, err + } + + rc.emitMonitoringReadyTransition(postgresCluster, oldConditions) } // Reconcile ConfigMap when CNPG cluster is healthy. @@ -845,6 +864,50 @@ func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, clu return nil } +// setCondition updates a specific condition on the PostgresCluster status. +func setCondition(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, condType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string) error { + base := cluster.Status.DeepCopy() + + meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{ + Type: string(condType), + Status: status, + Reason: string(reason), + Message: message, + ObservedGeneration: cluster.Generation, + }) + + if equality.Semantic.DeepEqual(*base, cluster.Status) { + return nil + } + if err := c.Status().Update(ctx, cluster); err != nil { + return fmt.Errorf("failed to update PostgresCluster condition: %w", err) + } + return nil +} + +// removeCondition removes a specific condition from the PostgresCluster status. +func removeCondition(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, condType conditionTypes) error { + base := cluster.Status.DeepCopy() + + meta.RemoveStatusCondition(&cluster.Status.Conditions, string(condType)) + + if equality.Semantic.DeepEqual(*base, cluster.Status) { + return nil + } + if err := c.Status().Update(ctx, cluster); err != nil { + return fmt.Errorf("failed to remove PostgresCluster condition: %w", err) + } + return nil +} + +func handleMonitoringFailure(ctx context.Context, c client.Client, rc *ReconcileContext, cluster *enterprisev4.PostgresCluster, reason conditionReasons, eventReason string, message string, err error) error { + rc.emitWarning(cluster, eventReason, message) + if statusErr := setCondition(ctx, c, cluster, monitoringReady, metav1.ConditionFalse, reason, message); statusErr != nil { + return errors.Join(err, fmt.Errorf("failed to update MonitoringReady condition: %w", statusErr)) + } + return err +} + // generateConfigMap builds a ConfigMap with connection details for the PostgresCluster. func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster, secretName string) (*corev1.ConfigMap, error) { cmName := fmt.Sprintf("%s%s", cluster.Name, defaultConfigMapSuffix) diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index afcfd768e..d2692ed9b 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -10,25 +10,28 @@ import ( ) const ( - EventSecretReady = "SecretReady" - EventConfigMapReady = "ConfigMapReady" - EventClusterAdopted = "ClusterAdopted" - EventClusterCreationStarted = "ClusterCreationStarted" - EventClusterUpdateStarted = "ClusterUpdateStarted" - EventClusterReady = "ClusterReady" - EventPoolerCreationStarted = "PoolerCreationStarted" - EventPoolerReady = "PoolerReady" - EventCleanupComplete = "CleanupComplete" - EventClusterClassNotFound = "ClusterClassNotFound" - EventConfigMergeFailed = "ConfigMergeFailed" - EventSecretReconcileFailed = "SecretReconcileFailed" - EventClusterCreateFailed = "ClusterCreateFailed" - EventClusterUpdateFailed = "ClusterUpdateFailed" - EventManagedRolesFailed = "ManagedRolesFailed" - EventPoolerReconcileFailed = "PoolerReconcileFailed" - EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" - EventClusterDegraded = "ClusterDegraded" - EventCleanupFailed = "CleanupFailed" + EventSecretReady = "SecretReady" + EventConfigMapReady = "ConfigMapReady" + EventClusterAdopted = "ClusterAdopted" + EventClusterCreationStarted = "ClusterCreationStarted" + EventClusterUpdateStarted = "ClusterUpdateStarted" + EventClusterReady = "ClusterReady" + EventPoolerCreationStarted = "PoolerCreationStarted" + EventPoolerReady = "PoolerReady" + EventCleanupComplete = "CleanupComplete" + EventClusterClassNotFound = "ClusterClassNotFound" + EventConfigMergeFailed = "ConfigMergeFailed" + EventSecretReconcileFailed = "SecretReconcileFailed" + EventClusterCreateFailed = "ClusterCreateFailed" + EventClusterUpdateFailed = "ClusterUpdateFailed" + EventManagedRolesFailed = "ManagedRolesFailed" + EventPoolerReconcileFailed = "PoolerReconcileFailed" + EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" + EventClusterDegraded = "ClusterDegraded" + EventCleanupFailed = "CleanupFailed" + EventMonitoringReady = "MonitoringReady" + EventMetricsServiceReconcileFailed = "MetricsServiceReconcileFailed" + EventServiceMonitorReconcileFailed = "ServiceMonitorReconcileFailed" ) func (rc *ReconcileContext) emitNormal(obj client.Object, reason, message string) { @@ -60,3 +63,11 @@ func (rc *ReconcileContext) emitPoolerReadyTransition(obj client.Object, conditi rc.emitNormal(obj, EventPoolerReady, "Connection poolers are ready") } } + +// emitMonitoringReadyTransition emits MonitoringReady only when the condition was not +// previously True — prevents re-emission on every reconcile while already ready. +func (rc *ReconcileContext) emitMonitoringReadyTransition(obj client.Object, conditions []metav1.Condition) { + if !meta.IsStatusConditionTrue(conditions, string(monitoringReady)) { + rc.emitNormal(obj, EventMonitoringReady, "Monitoring resources are ready") + } +} \ No newline at end of file diff --git a/pkg/postgresql/cluster/core/monitoring.go b/pkg/postgresql/cluster/core/monitoring.go index c2f1267f8..e422a25d9 100644 --- a/pkg/postgresql/cluster/core/monitoring.go +++ b/pkg/postgresql/cluster/core/monitoring.go @@ -37,48 +37,32 @@ const ( ) func isPostgreSQLMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { - if class == nil || class.Spec.Config == nil || class.Spec.Config.Observability == nil { + if class == nil || class.Spec.Config == nil || class.Spec.Config.Monitoring == nil { return false } - classCfg := class.Spec.Config.Observability.PostgreSQL + classCfg := class.Spec.Config.Monitoring.PostgreSQLMetrics if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { return false } - if cluster == nil || cluster.Spec.Observability == nil || cluster.Spec.Observability.PostgreSQL == nil { + if cluster == nil || cluster.Spec.Monitoring == nil || cluster.Spec.Monitoring.PostgreSQLMetrics == nil { return true } - override := cluster.Spec.Observability.PostgreSQL.Disabled + override := cluster.Spec.Monitoring.PostgreSQLMetrics.Disabled return override == nil || !*override } -func isConnectionPoolerEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { - if class == nil || class.Spec.Config == nil || class.Spec.Config.ConnectionPoolerEnabled == nil { - return false - } - if !*class.Spec.Config.ConnectionPoolerEnabled { - return false - } - if cluster == nil || cluster.Spec.ConnectionPoolerEnabled == nil { - return true - } - return *cluster.Spec.ConnectionPoolerEnabled -} - func isConnectionPoolerMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { - if !isConnectionPoolerEnabled(cluster, class) { - return false - } - if class == nil || class.Spec.Config == nil || class.Spec.Config.Observability == nil { + if class == nil || class.Spec.Config == nil || class.Spec.Config.Monitoring == nil { return false } - classCfg := class.Spec.Config.Observability.PgBouncer + classCfg := class.Spec.Config.Monitoring.ConnectionPoolerMetrics if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { return false } - if cluster == nil || cluster.Spec.Observability == nil || cluster.Spec.Observability.PgBouncer == nil { + if cluster == nil || cluster.Spec.Monitoring == nil || cluster.Spec.Monitoring.ConnectionPoolerMetrics == nil { return true } - override := cluster.Spec.Observability.PgBouncer.Disabled + override := cluster.Spec.Monitoring.ConnectionPoolerMetrics.Disabled return override == nil || !*override } diff --git a/pkg/postgresql/cluster/core/monitoring_unit_test.go b/pkg/postgresql/cluster/core/monitoring_unit_test.go index 545ea25da..fe10c0943 100644 --- a/pkg/postgresql/cluster/core/monitoring_unit_test.go +++ b/pkg/postgresql/cluster/core/monitoring_unit_test.go @@ -21,7 +21,7 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { want bool }{ { - name: "disabled when class observability is absent", + name: "disabled when class monitoring is absent", class: &enterprisev4.PostgresClusterClass{ Spec: enterprisev4.PostgresClusterClassSpec{ Config: &enterprisev4.PostgresClusterClassConfig{}, @@ -32,7 +32,7 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { { name: "enabled when class enables and cluster override is unset", cluster: &enterprisev4.PostgresCluster{}, - class: newClassWithObservability( + class: newClassWithMonitoring( ptr.To(true), nil, nil, @@ -43,12 +43,12 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { name: "disabled when cluster override disables", cluster: &enterprisev4.PostgresCluster{ Spec: enterprisev4.PostgresClusterSpec{ - Observability: &enterprisev4.PostgresObservabilityOverride{ - PostgreSQL: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + Monitoring: &enterprisev4.PostgresClusterMonitoring{ + PostgreSQLMetrics: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, }, }, }, - class: newClassWithObservability( + class: newClassWithMonitoring( ptr.To(true), nil, nil, @@ -59,12 +59,12 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { name: "disabled when class disables even if cluster has override struct", cluster: &enterprisev4.PostgresCluster{ Spec: enterprisev4.PostgresClusterSpec{ - Observability: &enterprisev4.PostgresObservabilityOverride{ - PostgreSQL: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(false)}, + Monitoring: &enterprisev4.PostgresClusterMonitoring{ + PostgreSQLMetrics: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(false)}, }, }, }, - class: newClassWithObservability( + class: newClassWithMonitoring( ptr.To(false), nil, nil, @@ -81,7 +81,7 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { } } -func TestIsConnectionPoolerEnabled(t *testing.T) { +func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { tests := []struct { name string cluster *enterprisev4.PostgresCluster @@ -89,85 +89,33 @@ func TestIsConnectionPoolerEnabled(t *testing.T) { want bool }{ { - name: "disabled when class config is absent", - class: &enterprisev4.PostgresClusterClass{}, - want: false, - }, - { - name: "inherits enabled class setting when cluster override is unset", + name: "disabled when class monitoring is absent", cluster: &enterprisev4.PostgresCluster{}, class: &enterprisev4.PostgresClusterClass{ Spec: enterprisev4.PostgresClusterClassSpec{ - Config: &enterprisev4.PostgresClusterClassConfig{ - ConnectionPoolerEnabled: ptr.To(true), - }, - }, - }, - want: true, - }, - { - name: "cluster can disable class enabled pooler", - cluster: &enterprisev4.PostgresCluster{ - Spec: enterprisev4.PostgresClusterSpec{ - ConnectionPoolerEnabled: ptr.To(false), - }, - }, - class: &enterprisev4.PostgresClusterClass{ - Spec: enterprisev4.PostgresClusterClassSpec{ - Config: &enterprisev4.PostgresClusterClassConfig{ - ConnectionPoolerEnabled: ptr.To(true), - }, - }, - }, - want: false, - }, - { - name: "class disabled wins", - cluster: &enterprisev4.PostgresCluster{ - Spec: enterprisev4.PostgresClusterSpec{ - ConnectionPoolerEnabled: ptr.To(true), - }, - }, - class: &enterprisev4.PostgresClusterClass{ - Spec: enterprisev4.PostgresClusterClassSpec{ - Config: &enterprisev4.PostgresClusterClassConfig{ - ConnectionPoolerEnabled: ptr.To(false), - }, + Config: &enterprisev4.PostgresClusterClassConfig{}, }, }, want: false, }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := isConnectionPoolerEnabled(tt.cluster, tt.class) - assert.Equal(t, tt.want, got) - }) - } -} - -func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { - tests := []struct { - name string - cluster *enterprisev4.PostgresCluster - class *enterprisev4.PostgresClusterClass - want bool - }{ { - name: "disabled when pooler itself is disabled", + name: "enabled when class enables and cluster override is unset", cluster: &enterprisev4.PostgresCluster{}, - class: newClassWithObservability( + class: newClassWithMonitoring( nil, ptr.To(true), - nil, + ptr.To(true), ), - want: false, + want: true, }, { - name: "enabled when pooler and pgbouncer metrics are enabled", - cluster: &enterprisev4.PostgresCluster{}, - class: newClassWithObservability( + name: "enabled even when cluster explicitly disables the pooler", + cluster: &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ConnectionPoolerEnabled: ptr.To(false), + }, + }, + class: newClassWithMonitoring( nil, ptr.To(true), ptr.To(true), @@ -178,12 +126,12 @@ func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { name: "disabled when cluster override disables pgbouncer metrics", cluster: &enterprisev4.PostgresCluster{ Spec: enterprisev4.PostgresClusterSpec{ - Observability: &enterprisev4.PostgresObservabilityOverride{ - PgBouncer: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + Monitoring: &enterprisev4.PostgresClusterMonitoring{ + ConnectionPoolerMetrics: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, }, }, }, - class: newClassWithObservability( + class: newClassWithMonitoring( nil, ptr.To(true), ptr.To(true), @@ -193,7 +141,7 @@ func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { { name: "disabled when class disables pgbouncer metrics", cluster: &enterprisev4.PostgresCluster{}, - class: newClassWithObservability( + class: newClassWithMonitoring( nil, ptr.To(true), ptr.To(false), @@ -308,18 +256,18 @@ func newTestMonitoringCluster() *enterprisev4.PostgresCluster { } } -func newClassWithObservability( +func newClassWithMonitoring( postgresEnabled *bool, poolerEnabled *bool, - pgBouncerMetricsEnabled *bool, + connectionPoolerMetricsEnabled *bool, ) *enterprisev4.PostgresClusterClass { return &enterprisev4.PostgresClusterClass{ Spec: enterprisev4.PostgresClusterClassSpec{ Config: &enterprisev4.PostgresClusterClassConfig{ ConnectionPoolerEnabled: poolerEnabled, - Observability: &enterprisev4.PostgresObservabilityClassConfig{ - PostgreSQL: &enterprisev4.MetricsClassConfig{Enabled: postgresEnabled}, - PgBouncer: &enterprisev4.MetricsClassConfig{Enabled: pgBouncerMetricsEnabled}, + Monitoring: &enterprisev4.PostgresMonitoringClassConfig{ + PostgreSQLMetrics: &enterprisev4.MetricsClassConfig{Enabled: postgresEnabled}, + ConnectionPoolerMetrics: &enterprisev4.MetricsClassConfig{Enabled: connectionPoolerMetricsEnabled}, }, }, }, diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index 7a43322fe..d4889edaa 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -76,21 +76,27 @@ const ( failedClusterPhase reconcileClusterPhases = "Failed" // condition types - clusterReady conditionTypes = "ClusterReady" - poolerReady conditionTypes = "PoolerReady" + clusterReady conditionTypes = "ClusterReady" + poolerReady conditionTypes = "PoolerReady" + monitoringReady conditionTypes = "MonitoringReady" // condition reasons — clusterReady - reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" - reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" - reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" - reasonClusterBuildSucceeded conditionReasons = "ClusterBuildSucceeded" - reasonClusterGetFailed conditionReasons = "ClusterGetFailed" - reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" - reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" - reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" - reasonUserSecretFailed conditionReasons = "UserSecretReconciliationFailed" - reasonSuperUserSecretFailed conditionReasons = "SuperUserSecretFailed" - reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" + reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" + reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" + reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" + reasonClusterBuildSucceeded conditionReasons = "ClusterBuildSucceeded" + reasonClusterGetFailed conditionReasons = "ClusterGetFailed" + reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" + reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" + reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" + reasonUserSecretFailed conditionReasons = "UserSecretReconciliationFailed" + reasonSuperUserSecretFailed conditionReasons = "SuperUserSecretFailed" + reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" + reasonObservabilityResourcesReady conditionReasons = "ObservabilityResourcesReady" + reasonPostgresMetricsMonitorFailed conditionReasons = "PostgresMetricsMonitorFailed" + reasonPostgresMetricsServiceFailed conditionReasons = "PostgresMetricsServiceFailed" + reasonPoolerMetricsServiceFailed conditionReasons = "PoolerMetricsServiceFailed" + reasonPoolerMetricsMonitorFailed conditionReasons = "PoolerMetricsMonitorFailed" // condition reasons — poolerReady reasonPoolerReconciliationFailed conditionReasons = "PoolerReconciliationFailed" From d353d0530eb565dded9c5ab6d655fa5ee7e3304a Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Wed, 15 Apr 2026 13:06:35 +0200 Subject: [PATCH 26/36] remove prometeush, add annotations. --- cmd/main.go | 2 - ...ise.splunk.com_postgresclusterclasses.yaml | 30 ++ ...nterprise.splunk.com_postgresclusters.yaml | 21 + docs/postgresSQLMonitoring-e2e.md | 285 +++++++++++ .../controller/postgrescluster_controller.go | 8 +- .../postgrescluster_controller_test.go | 166 ++++--- .../controller/postgresdatabase_controller.go | 6 +- internal/controller/suite_test.go | 6 - ...monitoring.coreos.com_servicemonitors.yaml | 32 -- pkg/postgresql/cluster/core/cluster.go | 141 ++---- .../cluster/core/cluster_unit_test.go | 76 ++- pkg/postgresql/cluster/core/events.go | 11 - pkg/postgresql/cluster/core/monitoring.go | 456 ++---------------- .../cluster/core/monitoring_unit_test.go | 158 ++---- pkg/postgresql/cluster/core/types.go | 33 +- .../monitoring/postgrescluster.yaml | 7 + .../monitoring/postgresclusterclass.yaml | 21 + test/postgresql/monitoring/values.yaml | 37 ++ 18 files changed, 706 insertions(+), 790 deletions(-) create mode 100644 docs/postgresSQLMonitoring-e2e.md delete mode 100644 internal/controller/testdata/crds/monitoring.coreos.com_servicemonitors.yaml create mode 100644 test/postgresql/monitoring/postgrescluster.yaml create mode 100644 test/postgresql/monitoring/postgresclusterclass.yaml create mode 100644 test/postgresql/monitoring/values.yaml diff --git a/cmd/main.go b/cmd/main.go index 402264560..d7cc6cc8b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -56,7 +56,6 @@ import ( "github.com/splunk/splunk-operator/internal/controller" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" //+kubebuilder:scaffold:imports //extapi "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" @@ -72,7 +71,6 @@ func init() { utilruntime.Must(enterpriseApi.AddToScheme(scheme)) utilruntime.Must(enterpriseApiV3.AddToScheme(scheme)) utilruntime.Must(cnpgv1.AddToScheme(scheme)) - utilruntime.Must(monitoringv1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme //utilruntime.Must(extapi.AddToScheme(scheme)) } diff --git a/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml b/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml index cd3d7ab7a..353d17e6e 100644 --- a/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml +++ b/config/crd/bases/enterprise.splunk.com_postgresclusterclasses.yaml @@ -134,6 +134,30 @@ spec: maximum: 10 minimum: 1 type: integer + monitoring: + default: {} + description: |- + Monitoring contains configuration for metrics exposure. + When enabled, creates metrics resources for clusters using this class. + Can be overridden in PostgresCluster CR. + properties: + connectionPoolerMetrics: + properties: + enabled: + default: false + description: Enabled controls whether metrics resources + should be created for this target. + type: boolean + type: object + postgresqlMetrics: + properties: + enabled: + default: false + description: Enabled controls whether metrics resources + should be created for this target. + type: boolean + type: object + type: object pgHBA: description: |- PgHBA contains pg_hba.conf host-based authentication rules. @@ -231,6 +255,12 @@ spec: pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true type: object + x-kubernetes-validations: + - message: connectionPoolerEnabled must be true when monitoring.connectionPoolerMetrics.enabled + is true + rule: '!has(self.monitoring) || !has(self.monitoring.connectionPoolerMetrics) + || !has(self.monitoring.connectionPoolerMetrics.enabled) || !self.monitoring.connectionPoolerMetrics.enabled + || (has(self.connectionPoolerEnabled) && self.connectionPoolerEnabled)' provisioner: description: |- Provisioner identifies which database provisioner to use. diff --git a/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml b/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml index 09c7de13c..ff5d6dbf5 100644 --- a/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml +++ b/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml @@ -126,6 +126,27 @@ spec: x-kubernetes-list-map-keys: - name x-kubernetes-list-type: map + monitoring: + description: Monitoring contains configuration for metrics exposure + features. + properties: + connectionPoolerMetrics: + properties: + disabled: + default: false + description: Disabled set to true will disable the feature + even if it's enabled in the class. + type: boolean + type: object + postgresqlMetrics: + properties: + disabled: + default: false + description: Disabled set to true will disable the feature + even if it's enabled in the class. + type: boolean + type: object + type: object pgHBA: default: [] description: |- diff --git a/docs/postgresSQLMonitoring-e2e.md b/docs/postgresSQLMonitoring-e2e.md new file mode 100644 index 000000000..e4409753a --- /dev/null +++ b/docs/postgresSQLMonitoring-e2e.md @@ -0,0 +1,285 @@ +# PostgreSQL Monitoring E2E on KIND + +This validates the PostgreSQL and PgBouncer monitoring flow in namespace `test`. + +## Goal + +Verify that: + +- PostgreSQL pods are scraped through pod annotations +- PgBouncer pooler pods are scraped through pod annotations +- no dedicated metrics `Service` is required +- no `ServiceMonitor` is used for PostgreSQL or PgBouncer + +`ServiceMonitor` is still acceptable for operator-controller metrics if you want that separately, but it is not part of this feature validation. + +## Prerequisites + +- KIND cluster is running +- CNPG is installed +- Splunk Operator is installed +- CRDs are up to date + +## 1. Install Prometheus and Grafana + +Create `values.yaml`: + +```yaml +grafana: + adminPassword: admin + +alertmanager: + enabled: false + +kubeStateMetrics: + enabled: false + +nodeExporter: + enabled: false + +prometheus: + prometheusSpec: + additionalScrapeConfigs: + - job_name: annotated-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod +``` + +Install the stack: + +```bash +kubectl create namespace monitoring + +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add grafana https://grafana.github.io/helm-charts +helm repo update + +helm install kube-prometheus prometheus-community/kube-prometheus-stack \ + --namespace monitoring \ + -f values.yaml +``` + +## 2. Optional: scrape operator-controller metrics + +This is separate from the PostgreSQL and PgBouncer validation. + +Grant Prometheus access: + +```bash +kubectl apply -f - < -o yaml | rg 'prometheus.io/' +``` + +Expected: + +- `prometheus.io/scrape: "true"` +- `prometheus.io/path: /metrics` +- `prometheus.io/port: "9187"` + +Pooler pods: + +```bash +kubectl get pods -n test -l cnpg.io/poolerName=-pooler-rw -o yaml | rg 'prometheus.io/' +kubectl get pods -n test -l cnpg.io/poolerName=-pooler-ro -o yaml | rg 'prometheus.io/' +``` + +Expected: + +- `prometheus.io/scrape: "true"` +- `prometheus.io/path: /metrics` +- `prometheus.io/port: "9127"` + +## 6. Access Prometheus + +```bash +kubectl port-forward -n monitoring svc/kube-prometheus-prometheus 9090:9090 +``` + +Open: + +- http://localhost:9090 + +Useful checks: + +```promql +up{job="annotated-pods", namespace="test"} +``` + +```promql +count by (pod) (cnpg_pg_postmaster_start_time_seconds{namespace="test"}) +``` + +```promql +cnpg_pgbouncer_up{namespace="test"} +``` + +## 7. Access Grafana + +Port-forward Grafana: + +```bash +kubectl port-forward svc/kube-prometheus-grafana -n monitoring 3000:80 +``` + +Open: + +- http://localhost:3000 + +Login: + +- user: `admin` +- password: `admin` + +Use Grafana in one of two ways: + +### Explore + +1. Open **Explore** +2. Select the default **Prometheus** datasource +3. Run PromQL queries such as: + +```promql +up{job="annotated-pods", namespace="test"} +``` + +```promql +cnpg_pg_postmaster_start_time_seconds{namespace="test"} +``` + +```promql +cnpg_pgbouncer_up{namespace="test"} +``` + +### Dashboard import + +You can also import the reference dashboard from: + +- [PostgreSQLObservabilityDashboard.json](/Users/dpishchenkov/splunk-operator/docs/PostgreSQLObservabilityDashboard.json) + +In Grafana: + +1. Go to **Dashboards** +2. Click **New** -> **Import** +3. Upload `docs/PostgreSQLObservabilityDashboard.json` +4. Select the Prometheus datasource + +## 8. Optional disable test + +Disable monitoring in the `PostgresCluster` and verify annotations disappear: + +```bash +kubectl patch postgrescluster -n test --type=merge -p ' +spec: + monitoring: + postgresqlMetrics: + disabled: true + connectionPoolerMetrics: + disabled: true +' +``` + +Then re-check: + +```bash +kubectl get pods -n test -l cnpg.io/cluster= -o yaml | rg 'prometheus.io/' || true +kubectl get pods -n test -l cnpg.io/poolerName=-pooler-rw -o yaml | rg 'prometheus.io/' || true +kubectl get pods -n test -l cnpg.io/poolerName=-pooler-ro -o yaml | rg 'prometheus.io/' || true +``` + +Prometheus should also stop showing those targets under `annotated-pods` after discovery refresh. + +## Notes + +- Use `ServiceMonitor` only for operator-controller metrics if needed. +- Do not use `ServiceMonitor` for PostgreSQL or PgBouncer in this E2E, because that bypasses the feature under test. +- Verify both: + - reconciled CNPG specs + - actual pod annotations +- PostgreSQL annotations come from CNPG `Cluster.Spec.InheritedMetadata` +- pooler annotations come from CNPG `Pooler.Spec.Template` diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 75b5bc50f..78aecad32 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -50,7 +50,7 @@ type PostgresClusterReconciler struct { Metrics ports.Recorder FleetCollector *pgprometheus.FleetCollector } -// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;list;watch;create;update;patch;delete + // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters/status,verbs=get;update;patch // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters/finalizers,verbs=update @@ -62,7 +62,11 @@ type PostgresClusterReconciler struct { // +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics} + metrics := r.Metrics + if metrics == nil { + metrics = &pgprometheus.NoopRecorder{} + } + rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: metrics} result, err := clustercore.PostgresClusterService(ctx, rc, req) r.FleetCollector.CollectClusterMetrics(ctx, r.Client, r.Metrics) if sharedreconcile.IsPureConflict(err) { diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 59737a2a4..730eb3251 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -28,17 +28,13 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/ptr" - enterprisev4 "github.com/splunk/splunk-operator/api/v4" "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" - corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) /* @@ -81,6 +77,15 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { req reconcile.Request ) + const ( + scrapeAnnotationKey = "prometheus.io/scrape" + pathAnnotationKey = "prometheus.io/path" + portAnnotationKey = "prometheus.io/port" + metricsPath = "/metrics" + postgresPort = "9187" + poolerPort = "9127" + ) + reconcileNTimes := func(times int) { for i := 0; i < times; i++ { _, err := reconciler.Reconcile(ctx, req) @@ -88,25 +93,18 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { } } - recreateClassWithMonitoring := func(postgresMetricsEnabled bool) { + recreateClusterClass := func(modify func(*enterprisev4.PostgresClusterClass)) { Expect(k8sClient.Delete(ctx, pgClusterClass)).To(Succeed()) - - pgClusterClass = &enterprisev4.PostgresClusterClass{ - ObjectMeta: metav1.ObjectMeta{Name: className}, - Spec: enterprisev4.PostgresClusterClassSpec{ - Provisioner: provisioner, - Config: &enterprisev4.PostgresClusterClassConfig{ - Instances: &[]int32{clusterMemberCount}[0], - Storage: &[]resource.Quantity{resource.MustParse(storageAmount)}[0], - PostgresVersion: &[]string{postgresVersion}[0], - ConnectionPoolerEnabled: &[]bool{poolerEnabled}[0], - Monitoring: &enterprisev4.PostgresMonitoringClassConfig{ - PostgreSQLMetrics: &enterprisev4.MetricsClassConfig{Enabled: ptr.To(postgresMetricsEnabled)}, - }, - }, - }, + Eventually(func() bool { + return apierrors.IsNotFound(k8sClient.Get(ctx, pgClusterClassKey, &enterprisev4.PostgresClusterClass{})) + }, "10s", "250ms").Should(BeTrue()) + + pgClusterClass = pgClusterClass.DeepCopy() + pgClusterClass.ResourceVersion = "" + pgClusterClass.UID = "" + if modify != nil { + modify(pgClusterClass) } - Expect(k8sClient.Create(ctx, pgClusterClass)).To(Succeed()) } @@ -266,63 +264,105 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond.ObservedGeneration).To(Equal(pc.Generation)) }) - It("creates monitoring resources and sets MonitoringReady when monitoring is enabled", func() { - recreateClassWithMonitoring(true) + It("adds PostgreSQL scrape annotations to the CNPG Cluster when monitoring is enabled", func() { + recreateClusterClass(func(class *enterprisev4.PostgresClusterClass) { + class.Spec.Config.Monitoring = &enterprisev4.PostgresMonitoringClassConfig{ + PostgreSQLMetrics: &enterprisev4.MetricsClassConfig{Enabled: &[]bool{true}[0]}, + } + }) Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) - reconcileNTimes(3) + reconcileNTimes(2) - pc := &enterprisev4.PostgresCluster{} - Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) - cond := meta.FindStatusCondition(pc.Status.Conditions, "MonitoringReady") - Expect(cond).NotTo(BeNil()) - Expect(cond.Status).To(Equal(metav1.ConditionTrue)) - Expect(cond.Reason).To(Equal("ObservabilityResourcesReady")) - - metricsService := &corev1.Service{} - Expect(k8sClient.Get(ctx, types.NamespacedName{ - Name: clusterName + "-postgres-metrics", - Namespace: namespace, - }, metricsService)).To(Succeed()) - - serviceMonitor := &monitoringv1.ServiceMonitor{} - Expect(k8sClient.Get(ctx, types.NamespacedName{ - Name: clusterName + "-postgres-metrics-monitor", - Namespace: namespace, - }, serviceMonitor)).To(Succeed()) + cnpg := &cnpgv1.Cluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + Expect(cnpg.Spec.InheritedMetadata).NotTo(BeNil()) + Expect(cnpg.Spec.InheritedMetadata.Annotations).To(HaveKeyWithValue(scrapeAnnotationKey, "true")) + Expect(cnpg.Spec.InheritedMetadata.Annotations).To(HaveKeyWithValue(pathAnnotationKey, metricsPath)) + Expect(cnpg.Spec.InheritedMetadata.Annotations).To(HaveKeyWithValue(portAnnotationKey, postgresPort)) }) - It("removes monitoring resources and MonitoringReady when monitoring is disabled by cluster override", func() { - recreateClassWithMonitoring(true) + It("removes PostgreSQL scrape annotations when disabled by cluster override", func() { + recreateClusterClass(func(class *enterprisev4.PostgresClusterClass) { + class.Spec.Config.Monitoring = &enterprisev4.PostgresMonitoringClassConfig{ + PostgreSQLMetrics: &enterprisev4.MetricsClassConfig{Enabled: &[]bool{true}[0]}, + } + }) Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) - reconcileNTimes(3) + reconcileNTimes(2) + + cnpg := &cnpgv1.Cluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + Expect(cnpg.Spec.InheritedMetadata).NotTo(BeNil()) + Expect(cnpg.Spec.InheritedMetadata.Annotations).To(HaveKeyWithValue(portAnnotationKey, postgresPort)) current := &enterprisev4.PostgresCluster{} Expect(k8sClient.Get(ctx, pgClusterKey, current)).To(Succeed()) current.Spec.Monitoring = &enterprisev4.PostgresClusterMonitoring{ - PostgreSQLMetrics: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + PostgreSQLMetrics: &enterprisev4.FeatureDisableOverride{Disabled: &[]bool{true}[0]}, } Expect(k8sClient.Update(ctx, current)).To(Succeed()) reconcileNTimes(1) - Expect(k8sClient.Get(ctx, pgClusterKey, current)).To(Succeed()) - Expect(meta.FindStatusCondition(current.Status.Conditions, "MonitoringReady")).To(BeNil()) - - metricsService := &corev1.Service{} - err := k8sClient.Get(ctx, types.NamespacedName{ - Name: clusterName + "-postgres-metrics", - Namespace: namespace, - }, metricsService) - Expect(apierrors.IsNotFound(err)).To(BeTrue()) - - serviceMonitor := &monitoringv1.ServiceMonitor{} - err = k8sClient.Get(ctx, types.NamespacedName{ - Name: clusterName + "-postgres-metrics-monitor", - Namespace: namespace, - }, serviceMonitor) - Expect(apierrors.IsNotFound(err)).To(BeTrue()) + Eventually(func(g Gomega) { + updated := &cnpgv1.Cluster{} + g.Expect(k8sClient.Get(ctx, pgClusterKey, updated)).To(Succeed()) + if updated.Spec.InheritedMetadata != nil { + g.Expect(updated.Spec.InheritedMetadata.Annotations).NotTo(HaveKey(scrapeAnnotationKey)) + g.Expect(updated.Spec.InheritedMetadata.Annotations).NotTo(HaveKey(pathAnnotationKey)) + g.Expect(updated.Spec.InheritedMetadata.Annotations).NotTo(HaveKey(portAnnotationKey)) + } + }, "20s", "250ms").Should(Succeed()) + }) + + It("creates poolers with scrape annotations only after the CNPG cluster becomes healthy", func() { + recreateClusterClass(func(class *enterprisev4.PostgresClusterClass) { + class.Spec.Config.ConnectionPoolerEnabled = &[]bool{true}[0] + class.Spec.Config.Monitoring = &enterprisev4.PostgresMonitoringClassConfig{ + ConnectionPoolerMetrics: &enterprisev4.MetricsClassConfig{Enabled: &[]bool{true}[0]}, + } + class.Spec.CNPG = &enterprisev4.CNPGConfig{ + ConnectionPooler: &enterprisev4.ConnectionPoolerConfig{ + Instances: &[]int32{2}[0], + Mode: &[]enterprisev4.ConnectionPoolerMode{enterprisev4.ConnectionPoolerModeTransaction}[0], + }, + } + }) + + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + reconcileNTimes(2) + + rwKey := types.NamespacedName{Name: clusterName + "-pooler-rw", Namespace: namespace} + roKey := types.NamespacedName{Name: clusterName + "-pooler-ro", Namespace: namespace} + + Expect(apierrors.IsNotFound(k8sClient.Get(ctx, rwKey, &cnpgv1.Pooler{}))).To(BeTrue()) + Expect(apierrors.IsNotFound(k8sClient.Get(ctx, roKey, &cnpgv1.Pooler{}))).To(BeTrue()) + + cnpg := &cnpgv1.Cluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + cnpg.Status.Phase = cnpgv1.PhaseHealthy + Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) + + Eventually(func(g Gomega) { + _, err := reconciler.Reconcile(ctx, req) + g.Expect(err).NotTo(HaveOccurred()) + + rw := &cnpgv1.Pooler{} + g.Expect(k8sClient.Get(ctx, rwKey, rw)).To(Succeed()) + g.Expect(rw.Spec.Template).NotTo(BeNil()) + g.Expect(rw.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(scrapeAnnotationKey, "true")) + g.Expect(rw.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(pathAnnotationKey, metricsPath)) + g.Expect(rw.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(portAnnotationKey, poolerPort)) + + ro := &cnpgv1.Pooler{} + g.Expect(k8sClient.Get(ctx, roKey, ro)).To(Succeed()) + g.Expect(ro.Spec.Template).NotTo(BeNil()) + g.Expect(ro.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(scrapeAnnotationKey, "true")) + g.Expect(ro.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(pathAnnotationKey, metricsPath)) + g.Expect(ro.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(portAnnotationKey, poolerPort)) + }, "20s", "250ms").Should(Succeed()) }) }) }) diff --git a/internal/controller/postgresdatabase_controller.go b/internal/controller/postgresdatabase_controller.go index 8a480abc2..d31082ba9 100644 --- a/internal/controller/postgresdatabase_controller.go +++ b/internal/controller/postgresdatabase_controller.go @@ -76,7 +76,11 @@ func (r *PostgresDatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Req } return ctrl.Result{}, err } - rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics} + metrics := r.Metrics + if metrics == nil { + metrics = &pgprometheus.NoopRecorder{} + } + rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: metrics} result, err := dbcore.PostgresDatabaseService(ctx, rc, postgresDB, dbadapter.NewDBRepository) r.FleetCollector.CollectDatabaseMetrics(ctx, r.Client, r.Metrics) if sharedreconcile.IsPureConflict(err) { diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index cf86dac87..5dca32cb2 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -39,7 +39,6 @@ import ( clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" enterpriseApiV3 "github.com/splunk/splunk-operator/api/v3" enterpriseApi "github.com/splunk/splunk-operator/api/v4" //+kubebuilder:scaffold:imports @@ -78,9 +77,7 @@ var _ = BeforeSuite(func(ctx context.Context) { CRDDirectoryPaths: []string{ filepath.Join("..", "..", "config", "crd", "bases"), filepath.Join(cnpgModuleDir, "config", "crd", "bases"), - filepath.Join("testdata", "crds"), }, - ErrorIfCRDPathMissing: true, } var err error @@ -111,9 +108,6 @@ var _ = BeforeSuite(func(ctx context.Context) { err = enterpriseApi.AddToScheme(clientgoscheme.Scheme) Expect(err).NotTo(HaveOccurred()) - err = monitoringv1.AddToScheme(clientgoscheme.Scheme) - Expect(err).NotTo(HaveOccurred()) - //+kubebuilder:scaffold:scheme // Create New Manager for controller diff --git a/internal/controller/testdata/crds/monitoring.coreos.com_servicemonitors.yaml b/internal/controller/testdata/crds/monitoring.coreos.com_servicemonitors.yaml deleted file mode 100644 index 230e31d50..000000000 --- a/internal/controller/testdata/crds/monitoring.coreos.com_servicemonitors.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: servicemonitors.monitoring.coreos.com -spec: - group: monitoring.coreos.com - scope: Namespaced - names: - plural: servicemonitors - singular: servicemonitor - kind: ServiceMonitor - listKind: ServiceMonitorList - versions: - - name: v1 - served: true - storage: true - schema: - openAPIV3Schema: - type: object - properties: - apiVersion: - type: string - kind: - type: string - metadata: - type: object - spec: - type: object - x-kubernetes-preserve-unknown-fields: true - status: - type: object - x-kubernetes-preserve-unknown-fields: true diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 4b5152d32..3b98ff100 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -183,7 +183,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } // Build desired CNPG Cluster spec. - desiredSpec := buildCNPGClusterSpec(mergedConfig, postgresSecretName) + desiredSpec := buildCNPGClusterSpec(mergedConfig, postgresSecretName, postgresMetricsEnabled) // Fetch existing CNPG Cluster or create it. existingCNPG := &cnpgv1.Cluster{} @@ -298,7 +298,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. "Waiting for CNPG cluster to become healthy before creating poolers", pendingClusterPhase) return ctrl.Result{RequeueAfter: retryDelay}, statusErr } - if err := createOrUpdateConnectionPoolers(ctx, c, rc.Scheme, postgresCluster, mergedConfig, cnpgCluster); err != nil { + if err := createOrUpdateConnectionPoolers(ctx, c, rc.Scheme, postgresCluster, mergedConfig, cnpgCluster, poolerMetricsEnabled); err != nil { logger.Error(err, "Failed to reconcile connection pooler") rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, @@ -344,56 +344,9 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. rc.emitPoolerReadyTransition(postgresCluster, oldConditions) } - postgresMetricsEnabled := isPostgreSQLMetricsEnabled(postgresCluster, clusterClass) - poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass) - rwPoolerMetricsEnabled := poolerMetricsEnabled && poolerEnabled && rwPoolerExists - roPoolerMetricsEnabled := poolerMetricsEnabled && poolerEnabled && roPoolerExists - monitoringEnabled := postgresMetricsEnabled || (poolerMetricsEnabled && poolerEnabled) - - monitoringFailure := func(reason conditionReasons, eventReason, message string, err error) (ctrl.Result, error) { - return ctrl.Result{}, handleMonitoringFailure(ctx, c, rc, postgresCluster, reason, eventReason, message, err) - } - oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) copy(oldConditions, postgresCluster.Status.Conditions) - if err := reconcilePostgreSQLMetricsService(ctx, c, rc.Scheme, postgresCluster, postgresMetricsEnabled); err != nil { - return monitoringFailure(reasonPostgresMetricsServiceFailed, EventMetricsServiceReconcileFailed, fmt.Sprintf("Failed to reconcile PostgreSQL metrics Service: %v", err), err) - } - if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled); err != nil { - return monitoringFailure(reasonPoolerMetricsServiceFailed, EventMetricsServiceReconcileFailed, fmt.Sprintf("Failed to reconcile RW pooler metrics Service: %v", err), err) - } - if err := reconcileConnectionPoolerMetricsService(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled); err != nil { - return monitoringFailure(reasonPoolerMetricsServiceFailed, EventMetricsServiceReconcileFailed, fmt.Sprintf("Failed to reconcile RO pooler metrics Service: %v", err), err) - } - if err := reconcilePostgreSQLMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, postgresMetricsEnabled); err != nil { - return monitoringFailure(reasonPostgresMetricsMonitorFailed, EventServiceMonitorReconcileFailed, fmt.Sprintf("Failed to reconcile PostgreSQL metrics ServiceMonitor: %v", err), err) - } - if err := reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readWriteEndpoint, rwPoolerMetricsEnabled); err != nil { - return monitoringFailure(reasonPoolerMetricsMonitorFailed, EventServiceMonitorReconcileFailed, fmt.Sprintf("Failed to reconcile RW pooler metrics ServiceMonitor: %v", err), err) - } - if err := reconcileConnectionPoolerMetricsServiceMonitor(ctx, c, rc.Scheme, postgresCluster, readOnlyEndpoint, roPoolerMetricsEnabled); err != nil { - return monitoringFailure(reasonPoolerMetricsMonitorFailed, EventServiceMonitorReconcileFailed, fmt.Sprintf("Failed to reconcile RO pooler metrics ServiceMonitor: %v", err), err) - } - - if !monitoringEnabled { - if err := removeCondition(ctx, c, postgresCluster, monitoringReady); err != nil { - if apierrors.IsConflict(err) { - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, err - } - } else { - if err := setCondition(ctx, c, postgresCluster, monitoringReady, metav1.ConditionTrue, reasonObservabilityResourcesReady, "Monitoring resources are ready"); err != nil { - if apierrors.IsConflict(err) { - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, err - } - - rc.emitMonitoringReadyTransition(postgresCluster, oldConditions) - } - // Reconcile ConfigMap when CNPG cluster is healthy. if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { logger.Info("CNPG Cluster healthy, reconciling ConfigMap") @@ -525,8 +478,8 @@ func getMergedConfig(class *enterprisev4.PostgresClusterClass, cluster *enterpri // buildCNPGClusterSpec builds the desired CNPG ClusterSpec. // IMPORTANT: any field added here must also appear in normalizeCNPGClusterSpec, // otherwise spec drift will be silently ignored. -func buildCNPGClusterSpec(cfg *MergedConfig, secretName string) cnpgv1.ClusterSpec { - return cnpgv1.ClusterSpec{ +func buildCNPGClusterSpec(cfg *MergedConfig, secretName string, postgresMetricsEnabled bool) cnpgv1.ClusterSpec { + spec := cnpgv1.ClusterSpec{ ImageName: fmt.Sprintf("ghcr.io/cloudnative-pg/postgresql:%s", *cfg.Spec.PostgresVersion), Instances: int(*cfg.Spec.Instances), PostgresConfiguration: cnpgv1.PostgresConfiguration{ @@ -547,12 +500,18 @@ func buildCNPGClusterSpec(cfg *MergedConfig, secretName string) cnpgv1.ClusterSp }, Resources: *cfg.Spec.Resources, } + if postgresMetricsEnabled { + spec.InheritedMetadata = &cnpgv1.EmbeddedObjectMetadata{ + Annotations: buildPostgresScrapeAnnotations(), + } + } + return spec } -func buildCNPGCluster(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, secretName string) (*cnpgv1.Cluster, error) { +func buildCNPGCluster(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, secretName string, postgresMetricsEnabled bool) *cnpgv1.Cluster { cnpg := &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{Name: cluster.Name, Namespace: cluster.Namespace}, - Spec: buildCNPGClusterSpec(cfg, secretName), + Spec: buildCNPGClusterSpec(cfg, secretName, postgresMetricsEnabled), } if err := ctrl.SetControllerReference(cluster, cnpg, scheme); err != nil { return nil, fmt.Errorf("setting controller reference on CNPG cluster: %w", err) @@ -576,6 +535,9 @@ func normalizeCNPGClusterSpec(spec cnpgv1.ClusterSpec, customDefinedParameters m if len(spec.PostgresConfiguration.PgHBA) > 0 { normalized.PgHBA = spec.PostgresConfiguration.PgHBA } + if spec.InheritedMetadata != nil && len(spec.InheritedMetadata.Annotations) > 0 { + normalized.InheritedAnnotations = spec.InheritedMetadata.Annotations + } if spec.Bootstrap != nil && spec.Bootstrap.InitDB != nil { normalized.DefaultDatabase = spec.Bootstrap.InitDB.Database normalized.Owner = spec.Bootstrap.InitDB.Owner @@ -674,17 +636,17 @@ func poolerInstanceCount(p *cnpgv1.Pooler) (desired, scheduled int32) { } // createOrUpdateConnectionPoolers creates RW and RO poolers if they don't exist. -func createOrUpdateConnectionPoolers(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster) error { - if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readWriteEndpoint); err != nil { +func createOrUpdateConnectionPoolers(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerMetricsEnabled bool) error { + if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readWriteEndpoint, poolerMetricsEnabled); err != nil { return fmt.Errorf("reconciling RW pooler: %w", err) } - if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readOnlyEndpoint); err != nil { + if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readOnlyEndpoint, poolerMetricsEnabled); err != nil { return fmt.Errorf("reconciling RO pooler: %w", err) } return nil } -func createConnectionPooler(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerType string) error { +func createConnectionPooler(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerType string, poolerMetricsEnabled bool) error { logger := log.FromContext(ctx) poolerName := poolerResourceName(cluster.Name, poolerType) existing := &cnpgv1.Pooler{} @@ -696,14 +658,14 @@ func createConnectionPooler(ctx context.Context, c client.Client, scheme *runtim return err } logger.Info("CNPG Pooler creation started", "name", poolerName, "type", poolerType) - pooler, err := buildCNPGPooler(scheme, cluster, cfg, cnpgCluster, poolerType) + pooler, err := buildCNPGPooler(scheme, cluster, cfg, cnpgCluster, poolerType, poolerMetricsEnabled) if err != nil { return err } return c.Create(ctx, pooler) } -func buildCNPGPooler(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerType string) (*cnpgv1.Pooler, error) { +func buildCNPGPooler(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerType string, poolerMetricsEnabled bool) (*cnpgv1.Pooler, error) { pc := cfg.CNPG.ConnectionPooler instances := *pc.Instances mode := cnpgv1.PgBouncerPoolMode(*pc.Mode) @@ -719,6 +681,21 @@ func buildCNPGPooler(scheme *runtime.Scheme, cluster *enterprisev4.PostgresClust }, }, } + if poolerMetricsEnabled { + pooler.Spec.Template = &cnpgv1.PodTemplateSpec{ + ObjectMeta: cnpgv1.Metadata{ + Annotations: buildPoolerScrapeAnnotations(), + }, + Spec: corev1.PodSpec{ + // CNPG's Pooler CRD requires template.spec.containers to be present. + // A minimal named container lets CNPG's podspec builder merge in the + // real PgBouncer image/command/ports while still carrying our annotations. + Containers: []corev1.Container{ + {Name: "pgbouncer"}, + }, + }, + } + } if err := ctrl.SetControllerReference(cluster, pooler, scheme); err != nil { return nil, fmt.Errorf("setting controller reference on CNPG pooler: %w", err) } @@ -864,50 +841,6 @@ func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, clu return nil } -// setCondition updates a specific condition on the PostgresCluster status. -func setCondition(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, condType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string) error { - base := cluster.Status.DeepCopy() - - meta.SetStatusCondition(&cluster.Status.Conditions, metav1.Condition{ - Type: string(condType), - Status: status, - Reason: string(reason), - Message: message, - ObservedGeneration: cluster.Generation, - }) - - if equality.Semantic.DeepEqual(*base, cluster.Status) { - return nil - } - if err := c.Status().Update(ctx, cluster); err != nil { - return fmt.Errorf("failed to update PostgresCluster condition: %w", err) - } - return nil -} - -// removeCondition removes a specific condition from the PostgresCluster status. -func removeCondition(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, condType conditionTypes) error { - base := cluster.Status.DeepCopy() - - meta.RemoveStatusCondition(&cluster.Status.Conditions, string(condType)) - - if equality.Semantic.DeepEqual(*base, cluster.Status) { - return nil - } - if err := c.Status().Update(ctx, cluster); err != nil { - return fmt.Errorf("failed to remove PostgresCluster condition: %w", err) - } - return nil -} - -func handleMonitoringFailure(ctx context.Context, c client.Client, rc *ReconcileContext, cluster *enterprisev4.PostgresCluster, reason conditionReasons, eventReason string, message string, err error) error { - rc.emitWarning(cluster, eventReason, message) - if statusErr := setCondition(ctx, c, cluster, monitoringReady, metav1.ConditionFalse, reason, message); statusErr != nil { - return errors.Join(err, fmt.Errorf("failed to update MonitoringReady condition: %w", statusErr)) - } - return err -} - // generateConfigMap builds a ConfigMap with connection details for the PostgresCluster. func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster, secretName string) (*corev1.ConfigMap, error) { cmName := fmt.Sprintf("%s%s", cluster.Name, defaultConfigMapSuffix) @@ -1144,4 +1077,4 @@ func generatePassword() (string, error) { symbols = 0 ) return password.Generate(length, digits, symbols, false, true) -} +} \ No newline at end of file diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index 1a0659c98..49e5382b2 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -204,6 +204,27 @@ func TestNormalizeCNPGClusterSpec(t *testing.T) { Owner: "admin", }, }, + { + name: "inherited annotations included when non-empty", + spec: cnpgv1.ClusterSpec{ + ImageName: "img:18", + Instances: 1, + InheritedMetadata: &cnpgv1.EmbeddedObjectMetadata{ + Annotations: map[string]string{ + prometheusScrapeAnnotation: "true", + prometheusPortAnnotation: postgresMetricsPortString, + }, + }, + }, + expected: normalizedCNPGClusterSpec{ + ImageName: "img:18", + Instances: 1, + InheritedAnnotations: map[string]string{ + prometheusScrapeAnnotation: "true", + prometheusPortAnnotation: postgresMetricsPortString, + }, + }, + }, { name: "nil bootstrap leaves database and owner empty", spec: cnpgv1.ClusterSpec{ @@ -354,7 +375,7 @@ func TestBuildCNPGClusterSpec(t *testing.T) { }, } - spec := buildCNPGClusterSpec(cfg, "my-secret") + spec := buildCNPGClusterSpec(cfg, "my-secret", false) assert.Equal(t, "ghcr.io/cloudnative-pg/postgresql:18", spec.ImageName) assert.Equal(t, 3, spec.Instances) @@ -371,6 +392,16 @@ func TestBuildCNPGClusterSpec(t *testing.T) { require.Len(t, spec.PostgresConfiguration.PgHBA, 2) assert.Equal(t, "hostssl all all 0.0.0.0/0 scram-sha-256", spec.PostgresConfiguration.PgHBA[0]) assert.Equal(t, "host replication all 10.0.0.0/8 md5", spec.PostgresConfiguration.PgHBA[1]) + assert.Nil(t, spec.InheritedMetadata) + + t.Run("adds postgres scrape annotations when enabled", func(t *testing.T) { + spec := buildCNPGClusterSpec(cfg, "my-secret", true) + + require.NotNil(t, spec.InheritedMetadata) + assert.Equal(t, "true", spec.InheritedMetadata.Annotations[prometheusScrapeAnnotation]) + assert.Equal(t, metricsPath, spec.InheritedMetadata.Annotations[prometheusPathAnnotation]) + assert.Equal(t, postgresMetricsPortString, spec.InheritedMetadata.Annotations[prometheusPortAnnotation]) + }) } func TestBuildCNPGPooler(t *testing.T) { @@ -403,7 +434,7 @@ func TestBuildCNPGPooler(t *testing.T) { } t.Run("rw pooler", func(t *testing.T) { - pooler, err := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "rw") + pooler, err := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "rw", false) require.NoError(t, err) assert.Equal(t, "my-cluster-pooler-rw", pooler.Name) @@ -416,14 +447,21 @@ func TestBuildCNPGPooler(t *testing.T) { assert.Equal(t, "25", pooler.Spec.PgBouncer.Parameters["default_pool_size"]) require.Len(t, pooler.OwnerReferences, 1) assert.Equal(t, "test-uid", string(pooler.OwnerReferences[0].UID)) + assert.Nil(t, pooler.Spec.Template) }) t.Run("ro pooler", func(t *testing.T) { - pooler, err := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "ro") + pooler, err := buildCNPGPooler(scheme, postgresCluster, cfg, cnpgCluster, "ro", true) require.NoError(t, err) assert.Equal(t, "my-cluster-pooler-ro", pooler.Name) assert.Equal(t, cnpgv1.PoolerType("ro"), pooler.Spec.Type) + require.NotNil(t, pooler.Spec.Template) + assert.Equal(t, "true", pooler.Spec.Template.ObjectMeta.Annotations[prometheusScrapeAnnotation]) + assert.Equal(t, metricsPath, pooler.Spec.Template.ObjectMeta.Annotations[prometheusPathAnnotation]) + assert.Equal(t, poolerMetricsPortString, pooler.Spec.Template.ObjectMeta.Annotations[prometheusPortAnnotation]) + require.Len(t, pooler.Spec.Template.Spec.Containers, 1) + assert.Equal(t, "pgbouncer", pooler.Spec.Template.Spec.Containers[0].Name) }) } @@ -453,7 +491,7 @@ func TestBuildCNPGCluster(t *testing.T) { }, } - cluster, err := buildCNPGCluster(scheme, postgresCluster, cfg, "my-secret") + cluster, err := buildCNPGCluster(scheme, postgresCluster, cfg, "my-secret", true) require.NoError(t, err) assert.Equal(t, "my-cluster", cluster.Name) @@ -461,6 +499,8 @@ func TestBuildCNPGCluster(t *testing.T) { require.Len(t, cluster.OwnerReferences, 1) assert.Equal(t, "pg-uid", string(cluster.OwnerReferences[0].UID)) assert.Equal(t, 3, cluster.Spec.Instances) + require.NotNil(t, cluster.Spec.InheritedMetadata) + assert.Equal(t, postgresMetricsPortString, cluster.Spec.InheritedMetadata.Annotations[prometheusPortAnnotation]) } func TestClusterSecretExists(t *testing.T) { @@ -924,7 +964,7 @@ func TestCreateConnectionPooler(t *testing.T) { t.Run(tt.name, func(t *testing.T) { c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(tt.objects...).Build() - err := createConnectionPooler(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpg, "rw") + err := createConnectionPooler(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpg, "rw", false) require.NoError(t, err) fetched := &cnpgv1.Pooler{} @@ -1098,7 +1138,7 @@ func TestCreateOrUpdateConnectionPoolers(t *testing.T) { t.Run("creates both rw and ro poolers", func(t *testing.T) { c := fake.NewClientBuilder().WithScheme(scheme).Build() - err := createOrUpdateConnectionPoolers(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpgCluster) + err := createOrUpdateConnectionPoolers(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpgCluster, false) require.NoError(t, err) @@ -1128,7 +1168,7 @@ func TestCreateOrUpdateConnectionPoolers(t *testing.T) { } c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(existing...).Build() - err := createOrUpdateConnectionPoolers(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpgCluster) + err := createOrUpdateConnectionPoolers(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpgCluster, false) require.NoError(t, err) rw := &cnpgv1.Pooler{} @@ -1138,4 +1178,26 @@ func TestCreateOrUpdateConnectionPoolers(t *testing.T) { require.NoError(t, c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-ro", Namespace: "default"}, ro)) assert.Equal(t, int32(1), *ro.Spec.Instances) }) + + t.Run("creates both rw and ro poolers with scrape annotations when metrics are enabled", func(t *testing.T) { + c := fake.NewClientBuilder().WithScheme(scheme).Build() + + err := createOrUpdateConnectionPoolers(context.Background(), c, scheme, cluster.DeepCopy(), cfg, cnpgCluster, true) + + require.NoError(t, err) + + rw := &cnpgv1.Pooler{} + require.NoError(t, c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-rw", Namespace: "default"}, rw)) + require.NotNil(t, rw.Spec.Template) + assert.Equal(t, "true", rw.Spec.Template.ObjectMeta.Annotations[prometheusScrapeAnnotation]) + assert.Equal(t, metricsPath, rw.Spec.Template.ObjectMeta.Annotations[prometheusPathAnnotation]) + assert.Equal(t, poolerMetricsPortString, rw.Spec.Template.ObjectMeta.Annotations[prometheusPortAnnotation]) + + ro := &cnpgv1.Pooler{} + require.NoError(t, c.Get(context.Background(), client.ObjectKey{Name: "my-cluster-pooler-ro", Namespace: "default"}, ro)) + require.NotNil(t, ro.Spec.Template) + assert.Equal(t, "true", ro.Spec.Template.ObjectMeta.Annotations[prometheusScrapeAnnotation]) + assert.Equal(t, metricsPath, ro.Spec.Template.ObjectMeta.Annotations[prometheusPathAnnotation]) + assert.Equal(t, poolerMetricsPortString, ro.Spec.Template.ObjectMeta.Annotations[prometheusPortAnnotation]) + }) } diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index d2692ed9b..b60f3f643 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -29,9 +29,6 @@ const ( EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" EventClusterDegraded = "ClusterDegraded" EventCleanupFailed = "CleanupFailed" - EventMonitoringReady = "MonitoringReady" - EventMetricsServiceReconcileFailed = "MetricsServiceReconcileFailed" - EventServiceMonitorReconcileFailed = "ServiceMonitorReconcileFailed" ) func (rc *ReconcileContext) emitNormal(obj client.Object, reason, message string) { @@ -63,11 +60,3 @@ func (rc *ReconcileContext) emitPoolerReadyTransition(obj client.Object, conditi rc.emitNormal(obj, EventPoolerReady, "Connection poolers are ready") } } - -// emitMonitoringReadyTransition emits MonitoringReady only when the condition was not -// previously True — prevents re-emission on every reconcile while already ready. -func (rc *ReconcileContext) emitMonitoringReadyTransition(obj client.Object, conditions []metav1.Condition) { - if !meta.IsStatusConditionTrue(conditions, string(monitoringReady)) { - rc.emitNormal(obj, EventMonitoringReady, "Monitoring resources are ready") - } -} \ No newline at end of file diff --git a/pkg/postgresql/cluster/core/monitoring.go b/pkg/postgresql/cluster/core/monitoring.go index e422a25d9..2b18ba497 100644 --- a/pkg/postgresql/cluster/core/monitoring.go +++ b/pkg/postgresql/cluster/core/monitoring.go @@ -1,41 +1,41 @@ package core import ( - "context" - "fmt" - - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "sigs.k8s.io/controller-runtime/pkg/log" ) const ( - // metrics - postgresMetricsServiceSuffix = "-postgres-metrics" - postgresMetricsPortName = "metrics" - postgresMetricsPort = int32(9187) - poolerMetricsPortName = "metrics" - poolerMetricsPort = int32(9127) + prometheusScrapeAnnotation = "prometheus.io/scrape" + prometheusPathAnnotation = "prometheus.io/path" + prometheusPortAnnotation = "prometheus.io/port" - // labels - labelManagedBy = "app.kubernetes.io/managed-by" - labelManagedByValue = "postgrescluster-controller" - labelObservabilityComponent = "enterprise.splunk.com/observability-component" - cnpgClusterLabelName = "cnpg.io/cluster" - cnpgPoolerNameLabel = "cnpg.io/poolerName" - cnpgPodRoleInstance = "instance" - cnpgPodRoleLabelName = "cnpg.io/podRole" + metricsPath = "/metrics" + postgresMetricsPortString = "9187" + poolerMetricsPortString = "9127" ) +func buildScrapeAnnotations(port string) map[string]string { + return map[string]string{ + prometheusScrapeAnnotation: "true", + prometheusPathAnnotation: metricsPath, + prometheusPortAnnotation: port, + } +} + +func removeScrapeAnnotations(annotations map[string]string) { + delete(annotations, prometheusScrapeAnnotation) + delete(annotations, prometheusPathAnnotation) + delete(annotations, prometheusPortAnnotation) +} + +func buildPostgresScrapeAnnotations() map[string]string { + return buildScrapeAnnotations(postgresMetricsPortString) +} + +func buildPoolerScrapeAnnotations() map[string]string { + return buildScrapeAnnotations(poolerMetricsPortString) +} + func isPostgreSQLMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { if class == nil || class.Spec.Config == nil || class.Spec.Config.Monitoring == nil { return false @@ -65,403 +65,3 @@ func isConnectionPoolerMetricsEnabled(cluster *enterprisev4.PostgresCluster, cla override := cluster.Spec.Monitoring.ConnectionPoolerMetrics.Disabled return override == nil || !*override } - -func buildPostgreSQLMetricsService(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster) (*corev1.Service, error) { - svc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: cluster.Name + postgresMetricsServiceSuffix, - Namespace: cluster.Namespace, - Labels: map[string]string{ - labelManagedBy: labelManagedByValue, - labelObservabilityComponent: "postgresql-metrics", - cnpgClusterLabelName: cluster.Name, - }, - }, - Spec: corev1.ServiceSpec{ - Type: corev1.ServiceTypeClusterIP, - Selector: map[string]string{ - cnpgClusterLabelName: cluster.Name, - cnpgPodRoleLabelName: cnpgPodRoleInstance, - }, - Ports: []corev1.ServicePort{ - { - Name: postgresMetricsPortName, - Port: postgresMetricsPort, - Protocol: corev1.ProtocolTCP, - TargetPort: intstr.FromString(postgresMetricsPortName), - }, - }, - }, - } - - if err := ctrl.SetControllerReference(cluster, svc, scheme); err != nil { - return nil, fmt.Errorf("setting controller reference on PostgreSQL metrics Service: %w", err) - } - - return svc, nil -} - -func poolerMetricsServiceName(clusterName, poolerType string) string { - return fmt.Sprintf("%s-pooler-%s-metrics", clusterName, poolerType) -} -func buildConnectionPoolerMetricsService( - scheme *runtime.Scheme, - cluster *enterprisev4.PostgresCluster, - poolerType string, -) (*corev1.Service, error) { - poolerName := poolerResourceName(cluster.Name, poolerType) - - svc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: poolerMetricsServiceName(cluster.Name, poolerType), - Namespace: cluster.Namespace, - Labels: map[string]string{ - labelManagedBy: labelManagedByValue, - labelObservabilityComponent: "pgbouncer-metrics", - cnpgClusterLabelName: cluster.Name, - cnpgPoolerNameLabel: poolerName, - }, - }, - Spec: corev1.ServiceSpec{ - Type: corev1.ServiceTypeClusterIP, - Selector: map[string]string{ - cnpgPoolerNameLabel: poolerName, - }, - Ports: []corev1.ServicePort{ - { - Name: poolerMetricsPortName, - Port: poolerMetricsPort, - Protocol: corev1.ProtocolTCP, - TargetPort: intstr.FromString(poolerMetricsPortName), - }, - }, - }, - } - - if err := ctrl.SetControllerReference(cluster, svc, scheme); err != nil { - return nil, fmt.Errorf("setting controller reference on PgBouncer metrics Service: %w", err) - } - - return svc, nil -} - -func reconcilePostgreSQLMetricsService(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, enabled bool) error { - logger := log.FromContext(ctx) - serviceName := cluster.Name + postgresMetricsServiceSuffix - - if !enabled { - existing := &corev1.Service{} - err := c.Get(ctx, types.NamespacedName{Name: serviceName, Namespace: cluster.Namespace}, existing) - switch { - case apierrors.IsNotFound(err): - return nil - case err != nil: - return fmt.Errorf("getting PostgreSQL metrics Service %s: %w", serviceName, err) - } - - logger.Info("Deleting PostgreSQL metrics Service", "name", serviceName) - if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("deleting PostgreSQL metrics Service %s: %w", serviceName, err) - } - return nil - } - - desired, err := buildPostgreSQLMetricsService(scheme, cluster) - if err != nil { - return fmt.Errorf("building PostgreSQL metrics Service: %w", err) - } - - live := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: desired.Name, - Namespace: desired.Namespace, - }, - } - - _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { - live.Labels = desired.Labels - live.Annotations = desired.Annotations - live.Spec.Type = desired.Spec.Type - live.Spec.Selector = desired.Spec.Selector - live.Spec.Ports = desired.Spec.Ports - - if !metav1.IsControlledBy(live, cluster) { - if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { - return fmt.Errorf("setting controller reference on PostgreSQL metrics Service: %w", err) - } - } - return nil - }) - if err != nil { - return fmt.Errorf("reconciling PostgreSQL metrics Service %s: %w", desired.Name, err) - } - - return nil -} - -func reconcileConnectionPoolerMetricsService( - ctx context.Context, - c client.Client, - scheme *runtime.Scheme, - cluster *enterprisev4.PostgresCluster, - poolerType string, - enabled bool, -) error { - logger := log.FromContext(ctx) - serviceName := poolerMetricsServiceName(cluster.Name, poolerType) - - if !enabled { - existing := &corev1.Service{} - err := c.Get(ctx, types.NamespacedName{Name: serviceName, Namespace: cluster.Namespace}, existing) - switch { - case apierrors.IsNotFound(err): - return nil - case err != nil: - return fmt.Errorf("getting PgBouncer metrics Service %s: %w", serviceName, err) - } - - logger.Info("Deleting PgBouncer metrics Service", "name", serviceName, "poolerType", poolerType) - if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("deleting PgBouncer metrics Service %s: %w", serviceName, err) - } - return nil - } - - desired, err := buildConnectionPoolerMetricsService(scheme, cluster, poolerType) - if err != nil { - return fmt.Errorf("building PgBouncer metrics Service for %s pooler: %w", poolerType, err) - } - - live := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: desired.Name, - Namespace: desired.Namespace, - }, - } - - _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { - live.Labels = desired.Labels - live.Annotations = desired.Annotations - live.Spec.Type = desired.Spec.Type - live.Spec.Selector = desired.Spec.Selector - live.Spec.Ports = desired.Spec.Ports - - if !metav1.IsControlledBy(live, cluster) { - if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { - return fmt.Errorf("setting controller reference on PgBouncer metrics Service: %w", err) - } - } - return nil - }) - if err != nil { - return fmt.Errorf("reconciling PgBouncer metrics Service %s: %w", desired.Name, err) - } - - return nil -} - -func postgresMetricsServiceMonitorName(clusterName string) string { - return clusterName + "-postgres-metrics-monitor" -} - -func poolerMetricsServiceMonitorName(clusterName, poolerType string) string { - return fmt.Sprintf("%s-pooler-%s-metrics-monitor", clusterName, poolerType) -} - -func buildPostgreSQLMetricsServiceMonitor( - scheme *runtime.Scheme, - cluster *enterprisev4.PostgresCluster, -) (*monitoringv1.ServiceMonitor, error) { - sm := &monitoringv1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: postgresMetricsServiceMonitorName(cluster.Name), - Namespace: cluster.Namespace, - Labels: map[string]string{ - labelManagedBy: labelManagedByValue, - labelObservabilityComponent: "postgresql-metrics", - cnpgClusterLabelName: cluster.Name, - }, - }, - Spec: monitoringv1.ServiceMonitorSpec{ - Selector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - labelObservabilityComponent: "postgresql-metrics", - cnpgClusterLabelName: cluster.Name, - }, - }, - Endpoints: []monitoringv1.Endpoint{ - { - Port: postgresMetricsPortName, - Path: "/metrics", - Scheme: "http", - }, - }, - }, - } - - if err := ctrl.SetControllerReference(cluster, sm, scheme); err != nil { - return nil, fmt.Errorf("setting controller reference on PostgreSQL ServiceMonitor: %w", err) - } - - return sm, nil -} - -func buildConnectionPoolerMetricsServiceMonitor( - scheme *runtime.Scheme, - cluster *enterprisev4.PostgresCluster, - poolerType string, -) (*monitoringv1.ServiceMonitor, error) { - poolerName := poolerResourceName(cluster.Name, poolerType) - - sm := &monitoringv1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: poolerMetricsServiceMonitorName(cluster.Name, poolerType), - Namespace: cluster.Namespace, - Labels: map[string]string{ - labelManagedBy: labelManagedByValue, - labelObservabilityComponent: "pgbouncer-metrics", - cnpgClusterLabelName: cluster.Name, - cnpgPoolerNameLabel: poolerName, - }, - }, - Spec: monitoringv1.ServiceMonitorSpec{ - Selector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - labelObservabilityComponent: "pgbouncer-metrics", - cnpgClusterLabelName: cluster.Name, - cnpgPoolerNameLabel: poolerName, - }, - }, - Endpoints: []monitoringv1.Endpoint{ - { - Port: poolerMetricsPortName, - Path: "/metrics", - Scheme: "http", - }, - }, - }, - } - - if err := ctrl.SetControllerReference(cluster, sm, scheme); err != nil { - return nil, fmt.Errorf("setting controller reference on PgBouncer ServiceMonitor: %w", err) - } - - return sm, nil -} - -func reconcilePostgreSQLMetricsServiceMonitor( - ctx context.Context, - c client.Client, - scheme *runtime.Scheme, - cluster *enterprisev4.PostgresCluster, - enabled bool, -) error { - logger := log.FromContext(ctx) - name := postgresMetricsServiceMonitorName(cluster.Name) - - if !enabled { - existing := &monitoringv1.ServiceMonitor{} - err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: cluster.Namespace}, existing) - switch { - case apierrors.IsNotFound(err): - return nil - case err != nil: - return fmt.Errorf("getting PostgreSQL ServiceMonitor %s: %w", name, err) - } - - logger.Info("Deleting PostgreSQL ServiceMonitor", "name", name) - if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("deleting PostgreSQL ServiceMonitor %s: %w", name, err) - } - return nil - } - - desired, err := buildPostgreSQLMetricsServiceMonitor(scheme, cluster) - if err != nil { - return fmt.Errorf("building PostgreSQL ServiceMonitor: %w", err) - } - - live := &monitoringv1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: desired.Name, - Namespace: desired.Namespace, - }, - } - - _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { - live.Labels = desired.Labels - live.Annotations = desired.Annotations - live.Spec = desired.Spec - - if !metav1.IsControlledBy(live, cluster) { - if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { - return fmt.Errorf("setting controller reference on PostgreSQL ServiceMonitor: %w", err) - } - } - return nil - }) - if err != nil { - return fmt.Errorf("reconciling PostgreSQL ServiceMonitor %s: %w", desired.Name, err) - } - - return nil -} - -func reconcileConnectionPoolerMetricsServiceMonitor( - ctx context.Context, - c client.Client, - scheme *runtime.Scheme, - cluster *enterprisev4.PostgresCluster, - poolerType string, - enabled bool, -) error { - logger := log.FromContext(ctx) - name := poolerMetricsServiceMonitorName(cluster.Name, poolerType) - - if !enabled { - existing := &monitoringv1.ServiceMonitor{} - err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: cluster.Namespace}, existing) - switch { - case apierrors.IsNotFound(err): - return nil - case err != nil: - return fmt.Errorf("getting PgBouncer ServiceMonitor %s: %w", name, err) - } - - logger.Info("Deleting PgBouncer ServiceMonitor", "name", name, "poolerType", poolerType) - if err := c.Delete(ctx, existing); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("deleting PgBouncer ServiceMonitor %s: %w", name, err) - } - return nil - } - - desired, err := buildConnectionPoolerMetricsServiceMonitor(scheme, cluster, poolerType) - if err != nil { - return fmt.Errorf("building PgBouncer ServiceMonitor for %s pooler: %w", poolerType, err) - } - - live := &monitoringv1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: desired.Name, - Namespace: desired.Namespace, - }, - } - - _, err = controllerutil.CreateOrUpdate(ctx, c, live, func() error { - live.Labels = desired.Labels - live.Annotations = desired.Annotations - live.Spec = desired.Spec - - if !metav1.IsControlledBy(live, cluster) { - if err := ctrl.SetControllerReference(cluster, live, scheme); err != nil { - return fmt.Errorf("setting controller reference on PgBouncer ServiceMonitor: %w", err) - } - } - return nil - }) - if err != nil { - return fmt.Errorf("reconciling PgBouncer ServiceMonitor %s: %w", desired.Name, err) - } - - return nil -} diff --git a/pkg/postgresql/cluster/core/monitoring_unit_test.go b/pkg/postgresql/cluster/core/monitoring_unit_test.go index fe10c0943..917052efc 100644 --- a/pkg/postgresql/cluster/core/monitoring_unit_test.go +++ b/pkg/postgresql/cluster/core/monitoring_unit_test.go @@ -3,13 +3,8 @@ package core import ( "testing" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/utils/ptr" ) @@ -81,6 +76,49 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { } } +func TestBuildScrapeAnnotations(t *testing.T) { + t.Run("postgres annotations", func(t *testing.T) { + got := buildPostgresScrapeAnnotations() + + assert.Equal(t, map[string]string{ + prometheusScrapeAnnotation: "true", + prometheusPathAnnotation: metricsPath, + prometheusPortAnnotation: postgresMetricsPortString, + }, got) + }) + + t.Run("pooler annotations", func(t *testing.T) { + got := buildPoolerScrapeAnnotations() + + assert.Equal(t, map[string]string{ + prometheusScrapeAnnotation: "true", + prometheusPathAnnotation: metricsPath, + prometheusPortAnnotation: poolerMetricsPortString, + }, got) + }) +} + +func TestRemoveScrapeAnnotations(t *testing.T) { + t.Run("removes only managed scrape keys", func(t *testing.T) { + annotations := map[string]string{ + prometheusScrapeAnnotation: "true", + prometheusPathAnnotation: metricsPath, + prometheusPortAnnotation: postgresMetricsPortString, + "custom": "keep-me", + } + + removeScrapeAnnotations(annotations) + + assert.Equal(t, map[string]string{ + "custom": "keep-me", + }, annotations) + }) + + t.Run("nil map is safe", func(t *testing.T) { + removeScrapeAnnotations(nil) + }) +} + func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { tests := []struct { name string @@ -158,104 +196,6 @@ func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { } } -func TestBuildPostgreSQLMetricsService(t *testing.T) { - scheme := newMonitoringTestScheme(t) - cluster := newTestMonitoringCluster() - - svc, err := buildPostgreSQLMetricsService(scheme, cluster) - require.NoError(t, err) - - assert.Equal(t, "postgresql-cluster-dev-postgres-metrics", svc.Name) - assert.Equal(t, cluster.Namespace, svc.Namespace) - assert.Equal(t, "postgresql-metrics", svc.Labels[labelObservabilityComponent]) - assert.Equal(t, cluster.Name, svc.Labels[cnpgClusterLabelName]) - assert.Equal(t, cluster.Name, svc.Spec.Selector[cnpgClusterLabelName]) - assert.Equal(t, cnpgPodRoleInstance, svc.Spec.Selector[cnpgPodRoleLabelName]) - require.Len(t, svc.Spec.Ports, 1) - assert.Equal(t, postgresMetricsPortName, svc.Spec.Ports[0].Name) - assert.Equal(t, postgresMetricsPort, svc.Spec.Ports[0].Port) - assert.Equal(t, postgresMetricsPortName, svc.Spec.Ports[0].TargetPort.StrVal) - assertMonitoringOwnerRef(t, svc.OwnerReferences, cluster) -} - -func TestBuildConnectionPoolerMetricsService(t *testing.T) { - scheme := newMonitoringTestScheme(t) - cluster := newTestMonitoringCluster() - - svc, err := buildConnectionPoolerMetricsService(scheme, cluster, readWriteEndpoint) - require.NoError(t, err) - - assert.Equal(t, "postgresql-cluster-dev-pooler-rw-metrics", svc.Name) - assert.Equal(t, "pgbouncer-metrics", svc.Labels[labelObservabilityComponent]) - assert.Equal(t, poolerResourceName(cluster.Name, readWriteEndpoint), svc.Labels[cnpgPoolerNameLabel]) - assert.Equal(t, poolerResourceName(cluster.Name, readWriteEndpoint), svc.Spec.Selector[cnpgPoolerNameLabel]) - require.Len(t, svc.Spec.Ports, 1) - assert.Equal(t, poolerMetricsPortName, svc.Spec.Ports[0].Name) - assert.Equal(t, poolerMetricsPort, svc.Spec.Ports[0].Port) - assert.Equal(t, poolerMetricsPortName, svc.Spec.Ports[0].TargetPort.StrVal) - assertMonitoringOwnerRef(t, svc.OwnerReferences, cluster) -} - -func TestBuildPostgreSQLMetricsServiceMonitor(t *testing.T) { - scheme := newMonitoringTestScheme(t) - cluster := newTestMonitoringCluster() - - sm, err := buildPostgreSQLMetricsServiceMonitor(scheme, cluster) - require.NoError(t, err) - - assert.Equal(t, "postgresql-cluster-dev-postgres-metrics-monitor", sm.Name) - assert.Equal(t, "postgresql-metrics", sm.Labels[labelObservabilityComponent]) - assert.Equal(t, cluster.Name, sm.Spec.Selector.MatchLabels[cnpgClusterLabelName]) - require.Len(t, sm.Spec.Endpoints, 1) - assert.Equal(t, postgresMetricsPortName, sm.Spec.Endpoints[0].Port) - assert.Equal(t, "/metrics", sm.Spec.Endpoints[0].Path) - assert.Equal(t, "http", sm.Spec.Endpoints[0].Scheme) - assertMonitoringOwnerRef(t, sm.OwnerReferences, cluster) -} - -func TestBuildConnectionPoolerMetricsServiceMonitor(t *testing.T) { - scheme := newMonitoringTestScheme(t) - cluster := newTestMonitoringCluster() - - sm, err := buildConnectionPoolerMetricsServiceMonitor(scheme, cluster, readOnlyEndpoint) - require.NoError(t, err) - - assert.Equal(t, "postgresql-cluster-dev-pooler-ro-metrics-monitor", sm.Name) - assert.Equal(t, "pgbouncer-metrics", sm.Labels[labelObservabilityComponent]) - assert.Equal(t, poolerResourceName(cluster.Name, readOnlyEndpoint), sm.Labels[cnpgPoolerNameLabel]) - assert.Equal(t, poolerResourceName(cluster.Name, readOnlyEndpoint), sm.Spec.Selector.MatchLabels[cnpgPoolerNameLabel]) - require.Len(t, sm.Spec.Endpoints, 1) - assert.Equal(t, poolerMetricsPortName, sm.Spec.Endpoints[0].Port) - assert.Equal(t, "/metrics", sm.Spec.Endpoints[0].Path) - assert.Equal(t, "http", sm.Spec.Endpoints[0].Scheme) - assertMonitoringOwnerRef(t, sm.OwnerReferences, cluster) -} - -func newMonitoringTestScheme(t *testing.T) *runtime.Scheme { - t.Helper() - - scheme := runtime.NewScheme() - require.NoError(t, corev1.AddToScheme(scheme)) - require.NoError(t, monitoringv1.AddToScheme(scheme)) - require.NoError(t, enterprisev4.AddToScheme(scheme)) - - return scheme -} - -func newTestMonitoringCluster() *enterprisev4.PostgresCluster { - return &enterprisev4.PostgresCluster{ - TypeMeta: metav1.TypeMeta{ - APIVersion: enterprisev4.GroupVersion.String(), - Kind: "PostgresCluster", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "postgresql-cluster-dev", - Namespace: "test", - UID: "cluster-uid", - }, - } -} - func newClassWithMonitoring( postgresEnabled *bool, poolerEnabled *bool, @@ -273,15 +213,3 @@ func newClassWithMonitoring( }, } } - -func assertMonitoringOwnerRef(t *testing.T, ownerRefs []metav1.OwnerReference, cluster *enterprisev4.PostgresCluster) { - t.Helper() - - require.Len(t, ownerRefs, 1) - assert.Equal(t, cluster.APIVersion, ownerRefs[0].APIVersion) - assert.Equal(t, cluster.Kind, ownerRefs[0].Kind) - assert.Equal(t, cluster.Name, ownerRefs[0].Name) - assert.Equal(t, cluster.UID, ownerRefs[0].UID) - require.NotNil(t, ownerRefs[0].Controller) - assert.True(t, *ownerRefs[0].Controller) -} diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index d4889edaa..7684e6df0 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -33,6 +33,7 @@ type normalizedCNPGClusterSpec struct { Owner string StorageSize string Resources corev1.ResourceRequirements + InheritedAnnotations map[string]string } // MergedConfig is the resolved configuration after overlaying PostgresCluster on PostgresClusterClass defaults. @@ -76,27 +77,21 @@ const ( failedClusterPhase reconcileClusterPhases = "Failed" // condition types - clusterReady conditionTypes = "ClusterReady" - poolerReady conditionTypes = "PoolerReady" - monitoringReady conditionTypes = "MonitoringReady" + clusterReady conditionTypes = "ClusterReady" + poolerReady conditionTypes = "PoolerReady" // condition reasons — clusterReady - reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" - reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" - reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" - reasonClusterBuildSucceeded conditionReasons = "ClusterBuildSucceeded" - reasonClusterGetFailed conditionReasons = "ClusterGetFailed" - reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" - reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" - reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" - reasonUserSecretFailed conditionReasons = "UserSecretReconciliationFailed" - reasonSuperUserSecretFailed conditionReasons = "SuperUserSecretFailed" - reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" - reasonObservabilityResourcesReady conditionReasons = "ObservabilityResourcesReady" - reasonPostgresMetricsMonitorFailed conditionReasons = "PostgresMetricsMonitorFailed" - reasonPostgresMetricsServiceFailed conditionReasons = "PostgresMetricsServiceFailed" - reasonPoolerMetricsServiceFailed conditionReasons = "PoolerMetricsServiceFailed" - reasonPoolerMetricsMonitorFailed conditionReasons = "PoolerMetricsMonitorFailed" + reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" + reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" + reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" + reasonClusterBuildSucceeded conditionReasons = "ClusterBuildSucceeded" + reasonClusterGetFailed conditionReasons = "ClusterGetFailed" + reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" + reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" + reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" + reasonUserSecretFailed conditionReasons = "UserSecretReconciliationFailed" + reasonSuperUserSecretFailed conditionReasons = "SuperUserSecretFailed" + reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" // condition reasons — poolerReady reasonPoolerReconciliationFailed conditionReasons = "PoolerReconciliationFailed" diff --git a/test/postgresql/monitoring/postgrescluster.yaml b/test/postgresql/monitoring/postgrescluster.yaml new file mode 100644 index 000000000..f572db864 --- /dev/null +++ b/test/postgresql/monitoring/postgrescluster.yaml @@ -0,0 +1,7 @@ +apiVersion: enterprise.splunk.com/v4 +kind: PostgresCluster +metadata: + name: pg-e2e +spec: + class: pg-monitoring + clusterDeletionPolicy: Delete diff --git a/test/postgresql/monitoring/postgresclusterclass.yaml b/test/postgresql/monitoring/postgresclusterclass.yaml new file mode 100644 index 000000000..37f2db96c --- /dev/null +++ b/test/postgresql/monitoring/postgresclusterclass.yaml @@ -0,0 +1,21 @@ +apiVersion: enterprise.splunk.com/v4 +kind: PostgresClusterClass +metadata: + name: pg-monitoring +spec: + provisioner: postgresql.cnpg.io + config: + instances: 2 + storage: 1Gi + postgresVersion: "17" + connectionPoolerEnabled: true + monitoring: + postgresqlMetrics: + enabled: true + connectionPoolerMetrics: + enabled: true + cnpg: + primaryUpdateMethod: switchover + connectionPooler: + instances: 2 + mode: transaction diff --git a/test/postgresql/monitoring/values.yaml b/test/postgresql/monitoring/values.yaml new file mode 100644 index 000000000..32079b7b5 --- /dev/null +++ b/test/postgresql/monitoring/values.yaml @@ -0,0 +1,37 @@ +grafana: + adminPassword: admin + +alertmanager: + enabled: false + +kubeStateMetrics: + enabled: false + +nodeExporter: + enabled: false + +prometheus: + prometheusSpec: + additionalScrapeConfigs: + - job_name: annotated-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod From 31e0fa06ff74686284b189d88d33fe4a1c33c280 Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Wed, 15 Apr 2026 14:41:55 +0200 Subject: [PATCH 27/36] update metrics and dashboard --- docs/PostgreSQLObservabilityDashboard.json | 161 +++++++++++------- docs/postgresSQLMonitoring-e2e.md | 16 +- .../controller/postgrescluster_controller.go | 2 + .../monitoring/postgrescluster.yaml | 7 - .../monitoring/postgresclusterclass.yaml | 21 --- 5 files changed, 115 insertions(+), 92 deletions(-) delete mode 100644 test/postgresql/monitoring/postgrescluster.yaml delete mode 100644 test/postgresql/monitoring/postgresclusterclass.yaml diff --git a/docs/PostgreSQLObservabilityDashboard.json b/docs/PostgreSQLObservabilityDashboard.json index 913e730b6..e9975ff59 100644 --- a/docs/PostgreSQLObservabilityDashboard.json +++ b/docs/PostgreSQLObservabilityDashboard.json @@ -52,7 +52,7 @@ }, "gridPos": { "h": 4, - "w": 6, + "w": 4, "x": 0, "y": 0 }, @@ -63,9 +63,7 @@ "justifyMode": "center", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -75,7 +73,7 @@ "targets": [ { "editorMode": "code", - "expr": "count(count by (pod) (cnpg_pg_postmaster_start_time_seconds{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"}))", + "expr": "count(count by (pod) (cnpg_pg_postmaster_start_time{namespace=\"$namespace\",pod=~\"$cluster-[0-9]+\"}))", "legendFormat": "postgres pods", "range": true, "refId": "A" @@ -128,8 +126,8 @@ }, "gridPos": { "h": 4, - "w": 6, - "x": 6, + "w": 4, + "x": 4, "y": 0 }, "id": 2, @@ -139,9 +137,7 @@ "justifyMode": "center", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -151,7 +147,7 @@ "targets": [ { "editorMode": "code", - "expr": "max(cnpg_pgbouncer_up{namespace=\"$namespace\",service=\"$cluster-pooler-rw-metrics\"})", + "expr": "max(1 - clamp_max(cnpg_pgbouncer_last_collection_error{namespace=\"$namespace\",pod=~\"$cluster-pooler-rw-.*\"}, 1))", "legendFormat": "rw", "range": true, "refId": "A" @@ -204,8 +200,8 @@ }, "gridPos": { "h": 4, - "w": 6, - "x": 12, + "w": 4, + "x": 8, "y": 0 }, "id": 3, @@ -215,9 +211,7 @@ "justifyMode": "center", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -227,7 +221,7 @@ "targets": [ { "editorMode": "code", - "expr": "max(cnpg_pgbouncer_up{namespace=\"$namespace\",service=\"$cluster-pooler-ro-metrics\"})", + "expr": "max(1 - clamp_max(cnpg_pgbouncer_last_collection_error{namespace=\"$namespace\",pod=~\"$cluster-pooler-ro-.*\"}, 1))", "legendFormat": "ro", "range": true, "refId": "A" @@ -266,8 +260,8 @@ }, "gridPos": { "h": 4, - "w": 6, - "x": 18, + "w": 4, + "x": 12, "y": 0 }, "id": 4, @@ -277,9 +271,7 @@ "justifyMode": "center", "orientation": "auto", "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -289,7 +281,7 @@ "targets": [ { "editorMode": "code", - "expr": "sum(rate(cnpg_pg_stat_archiver_archived_count{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"}[5m]))", + "expr": "sum(rate(cnpg_pg_stat_archiver_archived_count{namespace=\"$namespace\",pod=~\"$cluster-[0-9]+\"}[5m]))", "legendFormat": "archive rate", "range": true, "refId": "A" @@ -298,6 +290,70 @@ "title": "Archive Rate", "type": "stat" }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 5, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "sum(splunk_operator_postgres_databases{phase=\"Failed\"})", + "legendFormat": "failed", + "range": true, + "refId": "A" + } + ], + "title": "Failed Databases", + "type": "stat" + }, { "datasource": { "type": "prometheus", @@ -380,7 +436,7 @@ "targets": [ { "editorMode": "code", - "expr": "sum by (datname) (cnpg_pg_database_size_bytes{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"})", + "expr": "sum by (datname) (cnpg_pg_database_size_bytes{namespace=\"$namespace\",pod=~\"$cluster-[0-9]+\"})", "legendFormat": "{{datname}}", "range": true, "refId": "A" @@ -471,15 +527,15 @@ "targets": [ { "editorMode": "code", - "expr": "sum by (service) (cnpg_pgbouncer_pools_cl_active{namespace=\"$namespace\",service=~\"$cluster-pooler-(rw|ro)-metrics\"})", - "legendFormat": "{{service}} active", + "expr": "sum by (pooler) (label_replace(cnpg_pgbouncer_pools_cl_active{namespace=\"$namespace\",pod=~\"$cluster-pooler-(rw|ro)-.*\"}, \"pooler\", \"$1\", \"pod\", \".*-pooler-(rw|ro)-.*\"))", + "legendFormat": "{{pooler}} active", "range": true, "refId": "A" }, { "editorMode": "code", - "expr": "sum by (service) (cnpg_pgbouncer_pools_cl_waiting{namespace=\"$namespace\",service=~\"$cluster-pooler-(rw|ro)-metrics\"})", - "legendFormat": "{{service}} waiting", + "expr": "sum by (pooler) (label_replace(cnpg_pgbouncer_pools_cl_waiting{namespace=\"$namespace\",pod=~\"$cluster-pooler-(rw|ro)-.*\"}, \"pooler\", \"$1\", \"pod\", \".*-pooler-(rw|ro)-.*\"))", + "legendFormat": "{{pooler}} waiting", "range": true, "refId": "B" } @@ -569,14 +625,14 @@ "targets": [ { "editorMode": "code", - "expr": "sum(rate(cnpg_pg_stat_archiver_archived_count{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"}[5m]))", + "expr": "sum(rate(cnpg_pg_stat_archiver_archived_count{namespace=\"$namespace\",pod=~\"$cluster-[0-9]+\"}[5m]))", "legendFormat": "archived WAL / sec", "range": true, "refId": "A" }, { "editorMode": "code", - "expr": "sum(cnpg_pg_wal_files_total{namespace=\"$namespace\",service=\"$cluster-postgres-metrics\"})", + "expr": "sum(cnpg_collector_pg_wal{namespace=\"$namespace\",pod=~\"$cluster-[0-9]+\",value=\"count\"})", "legendFormat": "wal files total", "range": true, "refId": "B" @@ -594,7 +650,7 @@ }, "id": 100, "options": { - "content": "## Controller-Runtime (built-in, zero code)\nReconcile count, duration, errors — provided automatically by the framework.", + "content": "## Controller-Runtime (built-in, zero code)\nReconcile count, duration, errors \u2014 provided automatically by the framework.", "mode": "markdown" }, "title": "", @@ -736,7 +792,7 @@ }, "id": 200, "options": { - "content": "## Domain Metrics (custom, `splunk_operator_postgres_*`)\nFleet state gauges (collected every reconcile) and status transitions (emitted on condition changes).", + "content": "## Domain Metrics (custom, `splunk_operator_postgres_*`)\nFleet state gauges (collected every reconcile) and status transitions (emitted on condition changes).\n\nRuntime PostgreSQL and PgBouncer panels above assume annotation-based pod scraping and filter by `pod`, not dedicated metrics Services.", "mode": "markdown" }, "title": "", @@ -758,9 +814,7 @@ "legend": { "displayMode": "table", "placement": "right", - "values": [ - "value" - ] + "values": ["value"] }, "pieType": "donut" }, @@ -796,9 +850,7 @@ "legend": { "displayMode": "table", "placement": "right", - "values": [ - "value" - ] + "values": ["value"] }, "pieType": "donut" }, @@ -914,13 +966,7 @@ "refresh": "30s", "schemaVersion": 39, "style": "dark", - "tags": [ - "postgresql", - "cnpg", - "pgbouncer", - "splunk-operator", - "reference" - ], + "tags": ["postgresql", "cnpg", "pgbouncer", "splunk-operator", "reference"], "templating": { "list": [ { @@ -944,14 +990,14 @@ { "current": { "selected": true, - "text": "default", - "value": "default" + "text": "test", + "value": "test" }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(cnpg_pg_postmaster_start_time_seconds, namespace)", + "definition": "label_values(kube_pod_labels{label_cnpg_io_cluster!=\"\"}, namespace)", "hide": 0, "includeAll": false, "label": "Namespace", @@ -959,7 +1005,7 @@ "name": "namespace", "options": [], "query": { - "query": "label_values(cnpg_pg_postmaster_start_time_seconds, namespace)", + "query": "label_values(kube_pod_labels{label_cnpg_io_cluster!=\"\"}, namespace)", "refId": "Prometheus-namespace" }, "refresh": 2, @@ -970,30 +1016,31 @@ }, { "current": { - "selected": false, - "text": "", - "value": "" + "selected": true, + "text": "All", + "value": "$__all" }, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(cnpg_pg_postmaster_start_time_seconds{namespace=\"$namespace\"}, service)", + "definition": "label_values(kube_pod_labels{label_cnpg_io_cluster!=\"\", namespace=\"$namespace\"}, label_cnpg_io_cluster)", "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Cluster", "multi": false, "name": "cluster", "options": [], "query": { - "query": "label_values(cnpg_pg_postmaster_start_time_seconds{namespace=\"$namespace\"}, service)", + "query": "label_values(kube_pod_labels{label_cnpg_io_cluster!=\"\", namespace=\"$namespace\"}, label_cnpg_io_cluster)", "refId": "Prometheus-cluster" }, "refresh": 2, - "regex": "/(.*)-postgres-metrics/", + "regex": "", "skipUrlSync": false, "sort": 1, - "type": "query" + "type": "query", + "allValue": ".*" } ] }, diff --git a/docs/postgresSQLMonitoring-e2e.md b/docs/postgresSQLMonitoring-e2e.md index e4409753a..3153fd305 100644 --- a/docs/postgresSQLMonitoring-e2e.md +++ b/docs/postgresSQLMonitoring-e2e.md @@ -32,7 +32,7 @@ alertmanager: enabled: false kubeStateMetrics: - enabled: false + enabled: true nodeExporter: enabled: false @@ -44,14 +44,16 @@ prometheus: kubernetes_sd_configs: - role: pod relabel_configs: - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + - source_labels: + [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + - source_labels: + [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 @@ -192,11 +194,11 @@ up{job="annotated-pods", namespace="test"} ``` ```promql -count by (pod) (cnpg_pg_postmaster_start_time_seconds{namespace="test"}) +count by (pod) (cnpg_pg_postmaster_start_time{namespace="test"}) ``` ```promql -cnpg_pgbouncer_up{namespace="test"} +cnpg_pgbouncer_last_collection_error{namespace="test"} ``` ## 7. Access Grafana @@ -229,11 +231,11 @@ up{job="annotated-pods", namespace="test"} ``` ```promql -cnpg_pg_postmaster_start_time_seconds{namespace="test"} +cnpg_pg_postmaster_start_time{namespace="test"} ``` ```promql -cnpg_pgbouncer_up{namespace="test"} +cnpg_pgbouncer_last_collection_error{namespace="test"} ``` ### Dashboard import diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 78aecad32..886373031 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -64,6 +64,8 @@ type PostgresClusterReconciler struct { func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { metrics := r.Metrics if metrics == nil { + // Tests and minimal reconciler wiring may omit a metrics adapter. + // Fall back to a no-op recorder so status updates can proceed safely. metrics = &pgprometheus.NoopRecorder{} } rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: metrics} diff --git a/test/postgresql/monitoring/postgrescluster.yaml b/test/postgresql/monitoring/postgrescluster.yaml deleted file mode 100644 index f572db864..000000000 --- a/test/postgresql/monitoring/postgrescluster.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: enterprise.splunk.com/v4 -kind: PostgresCluster -metadata: - name: pg-e2e -spec: - class: pg-monitoring - clusterDeletionPolicy: Delete diff --git a/test/postgresql/monitoring/postgresclusterclass.yaml b/test/postgresql/monitoring/postgresclusterclass.yaml deleted file mode 100644 index 37f2db96c..000000000 --- a/test/postgresql/monitoring/postgresclusterclass.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: enterprise.splunk.com/v4 -kind: PostgresClusterClass -metadata: - name: pg-monitoring -spec: - provisioner: postgresql.cnpg.io - config: - instances: 2 - storage: 1Gi - postgresVersion: "17" - connectionPoolerEnabled: true - monitoring: - postgresqlMetrics: - enabled: true - connectionPoolerMetrics: - enabled: true - cnpg: - primaryUpdateMethod: switchover - connectionPooler: - instances: 2 - mode: transaction From 08dfa1610a136abbf7a899fb107068d0399138e4 Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Wed, 15 Apr 2026 14:59:22 +0200 Subject: [PATCH 28/36] fix rebase errors --- internal/controller/postgrescluster_controller_test.go | 1 + pkg/postgresql/cluster/core/cluster.go | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 730eb3251..47e1d0824 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -34,6 +34,7 @@ import ( enterprisev4 "github.com/splunk/splunk-operator/api/v4" "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" + pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 3b98ff100..80825bf04 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -119,6 +119,8 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, errors.Join(err, statusErr) } + postgresMetricsEnabled := isPostgreSQLMetricsEnabled(postgresCluster, clusterClass) + poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass) // Resolve or derive the superuser secret name. if postgresCluster.Status.Resources != nil && postgresCluster.Status.Resources.SuperUserSecretRef != nil { postgresSecretName = postgresCluster.Status.Resources.SuperUserSecretRef.Name @@ -191,7 +193,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. switch { case apierrors.IsNotFound(err): logger.Info("CNPG Cluster creation started", "name", postgresCluster.Name) - newCluster, err := buildCNPGCluster(rc.Scheme, postgresCluster, mergedConfig, postgresSecretName) + newCluster, err := buildCNPGCluster(rc.Scheme, postgresCluster, mergedConfig, postgresSecretName, postgresMetricsEnabled) if err != nil { logger.Error(err, "Failed to build CNPG Cluster", "name", postgresCluster.Name) return ctrl.Result{}, err @@ -508,7 +510,7 @@ func buildCNPGClusterSpec(cfg *MergedConfig, secretName string, postgresMetricsE return spec } -func buildCNPGCluster(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, secretName string, postgresMetricsEnabled bool) *cnpgv1.Cluster { +func buildCNPGCluster(scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, secretName string, postgresMetricsEnabled bool) (*cnpgv1.Cluster, error) { cnpg := &cnpgv1.Cluster{ ObjectMeta: metav1.ObjectMeta{Name: cluster.Name, Namespace: cluster.Namespace}, Spec: buildCNPGClusterSpec(cfg, secretName, postgresMetricsEnabled), From 8ed740c1b2c16ff61c0bac9c1ea520ac66ee2baf Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Wed, 15 Apr 2026 15:08:02 +0200 Subject: [PATCH 29/36] fix CR --- config/samples/enterprise_v4_postgresclusterclass_dev.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/samples/enterprise_v4_postgresclusterclass_dev.yaml b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml index 082d5fad9..89e915033 100644 --- a/config/samples/enterprise_v4_postgresclusterclass_dev.yaml +++ b/config/samples/enterprise_v4_postgresclusterclass_dev.yaml @@ -27,10 +27,10 @@ spec: cpu: "1" memory: "2Gi" connectionPoolerEnabled: true - observability: - postgresql: + monitoring: + postgresqlMetrics: enabled: true - pgbouncer: + connectionPoolerMetrics: enabled: true cnpg: From 14c94b0ed8fc89080dfa44c8d21d08a9bb626a3b Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Wed, 15 Apr 2026 18:06:50 +0200 Subject: [PATCH 30/36] update to use Otel --- docs/PostgreSQLObservabilityDashboard.json | 2 +- docs/postgresSQLMonitoring-e2e.md | 54 ++- docs/postgresSQLMonitoring-otel-e2e.md | 434 ++++++++++++++++++ .../monitoring/otel-collector-values.yaml | 53 +++ test/postgresql/monitoring/otel-rbac.yaml | 25 + .../prometheus-via-otel-values.yaml | 43 ++ 6 files changed, 600 insertions(+), 11 deletions(-) create mode 100644 docs/postgresSQLMonitoring-otel-e2e.md create mode 100644 test/postgresql/monitoring/otel-collector-values.yaml create mode 100644 test/postgresql/monitoring/otel-rbac.yaml create mode 100644 test/postgresql/monitoring/prometheus-via-otel-values.yaml diff --git a/docs/PostgreSQLObservabilityDashboard.json b/docs/PostgreSQLObservabilityDashboard.json index e9975ff59..68439cbc9 100644 --- a/docs/PostgreSQLObservabilityDashboard.json +++ b/docs/PostgreSQLObservabilityDashboard.json @@ -792,7 +792,7 @@ }, "id": 200, "options": { - "content": "## Domain Metrics (custom, `splunk_operator_postgres_*`)\nFleet state gauges (collected every reconcile) and status transitions (emitted on condition changes).\n\nRuntime PostgreSQL and PgBouncer panels above assume annotation-based pod scraping and filter by `pod`, not dedicated metrics Services.", + "content": "## Domain Metrics (custom, `splunk_operator_postgres_*`)\nFleet state gauges (collected every reconcile) and status transitions (emitted on condition changes).\n\nRuntime PostgreSQL and PgBouncer panels above assume annotation-based pod scraping and filter by `pod`, not dedicated metrics Services.\n\nIf you validate through OpenTelemetry Collector, keep the Grafana datasource as Prometheus and have Prometheus scrape only the OTel Collector exporter for these metrics to avoid duplicate series.", "mode": "markdown" }, "title": "", diff --git a/docs/postgresSQLMonitoring-e2e.md b/docs/postgresSQLMonitoring-e2e.md index 3153fd305..f49378d79 100644 --- a/docs/postgresSQLMonitoring-e2e.md +++ b/docs/postgresSQLMonitoring-e2e.md @@ -13,6 +13,12 @@ Verify that: `ServiceMonitor` is still acceptable for operator-controller metrics if you want that separately, but it is not part of this feature validation. +The reference dashboard in [PostgreSQLObservabilityDashboard.json](./PostgreSQLObservabilityDashboard.json) assumes: + +- `namespace=test` +- `cluster=postgresql-cluster-dev` +- `kube_pod_labels` is available for dashboard variables + ## Prerequisites - KIND cluster is running @@ -131,20 +137,28 @@ spec: EOF ``` -## 3. Apply test resources +## 3. Apply sample resources Create the namespace and apply the sample resources: ```bash kubectl create namespace test -kubectl apply -f test/postgresql/monitoring/postgresclusterclass.yaml -kubectl apply -f pgclustertest.yaml +kubectl apply -f config/samples/enterprise_v4_postgresclusterclass_dev.yaml +kubectl apply -n test -f config/samples/enterprise_v4_postgrescluster_dev.yaml +kubectl apply -n test -f config/samples/enterprise_v4_postgresdatabase.yaml ``` +These samples create: + +- `PostgresClusterClass` `postgresql-dev` +- `PostgresCluster` `postgresql-cluster-dev` +- `PostgresDatabase` `splunk-databases` + ## 4. Verify reconciled resources ```bash kubectl get postgrescluster -n test +kubectl get postgresdatabase -n test kubectl get cluster.postgresql.cnpg.io -n test kubectl get pooler.postgresql.cnpg.io -n test kubectl get pods -n test @@ -155,7 +169,7 @@ kubectl get pods -n test PostgreSQL pods: ```bash -kubectl get pods -n test -l cnpg.io/cluster= -o yaml | rg 'prometheus.io/' +kubectl get pods -n test -l cnpg.io/cluster=postgresql-cluster-dev -o yaml | rg 'prometheus.io/' ``` Expected: @@ -167,8 +181,8 @@ Expected: Pooler pods: ```bash -kubectl get pods -n test -l cnpg.io/poolerName=-pooler-rw -o yaml | rg 'prometheus.io/' -kubectl get pods -n test -l cnpg.io/poolerName=-pooler-ro -o yaml | rg 'prometheus.io/' +kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-rw -o yaml | rg 'prometheus.io/' +kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-ro -o yaml | rg 'prometheus.io/' ``` Expected: @@ -194,11 +208,15 @@ up{job="annotated-pods", namespace="test"} ``` ```promql -count by (pod) (cnpg_pg_postmaster_start_time{namespace="test"}) +count(count by (pod) (cnpg_pg_postmaster_start_time{namespace="test",pod=~"postgresql-cluster-dev-[0-9]+"})) ``` ```promql -cnpg_pgbouncer_last_collection_error{namespace="test"} +max(1 - clamp_max(cnpg_pgbouncer_last_collection_error{namespace="test",pod=~"postgresql-cluster-dev-pooler-rw-.*"}, 1)) +``` + +```promql +sum(rate(cnpg_pg_stat_archiver_archived_count{namespace="test",pod=~"postgresql-cluster-dev-[0-9]+"}[5m])) ``` ## 7. Access Grafana @@ -242,7 +260,7 @@ cnpg_pgbouncer_last_collection_error{namespace="test"} You can also import the reference dashboard from: -- [PostgreSQLObservabilityDashboard.json](/Users/dpishchenkov/splunk-operator/docs/PostgreSQLObservabilityDashboard.json) +- [PostgreSQLObservabilityDashboard.json](./PostgreSQLObservabilityDashboard.json) In Grafana: @@ -250,13 +268,29 @@ In Grafana: 2. Click **New** -> **Import** 3. Upload `docs/PostgreSQLObservabilityDashboard.json` 4. Select the Prometheus datasource +5. Set `namespace` to `test` +6. Set `cluster` to `postgresql-cluster-dev` + +The dashboard variables use: + +```promql +label_values(kube_pod_labels{label_cnpg_io_cluster!=""}, namespace) +``` + +and: + +```promql +label_values(kube_pod_labels{label_cnpg_io_cluster!="", namespace="$namespace"}, label_cnpg_io_cluster) +``` + +So `kubeStateMetrics.enabled: true` in `values.yaml` is required for the imported dashboard to work as-is. ## 8. Optional disable test Disable monitoring in the `PostgresCluster` and verify annotations disappear: ```bash -kubectl patch postgrescluster -n test --type=merge -p ' +kubectl patch postgrescluster postgresql-cluster-dev -n test --type=merge -p ' spec: monitoring: postgresqlMetrics: diff --git a/docs/postgresSQLMonitoring-otel-e2e.md b/docs/postgresSQLMonitoring-otel-e2e.md new file mode 100644 index 000000000..31d464bbe --- /dev/null +++ b/docs/postgresSQLMonitoring-otel-e2e.md @@ -0,0 +1,434 @@ +# PostgreSQL Monitoring E2E with OTel Collector + +This document describes how to validate PostgreSQL and PgBouncer monitoring with OpenTelemetry Collector using the current annotation-based design. + +## Goal + +Verify that: + +- PostgreSQL pods are discoverable through `prometheus.io/*` pod annotations +- PgBouncer pooler pods are discoverable through `prometheus.io/*` pod annotations +- OTel Collector scrapes those targets through Kubernetes pod discovery +- scraped metrics are re-exposed by OTel and then queried through Grafana +- disabling monitoring removes those scrape targets + +This test intentionally does not use dedicated metrics `Service`s or `ServiceMonitor`s for PostgreSQL and PgBouncer. + +The reference dashboard in [PostgreSQLObservabilityDashboard.json](./PostgreSQLObservabilityDashboard.json) assumes: + +- `namespace=test` +- `cluster=postgresql-cluster-dev` +- `kube_pod_labels` is available from kube-state-metrics + +## Prerequisites + +- KIND cluster is running +- CNPG is installed +- Splunk Operator is installed +- CRDs are up to date +- test resources exist in namespace `test` + +## Recommended Setup + +Use OTel Collector for scraping and re-expose the metrics to Prometheus for Grafana queries. + +In this setup: + +- OTel Collector scrapes annotated PostgreSQL and PgBouncer pods +- OTel Collector re-exposes those metrics on its own Prometheus exporter endpoint +- Prometheus scrapes the OTel Collector pod +- Grafana queries Prometheus + +Grafana does not query OTel Collector directly. The Grafana datasource remains Prometheus. + +## 1. Deploy OTel Collector + +Use the concrete Helm values file: + +- [otel-collector-values.yaml](../test/postgresql/monitoring/otel-collector-values.yaml) +- [otel-rbac.yaml](../test/postgresql/monitoring/otel-rbac.yaml) + +Install the Collector: + +```bash +kubectl create namespace monitoring + +helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts +helm repo update + +helm install otel open-telemetry/opentelemetry-collector \ + --namespace monitoring \ + -f test/postgresql/monitoring/otel-collector-values.yaml +``` + +If the `otel` release already exists, use: + +```bash +helm upgrade otel open-telemetry/opentelemetry-collector \ + --namespace monitoring \ + -f test/postgresql/monitoring/otel-collector-values.yaml +``` + +Grant the Collector RBAC required for Kubernetes pod discovery: + +```bash +kubectl apply -f test/postgresql/monitoring/otel-rbac.yaml +``` + +This setup uses: + +- Prometheus receiver with Kubernetes pod discovery +- `prometheus.io/*` relabeling +- Prometheus exporter on port `8889` +- `debug` exporter for easy validation in logs + +If this RBAC is missing, the Collector will fail with errors like: + +```text +failed to list *v1.Pod: pods is forbidden +``` + +because the service account needs cluster-scoped `get`, `list`, and `watch` access for pod discovery. + +## 2. Install Prometheus and Grafana for the OTel path + +Use the Prometheus values file that scrapes only the OTel Collector exporter: + +- [prometheus-via-otel-values.yaml](../test/postgresql/monitoring/prometheus-via-otel-values.yaml) + +Install: + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add grafana https://grafana.github.io/helm-charts +helm repo update + +helm install kube-prometheus prometheus-community/kube-prometheus-stack \ + --namespace monitoring \ + -f test/postgresql/monitoring/prometheus-via-otel-values.yaml +``` + +If the `kube-prometheus` release already exists, use: + +```bash +helm upgrade kube-prometheus prometheus-community/kube-prometheus-stack \ + --namespace monitoring \ + -f test/postgresql/monitoring/prometheus-via-otel-values.yaml +``` + +This is important: Prometheus should scrape the OTel Collector exporter, not the PostgreSQL and PgBouncer pods directly. Otherwise Grafana will bypass OTel or you will get duplicate series. + +## 3. Apply PostgreSQL sample resources + +Apply: + +- `config/samples/enterprise_v4_postgresclusterclass_dev.yaml` +- `config/samples/enterprise_v4_postgrescluster_dev.yaml` +- `config/samples/enterprise_v4_postgresdatabase.yaml` + +Example: + +```bash +kubectl create namespace test +kubectl apply -f config/samples/enterprise_v4_postgresclusterclass_dev.yaml +kubectl apply -n test -f config/samples/enterprise_v4_postgrescluster_dev.yaml +kubectl apply -n test -f config/samples/enterprise_v4_postgresdatabase.yaml +``` + +These samples create: + +- `PostgresClusterClass` `postgresql-dev` +- `PostgresCluster` `postgresql-cluster-dev` +- `PostgresDatabase` `splunk-databases` + +## 4. Verify reconciled resources + +```bash +kubectl get postgrescluster -n test +kubectl get postgresdatabase -n test +kubectl get cluster.postgresql.cnpg.io -n test +kubectl get pooler.postgresql.cnpg.io -n test +kubectl get pods -n test +``` + +## 5. Verify annotations on workloads + +PostgreSQL pods: + +```bash +kubectl get pods -n test -l cnpg.io/cluster=postgresql-cluster-dev -o yaml | rg 'prometheus.io/' +``` + +Expected: + +- `prometheus.io/scrape: "true"` +- `prometheus.io/path: /metrics` +- `prometheus.io/port: "9187"` + +PgBouncer RW pooler pods: + +```bash +kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-rw -o yaml | rg 'prometheus.io/' +``` + +PgBouncer RO pooler pods: + +```bash +kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-ro -o yaml | rg 'prometheus.io/' +``` + +Expected for both: + +- `prometheus.io/scrape: "true"` +- `prometheus.io/path: /metrics` +- `prometheus.io/port: "9127"` + +## 6. Verify OTel Collector scraping + +If using the `debug` exporter: + +```bash +kubectl logs -n deploy/ -f +``` + +Look for metrics such as: + +- `cnpg_pg_postmaster_start_time` +- `cnpg_pg_database_size_bytes` +- `cnpg_collector_pg_wal` +- `cnpg_pg_stat_archiver_archived_count` +- `cnpg_pgbouncer_last_collection_error` +- `cnpg_pgbouncer_pools_cl_active` + +## 7. Verify Prometheus is scraping OTel + +Port-forward Prometheus: + +```bash +kubectl port-forward -n monitoring svc/kube-prometheus-prometheus 9090:9090 +``` + +Check that Prometheus is scraping the OTel Collector pod: + +```promql +up{job="otel-collector", namespace="monitoring"} +``` + +If this returns no series, the usual causes are: + +- the `kube-prometheus` release was not upgraded with [prometheus-via-otel-values.yaml](../test/postgresql/monitoring/prometheus-via-otel-values.yaml) +- Prometheus is still using the old `annotated-pods` scrape job +- kube-state-metrics is still disabled, which also breaks the dashboard variables + +Then verify PostgreSQL and PgBouncer metrics coming through that OTel path: + +```promql +count(count by (pod) (cnpg_pg_postmaster_start_time{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-[0-9]+"})) +``` + +```promql +max(1 - clamp_max(cnpg_pgbouncer_last_collection_error{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-pooler-rw-.*"}, 1)) +``` + +## 8. Access Grafana + +Port-forward Grafana: + +```bash +kubectl port-forward -n monitoring svc/kube-prometheus-grafana 3000:80 +``` + +Open: + +- http://localhost:3000 + +Login: + +- user: `admin` +- password: `admin` + +Use the default Prometheus datasource. In this setup, Grafana is using metrics that flowed through OTel because Prometheus is scraping only the OTel Collector exporter. + +## 9. Verify dashboard queries + +Import: + +- [PostgreSQLObservabilityDashboard.json](./PostgreSQLObservabilityDashboard.json) + +Set: + +- `namespace=test` +- `cluster=postgresql-cluster-dev` + +The dashboard does not need query changes for this path, but it assumes kube-state-metrics is enabled for the `namespace` and `cluster` variables. + +You can confirm the live Prometheus release picked up the right values with: + +```bash +helm get values -n monitoring kube-prometheus +``` + +Expected: + +- `kubeStateMetrics.enabled: true` +- additional scrape job `otel-collector` + +## 10. Verify backend metrics + +Validate with queries such as: + +```promql +up{job="otel-collector", namespace="monitoring"} +``` + +```promql +count(count by (pod) (cnpg_pg_postmaster_start_time{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-[0-9]+"})) +``` + +```promql +max(1 - clamp_max(cnpg_pgbouncer_last_collection_error{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-pooler-rw-.*"}, 1)) +``` + +```promql +sum by (pooler) (label_replace(cnpg_pgbouncer_pools_cl_active{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-pooler-(rw|ro)-.*"}, "pooler", "$1", "pod", ".*-pooler-(rw|ro)-.*")) +``` + +The dashboard variables use: + +```promql +label_values(kube_pod_labels{label_cnpg_io_cluster!=""}, namespace) +``` + +and: + +```promql +label_values(kube_pod_labels{label_cnpg_io_cluster!="", namespace="$namespace"}, label_cnpg_io_cluster) +``` + +## 11. Disable monitoring and validate removal + +Disable both metrics paths: + +```bash +kubectl patch postgrescluster postgresql-cluster-dev -n test --type=merge -p ' +spec: + monitoring: + postgresqlMetrics: + disabled: true + connectionPoolerMetrics: + disabled: true +' +``` + +Re-check pod annotations: + +```bash +kubectl get pods -n test -l cnpg.io/cluster=postgresql-cluster-dev -o yaml | rg 'prometheus.io/' || true +kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-rw -o yaml | rg 'prometheus.io/' || true +kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-ro -o yaml | rg 'prometheus.io/' || true +``` + +Expected: + +- scrape annotations disappear +- OTel stops scraping those targets after discovery refresh + +## Test Plan + +### Test 1: PostgreSQL annotations are present + +Steps: + +1. Apply monitoring-enabled class and cluster +2. Wait for PostgreSQL pods +3. Inspect pod annotations + +Pass criteria: + +- PostgreSQL pods contain the expected scrape annotations + +### Test 2: Pooler annotations are present + +Steps: + +1. Apply class with `connectionPoolerEnabled=true` +2. Wait for RW and RO poolers and their pods +3. Inspect pooler pod annotations + +Pass criteria: + +- RW and RO pooler pods contain the expected scrape annotations + +### Test 3: OTel Collector scrapes PostgreSQL and poolers + +Steps: + +1. Run Collector with pod discovery +2. Inspect Collector logs or backend metrics + +Pass criteria: + +- PostgreSQL metrics are visible in OTel logs +- PgBouncer metrics are visible in OTel logs + +### Test 4: Prometheus and Grafana use the OTel path + +Steps: + +1. Verify `up{job="otel-collector", namespace="monitoring"}` +2. Verify PostgreSQL and PgBouncer metrics with `job="otel-collector"` +3. Import the dashboard and select the Prometheus datasource + +Pass criteria: + +- Prometheus is scraping the OTel Collector exporter +- Grafana panels return data from the `otel-collector` job + +### Test 5: Disable override removes scrape targets + +Steps: + +1. Patch the `PostgresCluster` to disable both monitoring paths +2. Re-check workload annotations +3. Re-check Collector or backend + +Pass criteria: + +- annotations are removed +- targets disappear from Collector/backend over time + +### Test 6: Cluster-only disable path + +Steps: + +1. Keep class monitoring enabled +2. Disable only in `PostgresCluster.spec.monitoring` + +Pass criteria: + +- class defaults remain unchanged +- only the target cluster loses annotations + +## Troubleshooting + +If no metrics appear: + +1. Check pod annotations first +2. Check Collector logs +3. Check whether the Collector is using `role: pod` +4. Check relabeling for `prometheus.io/scrape` +5. Check namespace filters in your backend queries + +Useful quick queries: + +```promql +up{namespace="test"} +``` + +```promql +cnpg_pg_postmaster_start_time_seconds{namespace="test"} +``` + +```promql +cnpg_pgbouncer_up{namespace="test"} +``` diff --git a/test/postgresql/monitoring/otel-collector-values.yaml b/test/postgresql/monitoring/otel-collector-values.yaml new file mode 100644 index 000000000..b68f1f273 --- /dev/null +++ b/test/postgresql/monitoring/otel-collector-values.yaml @@ -0,0 +1,53 @@ +mode: deployment + +image: + repository: otel/opentelemetry-collector-contrib + +podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics + prometheus.io/port: "8889" + +config: + receivers: + prometheus: + config: + scrape_configs: + - job_name: annotated-pods + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod + + processors: + batch: {} + + exporters: + debug: + verbosity: normal + prometheus: + endpoint: "0.0.0.0:8889" + + service: + pipelines: + metrics: + receivers: [prometheus] + processors: [batch] + exporters: [debug, prometheus] diff --git a/test/postgresql/monitoring/otel-rbac.yaml b/test/postgresql/monitoring/otel-rbac.yaml new file mode 100644 index 000000000..1e55e37b4 --- /dev/null +++ b/test/postgresql/monitoring/otel-rbac.yaml @@ -0,0 +1,25 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: otel-prometheus-pod-discovery +rules: +- apiGroups: [""] + resources: ["pods", "nodes", "nodes/proxy", "services", "endpoints", "namespaces"] + verbs: ["get", "list", "watch"] +- apiGroups: ["discovery.k8s.io"] + resources: ["endpointslices"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: otel-prometheus-pod-discovery +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: otel-prometheus-pod-discovery +subjects: +- kind: ServiceAccount + name: otel-opentelemetry-collector + namespace: monitoring diff --git a/test/postgresql/monitoring/prometheus-via-otel-values.yaml b/test/postgresql/monitoring/prometheus-via-otel-values.yaml new file mode 100644 index 000000000..b1df9371c --- /dev/null +++ b/test/postgresql/monitoring/prometheus-via-otel-values.yaml @@ -0,0 +1,43 @@ +grafana: + adminPassword: admin + +alertmanager: + enabled: false + +kubeStateMetrics: + enabled: true + +nodeExporter: + enabled: false + +prometheus: + prometheusSpec: + additionalScrapeConfigs: + - job_name: otel-collector + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_namespace] + action: keep + regex: monitoring + - source_labels: [__meta_kubernetes_pod_name] + action: keep + regex: otel-opentelemetry-collector-.* + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod From 3b83475eccafa85346db258df49f59aa18664304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20P=C5=82oski?= Date: Thu, 16 Apr 2026 22:25:16 +0200 Subject: [PATCH 31/36] PR tweaks --- api/v4/postgrescluster_types.go | 17 +- api/v4/zz_generated.deepcopy.go | 80 ++++ ...nterprise.splunk.com_postgresclusters.yaml | 22 +- docs/postgresSQLMonitoring-e2e.md | 321 ------------- docs/postgresSQLMonitoring-otel-e2e.md | 434 ------------------ .../controller/postgrescluster_controller.go | 8 +- .../postgrescluster_controller_test.go | 162 ++++--- .../controller/postgresdatabase_controller.go | 7 +- internal/controller/suite_test.go | 1 + pkg/postgresql/cluster/core/cluster.go | 36 +- .../cluster/core/cluster_unit_test.go | 9 +- pkg/postgresql/cluster/core/events.go | 38 +- pkg/postgresql/cluster/core/monitoring.go | 44 +- .../cluster/core/monitoring_unit_test.go | 10 +- 14 files changed, 261 insertions(+), 928 deletions(-) delete mode 100644 docs/postgresSQLMonitoring-e2e.md delete mode 100644 docs/postgresSQLMonitoring-otel-e2e.md diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 6450b33fe..310b2427c 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -114,20 +114,17 @@ type PostgresClusterSpec struct { } // PostgresClusterMonitoring overrides monitoring configuration options for PostgresClusterClass. +// Set a field to false to disable a metric target that is enabled in the class. type PostgresClusterMonitoring struct { - - // +optional - PostgreSQLMetrics *FeatureDisableOverride `json:"postgresqlMetrics,omitempty"` - + // PostgreSQLMetrics overrides whether PostgreSQL metrics scraping is enabled. + // When unset, the class-level setting applies. // +optional - ConnectionPoolerMetrics *FeatureDisableOverride `json:"connectionPoolerMetrics,omitempty"` -} + PostgreSQLMetrics *bool `json:"postgresqlMetrics,omitempty"` -type FeatureDisableOverride struct { - // Disabled set to true will disable the feature even if it's enabled in the class. - // +kubebuilder:default=false + // ConnectionPoolerMetrics overrides whether connection pooler metrics scraping is enabled. + // When unset, the class-level setting applies. // +optional - Disabled *bool `json:"disabled,omitempty"` + ConnectionPoolerMetrics *bool `json:"connectionPoolerMetrics,omitempty"` } // PostgresClusterResources defines references to Kubernetes resources related to the PostgresCluster, such as ConfigMaps and Secrets. diff --git a/api/v4/zz_generated.deepcopy.go b/api/v4/zz_generated.deepcopy.go index c698411c7..fd2c1905c 100644 --- a/api/v4/zz_generated.deepcopy.go +++ b/api/v4/zz_generated.deepcopy.go @@ -922,6 +922,26 @@ func (in *ManagedRolesStatus) DeepCopy() *ManagedRolesStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetricsClassConfig) DeepCopyInto(out *MetricsClassConfig) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetricsClassConfig. +func (in *MetricsClassConfig) DeepCopy() *MetricsClassConfig { + if in == nil { + return nil + } + out := new(MetricsClassConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MonitoringConsole) DeepCopyInto(out *MonitoringConsole) { *out = *in @@ -1220,6 +1240,11 @@ func (in *PostgresClusterClassConfig) DeepCopyInto(out *PostgresClusterClassConf *out = new(bool) **out = **in } + if in.Monitoring != nil { + in, out := &in.Monitoring, &out.Monitoring + *out = new(PostgresMonitoringClassConfig) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterClassConfig. @@ -1348,6 +1373,31 @@ func (in *PostgresClusterList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresClusterMonitoring) DeepCopyInto(out *PostgresClusterMonitoring) { + *out = *in + if in.PostgreSQLMetrics != nil { + in, out := &in.PostgreSQLMetrics, &out.PostgreSQLMetrics + *out = new(bool) + **out = **in + } + if in.ConnectionPoolerMetrics != nil { + in, out := &in.ConnectionPoolerMetrics, &out.ConnectionPoolerMetrics + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterMonitoring. +func (in *PostgresClusterMonitoring) DeepCopy() *PostgresClusterMonitoring { + if in == nil { + return nil + } + out := new(PostgresClusterMonitoring) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PostgresClusterResources) DeepCopyInto(out *PostgresClusterResources) { *out = *in @@ -1425,6 +1475,11 @@ func (in *PostgresClusterSpec) DeepCopyInto(out *PostgresClusterSpec) { *out = new(string) **out = **in } + if in.Monitoring != nil { + in, out := &in.Monitoring, &out.Monitoring + *out = new(PostgresClusterMonitoring) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresClusterSpec. @@ -1605,6 +1660,31 @@ func (in *PostgresDatabaseStatus) DeepCopy() *PostgresDatabaseStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PostgresMonitoringClassConfig) DeepCopyInto(out *PostgresMonitoringClassConfig) { + *out = *in + if in.PostgreSQLMetrics != nil { + in, out := &in.PostgreSQLMetrics, &out.PostgreSQLMetrics + *out = new(MetricsClassConfig) + (*in).DeepCopyInto(*out) + } + if in.ConnectionPoolerMetrics != nil { + in, out := &in.ConnectionPoolerMetrics, &out.ConnectionPoolerMetrics + *out = new(MetricsClassConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PostgresMonitoringClassConfig. +func (in *PostgresMonitoringClassConfig) DeepCopy() *PostgresMonitoringClassConfig { + if in == nil { + return nil + } + out := new(PostgresMonitoringClassConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PremiumAppsProps) DeepCopyInto(out *PremiumAppsProps) { *out = *in diff --git a/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml b/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml index ff5d6dbf5..331306b48 100644 --- a/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml +++ b/config/crd/bases/enterprise.splunk.com_postgresclusters.yaml @@ -131,21 +131,15 @@ spec: features. properties: connectionPoolerMetrics: - properties: - disabled: - default: false - description: Disabled set to true will disable the feature - even if it's enabled in the class. - type: boolean - type: object + description: |- + ConnectionPoolerMetrics overrides whether connection pooler metrics scraping is enabled. + When unset, the class-level setting applies. + type: boolean postgresqlMetrics: - properties: - disabled: - default: false - description: Disabled set to true will disable the feature - even if it's enabled in the class. - type: boolean - type: object + description: |- + PostgreSQLMetrics overrides whether PostgreSQL metrics scraping is enabled. + When unset, the class-level setting applies. + type: boolean type: object pgHBA: default: [] diff --git a/docs/postgresSQLMonitoring-e2e.md b/docs/postgresSQLMonitoring-e2e.md deleted file mode 100644 index f49378d79..000000000 --- a/docs/postgresSQLMonitoring-e2e.md +++ /dev/null @@ -1,321 +0,0 @@ -# PostgreSQL Monitoring E2E on KIND - -This validates the PostgreSQL and PgBouncer monitoring flow in namespace `test`. - -## Goal - -Verify that: - -- PostgreSQL pods are scraped through pod annotations -- PgBouncer pooler pods are scraped through pod annotations -- no dedicated metrics `Service` is required -- no `ServiceMonitor` is used for PostgreSQL or PgBouncer - -`ServiceMonitor` is still acceptable for operator-controller metrics if you want that separately, but it is not part of this feature validation. - -The reference dashboard in [PostgreSQLObservabilityDashboard.json](./PostgreSQLObservabilityDashboard.json) assumes: - -- `namespace=test` -- `cluster=postgresql-cluster-dev` -- `kube_pod_labels` is available for dashboard variables - -## Prerequisites - -- KIND cluster is running -- CNPG is installed -- Splunk Operator is installed -- CRDs are up to date - -## 1. Install Prometheus and Grafana - -Create `values.yaml`: - -```yaml -grafana: - adminPassword: admin - -alertmanager: - enabled: false - -kubeStateMetrics: - enabled: true - -nodeExporter: - enabled: false - -prometheus: - prometheusSpec: - additionalScrapeConfigs: - - job_name: annotated-pods - kubernetes_sd_configs: - - role: pod - relabel_configs: - - source_labels: - [__meta_kubernetes_pod_annotation_prometheus_io_scrape] - action: keep - regex: true - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: - [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: ([^:]+)(?::\d+)?;(\d+) - replacement: $1:$2 - target_label: __address__ - - source_labels: [__meta_kubernetes_namespace] - action: replace - target_label: namespace - - source_labels: [__meta_kubernetes_pod_name] - action: replace - target_label: pod -``` - -Install the stack: - -```bash -kubectl create namespace monitoring - -helm repo add prometheus-community https://prometheus-community.github.io/helm-charts -helm repo add grafana https://grafana.github.io/helm-charts -helm repo update - -helm install kube-prometheus prometheus-community/kube-prometheus-stack \ - --namespace monitoring \ - -f values.yaml -``` - -## 2. Optional: scrape operator-controller metrics - -This is separate from the PostgreSQL and PgBouncer validation. - -Grant Prometheus access: - -```bash -kubectl apply -f - < **Import** -3. Upload `docs/PostgreSQLObservabilityDashboard.json` -4. Select the Prometheus datasource -5. Set `namespace` to `test` -6. Set `cluster` to `postgresql-cluster-dev` - -The dashboard variables use: - -```promql -label_values(kube_pod_labels{label_cnpg_io_cluster!=""}, namespace) -``` - -and: - -```promql -label_values(kube_pod_labels{label_cnpg_io_cluster!="", namespace="$namespace"}, label_cnpg_io_cluster) -``` - -So `kubeStateMetrics.enabled: true` in `values.yaml` is required for the imported dashboard to work as-is. - -## 8. Optional disable test - -Disable monitoring in the `PostgresCluster` and verify annotations disappear: - -```bash -kubectl patch postgrescluster postgresql-cluster-dev -n test --type=merge -p ' -spec: - monitoring: - postgresqlMetrics: - disabled: true - connectionPoolerMetrics: - disabled: true -' -``` - -Then re-check: - -```bash -kubectl get pods -n test -l cnpg.io/cluster= -o yaml | rg 'prometheus.io/' || true -kubectl get pods -n test -l cnpg.io/poolerName=-pooler-rw -o yaml | rg 'prometheus.io/' || true -kubectl get pods -n test -l cnpg.io/poolerName=-pooler-ro -o yaml | rg 'prometheus.io/' || true -``` - -Prometheus should also stop showing those targets under `annotated-pods` after discovery refresh. - -## Notes - -- Use `ServiceMonitor` only for operator-controller metrics if needed. -- Do not use `ServiceMonitor` for PostgreSQL or PgBouncer in this E2E, because that bypasses the feature under test. -- Verify both: - - reconciled CNPG specs - - actual pod annotations -- PostgreSQL annotations come from CNPG `Cluster.Spec.InheritedMetadata` -- pooler annotations come from CNPG `Pooler.Spec.Template` diff --git a/docs/postgresSQLMonitoring-otel-e2e.md b/docs/postgresSQLMonitoring-otel-e2e.md deleted file mode 100644 index 31d464bbe..000000000 --- a/docs/postgresSQLMonitoring-otel-e2e.md +++ /dev/null @@ -1,434 +0,0 @@ -# PostgreSQL Monitoring E2E with OTel Collector - -This document describes how to validate PostgreSQL and PgBouncer monitoring with OpenTelemetry Collector using the current annotation-based design. - -## Goal - -Verify that: - -- PostgreSQL pods are discoverable through `prometheus.io/*` pod annotations -- PgBouncer pooler pods are discoverable through `prometheus.io/*` pod annotations -- OTel Collector scrapes those targets through Kubernetes pod discovery -- scraped metrics are re-exposed by OTel and then queried through Grafana -- disabling monitoring removes those scrape targets - -This test intentionally does not use dedicated metrics `Service`s or `ServiceMonitor`s for PostgreSQL and PgBouncer. - -The reference dashboard in [PostgreSQLObservabilityDashboard.json](./PostgreSQLObservabilityDashboard.json) assumes: - -- `namespace=test` -- `cluster=postgresql-cluster-dev` -- `kube_pod_labels` is available from kube-state-metrics - -## Prerequisites - -- KIND cluster is running -- CNPG is installed -- Splunk Operator is installed -- CRDs are up to date -- test resources exist in namespace `test` - -## Recommended Setup - -Use OTel Collector for scraping and re-expose the metrics to Prometheus for Grafana queries. - -In this setup: - -- OTel Collector scrapes annotated PostgreSQL and PgBouncer pods -- OTel Collector re-exposes those metrics on its own Prometheus exporter endpoint -- Prometheus scrapes the OTel Collector pod -- Grafana queries Prometheus - -Grafana does not query OTel Collector directly. The Grafana datasource remains Prometheus. - -## 1. Deploy OTel Collector - -Use the concrete Helm values file: - -- [otel-collector-values.yaml](../test/postgresql/monitoring/otel-collector-values.yaml) -- [otel-rbac.yaml](../test/postgresql/monitoring/otel-rbac.yaml) - -Install the Collector: - -```bash -kubectl create namespace monitoring - -helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts -helm repo update - -helm install otel open-telemetry/opentelemetry-collector \ - --namespace monitoring \ - -f test/postgresql/monitoring/otel-collector-values.yaml -``` - -If the `otel` release already exists, use: - -```bash -helm upgrade otel open-telemetry/opentelemetry-collector \ - --namespace monitoring \ - -f test/postgresql/monitoring/otel-collector-values.yaml -``` - -Grant the Collector RBAC required for Kubernetes pod discovery: - -```bash -kubectl apply -f test/postgresql/monitoring/otel-rbac.yaml -``` - -This setup uses: - -- Prometheus receiver with Kubernetes pod discovery -- `prometheus.io/*` relabeling -- Prometheus exporter on port `8889` -- `debug` exporter for easy validation in logs - -If this RBAC is missing, the Collector will fail with errors like: - -```text -failed to list *v1.Pod: pods is forbidden -``` - -because the service account needs cluster-scoped `get`, `list`, and `watch` access for pod discovery. - -## 2. Install Prometheus and Grafana for the OTel path - -Use the Prometheus values file that scrapes only the OTel Collector exporter: - -- [prometheus-via-otel-values.yaml](../test/postgresql/monitoring/prometheus-via-otel-values.yaml) - -Install: - -```bash -helm repo add prometheus-community https://prometheus-community.github.io/helm-charts -helm repo add grafana https://grafana.github.io/helm-charts -helm repo update - -helm install kube-prometheus prometheus-community/kube-prometheus-stack \ - --namespace monitoring \ - -f test/postgresql/monitoring/prometheus-via-otel-values.yaml -``` - -If the `kube-prometheus` release already exists, use: - -```bash -helm upgrade kube-prometheus prometheus-community/kube-prometheus-stack \ - --namespace monitoring \ - -f test/postgresql/monitoring/prometheus-via-otel-values.yaml -``` - -This is important: Prometheus should scrape the OTel Collector exporter, not the PostgreSQL and PgBouncer pods directly. Otherwise Grafana will bypass OTel or you will get duplicate series. - -## 3. Apply PostgreSQL sample resources - -Apply: - -- `config/samples/enterprise_v4_postgresclusterclass_dev.yaml` -- `config/samples/enterprise_v4_postgrescluster_dev.yaml` -- `config/samples/enterprise_v4_postgresdatabase.yaml` - -Example: - -```bash -kubectl create namespace test -kubectl apply -f config/samples/enterprise_v4_postgresclusterclass_dev.yaml -kubectl apply -n test -f config/samples/enterprise_v4_postgrescluster_dev.yaml -kubectl apply -n test -f config/samples/enterprise_v4_postgresdatabase.yaml -``` - -These samples create: - -- `PostgresClusterClass` `postgresql-dev` -- `PostgresCluster` `postgresql-cluster-dev` -- `PostgresDatabase` `splunk-databases` - -## 4. Verify reconciled resources - -```bash -kubectl get postgrescluster -n test -kubectl get postgresdatabase -n test -kubectl get cluster.postgresql.cnpg.io -n test -kubectl get pooler.postgresql.cnpg.io -n test -kubectl get pods -n test -``` - -## 5. Verify annotations on workloads - -PostgreSQL pods: - -```bash -kubectl get pods -n test -l cnpg.io/cluster=postgresql-cluster-dev -o yaml | rg 'prometheus.io/' -``` - -Expected: - -- `prometheus.io/scrape: "true"` -- `prometheus.io/path: /metrics` -- `prometheus.io/port: "9187"` - -PgBouncer RW pooler pods: - -```bash -kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-rw -o yaml | rg 'prometheus.io/' -``` - -PgBouncer RO pooler pods: - -```bash -kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-ro -o yaml | rg 'prometheus.io/' -``` - -Expected for both: - -- `prometheus.io/scrape: "true"` -- `prometheus.io/path: /metrics` -- `prometheus.io/port: "9127"` - -## 6. Verify OTel Collector scraping - -If using the `debug` exporter: - -```bash -kubectl logs -n deploy/ -f -``` - -Look for metrics such as: - -- `cnpg_pg_postmaster_start_time` -- `cnpg_pg_database_size_bytes` -- `cnpg_collector_pg_wal` -- `cnpg_pg_stat_archiver_archived_count` -- `cnpg_pgbouncer_last_collection_error` -- `cnpg_pgbouncer_pools_cl_active` - -## 7. Verify Prometheus is scraping OTel - -Port-forward Prometheus: - -```bash -kubectl port-forward -n monitoring svc/kube-prometheus-prometheus 9090:9090 -``` - -Check that Prometheus is scraping the OTel Collector pod: - -```promql -up{job="otel-collector", namespace="monitoring"} -``` - -If this returns no series, the usual causes are: - -- the `kube-prometheus` release was not upgraded with [prometheus-via-otel-values.yaml](../test/postgresql/monitoring/prometheus-via-otel-values.yaml) -- Prometheus is still using the old `annotated-pods` scrape job -- kube-state-metrics is still disabled, which also breaks the dashboard variables - -Then verify PostgreSQL and PgBouncer metrics coming through that OTel path: - -```promql -count(count by (pod) (cnpg_pg_postmaster_start_time{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-[0-9]+"})) -``` - -```promql -max(1 - clamp_max(cnpg_pgbouncer_last_collection_error{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-pooler-rw-.*"}, 1)) -``` - -## 8. Access Grafana - -Port-forward Grafana: - -```bash -kubectl port-forward -n monitoring svc/kube-prometheus-grafana 3000:80 -``` - -Open: - -- http://localhost:3000 - -Login: - -- user: `admin` -- password: `admin` - -Use the default Prometheus datasource. In this setup, Grafana is using metrics that flowed through OTel because Prometheus is scraping only the OTel Collector exporter. - -## 9. Verify dashboard queries - -Import: - -- [PostgreSQLObservabilityDashboard.json](./PostgreSQLObservabilityDashboard.json) - -Set: - -- `namespace=test` -- `cluster=postgresql-cluster-dev` - -The dashboard does not need query changes for this path, but it assumes kube-state-metrics is enabled for the `namespace` and `cluster` variables. - -You can confirm the live Prometheus release picked up the right values with: - -```bash -helm get values -n monitoring kube-prometheus -``` - -Expected: - -- `kubeStateMetrics.enabled: true` -- additional scrape job `otel-collector` - -## 10. Verify backend metrics - -Validate with queries such as: - -```promql -up{job="otel-collector", namespace="monitoring"} -``` - -```promql -count(count by (pod) (cnpg_pg_postmaster_start_time{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-[0-9]+"})) -``` - -```promql -max(1 - clamp_max(cnpg_pgbouncer_last_collection_error{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-pooler-rw-.*"}, 1)) -``` - -```promql -sum by (pooler) (label_replace(cnpg_pgbouncer_pools_cl_active{job="otel-collector",namespace="test",pod=~"postgresql-cluster-dev-pooler-(rw|ro)-.*"}, "pooler", "$1", "pod", ".*-pooler-(rw|ro)-.*")) -``` - -The dashboard variables use: - -```promql -label_values(kube_pod_labels{label_cnpg_io_cluster!=""}, namespace) -``` - -and: - -```promql -label_values(kube_pod_labels{label_cnpg_io_cluster!="", namespace="$namespace"}, label_cnpg_io_cluster) -``` - -## 11. Disable monitoring and validate removal - -Disable both metrics paths: - -```bash -kubectl patch postgrescluster postgresql-cluster-dev -n test --type=merge -p ' -spec: - monitoring: - postgresqlMetrics: - disabled: true - connectionPoolerMetrics: - disabled: true -' -``` - -Re-check pod annotations: - -```bash -kubectl get pods -n test -l cnpg.io/cluster=postgresql-cluster-dev -o yaml | rg 'prometheus.io/' || true -kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-rw -o yaml | rg 'prometheus.io/' || true -kubectl get pods -n test -l cnpg.io/poolerName=postgresql-cluster-dev-pooler-ro -o yaml | rg 'prometheus.io/' || true -``` - -Expected: - -- scrape annotations disappear -- OTel stops scraping those targets after discovery refresh - -## Test Plan - -### Test 1: PostgreSQL annotations are present - -Steps: - -1. Apply monitoring-enabled class and cluster -2. Wait for PostgreSQL pods -3. Inspect pod annotations - -Pass criteria: - -- PostgreSQL pods contain the expected scrape annotations - -### Test 2: Pooler annotations are present - -Steps: - -1. Apply class with `connectionPoolerEnabled=true` -2. Wait for RW and RO poolers and their pods -3. Inspect pooler pod annotations - -Pass criteria: - -- RW and RO pooler pods contain the expected scrape annotations - -### Test 3: OTel Collector scrapes PostgreSQL and poolers - -Steps: - -1. Run Collector with pod discovery -2. Inspect Collector logs or backend metrics - -Pass criteria: - -- PostgreSQL metrics are visible in OTel logs -- PgBouncer metrics are visible in OTel logs - -### Test 4: Prometheus and Grafana use the OTel path - -Steps: - -1. Verify `up{job="otel-collector", namespace="monitoring"}` -2. Verify PostgreSQL and PgBouncer metrics with `job="otel-collector"` -3. Import the dashboard and select the Prometheus datasource - -Pass criteria: - -- Prometheus is scraping the OTel Collector exporter -- Grafana panels return data from the `otel-collector` job - -### Test 5: Disable override removes scrape targets - -Steps: - -1. Patch the `PostgresCluster` to disable both monitoring paths -2. Re-check workload annotations -3. Re-check Collector or backend - -Pass criteria: - -- annotations are removed -- targets disappear from Collector/backend over time - -### Test 6: Cluster-only disable path - -Steps: - -1. Keep class monitoring enabled -2. Disable only in `PostgresCluster.spec.monitoring` - -Pass criteria: - -- class defaults remain unchanged -- only the target cluster loses annotations - -## Troubleshooting - -If no metrics appear: - -1. Check pod annotations first -2. Check Collector logs -3. Check whether the Collector is using `role: pod` -4. Check relabeling for `prometheus.io/scrape` -5. Check namespace filters in your backend queries - -Useful quick queries: - -```promql -up{namespace="test"} -``` - -```promql -cnpg_pg_postmaster_start_time_seconds{namespace="test"} -``` - -```promql -cnpg_pgbouncer_up{namespace="test"} -``` diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 886373031..6d42d72bf 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -62,13 +62,7 @@ type PostgresClusterReconciler struct { // +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - metrics := r.Metrics - if metrics == nil { - // Tests and minimal reconciler wiring may omit a metrics adapter. - // Fall back to a no-op recorder so status updates can proceed safely. - metrics = &pgprometheus.NoopRecorder{} - } - rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: metrics} + rc := &clustercore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics} result, err := clustercore.PostgresClusterService(ctx, rc, req) r.FleetCollector.CollectClusterMetrics(ctx, r.Client, r.Metrics) if sharedreconcile.IsPureConflict(err) { diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 47e1d0824..80b412562 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -36,6 +36,7 @@ import ( "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" pgprometheus "github.com/splunk/splunk-operator/pkg/postgresql/shared/adapter/prometheus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" ) /* @@ -70,6 +71,8 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { ctx context.Context clusterName string className string + classNameMetrics string + classNamePooler string pgCluster *enterprisev4.PostgresCluster pgClusterClass *enterprisev4.PostgresClusterClass pgClusterKey types.NamespacedName @@ -78,37 +81,13 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { req reconcile.Request ) - const ( - scrapeAnnotationKey = "prometheus.io/scrape" - pathAnnotationKey = "prometheus.io/path" - portAnnotationKey = "prometheus.io/port" - metricsPath = "/metrics" - postgresPort = "9187" - poolerPort = "9127" - ) - reconcileNTimes := func(times int) { - for i := 0; i < times; i++ { + for range times { _, err := reconciler.Reconcile(ctx, req) Expect(err).NotTo(HaveOccurred()) } } - recreateClusterClass := func(modify func(*enterprisev4.PostgresClusterClass)) { - Expect(k8sClient.Delete(ctx, pgClusterClass)).To(Succeed()) - Eventually(func() bool { - return apierrors.IsNotFound(k8sClient.Get(ctx, pgClusterClassKey, &enterprisev4.PostgresClusterClass{})) - }, "10s", "250ms").Should(BeTrue()) - - pgClusterClass = pgClusterClass.DeepCopy() - pgClusterClass.ResourceVersion = "" - pgClusterClass.UID = "" - if modify != nil { - modify(pgClusterClass) - } - Expect(k8sClient.Create(ctx, pgClusterClass)).To(Succeed()) - } - BeforeEach(func() { nameSuffix := fmt.Sprintf("%d-%d-%d", GinkgoParallelProcess(), @@ -119,6 +98,8 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { ctx = context.Background() clusterName = clusterNamePrefix + nameSuffix className = classNamePrefix + nameSuffix + classNameMetrics = classNamePrefix + "metrics-" + nameSuffix + classNamePooler = classNamePrefix + "pooler-" + nameSuffix pgClusterKey = types.NamespacedName{Name: clusterName, Namespace: namespace} pgClusterClassKey = types.NamespacedName{Name: className, Namespace: namespace} @@ -127,21 +108,60 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Spec: enterprisev4.PostgresClusterClassSpec{ Provisioner: provisioner, Config: &enterprisev4.PostgresClusterClassConfig{ - Instances: &[]int32{clusterMemberCount}[0], - Storage: &[]resource.Quantity{resource.MustParse(storageAmount)}[0], - PostgresVersion: &[]string{postgresVersion}[0], - ConnectionPoolerEnabled: &[]bool{poolerEnabled}[0], + Instances: ptr.To(clusterMemberCount), + Storage: ptr.To(resource.MustParse(storageAmount)), + PostgresVersion: ptr.To(postgresVersion), + ConnectionPoolerEnabled: ptr.To(poolerEnabled), + }, + }, + } + + pgClassPostgresMetrics := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{Name: classNameMetrics}, + Spec: enterprisev4.PostgresClusterClassSpec{ + Provisioner: provisioner, + Config: &enterprisev4.PostgresClusterClassConfig{ + Instances: ptr.To(clusterMemberCount), + Storage: ptr.To(resource.MustParse(storageAmount)), + PostgresVersion: ptr.To(postgresVersion), + Monitoring: &enterprisev4.PostgresMonitoringClassConfig{ + PostgreSQLMetrics: &enterprisev4.MetricsClassConfig{Enabled: ptr.To(true)}, + }, + }, + }, + } + + pgClassPoolerMetrics := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{Name: classNamePooler}, + Spec: enterprisev4.PostgresClusterClassSpec{ + Provisioner: provisioner, + Config: &enterprisev4.PostgresClusterClassConfig{ + Instances: ptr.To(clusterMemberCount), + Storage: ptr.To(resource.MustParse(storageAmount)), + PostgresVersion: ptr.To(postgresVersion), + ConnectionPoolerEnabled: ptr.To(true), + Monitoring: &enterprisev4.PostgresMonitoringClassConfig{ + ConnectionPoolerMetrics: &enterprisev4.MetricsClassConfig{Enabled: ptr.To(true)}, + }, + }, + CNPG: &enterprisev4.CNPGConfig{ + ConnectionPooler: &enterprisev4.ConnectionPoolerConfig{ + Instances: ptr.To(int32(2)), + Mode: ptr.To(enterprisev4.ConnectionPoolerModeTransaction), + }, }, }, } Expect(k8sClient.Create(ctx, pgClusterClass)).To(Succeed()) + Expect(k8sClient.Create(ctx, pgClassPostgresMetrics)).To(Succeed()) + Expect(k8sClient.Create(ctx, pgClassPoolerMetrics)).To(Succeed()) pgCluster = &enterprisev4.PostgresCluster{ ObjectMeta: metav1.ObjectMeta{Name: clusterName, Namespace: namespace}, Spec: enterprisev4.PostgresClusterSpec{ Class: className, - ClusterDeletionPolicy: &[]string{deletePolicy}[0], + ClusterDeletionPolicy: ptr.To(deletePolicy), }, } @@ -196,12 +216,19 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { return apierrors.IsNotFound(getErr) }, "10s", "500ms").Should(BeTrue()) - By("Cleaning up PostgresClusterClass fixture") - err = k8sClient.Get(ctx, pgClusterClassKey, pgClusterClass) - if err == nil { - Expect(k8sClient.Delete(ctx, pgClusterClass)).To(Succeed()) - } else { - Expect(apierrors.IsNotFound(err)).To(BeTrue()) + By("Cleaning up PostgresClusterClass fixtures") + for _, key := range []types.NamespacedName{ + pgClusterClassKey, + {Name: classNameMetrics}, + {Name: classNamePooler}, + } { + existing := &enterprisev4.PostgresClusterClass{} + err = k8sClient.Get(ctx, key, existing) + if err == nil { + Expect(k8sClient.Delete(ctx, existing)).To(Succeed()) + } else { + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + } } }) @@ -265,13 +292,25 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond.ObservedGeneration).To(Equal(pc.Generation)) }) - It("adds PostgreSQL scrape annotations to the CNPG Cluster when monitoring is enabled", func() { - recreateClusterClass(func(class *enterprisev4.PostgresClusterClass) { - class.Spec.Config.Monitoring = &enterprisev4.PostgresMonitoringClassConfig{ - PostgreSQLMetrics: &enterprisev4.MetricsClassConfig{Enabled: &[]bool{true}[0]}, - } - }) + }) + }) + + When("monitoring is configured", func() { + const ( + scrapeAnnotationKey = "prometheus.io/scrape" + pathAnnotationKey = "prometheus.io/path" + portAnnotationKey = "prometheus.io/port" + metricsPath = "/metrics" + postgresPort = "9187" + poolerPort = "9127" + ) + Context("with PostgreSQL metrics enabled in class", func() { + BeforeEach(func() { + pgCluster.Spec.Class = classNameMetrics + }) + + It("adds scrape annotations to the CNPG Cluster", func() { Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) reconcileNTimes(2) @@ -283,13 +322,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cnpg.Spec.InheritedMetadata.Annotations).To(HaveKeyWithValue(portAnnotationKey, postgresPort)) }) - It("removes PostgreSQL scrape annotations when disabled by cluster override", func() { - recreateClusterClass(func(class *enterprisev4.PostgresClusterClass) { - class.Spec.Config.Monitoring = &enterprisev4.PostgresMonitoringClassConfig{ - PostgreSQLMetrics: &enterprisev4.MetricsClassConfig{Enabled: &[]bool{true}[0]}, - } - }) - + It("removes scrape annotations when disabled by cluster override", func() { Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) reconcileNTimes(2) @@ -301,37 +334,28 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { current := &enterprisev4.PostgresCluster{} Expect(k8sClient.Get(ctx, pgClusterKey, current)).To(Succeed()) current.Spec.Monitoring = &enterprisev4.PostgresClusterMonitoring{ - PostgreSQLMetrics: &enterprisev4.FeatureDisableOverride{Disabled: &[]bool{true}[0]}, + PostgreSQLMetrics: ptr.To(false), } Expect(k8sClient.Update(ctx, current)).To(Succeed()) - reconcileNTimes(1) Eventually(func(g Gomega) { updated := &cnpgv1.Cluster{} g.Expect(k8sClient.Get(ctx, pgClusterKey, updated)).To(Succeed()) - if updated.Spec.InheritedMetadata != nil { - g.Expect(updated.Spec.InheritedMetadata.Annotations).NotTo(HaveKey(scrapeAnnotationKey)) - g.Expect(updated.Spec.InheritedMetadata.Annotations).NotTo(HaveKey(pathAnnotationKey)) - g.Expect(updated.Spec.InheritedMetadata.Annotations).NotTo(HaveKey(portAnnotationKey)) - } + g.Expect(updated.Spec.InheritedMetadata).NotTo(BeNil()) + g.Expect(updated.Spec.InheritedMetadata.Annotations).NotTo(HaveKey(scrapeAnnotationKey)) + g.Expect(updated.Spec.InheritedMetadata.Annotations).NotTo(HaveKey(pathAnnotationKey)) + g.Expect(updated.Spec.InheritedMetadata.Annotations).NotTo(HaveKey(portAnnotationKey)) }, "20s", "250ms").Should(Succeed()) }) + }) - It("creates poolers with scrape annotations only after the CNPG cluster becomes healthy", func() { - recreateClusterClass(func(class *enterprisev4.PostgresClusterClass) { - class.Spec.Config.ConnectionPoolerEnabled = &[]bool{true}[0] - class.Spec.Config.Monitoring = &enterprisev4.PostgresMonitoringClassConfig{ - ConnectionPoolerMetrics: &enterprisev4.MetricsClassConfig{Enabled: &[]bool{true}[0]}, - } - class.Spec.CNPG = &enterprisev4.CNPGConfig{ - ConnectionPooler: &enterprisev4.ConnectionPoolerConfig{ - Instances: &[]int32{2}[0], - Mode: &[]enterprisev4.ConnectionPoolerMode{enterprisev4.ConnectionPoolerModeTransaction}[0], - }, - } - }) + Context("with connection pooler metrics enabled in class", func() { + BeforeEach(func() { + pgCluster.Spec.Class = classNamePooler + }) + It("adds scrape annotations to poolers only after the CNPG cluster becomes healthy", func() { Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) reconcileNTimes(2) diff --git a/internal/controller/postgresdatabase_controller.go b/internal/controller/postgresdatabase_controller.go index d31082ba9..3d984e969 100644 --- a/internal/controller/postgresdatabase_controller.go +++ b/internal/controller/postgresdatabase_controller.go @@ -76,11 +76,8 @@ func (r *PostgresDatabaseReconciler) Reconcile(ctx context.Context, req ctrl.Req } return ctrl.Result{}, err } - metrics := r.Metrics - if metrics == nil { - metrics = &pgprometheus.NoopRecorder{} - } - rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: metrics} + + rc := &dbcore.ReconcileContext{Client: r.Client, Scheme: r.Scheme, Recorder: r.Recorder, Metrics: r.Metrics} result, err := dbcore.PostgresDatabaseService(ctx, rc, postgresDB, dbadapter.NewDBRepository) r.FleetCollector.CollectDatabaseMetrics(ctx, r.Client, r.Metrics) if sharedreconcile.IsPureConflict(err) { diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 5dca32cb2..9356a011f 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -78,6 +78,7 @@ var _ = BeforeSuite(func(ctx context.Context) { filepath.Join("..", "..", "config", "crd", "bases"), filepath.Join(cnpgModuleDir, "config", "crd", "bases"), }, + ErrorIfCRDPathMissing: true, } var err error diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 80825bf04..8aefdea45 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -346,9 +346,6 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. rc.emitPoolerReadyTransition(postgresCluster, oldConditions) } - oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) - copy(oldConditions, postgresCluster.Status.Conditions) - // Reconcile ConfigMap when CNPG cluster is healthy. if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { logger.Info("CNPG Cluster healthy, reconciling ConfigMap") @@ -502,11 +499,11 @@ func buildCNPGClusterSpec(cfg *MergedConfig, secretName string, postgresMetricsE }, Resources: *cfg.Spec.Resources, } + annotations := make(map[string]string) if postgresMetricsEnabled { - spec.InheritedMetadata = &cnpgv1.EmbeddedObjectMetadata{ - Annotations: buildPostgresScrapeAnnotations(), - } + annotations = buildPostgresScrapeAnnotations() } + spec.InheritedMetadata = &cnpgv1.EmbeddedObjectMetadata{Annotations: annotations} return spec } @@ -683,20 +680,19 @@ func buildCNPGPooler(scheme *runtime.Scheme, cluster *enterprisev4.PostgresClust }, }, } + poolerAnnotations := make(map[string]string) if poolerMetricsEnabled { - pooler.Spec.Template = &cnpgv1.PodTemplateSpec{ - ObjectMeta: cnpgv1.Metadata{ - Annotations: buildPoolerScrapeAnnotations(), - }, - Spec: corev1.PodSpec{ - // CNPG's Pooler CRD requires template.spec.containers to be present. - // A minimal named container lets CNPG's podspec builder merge in the - // real PgBouncer image/command/ports while still carrying our annotations. - Containers: []corev1.Container{ - {Name: "pgbouncer"}, - }, - }, - } + poolerAnnotations = buildPoolerScrapeAnnotations() + } + // Template is always set so that annotation removal is explicit in merge patches. + // CNPG's Pooler CRD requires template.spec.containers to be present — a minimal + // named container lets CNPG's podspec builder merge in the real PgBouncer + // image/command/ports while still carrying our annotations. + pooler.Spec.Template = &cnpgv1.PodTemplateSpec{ + ObjectMeta: cnpgv1.Metadata{Annotations: poolerAnnotations}, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{Name: "pgbouncer"}}, + }, } if err := ctrl.SetControllerReference(cluster, pooler, scheme); err != nil { return nil, fmt.Errorf("setting controller reference on CNPG pooler: %w", err) @@ -1079,4 +1075,4 @@ func generatePassword() (string, error) { symbols = 0 ) return password.Generate(length, digits, symbols, false, true) -} \ No newline at end of file +} diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index 49e5382b2..72b30205b 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -392,7 +392,8 @@ func TestBuildCNPGClusterSpec(t *testing.T) { require.Len(t, spec.PostgresConfiguration.PgHBA, 2) assert.Equal(t, "hostssl all all 0.0.0.0/0 scram-sha-256", spec.PostgresConfiguration.PgHBA[0]) assert.Equal(t, "host replication all 10.0.0.0/8 md5", spec.PostgresConfiguration.PgHBA[1]) - assert.Nil(t, spec.InheritedMetadata) + require.NotNil(t, spec.InheritedMetadata) + assert.Empty(t, spec.InheritedMetadata.Annotations) t.Run("adds postgres scrape annotations when enabled", func(t *testing.T) { spec := buildCNPGClusterSpec(cfg, "my-secret", true) @@ -447,7 +448,8 @@ func TestBuildCNPGPooler(t *testing.T) { assert.Equal(t, "25", pooler.Spec.PgBouncer.Parameters["default_pool_size"]) require.Len(t, pooler.OwnerReferences, 1) assert.Equal(t, "test-uid", string(pooler.OwnerReferences[0].UID)) - assert.Nil(t, pooler.Spec.Template) + require.NotNil(t, pooler.Spec.Template) + assert.Empty(t, pooler.Spec.Template.ObjectMeta.Annotations) }) t.Run("ro pooler", func(t *testing.T) { @@ -1132,6 +1134,9 @@ func TestCreateOrUpdateConnectionPoolers(t *testing.T) { PoolMode: cnpgv1.PgBouncerPoolMode("transaction"), Parameters: map[string]string{"default_pool_size": "25"}, }, + Template: &cnpgv1.PodTemplateSpec{ + Spec: corev1.PodSpec{Containers: []corev1.Container{{Name: "pgbouncer"}}}, + }, } } diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index b60f3f643..afcfd768e 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -10,25 +10,25 @@ import ( ) const ( - EventSecretReady = "SecretReady" - EventConfigMapReady = "ConfigMapReady" - EventClusterAdopted = "ClusterAdopted" - EventClusterCreationStarted = "ClusterCreationStarted" - EventClusterUpdateStarted = "ClusterUpdateStarted" - EventClusterReady = "ClusterReady" - EventPoolerCreationStarted = "PoolerCreationStarted" - EventPoolerReady = "PoolerReady" - EventCleanupComplete = "CleanupComplete" - EventClusterClassNotFound = "ClusterClassNotFound" - EventConfigMergeFailed = "ConfigMergeFailed" - EventSecretReconcileFailed = "SecretReconcileFailed" - EventClusterCreateFailed = "ClusterCreateFailed" - EventClusterUpdateFailed = "ClusterUpdateFailed" - EventManagedRolesFailed = "ManagedRolesFailed" - EventPoolerReconcileFailed = "PoolerReconcileFailed" - EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" - EventClusterDegraded = "ClusterDegraded" - EventCleanupFailed = "CleanupFailed" + EventSecretReady = "SecretReady" + EventConfigMapReady = "ConfigMapReady" + EventClusterAdopted = "ClusterAdopted" + EventClusterCreationStarted = "ClusterCreationStarted" + EventClusterUpdateStarted = "ClusterUpdateStarted" + EventClusterReady = "ClusterReady" + EventPoolerCreationStarted = "PoolerCreationStarted" + EventPoolerReady = "PoolerReady" + EventCleanupComplete = "CleanupComplete" + EventClusterClassNotFound = "ClusterClassNotFound" + EventConfigMergeFailed = "ConfigMergeFailed" + EventSecretReconcileFailed = "SecretReconcileFailed" + EventClusterCreateFailed = "ClusterCreateFailed" + EventClusterUpdateFailed = "ClusterUpdateFailed" + EventManagedRolesFailed = "ManagedRolesFailed" + EventPoolerReconcileFailed = "PoolerReconcileFailed" + EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" + EventClusterDegraded = "ClusterDegraded" + EventCleanupFailed = "CleanupFailed" ) func (rc *ReconcileContext) emitNormal(obj client.Object, reason, message string) { diff --git a/pkg/postgresql/cluster/core/monitoring.go b/pkg/postgresql/cluster/core/monitoring.go index 2b18ba497..725549b24 100644 --- a/pkg/postgresql/cluster/core/monitoring.go +++ b/pkg/postgresql/cluster/core/monitoring.go @@ -37,31 +37,31 @@ func buildPoolerScrapeAnnotations() map[string]string { } func isPostgreSQLMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { - if class == nil || class.Spec.Config == nil || class.Spec.Config.Monitoring == nil { - return false - } - classCfg := class.Spec.Config.Monitoring.PostgreSQLMetrics - if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { - return false - } - if cluster == nil || cluster.Spec.Monitoring == nil || cluster.Spec.Monitoring.PostgreSQLMetrics == nil { - return true + classEnabled := class != nil && + class.Spec.Config != nil && + class.Spec.Config.Monitoring != nil && + class.Spec.Config.Monitoring.PostgreSQLMetrics != nil && + class.Spec.Config.Monitoring.PostgreSQLMetrics.Enabled != nil && + *class.Spec.Config.Monitoring.PostgreSQLMetrics.Enabled + + if cluster != nil && cluster.Spec.Monitoring != nil && cluster.Spec.Monitoring.PostgreSQLMetrics != nil { + return *cluster.Spec.Monitoring.PostgreSQLMetrics } - override := cluster.Spec.Monitoring.PostgreSQLMetrics.Disabled - return override == nil || !*override + + return classEnabled } func isConnectionPoolerMetricsEnabled(cluster *enterprisev4.PostgresCluster, class *enterprisev4.PostgresClusterClass) bool { - if class == nil || class.Spec.Config == nil || class.Spec.Config.Monitoring == nil { - return false - } - classCfg := class.Spec.Config.Monitoring.ConnectionPoolerMetrics - if classCfg == nil || classCfg.Enabled == nil || !*classCfg.Enabled { - return false - } - if cluster == nil || cluster.Spec.Monitoring == nil || cluster.Spec.Monitoring.ConnectionPoolerMetrics == nil { - return true + classEnabled := class != nil && + class.Spec.Config != nil && + class.Spec.Config.Monitoring != nil && + class.Spec.Config.Monitoring.ConnectionPoolerMetrics != nil && + class.Spec.Config.Monitoring.ConnectionPoolerMetrics.Enabled != nil && + *class.Spec.Config.Monitoring.ConnectionPoolerMetrics.Enabled + + if cluster != nil && cluster.Spec.Monitoring != nil && cluster.Spec.Monitoring.ConnectionPoolerMetrics != nil { + return *cluster.Spec.Monitoring.ConnectionPoolerMetrics } - override := cluster.Spec.Monitoring.ConnectionPoolerMetrics.Disabled - return override == nil || !*override + + return classEnabled } diff --git a/pkg/postgresql/cluster/core/monitoring_unit_test.go b/pkg/postgresql/cluster/core/monitoring_unit_test.go index 917052efc..ae9feb73d 100644 --- a/pkg/postgresql/cluster/core/monitoring_unit_test.go +++ b/pkg/postgresql/cluster/core/monitoring_unit_test.go @@ -39,7 +39,7 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { cluster: &enterprisev4.PostgresCluster{ Spec: enterprisev4.PostgresClusterSpec{ Monitoring: &enterprisev4.PostgresClusterMonitoring{ - PostgreSQLMetrics: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + PostgreSQLMetrics: ptr.To(false), }, }, }, @@ -51,11 +51,11 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { want: false, }, { - name: "disabled when class disables even if cluster has override struct", + name: "enabled when cluster overrides class that has it disabled", cluster: &enterprisev4.PostgresCluster{ Spec: enterprisev4.PostgresClusterSpec{ Monitoring: &enterprisev4.PostgresClusterMonitoring{ - PostgreSQLMetrics: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(false)}, + PostgreSQLMetrics: ptr.To(true), }, }, }, @@ -64,7 +64,7 @@ func TestIsPostgreSQLMetricsEnabled(t *testing.T) { nil, nil, ), - want: false, + want: true, }, } @@ -165,7 +165,7 @@ func TestIsConnectionPoolerMetricsEnabled(t *testing.T) { cluster: &enterprisev4.PostgresCluster{ Spec: enterprisev4.PostgresClusterSpec{ Monitoring: &enterprisev4.PostgresClusterMonitoring{ - ConnectionPoolerMetrics: &enterprisev4.FeatureDisableOverride{Disabled: ptr.To(true)}, + ConnectionPoolerMetrics: ptr.To(false), }, }, }, From 34097e959dacdd925fe34d179f863c5879aca598 Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Thu, 2 Apr 2026 15:30:46 +0200 Subject: [PATCH 32/36] remove observedGeneration from postgresDatabase --- .../controller/postgresdatabase_controller.go | 43 +- .../postgresdatabase_controller_test.go | 350 ++++++++++++++- pkg/postgresql/database/core/database.go | 316 +++++++++++--- .../database/core/database_unit_test.go | 411 +++++++++++++++--- pkg/postgresql/database/core/events.go | 3 +- pkg/postgresql/database/core/types.go | 22 +- 6 files changed, 985 insertions(+), 160 deletions(-) diff --git a/internal/controller/postgresdatabase_controller.go b/internal/controller/postgresdatabase_controller.go index 3d984e969..b0cf7e82b 100644 --- a/internal/controller/postgresdatabase_controller.go +++ b/internal/controller/postgresdatabase_controller.go @@ -18,7 +18,6 @@ package controller import ( "context" - "reflect" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" @@ -29,6 +28,7 @@ import ( sharedreconcile "github.com/splunk/splunk-operator/pkg/postgresql/shared/reconcile" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -106,31 +106,28 @@ func (r *PostgresDatabaseReconciler) SetupWithManager(mgr ctrl.Manager) error { return err } return ctrl.NewControllerManagedBy(mgr). - For(&enterprisev4.PostgresDatabase{}, builder.WithPredicates( - predicate.Or( - predicate.GenerationChangedPredicate{}, - predicate.Funcs{ - UpdateFunc: func(e event.UpdateEvent) bool { - return !reflect.DeepEqual( - e.ObjectOld.GetFinalizers(), - e.ObjectNew.GetFinalizers(), - ) - }, - }, - ), - )). - Owns(&cnpgv1.Database{}, builder.WithPredicates(predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return false }, - })). - Owns(&corev1.Secret{}, builder.WithPredicates(predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return false }, - })). - Owns(&corev1.ConfigMap{}, builder.WithPredicates(predicate.Funcs{ - CreateFunc: func(event.CreateEvent) bool { return false }, - })). + WithEventFilter(predicate.Funcs{GenericFunc: func(event.GenericEvent) bool { return false }}). + For(&enterprisev4.PostgresDatabase{}, builder.WithPredicates(postgresDatabasePredicator())). + Owns(&cnpgv1.Database{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + Owns(&corev1.Secret{}, builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). + Owns(&corev1.ConfigMap{}, builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})). Named("postgresdatabase"). WithOptions(controller.Options{ MaxConcurrentReconciles: DatabaseTotalWorker, }). Complete(r) } + +func postgresDatabasePredicator() predicate.Predicate { + return predicate.Or( + predicate.GenerationChangedPredicate{}, + predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + if !equality.Semantic.DeepEqual(e.ObjectOld.GetDeletionTimestamp(), e.ObjectNew.GetDeletionTimestamp()) { + return true + } + return !equality.Semantic.DeepEqual(e.ObjectOld.GetFinalizers(), e.ObjectNew.GetFinalizers()) + }, + }, + ) +} diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index 44143919f..ca23c56e7 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -36,6 +36,8 @@ import ( "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -53,13 +55,15 @@ const ( // condition reasons const ( - reasonClusterNotFound = "ClusterNotFound" - reasonClusterAvailable = "ClusterAvailable" - reasonSecretsCreated = "SecretsCreated" - reasonConfigMapsCreated = "ConfigMapsCreated" - reasonUsersAvailable = "UsersAvailable" - reasonDatabasesAvailable = "DatabasesAvailable" - reasonRoleConflict = "RoleConflict" + reasonClusterNotFound = "ClusterNotFound" + reasonClusterAvailable = "ClusterAvailable" + reasonClusterProvisioning = "ClusterProvisioning" + reasonSecretsCreated = "SecretsCreated" + reasonConfigMapsCreated = "ConfigMapsCreated" + reasonRolesAvailable = "RolesAvailable" + reasonDatabasesAvailable = "DatabasesAvailable" + reasonRoleConflict = "RoleConflict" + reasonWaitingForCNPG = "WaitingForCNPG" ) // phases @@ -325,6 +329,10 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Namespace: namespace, OwnerReferences: ownerReferences, }, + Data: map[string][]byte{ + "username": []byte(adminRoleNameForTest(dbName)), + "password": []byte("test-password"), + }, })).To(Succeed()) Expect(k8sClient.Create(ctx, &corev1.Secret{ @@ -333,6 +341,10 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Namespace: namespace, OwnerReferences: ownerReferences, }, + Data: map[string][]byte{ + "username": []byte(rwRoleNameForTest(dbName)), + "password": []byte("test-password"), + }, })).To(Succeed()) Expect(k8sClient.Create(ctx, &corev1.ConfigMap{ @@ -392,14 +404,41 @@ func expectStatusCondition(current *enterprisev4.PostgresDatabase, conditionType func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, expectedDatabase enterprisev4.DatabaseInfo) { expectStatusPhase(current, phaseReady) - Expect(current.Status.ObservedGeneration).NotTo(BeNil()) - Expect(*current.Status.ObservedGeneration).To(Equal(generation)) Expect(current.Status.Databases).To(HaveLen(1)) Expect(current.Status.Databases[0].Name).To(Equal(expectedDatabase.Name)) Expect(current.Status.Databases[0].Ready).To(Equal(expectedDatabase.Ready)) Expect(current.Status.Databases[0].AdminUserSecretRef).NotTo(BeNil()) Expect(current.Status.Databases[0].RWUserSecretRef).NotTo(BeNil()) Expect(current.Status.Databases[0].ConfigMapRef).NotTo(BeNil()) + Expect(current.Status.ObservedGeneration).NotTo(BeNil()) + Expect(*current.Status.ObservedGeneration).To(Equal(generation)) +} + +func reconcilePostgresDatabaseToReady(ctx context.Context, scenario readyClusterScenario, poolerEnabled bool) *enterprisev4.PostgresDatabase { + seedReadyClusterScenario(ctx, scenario, poolerEnabled) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current := expectFinalizerAdded(ctx, scenario.requestName) + seedExistingDatabaseStatus(ctx, current, scenario.dbName) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + expectProvisionedArtifacts(ctx, scenario, current) + expectManagedRolesPatched(ctx, scenario) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + cnpgDatabase := expectCNPGDatabaseCreated(ctx, scenario, current) + markCNPGDatabaseApplied(ctx, cnpgDatabase) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current = fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + return current } var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { @@ -480,7 +519,7 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { expectStatusCondition(current, condClusterReady, metav1.ConditionTrue, reasonClusterAvailable) expectStatusCondition(current, condSecretsReady, metav1.ConditionTrue, reasonSecretsCreated) expectStatusCondition(current, condConfigMapsReady, metav1.ConditionTrue, reasonConfigMapsCreated) - expectStatusCondition(current, condRolesReady, metav1.ConditionTrue, reasonUsersAvailable) + expectStatusCondition(current, condRolesReady, metav1.ConditionTrue, reasonRolesAvailable) expectStatusCondition(current, condDatabasesReady, metav1.ConditionTrue, reasonDatabasesAvailable) Expect(meta.FindStatusCondition(current.Status.Conditions, condPrivilegesReady)).To(BeNil()) }) @@ -504,6 +543,246 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { }) }) + When("the referenced PostgresCluster exists but is not ready", func() { + It("waits for cluster to be provisioned and sets ClusterReady=False with reason ClusterProvisioning", func() { + scenario := newReadyClusterScenario(namespace, "not-ready-cluster", "not-ready-postgres", "not-ready-cnpg", dbAppdb) + createPostgresDatabaseResource(ctx, scenario.namespace, scenario.resourceName, scenario.clusterName, []enterprisev4.DatabaseDefinition{{Name: scenario.dbName}}) + createPostgresClusterResource(ctx, scenario.namespace, scenario.clusterName) + // Do NOT call markPostgresClusterReady to leave it in provisioning state + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current := expectFinalizerAdded(ctx, scenario.requestName) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + current = fetchPostgresDatabase(ctx, scenario.requestName) + expectStatusPhase(current, phasePending) + expectStatusCondition(current, condClusterReady, metav1.ConditionFalse, reasonClusterProvisioning) + }) + }) + + When("owned resource drift occurs after the PostgresDatabase is ready", func() { + It("repairs configmap content drift", func() { + scenario := newReadyClusterScenario(namespace, "configmap-drift", "tenant-cluster", "tenant-cnpg", "appdb") + owner := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + configMap.Data["rw-host"] = "unexpected.example" + Expect(k8sClient.Update(ctx, configMap)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMap.Name, Namespace: configMap.Namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) + + current := fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + Expect(metav1.IsControlledBy(configMap, owner)).To(BeTrue()) + }) + + It("recreates a deleted configmap", func() { + scenario := newReadyClusterScenario(namespace, "configmap-delete", "tenant-cluster", "tenant-cnpg", "appdb") + reconcilePostgresDatabaseToReady(ctx, scenario, false) + + configMapName := fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName) + Expect(k8sClient.Delete(ctx, &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: configMapName, Namespace: scenario.namespace}, + })).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMapName, Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) + }) + + It("does not recreate a deleted managed user secret", func() { + scenario := newReadyClusterScenario(namespace, "secret-delete", "tenant-cluster", "tenant-cnpg", "appdb") + reconcilePostgresDatabaseToReady(ctx, scenario, false) + + secretName := fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName) + Expect(k8sClient.Delete(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: secretName, Namespace: scenario.namespace}, + })).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + current := fetchPostgresDatabase(ctx, scenario.requestName) + expectStatusPhase(current, "Provisioning") + expectStatusCondition(current, "SecretsReady", metav1.ConditionFalse, "SecretsDriftDetected") + + missing := &corev1.Secret{} + err = k8sClient.Get(ctx, types.NamespacedName{Name: secretName, Namespace: scenario.namespace}, missing) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + It("re-attaches ownership when a managed user secret loses its owner reference", func() { + scenario := newReadyClusterScenario(namespace, "secret-adopt", "tenant-cluster", "tenant-cnpg", "appdb") + owner := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + secret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, secret)).To(Succeed()) + secret.OwnerReferences = nil + Expect(k8sClient.Update(ctx, secret)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: secret.Name, Namespace: secret.Namespace}, secret)).To(Succeed()) + Expect(metav1.IsControlledBy(secret, owner)).To(BeTrue()) + + current := fetchPostgresDatabase(ctx, scenario.requestName) + expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) + }) + + It("creates secrets and configmaps for a newly added database while preserving existing ones", func() { + scenario := newReadyClusterScenario(namespace, "new-database", "tenant-cluster", "tenant-cnpg", "appdb") + current := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + current.Spec.Databases = append(current.Spec.Databases, enterprisev4.DatabaseDefinition{Name: "analytics"}) + Expect(k8sClient.Update(ctx, current)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + for _, secretName := range []string{ + fmt.Sprintf("%s-analytics-admin", scenario.resourceName), + fmt.Sprintf("%s-analytics-rw", scenario.resourceName), + } { + secret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: secretName, Namespace: scenario.namespace}, secret)).To(Succeed()) + } + + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-analytics-config", scenario.resourceName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(configMap.Data).To(HaveKeyWithValue("dbname", "analytics")) + + existingSecret := &corev1.Secret{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, existingSecret)).To(Succeed()) + }) + + It("recreates a deleted CNPG Database", func() { + scenario := newReadyClusterScenario(namespace, "cnpg-database-delete", "tenant-cluster", "tenant-cnpg", "appdb") + owner := reconcilePostgresDatabaseToReady(ctx, scenario, false) + + cnpgDatabaseName := fmt.Sprintf("%s-%s", scenario.resourceName, scenario.dbName) + Expect(k8sClient.Delete(ctx, &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{Name: cnpgDatabaseName, Namespace: scenario.namespace}, + })).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + cnpgDatabase := &cnpgv1.Database{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cnpgDatabaseName, Namespace: scenario.namespace}, cnpgDatabase)).To(Succeed()) + Expect(cnpgDatabase.Spec.Name).To(Equal(scenario.dbName)) + Expect(metav1.IsControlledBy(cnpgDatabase, owner)).To(BeTrue()) + + markCNPGDatabaseApplied(ctx, cnpgDatabase) + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + }) + }) + + When("the CNPG Database exists but has not been applied yet", func() { + It("waits for CNPG to apply the database and sets DatabasesReady=False with reason WaitingForCNPG", func() { + scenario := newReadyClusterScenario(namespace, "cnpg-wait", "tenant-cluster", "tenant-cnpg", dbAppdb) + seedReadyClusterScenario(ctx, scenario, false) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current := expectFinalizerAdded(ctx, scenario.requestName) + seedExistingDatabaseStatus(ctx, current, scenario.dbName) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + expectProvisionedArtifacts(ctx, scenario, current) + expectManagedRolesPatched(ctx, scenario) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + expectCNPGDatabaseCreated(ctx, scenario, current) + // Do NOT call markCNPGDatabaseApplied to leave it waiting + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + current = fetchPostgresDatabase(ctx, scenario.requestName) + expectStatusCondition(current, condDatabasesReady, metav1.ConditionFalse, reasonWaitingForCNPG) + }) + }) + + When("managed roles have been patched but CNPG has not reconciled them yet", func() { + It("waits for CNPG to reconcile roles and sets RolesReady=False with reason WaitingForCNPG", func() { + scenario := newReadyClusterScenario(namespace, "roles-wait", "tenant-cluster", "tenant-cnpg", dbAppdb) + createPostgresDatabaseResource(ctx, scenario.namespace, scenario.resourceName, scenario.clusterName, []enterprisev4.DatabaseDefinition{{Name: scenario.dbName}}) + postgresCluster := createPostgresClusterResource(ctx, scenario.namespace, scenario.clusterName) + markPostgresClusterReady(ctx, postgresCluster, scenario.cnpgClusterName, scenario.namespace, false) + cnpgCluster := createCNPGClusterResource(ctx, scenario.namespace, scenario.cnpgClusterName) + // Mark with service endpoints but no reconciled roles — ConfigMaps need hosts but roles should stay pending + markCNPGClusterReady(ctx, cnpgCluster, []string{}, "tenant-rw", "tenant-ro") + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + + current := expectFinalizerAdded(ctx, scenario.requestName) + seedExistingDatabaseStatus(ctx, current, scenario.dbName) + + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + expectProvisionedArtifacts(ctx, scenario, current) + expectManagedRolesPatched(ctx, scenario) + + current = fetchPostgresDatabase(ctx, scenario.requestName) + expectStatusCondition(current, condRolesReady, metav1.ConditionFalse, reasonWaitingForCNPG) + }) + }) + + When("postgresdatabase secondary-resource predicates run", func() { + It("triggers on cnpg database generation change and ignores status-only updates", func() { + pred := predicate.GenerationChangedPredicate{} + + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &cnpgv1.Database{ObjectMeta: metav1.ObjectMeta{Generation: 1}}, + ObjectNew: &cnpgv1.Database{ObjectMeta: metav1.ObjectMeta{Generation: 2}}, + })).To(BeTrue()) + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &cnpgv1.Database{ObjectMeta: metav1.ObjectMeta{Generation: 1}}, + ObjectNew: &cnpgv1.Database{ObjectMeta: metav1.ObjectMeta{Generation: 1}}, + })).To(BeFalse()) + }) + + It("treats secret create, update, and delete events as drift triggers", func() { + pred := predicate.ResourceVersionChangedPredicate{} + + Expect(pred.Create(event.CreateEvent{})).To(BeTrue()) + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret", Namespace: "test", ResourceVersion: "1"}}, + ObjectNew: &corev1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret", Namespace: "test", ResourceVersion: "2"}}, + })).To(BeTrue()) + Expect(pred.Delete(event.DeleteEvent{})).To(BeTrue()) + }) + + It("treats configmap create, update, and delete events as drift triggers", func() { + pred := predicate.ResourceVersionChangedPredicate{} + + Expect(pred.Create(event.CreateEvent{})).To(BeTrue()) + Expect(pred.Update(event.UpdateEvent{ + ObjectOld: &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "config", Namespace: "test", ResourceVersion: "1"}}, + ObjectNew: &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "config", Namespace: "test", ResourceVersion: "2"}}, + })).To(BeTrue()) + Expect(pred.Delete(event.DeleteEvent{})).To(BeTrue()) + }) + }) + When("role ownership conflicts exist", func() { It("marks the resource failed and stops provisioning dependent resources", func() { resourceName := "conflict-cluster" @@ -673,4 +952,55 @@ var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { }) }) }) + + When("a retained CNPG Database exists without owner reference", func() { + It("re-adopts the resource and removes retained annotation", func() { + scenario := newReadyClusterScenario(namespace, "adopt-cnpg", "tenant-cluster", "tenant-cnpg", dbAppdb) + createPostgresDatabaseResource(ctx, scenario.namespace, scenario.resourceName, scenario.clusterName, []enterprisev4.DatabaseDefinition{{Name: scenario.dbName}}, postgresDatabaseFinalizer) + postgresCluster := createPostgresClusterResource(ctx, scenario.namespace, scenario.clusterName) + markPostgresClusterReady(ctx, postgresCluster, scenario.cnpgClusterName, scenario.namespace, false) + cnpgCluster := createCNPGClusterResource(ctx, scenario.namespace, scenario.cnpgClusterName) + markCNPGClusterReady(ctx, cnpgCluster, []string{adminRoleNameForTest(scenario.dbName), rwRoleNameForTest(scenario.dbName)}, "tenant-rw", "tenant-ro") + + // Create a CNPG Database with retained annotation but no owner reference + retainedCNPGDb := &cnpgv1.Database{ + ObjectMeta: metav1.ObjectMeta{ + Name: cnpgDatabaseNameForTest(scenario.resourceName, scenario.dbName), + Namespace: scenario.namespace, + Annotations: map[string]string{ + retainedFromAnnotation: scenario.resourceName, + }, + }, + Spec: cnpgv1.DatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: scenario.cnpgClusterName}, + Name: scenario.dbName, + Owner: adminRoleNameForTest(scenario.dbName), + }, + } + Expect(k8sClient.Create(ctx, retainedCNPGDb)).To(Succeed()) + + // Finalizer already present — first reconcile goes straight to provisioning + current := fetchPostgresDatabase(ctx, scenario.requestName) + seedExistingDatabaseStatus(ctx, current, scenario.dbName) + + result, err := reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + expectProvisionedArtifacts(ctx, scenario, current) + expectManagedRolesPatched(ctx, scenario) + + // Second reconcile: roles ready, re-adopts the retained CNPG Database, waits for applied + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectReconcileResult(result, err, 15*time.Second) + + adoptedDb := &cnpgv1.Database{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cnpgDatabaseNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, adoptedDb)).To(Succeed()) + Expect(metav1.IsControlledBy(adoptedDb, current)).To(BeTrue()) + _, hasRetainedAnnotation := adoptedDb.Annotations[retainedFromAnnotation] + Expect(hasRetainedAnnotation).To(BeFalse()) + + markCNPGDatabaseApplied(ctx, adoptedDb) + result, err = reconcilePostgresDatabase(ctx, scenario.requestName) + expectEmptyReconcileResult(result, err) + }) + }) }) diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index 362b2939c..8dd110d9e 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -30,6 +30,36 @@ import ( // Injected by the controller so the core never imports the pgx adapter directly. type NewDBRepoFunc func(ctx context.Context, host, dbName, password string) (DBRepo, error) +type secretReconcileError struct { + message string + reason conditionReasons +} + +type secretMissingPolicy int + +const ( + createSecretIfMissing secretMissingPolicy = iota + reportSecretDriftIfMissing +) + +func (e *secretReconcileError) Error() string { + return e.message +} + +func requeueOnConflict(ctx context.Context, err error, category reconcileConflictCategory, action string) (ctrl.Result, error, bool) { + if !errors.IsConflict(err) { + return ctrl.Result{}, err, false + } + + // Keep the category stable so future metrics or events can aggregate conflict sources. + log.FromContext(ctx).Info( + "Conflict during PostgresDatabase reconciliation, will requeue", + "category", category, + "action", action, + ) + return ctrl.Result{Requeue: true}, nil, true +} + // PostgresDatabaseService is the application service entry point called by the primary adapter (reconciler). // newDBRepo is injected to keep the core free of pgx imports. func PostgresDatabaseService( @@ -42,6 +72,8 @@ func PostgresDatabaseService( logger := log.FromContext(ctx).WithValues("postgresDatabase", postgresDB.Name) ctx = log.IntoContext(ctx, logger) logger.Info("Reconciling PostgresDatabase") + wasReady := postgresDB.Status.Phase != nil && *postgresDB.Status.Phase == string(readyDBPhase) + previouslyProvisionedDatabases := existingDatabaseStatus(postgresDB) updateStatus := func(conditionType conditionTypes, conditionStatus metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileDBPhases) error { return persistStatus(ctx, c, rc.Metrics, postgresDB, conditionType, conditionStatus, reason, message, phase) @@ -50,6 +82,9 @@ func PostgresDatabaseService( // Finalizer: cleanup on deletion, register on creation. if postgresDB.GetDeletionTimestamp() != nil { if err := handleDeletion(ctx, rc, postgresDB); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictDeletion, "handling deletion"); ok { + return result, conflictErr + } logger.Error(err, "Failed to clean up PostgresDatabase") rc.emitWarning(postgresDB, EventCleanupFailed, fmt.Sprintf("Cleanup failed: %v", err)) return ctrl.Result{}, err @@ -60,16 +95,13 @@ func PostgresDatabaseService( if !controllerutil.ContainsFinalizer(postgresDB, postgresDatabaseFinalizerName) { controllerutil.AddFinalizer(postgresDB, postgresDatabaseFinalizerName) if err := c.Update(ctx, postgresDB); err != nil { - logger.Error(err, "Failed to add finalizer to PostgresDatabase") + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictFinalizer, "adding finalizer"); ok { + return result, conflictErr + } + logger.Error(err, "Failed to add finalizer") return ctrl.Result{}, fmt.Errorf("failed to add finalizer: %w", err) } - logger.Info("Finalizer added") - return ctrl.Result{}, nil - } - - // ObservedGeneration equality means all phases completed on the current spec — nothing to do. - if postgresDB.Status.ObservedGeneration != nil && *postgresDB.Status.ObservedGeneration == postgresDB.Generation { - logger.Info("Spec unchanged and all phases complete, skipping") + logger.Info("Finalizer added successfully") return ctrl.Result{}, nil } @@ -79,23 +111,32 @@ func PostgresDatabaseService( if errors.IsNotFound(err) { rc.emitWarning(postgresDB, EventClusterNotFound, fmt.Sprintf("PostgresCluster %s not found", postgresDB.Spec.ClusterRef.Name)) if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterNotFound, "Cluster CR not found", pendingDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictClusterStatus, "persisting cluster not found status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: clusterNotFoundRetryDelay}, nil } if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterInfoFetchFailed, "Can't reach Cluster CR due to transient errors", pendingDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictClusterStatus, "persisting cluster fetch failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist cluster status") } return ctrl.Result{}, err } clusterStatus := getClusterReadyStatus(cluster) - logger.Info("Cluster validation complete", "clusterName", postgresDB.Spec.ClusterRef.Name, "status", clusterStatus) + logger.Info("Cluster validation completed", "clusterRef", postgresDB.Spec.ClusterRef.Name, "status", clusterStatus) switch clusterStatus { case ClusterNotReady, ClusterNoProvisionerRef: rc.emitWarning(postgresDB, EventClusterNotReady, "Referenced PostgresCluster is not ready yet") if err := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterProvisioning, "Cluster is not in ready state yet", pendingDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictClusterStatus, "persisting cluster provisioning status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil @@ -103,6 +144,9 @@ func PostgresDatabaseService( case ClusterReady: rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, clusterReady, EventClusterValidated, "Referenced PostgresCluster is ready") if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonClusterAvailable, "Cluster is operational", provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictClusterStatus, "persisting cluster ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } } @@ -114,11 +158,14 @@ func PostgresDatabaseService( "If you deleted a previous PostgresDatabase, recreate it with the original name to re-adopt the orphaned resources.", strings.Join(roleConflicts, ", ")) conflictErr := fmt.Errorf("role conflict detected: %s", strings.Join(roleConflicts, ", ")) - logger.Error(conflictErr, conflictMsg) + logger.Error(conflictErr, "Failed to validate managed role ownership", "conflicts", roleConflicts) rc.emitWarning(postgresDB, EventRoleConflict, conflictMsg) errs := []error{conflictErr} if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRoleConflict, conflictMsg, failedDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictRoleConflictStatus, "persisting role conflict status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist role conflict status") errs = append(errs, fmt.Errorf("failed to update status: %w", statusErr)) } return ctrl.Result{}, stderrors.Join(errs...) @@ -131,23 +178,44 @@ func PostgresDatabaseService( Name: cluster.Status.ProvisionerRef.Name, Namespace: cluster.Status.ProvisionerRef.Namespace, }, cnpgCluster); err != nil { - logger.Error(err, "Failed to fetch CNPG Cluster") + logger.Error(err, "Failed to fetch CNPG Cluster", "cluster", cluster.Status.ProvisionerRef.Name) return ctrl.Result{}, err } // Phase: CredentialProvisioning — secrets must exist before roles are patched. // CNPG rejects a PasswordSecretRef pointing at a missing secret. - if err := reconcileUserSecrets(ctx, c, rc.Scheme, postgresDB); err != nil { - rc.emitWarning(postgresDB, EventUserSecretsFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err)) + if err := reconcileRoleSecrets(ctx, c, rc.Scheme, postgresDB, previouslyProvisionedDatabases); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictSecretsReconcile, "reconciling user secrets"); ok { + return result, conflictErr + } + var secretErr *secretReconcileError + if stderrors.As(err, &secretErr) { + rc.emitWarning(postgresDB, EventRolesSecretsDriftDetected, secretErr.message) + if statusErr := updateStatus(secretsReady, metav1.ConditionFalse, secretErr.reason, + secretErr.message, provisioningDBPhase); statusErr != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictSecretsStatus, "persisting secret drift status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist secret drift status") + } + return ctrl.Result{RequeueAfter: retryDelay}, nil + } + rc.emitWarning(postgresDB, EventRoleSecretsFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err)) if statusErr := updateStatus(secretsReady, metav1.ConditionFalse, reasonSecretsCreationFailed, fmt.Sprintf("Failed to reconcile user secrets: %v", err), provisioningDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictSecretsStatus, "persisting secret failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist secrets status") } return ctrl.Result{}, err } rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, secretsReady, EventSecretsReady, fmt.Sprintf("All secrets provisioned for %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(secretsReady, metav1.ConditionTrue, reasonSecretsCreated, fmt.Sprintf("All secrets provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictSecretsStatus, "persisting secrets ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } @@ -155,16 +223,25 @@ func PostgresDatabaseService( // as databases are ready, so they are created alongside secrets. endpoints := resolveClusterEndpoints(cluster, cnpgCluster, postgresDB.Namespace) if err := reconcileRoleConfigMaps(ctx, c, rc.Scheme, postgresDB, endpoints); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictConfigMapsReconcile, "reconciling configmaps"); ok { + return result, conflictErr + } rc.emitWarning(postgresDB, EventAccessConfigFailed, fmt.Sprintf("Failed to reconcile ConfigMaps: %v", err)) if statusErr := updateStatus(configMapsReady, metav1.ConditionFalse, reasonConfigMapsCreationFailed, fmt.Sprintf("Failed to reconcile ConfigMaps: %v", err), provisioningDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictConfigMapsStatus, "persisting configmaps failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist configmaps status") } return ctrl.Result{}, err } rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, configMapsReady, EventConfigMapsReady, fmt.Sprintf("All ConfigMaps provisioned for %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(configMapsReady, metav1.ConditionTrue, reasonConfigMapsCreated, fmt.Sprintf("All ConfigMaps provisioned for %d databases", len(postgresDB.Spec.Databases)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictConfigMapsStatus, "persisting configmaps ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } @@ -176,55 +253,74 @@ func PostgresDatabaseService( allRoles := append(desired, rolesToRemove...) if len(rolesToAdd) > 0 || len(rolesToRemove) > 0 { - logger.Info("CNPG Cluster patch started, role drift detected", "toAdd", len(rolesToAdd), "toRemove", len(rolesToRemove)) + logger.Info("Managed roles patch started", "addCount", len(rolesToAdd), "removeCount", len(rolesToRemove)) if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { - logger.Error(err, "Failed to patch users in CNPG Cluster") + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictManagedRolesPatch, "patching managed roles"); ok { + return result, conflictErr + } + logger.Error(err, "Failed to patch managed roles", "roleCount", len(allRoles)) rc.emitWarning(postgresDB, EventManagedRolesPatchFailed, fmt.Sprintf("Failed to patch managed roles: %v", err)) - if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, + if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRolesCreationFailed, fmt.Sprintf("Failed to patch managed roles: %v", err), failedDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + logger.Error(statusErr, "Failed to persist roles status") } return ctrl.Result{}, err } + logger.Info("Managed roles patched", "roleCount", len(allRoles)) rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove))) if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for roles to be reconciled: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictRolesStatus, "persisting roles waiting status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil } - roleNames := getDesiredUsers(postgresDB) + roleNames := getDesiredRoles(postgresDB) notReadyRoles, err := verifyRolesReady(ctx, roleNames, cnpgCluster) if err != nil { rc.emitWarning(postgresDB, EventRoleFailed, fmt.Sprintf("Role reconciliation failed: %v", err)) - if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, + if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonRolesCreationFailed, fmt.Sprintf("Role creation failed: %v", err), failedDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictRolesStatus, "persisting role failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist roles status") } return ctrl.Result{}, err } if len(notReadyRoles) > 0 { if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for roles to be reconciled: %v", notReadyRoles), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictRolesStatus, "persisting roles pending status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil } rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove))) - if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonUsersAvailable, + if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonRolesAvailable, fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictRolesStatus, "persisting roles ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } // Phase: DatabaseProvisioning adopted, err := reconcileCNPGDatabases(ctx, c, rc.Scheme, postgresDB, cluster) if err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictCNPGDatabasesReconcile, "reconciling CNPG databases"); ok { + return result, conflictErr + } logger.Error(err, "Failed to reconcile CNPG Databases") rc.emitWarning(postgresDB, EventDatabasesReconcileFailed, fmt.Sprintf("Failed to reconcile databases: %v", err)) if statusErr := updateStatus(databasesReady, metav1.ConditionFalse, reasonDatabaseReconcileFailed, fmt.Sprintf("Failed to reconcile databases: %v", err), failedDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + logger.Error(statusErr, "Failed to persist databases status") } return ctrl.Result{}, err } @@ -234,13 +330,16 @@ func PostgresDatabaseService( notReadyDBs, err := verifyDatabasesReady(ctx, c, postgresDB) if err != nil { - logger.Error(err, "Failed to verify database status") + logger.Error(err, "Failed to verify database readiness") return ctrl.Result{}, err } if len(notReadyDBs) > 0 { rc.emitOnceBeforeWait(postgresDB, postgresDB.Status.Conditions, databasesReady, EventDatabaseReconciliationStarted, fmt.Sprintf("Reconciling %d databases, waiting for readiness", len(postgresDB.Spec.Databases))) if err := updateStatus(databasesReady, metav1.ConditionFalse, reasonWaitingForCNPG, fmt.Sprintf("Waiting for databases to be ready: %v", notReadyDBs), provisioningDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictDatabasesStatus, "persisting databases pending status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil @@ -248,6 +347,9 @@ func PostgresDatabaseService( rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, databasesReady, EventDatabasesReady, fmt.Sprintf("All %d databases ready", len(postgresDB.Spec.Databases))) if err := updateStatus(databasesReady, metav1.ConditionTrue, reasonDatabasesAvailable, fmt.Sprintf("All %d databases ready", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictDatabasesStatus, "persisting databases ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } @@ -284,26 +386,37 @@ func PostgresDatabaseService( rc.emitWarning(postgresDB, EventPrivilegesGrantFailed, fmt.Sprintf("Failed to grant RW role privileges: %v", err)) if statusErr := updateStatus(privilegesReady, metav1.ConditionFalse, reasonPrivilegesGrantFailed, fmt.Sprintf("Failed to grant RW role privileges: %v", err), provisioningDBPhase); statusErr != nil { - logger.Error(statusErr, "Failed to update status") + if result, conflictErr, ok := requeueOnConflict(ctx, statusErr, conflictPrivilegesStatus, "persisting privileges failure status"); ok { + return result, conflictErr + } + logger.Error(statusErr, "Failed to persist privileges status") } return ctrl.Result{}, err } rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, privilegesReady, EventPrivilegesReady, fmt.Sprintf("RW role privileges granted for all %d databases", len(postgresDB.Spec.Databases))) if err := updateStatus(privilegesReady, metav1.ConditionTrue, reasonPrivilegesGranted, fmt.Sprintf("RW role privileges granted for all %d databases", len(postgresDB.Spec.Databases)), readyDBPhase); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictPrivilegesStatus, "persisting privileges ready status"); ok { + return result, conflictErr + } return ctrl.Result{}, err } } - rc.emitNormal(postgresDB, EventPostgresDatabaseReady, fmt.Sprintf("PostgresDatabase %s is ready", postgresDB.Name)) + if !wasReady { + rc.emitNormal(postgresDB, EventPostgresDatabaseReady, fmt.Sprintf("PostgresDatabase %s is ready", postgresDB.Name)) + } postgresDB.Status.Databases = populateDatabaseStatus(postgresDB) postgresDB.Status.ObservedGeneration = &postgresDB.Generation if err := c.Status().Update(ctx, postgresDB); err != nil { + if result, conflictErr, ok := requeueOnConflict(ctx, err, conflictFinalStatus, "persisting final status"); ok { + return result, conflictErr + } return ctrl.Result{}, fmt.Errorf("failed to persist final status: %w", err) } - logger.Info("All phases complete") + logger.Info("PostgresDatabase reconciliation completed") return ctrl.Result{}, nil } @@ -350,7 +463,7 @@ func getClusterReadyStatus(cluster *enterprisev4.PostgresCluster) clusterReadySt return ClusterReady } -func getDesiredUsers(postgresDB *enterprisev4.PostgresDatabase) []string { +func getDesiredRoles(postgresDB *enterprisev4.PostgresDatabase) []string { users := make([]string, 0, len(postgresDB.Spec.Databases)*2) for _, dbSpec := range postgresDB.Spec.Databases { users = append(users, adminRoleName(dbSpec.Name), rwRoleName(dbSpec.Name)) @@ -358,12 +471,15 @@ func getDesiredUsers(postgresDB *enterprisev4.PostgresDatabase) []string { return users } -func getUsersInClusterSpec(cluster *enterprisev4.PostgresCluster) []string { - users := make([]string, 0, len(cluster.Spec.ManagedRoles)) - for _, role := range cluster.Spec.ManagedRoles { - users = append(users, role.Name) +func existingDatabaseStatus(postgresDB *enterprisev4.PostgresDatabase) map[string]struct{} { + if postgresDB.Status.Phase == nil || *postgresDB.Status.Phase != string(readyDBPhase) { + return map[string]struct{}{} } - return users + existing := make(map[string]struct{}, len(postgresDB.Status.Databases)) + for _, database := range postgresDB.Status.Databases { + existing[database.Name] = struct{}{} + } + return existing } // rolesMatchClusterSpec returns true if desired and actual contain the same roles @@ -417,7 +533,6 @@ func parseRoleNames(raw []byte) []string { } func patchManagedRoles(ctx context.Context, c client.Client, fieldManager string, cluster *enterprisev4.PostgresCluster, roles []enterprisev4.ManagedRole) error { - logger := log.FromContext(ctx) rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) if err != nil { return fmt.Errorf("building managed roles patch: %w", err) @@ -425,14 +540,12 @@ func patchManagedRoles(ctx context.Context, c client.Client, fieldManager string if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManager)); err != nil { return fmt.Errorf("patching managed roles: %w", err) } - logger.Info("Managed roles patched", "count", len(roles)) return nil } -func verifyRolesReady(ctx context.Context, expectedUsers []string, cnpgCluster *cnpgv1.Cluster) ([]string, error) { - logger := log.FromContext(ctx) +func verifyRolesReady(_ context.Context, expectedRoles []string, cnpgCluster *cnpgv1.Cluster) ([]string, error) { if cnpgCluster.Status.ManagedRolesStatus.CannotReconcile != nil { - for _, userName := range expectedUsers { + for _, userName := range expectedRoles { if errs, exists := cnpgCluster.Status.ManagedRolesStatus.CannotReconcile[userName]; exists { return nil, fmt.Errorf("reconciling user %s: %v", userName, errs) } @@ -440,14 +553,11 @@ func verifyRolesReady(ctx context.Context, expectedUsers []string, cnpgCluster * } reconciled := cnpgCluster.Status.ManagedRolesStatus.ByStatus[cnpgv1.RoleStatusReconciled] var notReady []string - for _, userName := range expectedUsers { + for _, userName := range expectedRoles { if !slices.Contains(reconciled, userName) { notReady = append(notReady, userName) } } - if len(notReady) > 0 { - logger.Info("Users not reconciled yet", "pending", notReady) - } return notReady, nil } @@ -456,18 +566,18 @@ func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtim var adopted []string for _, dbSpec := range postgresDB.Spec.Databases { cnpgDBName := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) + reAdopted := false cnpgDB := &cnpgv1.Database{ ObjectMeta: metav1.ObjectMeta{Name: cnpgDBName, Namespace: postgresDB.Namespace}, } _, err := controllerutil.CreateOrUpdate(ctx, c, cnpgDB, func() error { cnpgDB.Spec = buildCNPGDatabaseSpec(cluster.Status.ProvisionerRef.Name, dbSpec) - reAdopting := cnpgDB.Annotations[annotationRetainedFrom] == postgresDB.Name - if reAdopting { - logger.Info("Orphaned CNPG Database re-adopted", "name", cnpgDBName) + reAdopted = cnpgDB.Annotations[annotationRetainedFrom] == postgresDB.Name + if reAdopted { delete(cnpgDB.Annotations, annotationRetainedFrom) adopted = append(adopted, dbSpec.Name) } - if cnpgDB.CreationTimestamp.IsZero() || reAdopting { + if cnpgDB.CreationTimestamp.IsZero() || reAdopted { return controllerutil.SetControllerReference(postgresDB, cnpgDB, scheme) } return nil @@ -475,6 +585,9 @@ func reconcileCNPGDatabases(ctx context.Context, c client.Client, scheme *runtim if err != nil { return adopted, fmt.Errorf("reconciling CNPG Database %s: %w", cnpgDBName, err) } + if reAdopted { + logger.Info("CNPG Database re-adopted", "name", cnpgDBName) + } } return adopted, nil } @@ -485,6 +598,10 @@ func verifyDatabasesReady(ctx context.Context, c client.Client, postgresDB *ente cnpgDBName := cnpgDatabaseName(postgresDB.Name, dbSpec.Name) cnpgDB := &cnpgv1.Database{} if err := c.Get(ctx, types.NamespacedName{Name: cnpgDBName, Namespace: postgresDB.Namespace}, cnpgDB); err != nil { + if errors.IsNotFound(err) { + notReady = append(notReady, dbSpec.Name) + continue + } return nil, fmt.Errorf("getting CNPG Database %s: %w", cnpgDBName, err) } if cnpgDB.Status.Applied == nil || !*cnpgDB.Status.Applied { @@ -510,6 +627,7 @@ func applyStatus(db *enterprisev4.PostgresDatabase, conditionType conditionTypes }) p := string(phase) db.Status.Phase = &p + db.Status.ObservedGeneration = &db.Generation } func buildDeletionPlan(databases []enterprisev4.DatabaseDefinition) deletionPlan { @@ -545,7 +663,7 @@ func handleDeletion(ctx context.Context, rc *ReconcileContext, postgresDB *enter return fmt.Errorf("removing finalizer: %w", err) } rc.emitNormal(postgresDB, EventCleanupComplete, fmt.Sprintf("Cleanup complete (%d retained, %d deleted)", len(plan.retained), len(plan.deleted))) - logger.Info("Cleanup complete", "retained", len(plan.retained), "deleted", len(plan.deleted)) + logger.Info("Cleanup completed", "retained", len(plan.retained), "deleted", len(plan.deleted)) return nil } @@ -579,7 +697,7 @@ func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enter if !errors.IsNotFound(err) { return fmt.Errorf("getting PostgresCluster for role cleanup: %w", err) } - logger.Info("PostgresCluster already deleted, skipping role cleanup") + logger.Info("PostgresCluster already deleted, skipping managed roles cleanup") return nil } fieldManager := fieldManagerName(postgresDB.Name) @@ -589,7 +707,6 @@ func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enter if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { return err } - logger.Info("Managed roles patched on deletion", "retained", len(retainedRoles), "removed", len(rolesToRemove)) return nil } @@ -615,7 +732,7 @@ func orphanCNPGDatabases(ctx context.Context, c client.Client, postgresDB *enter if err := c.Update(ctx, db); err != nil { return fmt.Errorf("orphaning CNPG Database %s: %w", name, err) } - logger.Info("Orphaned CNPG Database CR", "name", name) + logger.Info("CNPG Database orphaned", "name", name) } return nil } @@ -642,7 +759,7 @@ func orphanConfigMaps(ctx context.Context, c client.Client, postgresDB *enterpri if err := c.Update(ctx, cm); err != nil { return fmt.Errorf("orphaning ConfigMap %s: %w", name, err) } - logger.Info("Orphaned ConfigMap", "name", name) + logger.Info("ConfigMap orphaned", "name", name) } return nil } @@ -670,7 +787,7 @@ func orphanSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev if err := c.Update(ctx, secret); err != nil { return fmt.Errorf("orphaning Secret %s: %w", name, err) } - logger.Info("Orphaned Secret", "name", name) + logger.Info("Secret orphaned", "name", name) } } return nil @@ -687,7 +804,7 @@ func deleteCNPGDatabases(ctx context.Context, c client.Client, postgresDB *enter } return fmt.Errorf("deleting CNPG Database %s: %w", name, err) } - logger.Info("Deleted CNPG Database CR", "name", name) + logger.Info("CNPG Database deleted", "name", name) } return nil } @@ -703,7 +820,7 @@ func deleteConfigMaps(ctx context.Context, c client.Client, postgresDB *enterpri } return fmt.Errorf("deleting ConfigMap %s: %w", name, err) } - logger.Info("Deleted ConfigMap", "name", name) + logger.Info("ConfigMap deleted", "name", name) } return nil } @@ -720,7 +837,7 @@ func deleteSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev } return fmt.Errorf("deleting Secret %s: %w", name, err) } - logger.Info("Deleted Secret", "name", name) + logger.Info("Secret deleted", "name", name) } } return nil @@ -843,33 +960,84 @@ func adoptResource(ctx context.Context, c client.Client, scheme *runtime.Scheme, return c.Update(ctx, obj) } -func reconcileUserSecrets(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase) error { +func secretMissingPolicyForDB(dbName string, existingDBs map[string]struct{}) secretMissingPolicy { + if _, exists := existingDBs[dbName]; exists { + return reportSecretDriftIfMissing + } + return createSecretIfMissing +} + +func reconcileRoleSecrets(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, existingDatabases map[string]struct{}) error { for _, dbSpec := range postgresDB.Spec.Databases { - if err := ensureSecret(ctx, c, scheme, postgresDB, adminRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleAdmin)); err != nil { + missingPolicy := secretMissingPolicyForDB(dbSpec.Name, existingDatabases) + if err := reconcileRoleSecret(ctx, c, scheme, postgresDB, adminRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleAdmin), missingPolicy); err != nil { return err } - if err := ensureSecret(ctx, c, scheme, postgresDB, rwRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleRW)); err != nil { + if err := reconcileRoleSecret(ctx, c, scheme, postgresDB, rwRoleName(dbSpec.Name), roleSecretName(postgresDB.Name, dbSpec.Name, secretRoleRW), missingPolicy); err != nil { return err } } return nil } +func reconcileRoleSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string, missingPolicy secretMissingPolicy) error { + if missingPolicy == reportSecretDriftIfMissing { + return ensureProvisionedSecret(ctx, c, scheme, postgresDB, roleName, secretName) + } + return ensureSecret(ctx, c, scheme, postgresDB, roleName, secretName) +} + func ensureSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { secret, err := getSecret(ctx, c, postgresDB.Namespace, secretName) if err != nil { return err } + if secret == nil { + return createRoleSecret(ctx, c, scheme, postgresDB, roleName, secretName) + } + return reconcileExistingSecret(ctx, c, scheme, postgresDB, secretName, secret) +} + +func ensureProvisionedSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { + secret, err := getSecret(ctx, c, postgresDB.Namespace, secretName) + if err != nil { + return err + } + if secret == nil { + return &secretReconcileError{ + message: fmt.Sprintf("Managed Secret %s is missing for previously provisioned role %s", secretName, roleName), + reason: reasonSecretsDriftDetected, + } + } + return reconcileExistingSecret(ctx, c, scheme, postgresDB, secretName, secret) +} + +// reconcileExistingSecret only reconciles ownership — it never rewrites secret data. +// Passwords must not be regenerated for existing credentials; CNPG and consumers hold live references. +func reconcileExistingSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, secretName string, secret *corev1.Secret) error { logger := log.FromContext(ctx) switch { - case secret == nil: - logger.Info("User secret creation started", "name", secretName) - return createUserSecret(ctx, c, scheme, postgresDB, roleName, secretName) case secret.Annotations[annotationRetainedFrom] == postgresDB.Name: - logger.Info("Orphaned secret re-adopted", "name", secretName) - return adoptResource(ctx, c, scheme, postgresDB, secret) + if err := adoptResource(ctx, c, scheme, postgresDB, secret); err != nil { + return err + } + logger.Info("Secret re-adopted", "name", secretName) + return nil + case metav1.IsControlledBy(secret, postgresDB): + return nil + case metav1.GetControllerOf(secret) == nil: + if err := adoptResource(ctx, c, scheme, postgresDB, secret); err != nil { + return err + } + logger.Info("Secret adopted", "name", secretName) + return nil + default: + owner := metav1.GetControllerOf(secret) + return &secretReconcileError{ + message: fmt.Sprintf("Managed Secret %s is controlled by %s %s", secretName, owner.Kind, owner.Name), + reason: reasonSecretsDriftDetected, + } } - return nil } func getSecret(ctx context.Context, c client.Client, namespace, name string) (*corev1.Secret, error) { @@ -884,7 +1052,7 @@ func getSecret(ctx context.Context, c client.Client, namespace, name string) (*c return secret, nil } -func createUserSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { +func createRoleSecret(ctx context.Context, c client.Client, scheme *runtime.Scheme, postgresDB *enterprisev4.PostgresDatabase, roleName, secretName string) error { pw, err := generatePassword() if err != nil { return err @@ -899,6 +1067,7 @@ func createUserSecret(ctx context.Context, c client.Client, scheme *runtime.Sche } return err } + log.FromContext(ctx).Info("Role secret created", "name", secretName) return nil } @@ -930,6 +1099,7 @@ func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runti logger := log.FromContext(ctx) for _, dbSpec := range postgresDB.Spec.Databases { cmName := configMapName(postgresDB.Name, dbSpec.Name) + reAdopted := false cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: cmName, @@ -939,12 +1109,11 @@ func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runti } _, err := controllerutil.CreateOrUpdate(ctx, c, cm, func() error { cm.Data = buildDatabaseConfigMapBody(dbSpec.Name, endpoints) - reAdopting := cm.Annotations[annotationRetainedFrom] == postgresDB.Name - if reAdopting { - logger.Info("Orphaned ConfigMap re-adopted", "name", cmName) + reAdopted = cm.Annotations[annotationRetainedFrom] == postgresDB.Name + if reAdopted { delete(cm.Annotations, annotationRetainedFrom) } - if cm.CreationTimestamp.IsZero() || reAdopting { + if !metav1.IsControlledBy(cm, postgresDB) { return controllerutil.SetControllerReference(postgresDB, cm, scheme) } return nil @@ -952,6 +1121,9 @@ func reconcileRoleConfigMaps(ctx context.Context, c client.Client, scheme *runti if err != nil { return fmt.Errorf("reconciling ConfigMap %s: %w", cmName, err) } + if reAdopted { + logger.Info("ConfigMap re-adopted", "name", cmName) + } } return nil } diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go index c41d2dd59..a755eac6f 100644 --- a/pkg/postgresql/database/core/database_unit_test.go +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -3,7 +3,6 @@ package core // The following functions are intentionally not tested directly here. // Their business logic is covered by narrower helper tests where practical, // and the remaining behavior is mostly controller-runtime orchestration: -// - PostgresDatabaseService // - patchManagedRoles // - reconcileCNPGDatabases // - handleDeletion @@ -27,9 +26,12 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/client/interceptor" @@ -124,7 +126,160 @@ func testClient(t *testing.T, scheme *runtime.Scheme, objs ...client.Object) cli return builder.Build() } -func TestGetDesiredUsers(t *testing.T) { +func postgresDatabaseConflict(name string) error { + return apierrors.NewConflict( + schema.GroupResource{ + Group: enterprisev4.GroupVersion.Group, + Resource: "postgresdatabases", + }, + name, + errors.New("resource version conflict"), + ) +} + +func TestPostgresDatabaseServiceRequeuesOnConflict(t *testing.T) { + scheme := testScheme(t) + tests := []struct { + name string + existing *enterprisev4.PostgresDatabase + build func(*enterprisev4.PostgresDatabase) client.Client + }{ + { + name: "when adding the finalizer", + existing: &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + }, + }, + build: func(existing *enterprisev4.PostgresDatabase) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(existing). + WithInterceptorFuncs(interceptor.Funcs{ + Update: func(_ context.Context, _ client.WithWatch, obj client.Object, _ ...client.UpdateOption) error { + return postgresDatabaseConflict(obj.GetName()) + }, + }). + Build() + }, + }, + { + name: "when persisting status", + existing: &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + Finalizers: []string{postgresDatabaseFinalizerName}, + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "missing-cluster"}, + }, + }, + build: func(existing *enterprisev4.PostgresDatabase) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(existing). + WithInterceptorFuncs(interceptor.Funcs{ + SubResourceUpdate: func(_ context.Context, _ client.Client, subResourceName string, obj client.Object, _ ...client.SubResourceUpdateOption) error { + if subResourceName != "status" { + return nil + } + return postgresDatabaseConflict(obj.GetName()) + }, + }). + Build() + }, + }, + { + name: "when status update conflicts while handling another error", + existing: &enterprisev4.PostgresDatabase{ + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + Finalizers: []string{postgresDatabaseFinalizerName}, + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + ClusterRef: corev1.LocalObjectReference{Name: "primary"}, + }, + }, + build: func(existing *enterprisev4.PostgresDatabase) client.Client { + return fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&enterprisev4.PostgresDatabase{}). + WithObjects(existing). + WithInterceptorFuncs(interceptor.Funcs{ + Get: func(ctx context.Context, client client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if _, ok := obj.(*enterprisev4.PostgresCluster); ok { + return errors.New("temporary get failure") + } + return client.Get(ctx, key, obj, opts...) + }, + SubResourceUpdate: func(_ context.Context, _ client.Client, subResourceName string, obj client.Object, _ ...client.SubResourceUpdateOption) error { + if subResourceName != "status" { + return nil + } + return postgresDatabaseConflict(obj.GetName()) + }, + }). + Build() + }, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + c := tst.build(tst.existing) + + postgresDB := &enterprisev4.PostgresDatabase{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: tst.existing.Name, Namespace: tst.existing.Namespace}, postgresDB)) + + result, err := PostgresDatabaseService( + context.Background(), + &ReconcileContext{Client: c, Scheme: scheme, Recorder: record.NewFakeRecorder(10), Metrics: &pgprometheus.NoopRecorder{}}, + postgresDB, + nil, + ) + + require.NoError(t, err) + assert.Equal(t, ctrl.Result{Requeue: true}, result) + }) + } +} + +func TestSecretMissingPolicyForDB(t *testing.T) { + tests := []struct { + name string + dbName string + existingDBs map[string]struct{} + want secretMissingPolicy + }{ + { + name: "creates secrets for new databases", + dbName: "payments", + existingDBs: map[string]struct{}{}, + want: createSecretIfMissing, + }, + { + name: "reports drift for previously provisioned databases", + dbName: "payments", + existingDBs: map[string]struct{}{ + "payments": {}, + }, + want: reportSecretDriftIfMissing, + }, + } + + for _, tst := range tests { + t.Run(tst.name, func(t *testing.T) { + assert.Equal(t, tst.want, secretMissingPolicyForDB(tst.dbName, tst.existingDBs)) + }) + } +} + +func TestGetDesiredRoles(t *testing.T) { postgresDB := &enterprisev4.PostgresDatabase{ Spec: enterprisev4.PostgresDatabaseSpec{ Databases: []enterprisev4.DatabaseDefinition{ @@ -140,23 +295,7 @@ func TestGetDesiredUsers(t *testing.T) { "secondary_db_rw", } - got := getDesiredUsers(postgresDB) - - assert.Equal(t, want, got) -} - -func TestGetUsersInClusterSpec(t *testing.T) { - cluster := &enterprisev4.PostgresCluster{ - Spec: enterprisev4.PostgresClusterSpec{ - ManagedRoles: []enterprisev4.ManagedRole{ - {Name: "main_db_admin"}, - {Name: "main_db_rw"}, - }, - }, - } - want := []string{"main_db_admin", "main_db_rw"} - - got := getUsersInClusterSpec(cluster) + got := getDesiredRoles(postgresDB) assert.Equal(t, want, got) } @@ -290,14 +429,14 @@ func TestGetRoleConflicts(t *testing.T) { func TestVerifyRolesReady(t *testing.T) { tests := []struct { name string - expectedUsers []string + expectedRoles []string cluster *cnpgv1.Cluster wantNotReady []string wantErr string }{ { name: "returns error when a role cannot reconcile", - expectedUsers: []string{"main_db_admin", "main_db_rw"}, + expectedRoles: []string{"main_db_admin", "main_db_rw"}, cluster: &cnpgv1.Cluster{ Status: cnpgv1.ClusterStatus{ ManagedRolesStatus: cnpgv1.ManagedRoles{ @@ -311,7 +450,7 @@ func TestVerifyRolesReady(t *testing.T) { }, { name: "returns missing roles that are not reconciled yet", - expectedUsers: []string{"main_db_admin", "main_db_rw", "analytics_admin"}, + expectedRoles: []string{"main_db_admin", "main_db_rw", "analytics_admin"}, cluster: &cnpgv1.Cluster{ Status: cnpgv1.ClusterStatus{ ManagedRolesStatus: cnpgv1.ManagedRoles{ @@ -325,7 +464,7 @@ func TestVerifyRolesReady(t *testing.T) { }, { name: "returns pending reconciliation roles as not ready", - expectedUsers: []string{"main_db_admin", "main_db_rw"}, + expectedRoles: []string{"main_db_admin", "main_db_rw"}, cluster: &cnpgv1.Cluster{ Status: cnpgv1.ClusterStatus{ ManagedRolesStatus: cnpgv1.ManagedRoles{ @@ -340,7 +479,7 @@ func TestVerifyRolesReady(t *testing.T) { }, { name: "returns empty when all roles are reconciled", - expectedUsers: []string{"main_db_admin"}, + expectedRoles: []string{"main_db_admin"}, cluster: &cnpgv1.Cluster{ Status: cnpgv1.ClusterStatus{ ManagedRolesStatus: cnpgv1.ManagedRoles{ @@ -357,7 +496,7 @@ func TestVerifyRolesReady(t *testing.T) { for _, tst := range tests { t.Run(tst.name, func(t *testing.T) { - gotNotReady, err := verifyRolesReady(context.Background(), tst.expectedUsers, tst.cluster) + gotNotReady, err := verifyRolesReady(context.Background(), tst.expectedRoles, tst.cluster) if tst.wantErr != "" { require.Error(t, err) assert.Equal(t, tst.wantErr, err.Error()) @@ -666,14 +805,14 @@ func TestVerifyDatabasesReady(t *testing.T) { wantNotReady: []string{"payments", "analytics"}, }, { - name: "returns error when a database is missing", + name: "returns not ready when a database is missing", objects: []client.Object{ &cnpgv1.Database{ ObjectMeta: metav1.ObjectMeta{Name: "primary-payments", Namespace: "dbs"}, Status: cnpgv1.DatabaseStatus{Applied: boolPtr(true)}, }, }, - wantErr: "getting CNPG Database primary-analytics", + wantNotReady: []string{"analytics"}, }, } @@ -858,7 +997,7 @@ func TestGeneratePassword(t *testing.T) { } // Uses a fake client because the helper creates Secret objects and persists owner references through the Kubernetes API. -func TestCreateUserSecret(t *testing.T) { +func TestCreateRoleSecret(t *testing.T) { scheme := testScheme(t) postgresDB := &enterprisev4.PostgresDatabase{ TypeMeta: metav1.TypeMeta{ @@ -877,13 +1016,13 @@ func TestCreateUserSecret(t *testing.T) { secretName := "primary-payments-admin" wantManagedBy := "splunk-operator" wantReload := "true" - wantUsername := roleName + wantRolename := roleName wantOwnerUID := postgresDB.UID wantPasswordLength := passwordLength wantPasswordDigits := passwordDigits c := testClient(t, scheme) - err := createUserSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + err := createRoleSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) require.NoError(t, err) @@ -893,7 +1032,7 @@ func TestCreateUserSecret(t *testing.T) { assert.Equal(t, postgresDB.Namespace, got.Namespace) assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) assert.Equal(t, wantReload, got.Labels[labelCNPGReload]) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assertGeneratedPassword(t, string(got.Data[secretKeyPassword]), wantPasswordLength, wantPasswordDigits) require.Len(t, got.OwnerReferences, 1) assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) @@ -902,18 +1041,18 @@ func TestCreateUserSecret(t *testing.T) { t.Run("returns nil when secret already exists", func(t *testing.T) { roleName := "payments_admin" secretName := "primary-payments-admin" - wantUsername := roleName + wantRolename := roleName wantPassword := "existing-password" - existing := buildPasswordSecret(postgresDB, secretName, wantUsername, wantPassword) + existing := buildPasswordSecret(postgresDB, secretName, wantRolename, wantPassword) c := testClient(t, scheme, existing) - err := createUserSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + err := createRoleSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) require.NoError(t, err) got := &corev1.Secret{} require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) assert.Empty(t, got.OwnerReferences) }) @@ -939,7 +1078,7 @@ func TestEnsureSecret(t *testing.T) { secretName := "primary-payments-admin" wantManagedBy := "splunk-operator" wantReload := "true" - wantUsername := roleName + wantRolename := roleName wantOwnerUID := postgresDB.UID wantPasswordLength := passwordLength wantPasswordDigits := passwordDigits @@ -953,7 +1092,7 @@ func TestEnsureSecret(t *testing.T) { require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) assert.Equal(t, wantReload, got.Labels[labelCNPGReload]) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assertGeneratedPassword(t, string(got.Data[secretKeyPassword]), wantPasswordLength, wantPasswordDigits) require.Len(t, got.OwnerReferences, 1) assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) @@ -962,7 +1101,7 @@ func TestEnsureSecret(t *testing.T) { t.Run("re-adopts retained secret", func(t *testing.T) { roleName := "payments_admin" secretName := "primary-payments-admin" - wantUsername := roleName + wantRolename := roleName wantPassword := "existing-password" wantOwnerUID := postgresDB.UID wantKeep := "true" @@ -979,7 +1118,7 @@ func TestEnsureSecret(t *testing.T) { }, }, Data: map[string][]byte{ - "username": []byte(wantUsername), + "username": []byte(wantRolename), secretKeyPassword: []byte(wantPassword), }, } @@ -994,7 +1133,7 @@ func TestEnsureSecret(t *testing.T) { assert.Equal(t, wantKeep, got.Annotations["keep"]) _, hasRetainedAnnotation := got.Annotations[annotationRetainedFrom] assert.False(t, hasRetainedAnnotation) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) assert.Contains(t, got.OwnerReferences, metav1.OwnerReference{ APIVersion: enterprisev4.GroupVersion.String(), @@ -1009,7 +1148,7 @@ func TestEnsureSecret(t *testing.T) { t.Run("does nothing for existing managed secret", func(t *testing.T) { roleName := "payments_admin" secretName := "primary-payments-admin" - wantUsername := roleName + wantRolename := roleName wantPassword := "existing-password" wantKeep := "true" wantOwnerUID := postgresDB.UID @@ -1021,11 +1160,18 @@ func TestEnsureSecret(t *testing.T) { "keep": wantKeep, }, OwnerReferences: []metav1.OwnerReference{ - {UID: wantOwnerUID, Name: postgresDB.Name}, + { + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + Name: postgresDB.Name, + UID: wantOwnerUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }, }, }, Data: map[string][]byte{ - "username": []byte(wantUsername), + "username": []byte(wantRolename), secretKeyPassword: []byte(wantPassword), }, } @@ -1038,15 +1184,124 @@ func TestEnsureSecret(t *testing.T) { got := &corev1.Secret{} require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) assert.Equal(t, wantKeep, got.Annotations["keep"]) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) require.Len(t, got.OwnerReferences, 1) assert.Equal(t, wantOwnerUID, got.OwnerReferences[0].UID) }) + + t.Run("returns drift error when a previously provisioned secret is missing", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + c := testClient(t, scheme) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.Error(t, err) + var driftErr *secretReconcileError + require.ErrorAs(t, err, &driftErr) + assert.Equal(t, reasonSecretsDriftDetected, driftErr.reason) + assert.ErrorContains(t, err, secretName) + }) + + t.Run("re-attaches owner reference when ownership was manually stripped", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + Labels: map[string]string{ + labelManagedBy: "splunk-operator", + labelCNPGReload: "true", + }, + Annotations: map[string]string{"keep": "true"}, + }, + Data: map[string][]byte{ + "username": []byte(roleName), + secretKeyPassword: []byte("existing-password"), + }, + } + c := testClient(t, scheme, existing) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, "true", got.Annotations["keep"]) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, postgresDB.UID, got.OwnerReferences[0].UID) + }) + + t.Run("accepts an existing secret with mutated data without rewriting it", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + wantUsername := "wrong_user" + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + OwnerReferences: []metav1.OwnerReference{ + {UID: postgresDB.UID, Name: postgresDB.Name}, + }, + }, + Data: map[string][]byte{ + "username": []byte(wantUsername), + secretKeyPassword: []byte("existing-password"), + }, + } + c := testClient(t, scheme, existing) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.NoError(t, err) + + got := &corev1.Secret{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: secretName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, "existing-password", string(got.Data[secretKeyPassword])) + }) + + t.Run("returns drift error when secret is owned by a different controller", func(t *testing.T) { + roleName := "payments_admin" + secretName := "primary-payments-admin" + otherOwnerUID := types.UID("other-owner-uid") + existing := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: postgresDB.Namespace, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "v1", + Kind: "SomeOtherController", + Name: "other-controller", + UID: otherOwnerUID, + Controller: boolPtr(true), + BlockOwnerDeletion: boolPtr(true), + }, + }, + }, + Data: map[string][]byte{ + "username": []byte(roleName), + secretKeyPassword: []byte("existing-password"), + }, + } + c := testClient(t, scheme, existing) + + err := ensureProvisionedSecret(context.Background(), c, scheme, postgresDB, roleName, secretName) + + require.Error(t, err) + var driftErr *secretReconcileError + require.ErrorAs(t, err, &driftErr) + assert.Equal(t, reasonSecretsDriftDetected, driftErr.reason) + assert.ErrorContains(t, err, secretName) + }) } // Uses a fake client because the helper reconciles multiple Secret objects through the Kubernetes API. -func TestReconcileUserSecrets(t *testing.T) { +func TestReconcileRoleSecrets(t *testing.T) { scheme := testScheme(t) postgresDB := &enterprisev4.PostgresDatabase{ TypeMeta: metav1.TypeMeta{ @@ -1078,7 +1333,7 @@ func TestReconcileUserSecrets(t *testing.T) { {name: "primary-analytics-rw", username: "analytics_rw"}, } - err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB) + err := reconcileRoleSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB)) require.NoError(t, err) for _, want := range wantSecrets { @@ -1094,13 +1349,13 @@ func TestReconcileUserSecrets(t *testing.T) { t.Run("is idempotent when secrets already exist", func(t *testing.T) { c := testClient(t, scheme) - require.NoError(t, reconcileUserSecrets(context.Background(), c, scheme, postgresDB)) + require.NoError(t, reconcileRoleSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB))) before := &corev1.Secret{} require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: "primary-payments-admin", Namespace: postgresDB.Namespace}, before)) beforePassword := append([]byte(nil), before.Data[secretKeyPassword]...) - err := reconcileUserSecrets(context.Background(), c, scheme, postgresDB) + err := reconcileRoleSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB)) require.NoError(t, err) @@ -1110,6 +1365,19 @@ func TestReconcileUserSecrets(t *testing.T) { require.Len(t, after.OwnerReferences, 1) assert.Equal(t, postgresDB.UID, after.OwnerReferences[0].UID) }) + + t.Run("does not recreate missing secrets for previously provisioned databases", func(t *testing.T) { + postgresDB.Status.Phase = strPtr(string(readyDBPhase)) + postgresDB.Status.Databases = []enterprisev4.DatabaseInfo{{Name: "payments"}} + c := testClient(t, scheme) + + err := reconcileRoleSecrets(context.Background(), c, scheme, postgresDB, existingDatabaseStatus(postgresDB)) + + require.Error(t, err) + var driftErr *secretReconcileError + require.ErrorAs(t, err, &driftErr) + assert.Equal(t, reasonSecretsDriftDetected, driftErr.reason) + }) } // Uses a fake client because the helper reconciles ConfigMaps through CreateOrUpdate and persists re-adoption metadata. @@ -1228,6 +1496,47 @@ func TestReconcileRoleConfigMaps(t *testing.T) { BlockOwnerDeletion: boolPtr(true), }) }) + + t.Run("re-attaches owner reference when configmap ownership was manually stripped", func(t *testing.T) { + postgresDB := &enterprisev4.PostgresDatabase{ + TypeMeta: metav1.TypeMeta{ + APIVersion: enterprisev4.GroupVersion.String(), + Kind: "PostgresDatabase", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "primary", + Namespace: "dbs", + UID: types.UID("postgresdb-uid"), + }, + Spec: enterprisev4.PostgresDatabaseSpec{ + Databases: []enterprisev4.DatabaseDefinition{ + {Name: "payments"}, + }, + }, + } + cmName := "primary-payments-config" + existing := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cmName, + Namespace: postgresDB.Namespace, + Labels: map[string]string{labelManagedBy: "splunk-operator"}, + Annotations: map[string]string{"keep": "true"}, + }, + Data: map[string]string{"dbname": "payments"}, + } + c := testClient(t, scheme, existing) + + err := reconcileRoleConfigMaps(context.Background(), c, scheme, postgresDB, endpoints) + + require.NoError(t, err) + + got := &corev1.ConfigMap{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: cmName, Namespace: postgresDB.Namespace}, got)) + assert.Equal(t, "true", got.Annotations["keep"]) + require.Len(t, got.OwnerReferences, 1) + assert.Equal(t, postgresDB.UID, got.OwnerReferences[0].UID) + assert.Equal(t, buildDatabaseConfigMapBody("payments", endpoints), got.Data) + }) } func TestBuildDeletionPlan(t *testing.T) { @@ -1485,16 +1794,16 @@ func TestBuildPasswordSecret(t *testing.T) { wantNamespace := "dbs" wantManagedBy := "splunk-operator" wantReload := "true" - wantUsername := "payments_admin" + wantRolename := "payments_admin" wantPassword := "topsecret" - got := buildPasswordSecret(postgresDB, wantName, wantUsername, wantPassword) + got := buildPasswordSecret(postgresDB, wantName, wantRolename, wantPassword) assert.Equal(t, wantName, got.Name) assert.Equal(t, wantNamespace, got.Namespace) assert.Equal(t, wantManagedBy, got.Labels[labelManagedBy]) assert.Equal(t, wantReload, got.Labels[labelCNPGReload]) - assert.Equal(t, wantUsername, string(got.Data["username"])) + assert.Equal(t, wantRolename, string(got.Data["username"])) assert.Equal(t, wantPassword, string(got.Data[secretKeyPassword])) } diff --git a/pkg/postgresql/database/core/events.go b/pkg/postgresql/database/core/events.go index 987b8bbfb..eb5e08d81 100644 --- a/pkg/postgresql/database/core/events.go +++ b/pkg/postgresql/database/core/events.go @@ -22,7 +22,8 @@ const ( EventClusterNotFound = "ClusterNotFound" EventClusterNotReady = "ClusterNotReady" EventRoleConflict = "RoleConflict" - EventUserSecretsFailed = "UserSecretsFailed" + EventRoleSecretsFailed = "RoleSecretsFailed" + EventRolesSecretsDriftDetected = "RolesSecretsDriftDetected" EventAccessConfigFailed = "AccessConfigFailed" EventManagedRolesPatchFailed = "ManagedRolesPatchFailed" EventRoleFailed = "RoleFailed" diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index 6511b502f..e9b327f99 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -22,6 +22,7 @@ type reconcileDBPhases string type conditionTypes string type conditionReasons string type clusterReadyStatus string +type reconcileConflictCategory string const ( retryDelay = time.Second * 15 @@ -33,7 +34,6 @@ const ( readWriteEndpoint string = "rw" deletionPolicyRetain string = "Retain" - deletionPolicyDelete string = "Delete" postgresDatabaseFinalizerName string = "postgresdatabases.enterprise.splunk.com/finalizer" annotationRetainedFrom string = "enterprise.splunk.com/retained-from" @@ -74,9 +74,10 @@ const ( reasonDatabasesAvailable conditionReasons = "DatabasesAvailable" reasonSecretsCreated conditionReasons = "SecretsCreated" reasonSecretsCreationFailed conditionReasons = "SecretsCreationFailed" + reasonSecretsDriftDetected conditionReasons = "SecretsDriftDetected" reasonWaitingForCNPG conditionReasons = "WaitingForCNPG" - reasonUsersCreationFailed conditionReasons = "UsersCreationFailed" - reasonUsersAvailable conditionReasons = "UsersAvailable" + reasonRolesCreationFailed conditionReasons = "RolesCreationFailed" + reasonRolesAvailable conditionReasons = "RolesAvailable" reasonRoleConflict conditionReasons = "RoleConflict" reasonConfigMapsCreationFailed conditionReasons = "ConfigMapsCreationFailed" reasonConfigMapsCreated conditionReasons = "ConfigMapsCreated" @@ -90,6 +91,21 @@ const ( ClusterNotReady clusterReadyStatus = "NotReady" ClusterNoProvisionerRef clusterReadyStatus = "NoProvisionerRef" ClusterReady clusterReadyStatus = "Ready" + + conflictDeletion reconcileConflictCategory = "deletion" + conflictFinalizer reconcileConflictCategory = "finalizer" + conflictClusterStatus reconcileConflictCategory = "cluster_status" + conflictRoleConflictStatus reconcileConflictCategory = "role_conflict_status" + conflictSecretsReconcile reconcileConflictCategory = "secrets_reconcile" + conflictSecretsStatus reconcileConflictCategory = "secrets_status" + conflictConfigMapsReconcile reconcileConflictCategory = "configmaps_reconcile" + conflictConfigMapsStatus reconcileConflictCategory = "configmaps_status" + conflictManagedRolesPatch reconcileConflictCategory = "managed_roles_patch" + conflictRolesStatus reconcileConflictCategory = "roles_status" + conflictCNPGDatabasesReconcile reconcileConflictCategory = "cnpg_databases_reconcile" + conflictDatabasesStatus reconcileConflictCategory = "databases_status" + conflictPrivilegesStatus reconcileConflictCategory = "privileges_status" + conflictFinalStatus reconcileConflictCategory = "final_status" ) // clusterEndpoints holds fully-resolved connection hostnames for a cluster. From 675ce20fc4549b2bea71fb7ab74daa65aab25c07 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Wed, 8 Apr 2026 10:02:21 +0200 Subject: [PATCH 33/36] sync logic rewrite + tests and constants logging changed, pureness fix attempt removed redundant sync at the end incremental state building with limited redundancy logging align cluster unit adj merge adjustments event emmision placed back, fortified with tests allign with requirements on state building review and rebase changes merge alignment review changes --- .../postgrescluster_controller_test.go | 91 +- pkg/postgresql/cluster/core/cluster.go | 1518 +++++++++++++---- .../cluster/core/cluster_unit_test.go | 896 +++++++++- pkg/postgresql/cluster/core/events.go | 11 + pkg/postgresql/cluster/core/types.go | 87 +- .../core/types/constants/components.go | 9 + .../cluster/core/types/constants/state.go | 24 + 7 files changed, 2214 insertions(+), 422 deletions(-) create mode 100644 pkg/postgresql/cluster/core/types/constants/components.go create mode 100644 pkg/postgresql/cluster/core/types/constants/state.go diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index 80b412562..b7becdff5 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -19,7 +19,9 @@ package controller import ( "context" "fmt" + "strings" + v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" @@ -52,6 +54,20 @@ import ( * PC-09 ignores no-op updates */ +func containsEvents(events *[]string, recorder *record.FakeRecorder, eventType string, event string) bool { + for { + select { + case e := <-recorder.Events: + *events = append(*events, e) + if strings.Contains(e, eventType) && strings.Contains(e, event) { + return true + } + default: + return false + } + } +} + var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { const ( @@ -79,6 +95,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { pgClusterClassKey types.NamespacedName reconciler *PostgresClusterReconciler req reconcile.Request + fakeRecorder *record.FakeRecorder ) reconcileNTimes := func(times int) { @@ -162,13 +179,17 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Spec: enterprisev4.PostgresClusterSpec{ Class: className, ClusterDeletionPolicy: ptr.To(deletePolicy), + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + {Name: "app_user_rw", Exists: true}, + }, }, } - + fakeRecorder = record.NewFakeRecorder(100) reconciler = &PostgresClusterReconciler{ Client: k8sClient, Scheme: k8sClient.Scheme(), - Recorder: record.NewFakeRecorder(100), + Recorder: fakeRecorder, Metrics: &pgprometheus.NoopRecorder{}, FleetCollector: pgprometheus.NewFleetCollector(), } @@ -255,12 +276,17 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { cond := meta.FindStatusCondition(pc.Status.Conditions, "ClusterReady") Expect(cond).NotTo(BeNil()) Expect(cond.Status).To(Equal(metav1.ConditionFalse)) - Expect(cond.Reason).To(Equal("ClusterBuildSucceeded")) + Expect(cond.Reason).To(Equal("CNPGClusterProvisioning")) // Simulate external CNPG controller status progression. cnpg := &cnpgv1.Cluster{} Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) cnpg.Status.Phase = cnpgv1.PhaseHealthy + cnpg.Status.ManagedRolesStatus = cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, + }, + } Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) reconcileNTimes(1) @@ -270,9 +296,47 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond).NotTo(BeNil()) Expect(cond.Status).To(Equal(metav1.ConditionTrue)) Expect(cond.Reason).To(Equal("CNPGClusterHealthy")) + + secretCond := meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") + Expect(secretCond).NotTo(BeNil()) + Expect(secretCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(secretCond.Reason).To(Equal("SuperUserSecretReady")) + + configMapCond := meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") + Expect(configMapCond).NotTo(BeNil()) + Expect(configMapCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(configMapCond.Reason).To(Equal("ConfigMapReconciled")) + + managedRolesCond := meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") + Expect(managedRolesCond).NotTo(BeNil()) + Expect(managedRolesCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(managedRolesCond.Reason).To(Equal("ManagedRolesReconciled")) + + // Pooler is disabled in this suite fixture, but converge publishes PoolerReady=True with disabled message. + poolerCond := meta.FindStatusCondition(pc.Status.Conditions, "PoolerReady") + Expect(poolerCond).NotTo(BeNil()) + Expect(poolerCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(poolerCond.Reason).To(Equal("AllInstancesReady")) + Expect(poolerCond.Message).To(Equal("Connection pooler disabled")) + + Expect(pc.Status.ManagedRolesStatus).NotTo(BeNil()) + Expect(pc.Status.ManagedRolesStatus.Reconciled).To(ContainElements("app_user", "app_user_rw")) + + Expect(pc.Status.Phase).NotTo(BeNil()) + Expect(*pc.Status.Phase).To(Equal("Ready")) + Expect(pc.Status.ProvisionerRef).NotTo(BeNil()) + Expect(pc.Status.ProvisionerRef.Kind).To(Equal("Cluster")) + Expect(pc.Status.ProvisionerRef.Name).To(Equal(clusterName)) + Expect(pc.Status.Resources).NotTo(BeNil()) Expect(pc.Status.Resources.SuperUserSecretRef).NotTo(BeNil()) Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) + + received := make([]string, 0, 8) + Expect(containsEvents( + &received, fakeRecorder, + v1.EventTypeNormal, core.EventClusterReady, + )).To(BeTrue(), "events seen: %v", received) }) // PC-07 @@ -308,6 +372,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Context("with PostgreSQL metrics enabled in class", func() { BeforeEach(func() { pgCluster.Spec.Class = classNameMetrics + pgCluster.Spec.ManagedRoles = nil }) It("adds scrape annotations to the CNPG Cluster", func() { @@ -316,6 +381,17 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { cnpg := &cnpgv1.Cluster{} Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + + cnpg.Status.Phase = cnpgv1.PhaseHealthy + cnpg.Status.ManagedRolesStatus = cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, + }, + } + Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) + + reconcileNTimes(1) + Expect(cnpg.Spec.InheritedMetadata).NotTo(BeNil()) Expect(cnpg.Spec.InheritedMetadata.Annotations).To(HaveKeyWithValue(scrapeAnnotationKey, "true")) Expect(cnpg.Spec.InheritedMetadata.Annotations).To(HaveKeyWithValue(pathAnnotationKey, metricsPath)) @@ -353,6 +429,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Context("with connection pooler metrics enabled in class", func() { BeforeEach(func() { pgCluster.Spec.Class = classNamePooler + pgCluster.Spec.ManagedRoles = nil }) It("adds scrape annotations to poolers only after the CNPG cluster becomes healthy", func() { @@ -439,7 +516,7 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { When("reconciling with invalid or drifted dependencies", func() { // PC-05 Context("when referenced class does not exist", func() { - It("fails with class-not-found condition", func() { + It("fails with class-not-found condition and emits a warning event", func() { badName := "bad-" + clusterName badKey := types.NamespacedName{Name: badName, Namespace: namespace} @@ -464,6 +541,12 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { cond := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") return cond != nil && cond.Reason == "ClusterClassNotFound" }, "20s", "250ms").Should(BeTrue()) + + received := make([]string, 0, 8) + Expect(containsEvents( + &received, fakeRecorder, + v1.EventTypeWarning, core.EventClusterClassNotFound, + )).To(BeTrue(), "events seen: %v", received) }) }) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 8aefdea45..f0efc1926 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -20,10 +20,13 @@ import ( "context" "errors" "fmt" + "sort" + "strings" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" password "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" "github.com/splunk/splunk-operator/pkg/postgresql/shared/ports" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" @@ -67,8 +70,36 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger = logger.WithValues("postgresCluster", postgresCluster.Name) ctx = log.IntoContext(ctx, logger) + currentPhase := func() string { + if postgresCluster.Status.Phase == nil { + return "" + } + return *postgresCluster.Status.Phase + } + updateStatus := func(conditionType conditionTypes, status metav1.ConditionStatus, reason conditionReasons, message string, phase reconcileClusterPhases) error { - return setStatus(ctx, c, rc.Metrics, postgresCluster, conditionType, status, reason, message, phase) + oldPhase := currentPhase() + if err := setStatus(ctx, c, rc.Metrics, postgresCluster, conditionType, status, reason, message, phase); err != nil { + return err + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, currentPhase()) + return nil + } + updateComponentHealthStatus := func(health componentHealth) error { + oldPhase := currentPhase() + if err := setStatusFromHealth(ctx, c, rc.Metrics, postgresCluster, health); err != nil { + return err + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, currentPhase()) + return nil + } + updatePhaseStatus := func(phase reconcileClusterPhases) error { + oldPhase := currentPhase() + if err := setPhaseStatus(ctx, c, postgresCluster, phase); err != nil { + return err + } + rc.emitClusterPhaseTransition(postgresCluster, oldPhase, currentPhase()) + return nil } // Finalizer handling must come before any other processing. @@ -119,8 +150,6 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. return ctrl.Result{}, errors.Join(err, statusErr) } - postgresMetricsEnabled := isPostgreSQLMetricsEnabled(postgresCluster, clusterClass) - poolerMetricsEnabled := isConnectionPoolerMetricsEnabled(postgresCluster, clusterClass) // Resolve or derive the superuser secret name. if postgresCluster.Status.Resources != nil && postgresCluster.Status.Resources.SuperUserSecretRef != nil { postgresSecretName = postgresCluster.Status.Resources.SuperUserSecretRef.Name @@ -130,302 +159,1131 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. logger.Info("Superuser secret name derived", "name", postgresSecretName) } - secretExists, secretErr := clusterSecretExists(ctx, c, postgresCluster.Namespace, postgresSecretName, secret) - if secretErr != nil { - logger.Error(secretErr, "Failed to check if PostgresCluster secret exists", "name", postgresSecretName) - rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, - fmt.Sprintf("Failed to check secret existence: %v", secretErr), failedClusterPhase) - return ctrl.Result{}, errors.Join(secretErr, statusErr) + poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + poolerConfigPresent := mergedConfig.CNPG != nil && mergedConfig.CNPG.ConnectionPooler != nil + + secretComponent := newSecretModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, postgresSecretName) + clusterComponent := newClusterModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, postgresSecretName) + + bootstrapComponents := []component{ + secretComponent, + clusterComponent, } - if !secretExists { - logger.Info("Superuser secret creation started", "name", postgresSecretName) - if err := ensureClusterSecret(ctx, c, rc.Scheme, postgresCluster, postgresSecretName, secret); err != nil { - logger.Error(err, "Failed to ensure PostgresCluster secret", "name", postgresSecretName) - rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonUserSecretFailed, - fmt.Sprintf("Failed to generate PostgresCluster secret: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + + phase := func(component component) (ctrl.Result, error) { + componentLogger := logger.WithValues("component", component.Name()) + gate, gateErr := component.EvaluatePrerequisites(ctx) + if gateErr != nil { + if isTransientError(gateErr) { + componentLogger.Error(gateErr, "Component prerequisite transient error, requeueing", "step", "prerequisites") + return transientResult(gateErr), nil + } + componentLogger.Error(gateErr, "Component prerequisite evaluation failed", "step", "prerequisites") + return ctrl.Result{}, fmt.Errorf("%s prerequisites: %w", component.Name(), gateErr) } - if err := c.Status().Update(ctx, postgresCluster); err != nil { - logger.Error(err, "Failed to update status after secret creation") - return ctrl.Result{}, err + if !gate.Allowed { + componentLogger.Info("Component blocked by prerequisites", + "step", "prerequisites", + "condition", gate.Health.Condition, + "reason", gate.Health.Reason, + "phase", gate.Health.Phase, + "requeueAfter", gate.Health.Result.RequeueAfter) + health, err := component.Converge(ctx) + if err != nil && isTransientError(err) { + return transientResult(err), nil + } + if err != nil { + componentLogger.Error(err, "Blocked component convergence failed", "step", "converge") + return health.Result, fmt.Errorf("%s converge (blocked): %w", component.Name(), err) + } + return health.Result, nil } - rc.emitNormal(postgresCluster, EventSecretReady, fmt.Sprintf("Superuser secret %s created", postgresSecretName)) - logger.Info("Superuser secret ref persisted to status") - } - // Re-attach ownerRef if it was stripped (e.g. by a Retain-policy deletion of a previous cluster). - hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), postgresCluster, rc.Scheme) - if ownerRefErr != nil { - logger.Error(ownerRefErr, "Failed to check owner reference on Secret") - return ctrl.Result{}, fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + if err := component.Actuate(ctx); err != nil { + if isTransientError(err) { + componentLogger.Error(err, "Component actuation transient error, requeueing", "step", "actuate") + return transientResult(err), nil + } + componentLogger.Error(err, "Component actuation failed", "step", "actuate") + return ctrl.Result{}, fmt.Errorf("%s actuate: %w", component.Name(), err) + } + componentLogger.Info("Component actuation completed", "step", "actuate") + + health, err := component.Converge(ctx) + if err != nil && isTransientError(err) { + componentLogger.Error(err, "Component convergence transient error, requeueing", "step", "converge") + return transientResult(err), nil + } + + if err != nil { + componentLogger.Error(err, "Component convergence failed", + "step", "converge", + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase) + return health.Result, fmt.Errorf("%s converge: %w", component.Name(), err) + } + if isIntermediateState(health.State) { + componentLogger.Info("Component convergence pending", + "step", "converge", + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase, + "requeueAfter", health.Result.RequeueAfter) + return health.Result, nil + } + componentLogger.Info("Component convergence ready", + "step", "converge", + "condition", health.Condition, + "reason", health.Reason, + "phase", health.Phase) + if health.Result != (ctrl.Result{}) { + componentLogger.Info("Component requested explicit result", + "step", "converge", + "requeueAfter", health.Result.RequeueAfter) + return health.Result, nil + } + return ctrl.Result{}, nil } - if secretExists && !hasOwnerRef { - logger.Info("Existing secret linked to PostgresCluster", "name", postgresSecretName) - rc.emitNormal(postgresCluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", postgresSecretName)) - originalSecret := secret.DeepCopy() - if err := ctrl.SetControllerReference(postgresCluster, secret, rc.Scheme); err != nil { - return ctrl.Result{}, fmt.Errorf("failed to set controller reference on existing secret: %w", err) + + for _, component := range bootstrapComponents { + result, err := phase(component) + if err != nil { + return result, err } - if err := patchObject(ctx, c, originalSecret, secret, "Secret"); err != nil { - logger.Error(err, "Failed to patch existing secret with controller reference") - rc.emitWarning(postgresCluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonSuperUserSecretFailed, - fmt.Sprintf("Failed to patch existing secret: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + if result != (ctrl.Result{}) { + return result, nil } } - if postgresCluster.Status.Resources.SuperUserSecretRef == nil { - postgresCluster.Status.Resources.SuperUserSecretRef = &corev1.SecretKeySelector{ - LocalObjectReference: corev1.LocalObjectReference{Name: postgresSecretName}, - Key: secretKeyPassword, + cnpgCluster = clusterComponent.cnpgCluster + runtimeView := clusterRuntimeViewAdapter{model: clusterComponent} + runtimeComponents := []component{ + newManagedRolesModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), + newPoolerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), + newConfigMapModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), + } + + for _, component := range runtimeComponents { + result, err := phase(component) + if err != nil { + return result, err + } + if result != (ctrl.Result{}) { + return result, nil } } + logger.Info("Reconciliation complete") + if err := updatePhaseStatus(readyClusterPhase); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func isTransientError(err error) bool { + return apierrors.IsConflict(err) || + apierrors.IsServerTimeout(err) || + apierrors.IsTooManyRequests(err) || + apierrors.IsTimeout(err) +} + +func transientResult(err error) ctrl.Result { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true} + } + return ctrl.Result{RequeueAfter: retryDelay} +} + +func writeComponentStatus(updateStatus healthStatusUpdater, health componentHealth) error { + if updateStatus == nil { + return nil + } + return updateStatus(health) +} + +// types/dto candidate +type componentHealth struct { + State pgcConstants.State + Condition conditionTypes + Reason conditionReasons + Message string + Phase reconcileClusterPhases + Result ctrl.Result +} + +type component interface { + Actuate(ctx context.Context) error + Converge(ctx context.Context) (componentHealth, error) + EvaluatePrerequisites(ctx context.Context) (prerequisiteDecision, error) + Name() string +} + +type prerequisiteDecision struct { + Allowed bool + Health componentHealth +} + +type healthStatusUpdater func(health componentHealth) error - // Build desired CNPG Cluster spec. - desiredSpec := buildCNPGClusterSpec(mergedConfig, postgresSecretName, postgresMetricsEnabled) +type eventEmitter interface { + emitNormal(obj client.Object, reason, message string) + emitWarning(obj client.Object, reason, message string) +} + +type poolerEmitter interface { + eventEmitter + emitPoolerReadyTransition(obj client.Object, conditions []metav1.Condition) + emitPoolerCreationTransition(obj client.Object, conditions []metav1.Condition) +} + +type clusterRuntimeView interface { + Cluster() *cnpgv1.Cluster + IsHealthy() bool +} + +type clusterRuntimeViewAdapter struct { + model *clusterModel +} + +func (v clusterRuntimeViewAdapter) Cluster() *cnpgv1.Cluster { + return v.model.cnpgCluster +} + +func (v clusterRuntimeViewAdapter) IsHealthy() bool { + return v.model.cnpgCluster != nil && v.model.cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy +} - // Fetch existing CNPG Cluster or create it. +type clusterModel struct { + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + cluster *enterprisev4.PostgresCluster + clusterClass *enterprisev4.PostgresClusterClass + mergedConfig *MergedConfig + secretName string + cnpgCluster *cnpgv1.Cluster + cnpgCreated bool + cnpgPatched bool + + metricsEnabled bool + health componentHealth +} + +func newClusterModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, clusterClass *enterprisev4.PostgresClusterClass, mergedConfig *MergedConfig, secretName string) *clusterModel { + model := &clusterModel{ + client: c, scheme: scheme, + events: events, updateStatus: updateStatus, + cluster: cluster, clusterClass: clusterClass, mergedConfig: mergedConfig, + secretName: secretName, + } + model.metricsEnabled = isPostgreSQLMetricsEnabled(cluster, clusterClass) + return model +} + +func (p *clusterModel) Name() string { return pgcConstants.ComponentProvisioner } + +func (p *clusterModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if health, missing := p.getHealthOnMissingSecretRef(); missing { + return prerequisiteDecision{ + Allowed: false, + Health: health, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (p *clusterModel) Actuate(ctx context.Context) error { + p.cnpgCreated = false + p.cnpgPatched = false + + desiredSpec := buildCNPGClusterSpec(p.mergedConfig, p.secretName, p.metricsEnabled) existingCNPG := &cnpgv1.Cluster{} - err = c.Get(ctx, types.NamespacedName{Name: postgresCluster.Name, Namespace: postgresCluster.Namespace}, existingCNPG) + err := p.client.Get(ctx, types.NamespacedName{Name: p.cluster.Name, Namespace: p.cluster.Namespace}, existingCNPG) switch { case apierrors.IsNotFound(err): - logger.Info("CNPG Cluster creation started", "name", postgresCluster.Name) - newCluster, err := buildCNPGCluster(rc.Scheme, postgresCluster, mergedConfig, postgresSecretName, postgresMetricsEnabled) + newCluster, err := buildCNPGCluster(p.scheme, p.cluster, p.mergedConfig, p.secretName, p.metricsEnabled) if err != nil { - logger.Error(err, "Failed to build CNPG Cluster", "name", postgresCluster.Name) - return ctrl.Result{}, err - } - if err := c.Create(ctx, newCluster); err != nil { - logger.Error(err, "Failed to create CNPG Cluster") - rc.emitWarning(postgresCluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildFailed, - fmt.Sprintf("Failed to create CNPG Cluster: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - rc.emitNormal(postgresCluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, - "CNPG Cluster created", pendingClusterPhase); statusErr != nil { - return ctrl.Result{}, statusErr - } - logger.Info("CNPG Cluster created, requeueing for status update", "name", postgresCluster.Name) - return ctrl.Result{RequeueAfter: retryDelay}, nil + p.events.emitWarning(p.cluster, EventClusterCreateFailed, fmt.Sprintf("Failed to build CNPG cluster: %v", err)) + return err + } + if err = p.client.Create(ctx, newCluster); err != nil { + p.events.emitWarning(p.cluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) + return err + } + p.events.emitNormal(p.cluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") + p.cnpgCluster = newCluster + p.cnpgCreated = true case err != nil: - logger.Error(err, "Failed to get CNPG Cluster") - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterGetFailed, - fmt.Sprintf("Failed to get CNPG Cluster: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + return err + default: + p.cnpgCluster = existingCNPG + currentNormalized := normalizeCNPGClusterSpec(p.cnpgCluster.Spec, p.mergedConfig.Spec.PostgreSQLConfig) + desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, p.mergedConfig.Spec.PostgreSQLConfig) + if !equality.Semantic.DeepEqual(currentNormalized, desiredNormalized) { + originalCluster := p.cnpgCluster.DeepCopy() + p.cnpgCluster.Spec = desiredSpec + if patchErr := patchObject(ctx, p.client, originalCluster, p.cnpgCluster, "CNPGCluster"); patchErr != nil { + p.events.emitWarning(p.cluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) + return patchErr + } + p.events.emitNormal(p.cluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") + p.cnpgPatched = true + } } - // Patch CNPG Cluster spec if drift detected. - cnpgCluster = existingCNPG - currentNormalized := normalizeCNPGClusterSpec(cnpgCluster.Spec, mergedConfig.Spec.PostgreSQLConfig) - desiredNormalized := normalizeCNPGClusterSpec(desiredSpec, mergedConfig.Spec.PostgreSQLConfig) - - if !equality.Semantic.DeepEqual(currentNormalized, desiredNormalized) { - logger.Info("CNPG Cluster spec drift detected, patch started", "name", cnpgCluster.Name) - originalCluster := cnpgCluster.DeepCopy() - cnpgCluster.Spec = desiredSpec - - switch patchErr := patchObject(ctx, c, originalCluster, cnpgCluster, "CNPGCluster"); { - case patchErr != nil: - logger.Error(patchErr, "Failed to patch CNPG Cluster", "name", cnpgCluster.Name) - rc.emitWarning(postgresCluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterPatchFailed, - fmt.Sprintf("Failed to patch CNPG Cluster: %v", patchErr), failedClusterPhase) - return ctrl.Result{}, errors.Join(patchErr, statusErr) - default: - if statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonClusterBuildSucceeded, - "CNPG Cluster spec updated, waiting for healthy state", provisioningClusterPhase); statusErr != nil { - return ctrl.Result{}, statusErr + if p.cnpgCluster != nil { + p.cluster.Status.ProvisionerRef = &corev1.ObjectReference{ + APIVersion: "postgresql.cnpg.io/v1", + Kind: "Cluster", + Namespace: p.cnpgCluster.Namespace, + Name: p.cnpgCluster.Name, + UID: p.cnpgCluster.UID, + } + } + return nil +} + +func (p *clusterModel) Converge(_ context.Context) (health componentHealth, err error) { + p.health.Condition = clusterReady + defer func() { + statusErr := writeComponentStatus(p.updateStatus, p.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr } - rc.emitNormal(postgresCluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") - logger.Info("CNPG Cluster patched, requeueing for status update", "name", cnpgCluster.Name) - return ctrl.Result{RequeueAfter: retryDelay}, nil } + health = p.health + }() + + if missingHealth, missing := p.getHealthOnMissingSecretRef(); missing { + p.health = missingHealth + return p.health, nil } - // Reconcile ManagedRoles. - if err := reconcileManagedRoles(ctx, c, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to reconcile managed roles") - rc.emitWarning(postgresCluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonManagedRolesFailed, - fmt.Sprintf("Failed to reconcile managed roles: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + if p.cnpgCluster == nil { + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil } - // Reconcile Connection Pooler. - poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + if p.cnpgCreated { + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + if p.cnpgPatched { + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + switch p.cnpgCluster.Status.Phase { + case cnpgv1.PhaseHealthy: + p.health.State = pgcConstants.Ready + p.health.Reason = reasonCNPGClusterHealthy + p.health.Message = msgProvisionerHealthy + p.health.Phase = readyClusterPhase + p.health.Result = ctrl.Result{} + return p.health, nil + case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGProvisioning, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseSwitchover: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGSwitchover + p.health.Message = msgCNPGSwitchover + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseFailOver: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGFailingOver + p.health.Message = msgCNPGFailingOver + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGRestarting + p.health.Message = fmt.Sprintf(msgFmtCNPGRestarting, p.cnpgCluster.Status.Phase) + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGUpgrading + p.health.Message = fmt.Sprintf(msgFmtCNPGUpgrading, p.cnpgCluster.Status.Phase) + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseApplyingConfiguration: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGApplyingConfig + p.health.Message = msgCNPGApplyingConfiguration + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseReplicaClusterPromotion: + p.health.State = pgcConstants.Configuring + p.health.Reason = reasonCNPGPromoting + p.health.Message = msgCNPGPromoting + p.health.Phase = configuringClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + case cnpgv1.PhaseWaitingForUser: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGWaitingForUser + p.health.Message = msgCNPGWaitingForUser + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner requires user action") + case cnpgv1.PhaseUnrecoverable: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGUnrecoverable + p.health.Message = msgCNPGUnrecoverable + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner unrecoverable") + case cnpgv1.PhaseCannotCreateClusterObjects: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGProvisioningFailed + p.health.Message = msgCNPGCannotCreateObjects + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner cannot create cluster objects") + case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGPluginError + p.health.Message = fmt.Sprintf(msgFmtCNPGPluginError, p.cnpgCluster.Status.Phase) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner plugin error") + case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: + p.health.State = pgcConstants.Failed + p.health.Reason = reasonCNPGImageError + p.health.Message = fmt.Sprintf(msgFmtCNPGImageError, p.cnpgCluster.Status.Phase) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("provisioner image error") + case "": + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + default: + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } +} + +func (p *clusterModel) getHealthOnMissingSecretRef() (componentHealth, bool) { + if p.cluster.Status.Resources == nil || p.cluster.Status.Resources.SuperUserSecretRef == nil { + return componentHealth{ + State: pgcConstants.Pending, + Condition: clusterReady, + Reason: reasonUserSecretPending, + Message: msgSecretRefNotPublished, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, true + } + return componentHealth{}, false +} + +type managedRolesModel struct { + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + runtime clusterRuntimeView + cluster *enterprisev4.PostgresCluster + secret string + + health componentHealth +} + +func newManagedRolesModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *managedRolesModel { + return &managedRolesModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, runtime: runtime, cluster: cluster, secret: secret} +} + +func (m *managedRolesModel) Name() string { return pgcConstants.ComponentManagedRoles } + +func (m *managedRolesModel) runtimeGateHealth() (componentHealth, bool) { + if m.runtime == nil || !m.runtime.IsHealthy() { + return componentHealth{ + State: pgcConstants.Pending, + Condition: managedRolesReady, + Reason: reasonManagedRolesPending, + Message: "Managed roles blocked until CNPG cluster is healthy", + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, true + } + return componentHealth{}, false +} + +func (m *managedRolesModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if gateHealth, blocked := m.runtimeGateHealth(); blocked { + return prerequisiteDecision{ + Allowed: false, + Health: gateHealth, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (m *managedRolesModel) Actuate(ctx context.Context) error { + if rolesErr := reconcileManagedRoles(ctx, m.client, m.cluster, m.runtime.Cluster()); rolesErr != nil { + m.events.emitWarning(m.cluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr)) + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr) + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{} + return rolesErr + } + return nil +} + +func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealth, err error) { + _ = ctx + m.health = componentHealth{Condition: managedRolesReady} + defer func() { + statusErr := writeComponentStatus(m.updateStatus, m.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = m.health + }() + + if gateHealth, blocked := m.runtimeGateHealth(); blocked { + m.health = gateHealth + return m.health, nil + } + + syncManagedRolesStatusFromCNPG(m.cluster, m.runtime.Cluster()) + status := m.cluster.Status.ManagedRolesStatus + if status == nil { + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = "Managed roles status not published yet" + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + m.emitManagedRolesConvergeFailure(m.health.Message) + return m.health, fmt.Errorf("managed roles status not published") + } + + if len(status.Failed) > 0 { + m.health.State = pgcConstants.Failed + m.health.Reason = reasonManagedRolesFailed + m.health.Message = fmt.Sprintf("Managed roles reconciliation failed for %d role(s)", len(status.Failed)) + m.health.Phase = failedClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + m.emitManagedRolesConvergeFailure(m.health.Message) + return m.health, fmt.Errorf("managed roles have failed entries") + } + + if len(status.Pending) > 0 { + m.health.State = pgcConstants.Pending + m.health.Reason = reasonManagedRolesPending + m.health.Message = fmt.Sprintf("Managed roles pending for %d role(s)", len(status.Pending)) + m.health.Phase = pendingClusterPhase + m.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return m.health, nil + } + + m.health.State = pgcConstants.Ready + m.health.Reason = reasonManagedRolesReady + m.health.Message = "Managed roles are reconciled" + m.health.Phase = readyClusterPhase + m.health.Result = ctrl.Result{} + if !meta.IsStatusConditionTrue(m.cluster.Status.Conditions, string(managedRolesReady)) { + m.events.emitNormal(m.cluster, EventManagedRolesReady, m.health.Message) + } + return m.health, nil +} + +func (m *managedRolesModel) emitManagedRolesConvergeFailure(message string) { + cond := meta.FindStatusCondition(m.cluster.Status.Conditions, string(managedRolesReady)) + if cond != nil && + cond.Status == metav1.ConditionFalse && + cond.Reason == string(reasonManagedRolesFailed) && + cond.Message == message { + return + } + m.events.emitWarning(m.cluster, EventManagedRolesFailed, message) +} + +// TODO: Ports as access to cnpg originated info to decouple. +func syncManagedRolesStatusFromCNPG(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) { + if cluster == nil || cnpgCluster == nil { + return + } + + expectedRoles := make([]string, 0, len(cluster.Spec.ManagedRoles)) + for _, role := range cluster.Spec.ManagedRoles { + expectedRoles = append(expectedRoles, role.Name) + } + + cnpgStatus := cnpgCluster.Status.ManagedRolesStatus + reconciled := append([]string(nil), cnpgStatus.ByStatus[cnpgv1.RoleStatusReconciled]...) + pending := append([]string(nil), cnpgStatus.ByStatus[cnpgv1.RoleStatusPendingReconciliation]...) + + reconciledSet := make(map[string]struct{}, len(reconciled)) + for _, roleName := range reconciled { + reconciledSet[roleName] = struct{}{} + } + pendingSet := make(map[string]struct{}, len(pending)) + for _, roleName := range pending { + pendingSet[roleName] = struct{}{} + } + + failed := make(map[string]string, len(cnpgStatus.CannotReconcile)) + for roleName, errs := range cnpgStatus.CannotReconcile { + if len(errs) == 0 { + failed[roleName] = "role cannot be reconciled" + continue + } + failed[roleName] = strings.Join(errs, "; ") + } + + for _, roleName := range expectedRoles { + if _, ok := reconciledSet[roleName]; ok { + continue + } + if _, ok := failed[roleName]; ok { + continue + } + if _, ok := pendingSet[roleName]; ok { + continue + } + pending = append(pending, roleName) + } + + sort.Strings(reconciled) + sort.Strings(pending) + if len(failed) == 0 { + failed = nil + } - rwPoolerExists, err := poolerExists(ctx, c, postgresCluster, readWriteEndpoint) + cluster.Status.ManagedRolesStatus = &enterprisev4.ManagedRolesStatus{ + Reconciled: reconciled, + Pending: pending, + Failed: failed, + } +} + +type poolerModel struct { + client client.Client + scheme *runtime.Scheme + events poolerEmitter + updateStatus healthStatusUpdater + cluster *enterprisev4.PostgresCluster + clusterClass *enterprisev4.PostgresClusterClass + mergedConfig *MergedConfig + cnpgCluster *cnpgv1.Cluster + poolerEnabled bool + poolerConfigPresent bool + + metricsEnabled bool + health componentHealth +} + +func newPoolerModel(c client.Client, scheme *runtime.Scheme, events poolerEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, clusterClass *enterprisev4.PostgresClusterClass, mergedConfig *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { + model := &poolerModel{ + client: c, + scheme: scheme, + events: events, + updateStatus: updateStatus, + cluster: cluster, + clusterClass: clusterClass, + mergedConfig: mergedConfig, + cnpgCluster: cnpgCluster, + poolerEnabled: poolerEnabled, + poolerConfigPresent: poolerConfigPresent, + } + model.metricsEnabled = isConnectionPoolerMetricsEnabled(cluster, clusterClass) + return model +} + +func (p *poolerModel) Name() string { return pgcConstants.ComponentPooler } + +func (p *poolerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + if !p.poolerEnabled || !p.poolerConfigPresent { + return prerequisiteDecision{Allowed: true}, nil + } + if p.cnpgCluster == nil { + return prerequisiteDecision{ + Allowed: false, + Health: componentHealth{ + State: pgcConstants.Pending, + Condition: poolerReady, + Reason: reasonCNPGProvisioning, + Message: msgCNPGPendingCreation, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, + }, nil + } + if p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { + return prerequisiteDecision{ + Allowed: false, + Health: componentHealth{ + State: pgcConstants.Provisioning, + Condition: poolerReady, + Reason: reasonCNPGProvisioning, + Message: fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase), + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, + }, nil + } + return prerequisiteDecision{Allowed: true}, nil +} + +func (p *poolerModel) Actuate(ctx context.Context) error { + switch { + case !p.poolerEnabled: + if err := deleteConnectionPoolers(ctx, p.client, p.cluster); err != nil { + return err + } + p.cluster.Status.ConnectionPoolerStatus = nil + meta.RemoveStatusCondition(&p.cluster.Status.Conditions, string(poolerReady)) + return nil + case !p.poolerConfigPresent: + return nil + case p.cnpgCluster == nil || p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy: + return nil + default: + if err := createOrUpdateConnectionPoolers(ctx, p.client, p.scheme, p.cluster, p.mergedConfig, p.cnpgCluster, p.metricsEnabled); err != nil { + p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) + return err + } + return nil + } +} + +func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err error) { + p.health = componentHealth{Condition: poolerReady} + defer func() { + statusErr := writeComponentStatus(p.updateStatus, p.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = p.health + }() + + if !p.poolerEnabled { + p.health.State = pgcConstants.Ready + p.health.Reason = reasonAllInstancesReady + p.health.Message = msgPoolerDisabled + p.health.Phase = readyClusterPhase + p.health.Result = ctrl.Result{} + return p.health, nil + } + if !p.poolerConfigPresent { + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerConfigMissing + p.health.Message = msgPoolerConfigMissing + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, fmt.Errorf("pooler config missing") + } + if p.cnpgCluster == nil { + p.health.State = pgcConstants.Pending + p.health.Reason = reasonCNPGProvisioning + p.health.Message = msgCNPGPendingCreation + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + if p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonCNPGProvisioning + p.health.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, p.cnpgCluster.Status.Phase) + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + // TODO: Port material. + rwExists, err := poolerExists(ctx, p.client, p.cluster, readWriteEndpoint) if err != nil { - logger.Error(err, "Failed to check RW pooler existence") - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to check RW pooler existence: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, err + } + roExists, err := poolerExists(ctx, p.client, p.cluster, readOnlyEndpoint) + if err != nil { + p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to check RO pooler existence: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + return p.health, err + } + if !rwExists || !roExists { + p.events.emitPoolerCreationTransition(p.cluster, p.cluster.Status.Conditions) + p.health.State = pgcConstants.Provisioning + p.health.Reason = reasonPoolerCreating + p.health.Message = msgPoolersProvisioning + p.health.Phase = provisioningClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + rwPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readWriteEndpoint), + Namespace: p.cluster.Namespace, + }, rwPooler); err != nil { + p.events.emitPoolerCreationTransition(p.cluster, p.cluster.Status.Conditions) + p.health.State = pgcConstants.Pending + p.health.Reason = reasonPoolerCreating + p.health.Message = msgWaitRWPoolerObject + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil } - roPoolerExists, err := poolerExists(ctx, c, postgresCluster, readOnlyEndpoint) + roPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readOnlyEndpoint), + Namespace: p.cluster.Namespace, + }, roPooler); err != nil { + p.events.emitPoolerCreationTransition(p.cluster, p.cluster.Status.Conditions) + p.health.State = pgcConstants.Pending + p.health.Reason = reasonPoolerCreating + p.health.Message = msgWaitROPoolerObject + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + if !arePoolersReady(rwPooler, roPooler) { + p.events.emitPoolerCreationTransition(p.cluster, p.cluster.Status.Conditions) + p.health.State = pgcConstants.Pending + p.health.Reason = reasonPoolerCreating + p.health.Message = msgPoolersNotReady + p.health.Phase = pendingClusterPhase + p.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return p.health, nil + } + + p.cluster.Status.ConnectionPoolerStatus = &enterprisev4.ConnectionPoolerStatus{Enabled: true} + p.health.State = pgcConstants.Ready + p.health.Reason = reasonAllInstancesReady + p.health.Message = msgPoolersReady + p.health.Phase = readyClusterPhase + p.health.Result = ctrl.Result{} + p.events.emitPoolerReadyTransition(p.cluster, p.cluster.Status.Conditions) + return p.health, nil +} + +type configMapModel struct { + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + runtime clusterRuntimeView + cluster *enterprisev4.PostgresCluster + secret string + + health componentHealth +} + +func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *configMapModel { + return &configMapModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, runtime: runtime, cluster: cluster, secret: secret} +} + +func (c *configMapModel) Name() string { return pgcConstants.ComponentConfigMap } + +func (c *configMapModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + return prerequisiteDecision{Allowed: true}, nil +} + +func (c *configMapModel) Actuate(ctx context.Context) error { + cnpgCluster := c.runtime.Cluster() + if cnpgCluster == nil { + return nil + } + desiredCM, err := generateConfigMap(ctx, c.client, c.scheme, c.cluster, cnpgCluster, c.secret) if err != nil { - logger.Error(err, "Failed to check RO pooler existence") - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to check pooler existence: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) + c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) + return err + } + cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} + _, err = controllerutil.CreateOrUpdate(ctx, c.client, cm, func() error { + cm.Data = desiredCM.Data + cm.Annotations = desiredCM.Annotations + cm.Labels = desiredCM.Labels + if !metav1.IsControlledBy(cm, c.cluster) { + if setErr := ctrl.SetControllerReference(c.cluster, cm, c.scheme); setErr != nil { + return fmt.Errorf("setting controller reference: %w", setErr) + } + } + return nil + }) + if err != nil { + c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) + return err } + if c.cluster.Status.Resources.ConfigMapRef == nil { + c.cluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} + } + return nil +} - switch { - case !poolerEnabled: - if err := deleteConnectionPoolers(ctx, c, postgresCluster); err != nil { - logger.Error(err, "Failed to delete connection poolers") - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to delete connection poolers: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - postgresCluster.Status.ConnectionPoolerStatus = nil - meta.RemoveStatusCondition(&postgresCluster.Status.Conditions, string(poolerReady)) - - case !rwPoolerExists || !roPoolerExists: - if mergedConfig.CNPG == nil || mergedConfig.CNPG.ConnectionPooler == nil { - logger.Info("Connection pooler enabled but no config found in class or cluster spec, skipping", - "class", postgresCluster.Spec.Class, "cluster", postgresCluster.Name) - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerConfigMissing, - fmt.Sprintf("Connection pooler is enabled but no config found in class %q or cluster %q", - postgresCluster.Spec.Class, postgresCluster.Name), failedClusterPhase) - return ctrl.Result{}, statusErr - } - if cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { - logger.Info("CNPG Cluster not healthy yet, pending pooler creation", "clusterPhase", cnpgCluster.Status.Phase) - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonCNPGClusterNotHealthy, - "Waiting for CNPG cluster to become healthy before creating poolers", pendingClusterPhase) - return ctrl.Result{RequeueAfter: retryDelay}, statusErr - } - if err := createOrUpdateConnectionPoolers(ctx, c, rc.Scheme, postgresCluster, mergedConfig, cnpgCluster, poolerMetricsEnabled); err != nil { - logger.Error(err, "Failed to reconcile connection pooler") - rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to reconcile connection pooler: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - rc.emitNormal(postgresCluster, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") - logger.Info("Connection pooler creation started, requeueing") - if statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, - "Connection poolers are being provisioned", provisioningClusterPhase); statusErr != nil { - return ctrl.Result{}, statusErr - } - return ctrl.Result{RequeueAfter: retryDelay}, nil - - case func() bool { - rwPooler := &cnpgv1.Pooler{} - rwErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), - Namespace: postgresCluster.Namespace, - }, rwPooler) - roPooler := &cnpgv1.Pooler{} - roErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), - Namespace: postgresCluster.Namespace, - }, roPooler) - return rwErr != nil || roErr != nil || !arePoolersReady(rwPooler, roPooler) - }(): - logger.Info("Connection Poolers are not ready yet, requeueing") - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerCreating, - "Connection poolers are being provisioned", pendingClusterPhase) - return ctrl.Result{RequeueAfter: retryDelay}, statusErr +func (c *configMapModel) Converge(ctx context.Context) (health componentHealth, err error) { + c.health = componentHealth{Condition: configMapsReady} + defer func() { + statusErr := writeComponentStatus(c.updateStatus, c.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr + } + } + health = c.health + }() + + if c.runtime == nil || !c.runtime.IsHealthy() { + c.health.State = pgcConstants.Provisioning + c.health.Reason = reasonCNPGProvisioning + c.health.Message = msgCNPGPendingCreation + c.health.Phase = provisioningClusterPhase + c.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return c.health, nil + } + + if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { + c.health.State = pgcConstants.Provisioning + c.health.Reason = reasonConfigMapFailed + c.health.Message = msgConfigMapRefNotPublished + c.health.Phase = provisioningClusterPhase + c.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return c.health, nil + } + + cm := &corev1.ConfigMap{} + key := types.NamespacedName{Name: c.cluster.Status.Resources.ConfigMapRef.Name, Namespace: c.cluster.Namespace} + if err := c.client.Get(ctx, key, cm); err != nil { + if apierrors.IsNotFound(err) { + c.health.State = pgcConstants.Provisioning + c.health.Reason = reasonConfigMapFailed + c.health.Message = msgConfigMapNotFoundYet + c.health.Phase = provisioningClusterPhase + c.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return c.health, nil + } + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf("Failed to fetch ConfigMap: %v", err) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + return c.health, err + } + + requiredKeys := []string{ + configKeyClusterRWEndpoint, + configKeyClusterROEndpoint, + configKeyClusterREndpoint, + configKeyDefaultClusterPort, + configKeySuperUserSecretRef, + } + for _, requiredKey := range requiredKeys { + if _, ok := cm.Data[requiredKey]; !ok { + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf(msgFmtConfigMapMissingRequiredKey, requiredKey) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + return c.health, fmt.Errorf("configmap missing key %s", requiredKey) + } + } - default: - oldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) - copy(oldConditions, postgresCluster.Status.Conditions) - if err := syncPoolerStatus(ctx, c, rc.Metrics, postgresCluster); err != nil { - logger.Error(err, "Failed to sync pooler status") - rc.emitWarning(postgresCluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to sync pooler status: %v", err)) - statusErr := updateStatus(poolerReady, metav1.ConditionFalse, reasonPoolerReconciliationFailed, - fmt.Sprintf("Failed to sync pooler status: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - rc.emitPoolerReadyTransition(postgresCluster, oldConditions) - } - - // Reconcile ConfigMap when CNPG cluster is healthy. - if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { - logger.Info("CNPG Cluster healthy, reconciling ConfigMap") - desiredCM, err := generateConfigMap(ctx, c, rc.Scheme, postgresCluster, cnpgCluster, postgresSecretName) - if err != nil { - logger.Error(err, "Failed to generate ConfigMap") - rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, - fmt.Sprintf("Failed to generate ConfigMap: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} - createOrUpdateResult, err := controllerutil.CreateOrUpdate(ctx, c, cm, func() error { - cm.Data = desiredCM.Data - cm.Annotations = desiredCM.Annotations - cm.Labels = desiredCM.Labels - if !metav1.IsControlledBy(cm, postgresCluster) { - if err := ctrl.SetControllerReference(postgresCluster, cm, rc.Scheme); err != nil { - return fmt.Errorf("setting controller reference: %w", err) - } + c.health.State = pgcConstants.Ready + c.health.Reason = reasonConfigMapReady + c.health.Message = msgAccessConfigMapReady + c.health.Phase = readyClusterPhase + c.health.Result = ctrl.Result{} + if !meta.IsStatusConditionTrue(c.cluster.Status.Conditions, string(configMapsReady)) { + c.events.emitNormal(c.cluster, EventConfigMapReady, c.health.Message) + } + return c.health, nil +} + +type secretModel struct { + client client.Client + scheme *runtime.Scheme + events eventEmitter + updateStatus healthStatusUpdater + cluster *enterprisev4.PostgresCluster + name string + + health componentHealth +} + +func newSecretModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, name string) *secretModel { + return &secretModel{client: c, scheme: scheme, events: events, updateStatus: updateStatus, cluster: cluster, name: name} +} + +func (s *secretModel) Name() string { return pgcConstants.ComponentSecret } + +func (s *secretModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { + return prerequisiteDecision{Allowed: true}, nil +} + +func (s *secretModel) Actuate(ctx context.Context) error { + secret := &corev1.Secret{} + secretExists, secretErr := clusterSecretExists(ctx, s.client, s.cluster.Namespace, s.name, secret) + if secretErr != nil { + s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) + return secretErr + } + if !secretExists { + if err := ensureClusterSecret(ctx, s.client, s.scheme, s.cluster, s.name, secret); err != nil { + s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) + return err + } + } + hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), s.cluster, s.scheme) + if ownerRefErr != nil { + return fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + } + if secretExists && !hasOwnerRef { + originalSecret := secret.DeepCopy() + if err := ctrl.SetControllerReference(s.cluster, secret, s.scheme); err != nil { + return fmt.Errorf("failed to set controller reference on existing secret: %w", err) + } + if err := patchObject(ctx, s.client, originalSecret, secret, "Secret"); err != nil { + s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) + return err + } + s.events.emitNormal(s.cluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", s.name)) + } + if s.cluster.Status.Resources.SuperUserSecretRef == nil { + s.cluster.Status.Resources.SuperUserSecretRef = &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: s.name}, + Key: secretKeyPassword, + } + } + return nil +} + +func (s *secretModel) Converge(ctx context.Context) (health componentHealth, err error) { + s.health = componentHealth{Condition: secretsReady} + defer func() { + statusErr := writeComponentStatus(s.updateStatus, s.health) + if statusErr != nil { + if err != nil { + err = errors.Join(err, statusErr) + } else { + err = statusErr } - return nil - }) - if err != nil { - logger.Error(err, "Failed to reconcile ConfigMap", "name", desiredCM.Name) - rc.emitWarning(postgresCluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - statusErr := updateStatus(clusterReady, metav1.ConditionFalse, reasonConfigMapFailed, - fmt.Sprintf("Failed to reconcile ConfigMap: %v", err), failedClusterPhase) - return ctrl.Result{}, errors.Join(err, statusErr) - } - switch createOrUpdateResult { - case controllerutil.OperationResultCreated: - rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s created", desiredCM.Name)) - logger.Info("ConfigMap created", "name", desiredCM.Name) - case controllerutil.OperationResultUpdated: - rc.emitNormal(postgresCluster, EventConfigMapReady, fmt.Sprintf("ConfigMap %s updated", desiredCM.Name)) - logger.Info("ConfigMap updated", "name", desiredCM.Name) - default: - logger.Info("ConfigMap unchanged", "name", desiredCM.Name) - } - if postgresCluster.Status.Resources.ConfigMapRef == nil { - postgresCluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} - } - } - - // Final status sync. - var oldPhase string - if postgresCluster.Status.Phase != nil { - oldPhase = *postgresCluster.Status.Phase - } - if err := syncStatus(ctx, c, rc.Metrics, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to sync status") - return ctrl.Result{}, err + } + health = s.health + }() + + if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { + s.health.State = pgcConstants.Provisioning + s.health.Reason = reasonUserSecretPending + s.health.Message = msgSecretRefNotPublished + s.health.Phase = provisioningClusterPhase + s.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return s.health, nil } - var newPhase string - if postgresCluster.Status.Phase != nil { - newPhase = *postgresCluster.Status.Phase - } - rc.emitClusterPhaseTransition(postgresCluster, oldPhase, newPhase) - if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { - rwPooler := &cnpgv1.Pooler{} - rwErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), - Namespace: postgresCluster.Namespace, - }, rwPooler) - roPooler := &cnpgv1.Pooler{} - roErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), - Namespace: postgresCluster.Namespace, - }, roPooler) - if rwErr == nil && roErr == nil && arePoolersReady(rwPooler, roPooler) { - logger.Info("Poolers ready, syncing status") - poolerOldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) - copy(poolerOldConditions, postgresCluster.Status.Conditions) - _ = syncPoolerStatus(ctx, c, rc.Metrics, postgresCluster) - rc.emitPoolerReadyTransition(postgresCluster, poolerOldConditions) + + secret := &corev1.Secret{} + key := types.NamespacedName{Name: s.cluster.Status.Resources.SuperUserSecretRef.Name, Namespace: s.cluster.Namespace} + if err := s.client.Get(ctx, key, secret); err != nil { + if apierrors.IsNotFound(err) { + s.health.State = pgcConstants.Provisioning + s.health.Reason = reasonUserSecretPending + s.health.Message = msgSecretNotFoundYet + s.health.Phase = provisioningClusterPhase + s.health.Result = ctrl.Result{RequeueAfter: retryDelay} + return s.health, nil } + s.health.State = pgcConstants.Failed + s.health.Reason = reasonUserSecretFailed + s.health.Message = fmt.Sprintf("Failed to fetch superuser secret: %v", err) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + return s.health, err + } + + refKey := s.cluster.Status.Resources.SuperUserSecretRef.Key + if refKey == "" { + refKey = secretKeyPassword + } + if _, ok := secret.Data[refKey]; !ok { + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf(msgFmtSecretMissingKey, refKey) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + return s.health, fmt.Errorf("secret missing key %s", refKey) + } + + s.health.State = pgcConstants.Ready + s.health.Reason = reasonSuperUserSecretReady + s.health.Message = msgSuperuserSecretReady + s.health.Phase = readyClusterPhase + s.health.Result = ctrl.Result{} + if !meta.IsStatusConditionTrue(s.cluster.Status.Conditions, string(secretsReady)) { + s.events.emitNormal(s.cluster, EventSecretReady, s.health.Message) + } + return s.health, nil +} + +func isIntermediateState(state pgcConstants.State) bool { + switch state { + case pgcConstants.Pending, + pgcConstants.Provisioning, + pgcConstants.Configuring: + return true + default: + return false } - logger.Info("Reconciliation complete") - return ctrl.Result{}, nil } // getMergedConfig overlays PostgresCluster spec on top of the class defaults. @@ -626,14 +1484,6 @@ func isPoolerReady(pooler *cnpgv1.Pooler) bool { return pooler.Status.Instances >= desired } -func poolerInstanceCount(p *cnpgv1.Pooler) (desired, scheduled int32) { - desired = 1 - if p.Spec.Instances != nil { - desired = *p.Spec.Instances - } - return desired, p.Status.Instances -} - // createOrUpdateConnectionPoolers creates RW and RO poolers if they don't exist. func createOrUpdateConnectionPoolers(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cfg *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerMetricsEnabled bool) error { if err := createConnectionPooler(ctx, c, scheme, cluster, cfg, cnpgCluster, readWriteEndpoint, poolerMetricsEnabled); err != nil { @@ -727,90 +1577,6 @@ func deleteConnectionPoolers(ctx context.Context, c client.Client, cluster *ente return nil } -// syncPoolerStatus populates ConnectionPoolerStatus and the PoolerReady condition. -func syncPoolerStatus(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster) error { - rwPooler := &cnpgv1.Pooler{} - if err := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(cluster.Name, readWriteEndpoint), - Namespace: cluster.Namespace, - }, rwPooler); err != nil { - return err - } - - roPooler := &cnpgv1.Pooler{} - if err := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(cluster.Name, readOnlyEndpoint), - Namespace: cluster.Namespace, - }, roPooler); err != nil { - return err - } - - cluster.Status.ConnectionPoolerStatus = &enterprisev4.ConnectionPoolerStatus{Enabled: true} - rwDesired, rwScheduled := poolerInstanceCount(rwPooler) - roDesired, roScheduled := poolerInstanceCount(roPooler) - - return setStatus(ctx, c, metrics, cluster, poolerReady, metav1.ConditionTrue, reasonAllInstancesReady, - fmt.Sprintf("%s: %d/%d, %s: %d/%d", readWriteEndpoint, rwScheduled, rwDesired, readOnlyEndpoint, roScheduled, roDesired), - readyClusterPhase) -} - -// syncStatus maps CNPG Cluster state to PostgresCluster status. -func syncStatus(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) error { - cluster.Status.ProvisionerRef = &corev1.ObjectReference{ - APIVersion: "postgresql.cnpg.io/v1", - Kind: "Cluster", - Namespace: cnpgCluster.Namespace, - Name: cnpgCluster.Name, - UID: cnpgCluster.UID, - } - - var phase reconcileClusterPhases - var condStatus metav1.ConditionStatus - var reason conditionReasons - var message string - - switch cnpgCluster.Status.Phase { - case cnpgv1.PhaseHealthy: - phase, condStatus, reason, message = readyClusterPhase, metav1.ConditionTrue, reasonCNPGClusterHealthy, "Cluster is up and running" - case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster provisioning: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseSwitchover: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGSwitchover, "Cluster changing primary node" - case cnpgv1.PhaseFailOver: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGFailingOver, "Pod missing, need to change primary" - case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGRestarting - message = fmt.Sprintf("CNPG cluster restarting: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGUpgrading - message = fmt.Sprintf("CNPG cluster upgrading: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseApplyingConfiguration: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGApplyingConfig, "Configuration change is being applied" - case cnpgv1.PhaseReplicaClusterPromotion: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGPromoting, "Replica is being promoted to primary" - case cnpgv1.PhaseWaitingForUser: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGWaitingForUser, "Action from the user is required" - case cnpgv1.PhaseUnrecoverable: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGUnrecoverable, "Cluster failed, needs manual intervention" - case cnpgv1.PhaseCannotCreateClusterObjects: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioningFailed, "Cluster resources cannot be created" - case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGPluginError - message = fmt.Sprintf("CNPG plugin error: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGImageError - message = fmt.Sprintf("CNPG image error: %s", cnpgCluster.Status.Phase) - case "": - phase, condStatus, reason, message = pendingClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning, "CNPG cluster is pending creation" - default: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster phase: %s", cnpgCluster.Status.Phase) - } - - return setStatus(ctx, c, metrics, cluster, clusterReady, condStatus, reason, message, phase) -} - // setStatus sets the phase, condition and persists the status. // It skips the API write when the resulting status is identical to the current // state, avoiding unnecessary etcd churn and ResourceVersion bumps on stable clusters. @@ -831,7 +1597,9 @@ func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, clu return nil } - metrics.IncStatusTransition(ports.ControllerCluster, string(condType), string(status), string(reason)) + if metrics != nil { + metrics.IncStatusTransition(ports.ControllerCluster, string(condType), string(status), string(reason)) + } if err := c.Status().Update(ctx, cluster); err != nil { return fmt.Errorf("failed to update PostgresCluster status: %w", err) @@ -839,6 +1607,27 @@ func setStatus(ctx context.Context, c client.Client, metrics ports.Recorder, clu return nil } +func setStatusFromHealth(ctx context.Context, c client.Client, metrics ports.Recorder, cluster *enterprisev4.PostgresCluster, health componentHealth) error { + conditionStatus := metav1.ConditionFalse + if health.State == pgcConstants.Ready { + conditionStatus = metav1.ConditionTrue + } + return setStatus(ctx, c, metrics, cluster, health.Condition, conditionStatus, health.Reason, health.Message, health.Phase) +} + +func setPhaseStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, phase reconcileClusterPhases) error { + before := cluster.Status.DeepCopy() + p := string(phase) + cluster.Status.Phase = &p + if equality.Semantic.DeepEqual(*before, cluster.Status) { + return nil + } + if err := c.Status().Update(ctx, cluster); err != nil { + return fmt.Errorf("failed to update PostgresCluster status phase: %w", err) + } + return nil +} + // generateConfigMap builds a ConfigMap with connection details for the PostgresCluster. func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Scheme, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster, secretName string) (*corev1.ConfigMap, error) { cmName := fmt.Sprintf("%s%s", cluster.Name, defaultConfigMapSuffix) @@ -847,12 +1636,12 @@ func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Sch } data := map[string]string{ - "CLUSTER_RW_ENDPOINT": fmt.Sprintf("%s-rw.%s", cnpgCluster.Name, cnpgCluster.Namespace), - "CLUSTER_RO_ENDPOINT": fmt.Sprintf("%s-ro.%s", cnpgCluster.Name, cnpgCluster.Namespace), - "CLUSTER_R_ENDPOINT": fmt.Sprintf("%s-r.%s", cnpgCluster.Name, cnpgCluster.Namespace), - "DEFAULT_CLUSTER_PORT": defaultPort, - "SUPER_USER_NAME": superUsername, - "SUPER_USER_SECRET_REF": secretName, + configKeyClusterRWEndpoint: fmt.Sprintf("%s-rw.%s", cnpgCluster.Name, cnpgCluster.Namespace), + configKeyClusterROEndpoint: fmt.Sprintf("%s-ro.%s", cnpgCluster.Name, cnpgCluster.Namespace), + configKeyClusterREndpoint: fmt.Sprintf("%s-r.%s", cnpgCluster.Name, cnpgCluster.Namespace), + configKeyDefaultClusterPort: defaultPort, + configKeySuperUserName: superUsername, + configKeySuperUserSecretRef: secretName, } rwExists, err := poolerExists(ctx, c, cluster, readWriteEndpoint) if err != nil { @@ -863,8 +1652,8 @@ func generateConfigMap(ctx context.Context, c client.Client, scheme *runtime.Sch return nil, fmt.Errorf("failed to check RO pooler existence: %w", err) } if rwExists && roExists { - data["CLUSTER_POOLER_RW_ENDPOINT"] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readWriteEndpoint), cnpgCluster.Namespace) - data["CLUSTER_POOLER_RO_ENDPOINT"] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readOnlyEndpoint), cnpgCluster.Namespace) + data[configKeyPoolerRWEndpoint] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readWriteEndpoint), cnpgCluster.Namespace) + data[configKeyPoolerROEndpoint] = fmt.Sprintf("%s.%s", poolerResourceName(cnpgCluster.Name, readOnlyEndpoint), cnpgCluster.Namespace) } cm := &corev1.ConfigMap{ @@ -1056,15 +1845,12 @@ func removeOwnerRef(scheme *runtime.Scheme, owner, obj client.Object) (bool, err // patchObject patches obj from original; treats NotFound as a no-op. func patchObject(ctx context.Context, c client.Client, original, obj client.Object, kind objectKind) error { - logger := log.FromContext(ctx) if err := c.Patch(ctx, obj, client.MergeFrom(original)); err != nil { if apierrors.IsNotFound(err) { - logger.Info("Object not found, skipping patch", "kind", kind, "name", obj.GetName()) return nil } return fmt.Errorf("patching %s: %w", kind, err) } - logger.Info("Object patched", "kind", kind, "name", obj.GetName()) return nil } diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index 72b30205b..85eeba7e2 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -6,18 +6,67 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/utils/ptr" client "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +type configMapNotFoundClient struct { + client.Client +} + +type noopEventEmitter struct{} + +func (noopEventEmitter) emitNormal(_ client.Object, _, _ string) {} +func (noopEventEmitter) emitWarning(_ client.Object, _, _ string) {} +func (noopEventEmitter) emitPoolerReadyTransition(_ client.Object, _ []metav1.Condition) {} +func (noopEventEmitter) emitPoolerCreationTransition(_ client.Object, _ []metav1.Condition) { +} + +type captureEventEmitter struct { + normals []string + warnings []string +} + +func (c *captureEventEmitter) emitNormal(_ client.Object, reason, message string) { + c.normals = append(c.normals, reason+":"+message) +} + +func (c *captureEventEmitter) emitWarning(_ client.Object, reason, message string) { + c.warnings = append(c.warnings, reason+":"+message) +} + +func (c *captureEventEmitter) emitPoolerReadyTransition(_ client.Object, conditions []metav1.Condition) { + if !meta.IsStatusConditionTrue(conditions, string(poolerReady)) { + c.normals = append(c.normals, EventPoolerReady+":Connection poolers are ready") + } +} + +func (c *captureEventEmitter) emitPoolerCreationTransition(_ client.Object, conditions []metav1.Condition) { + cond := meta.FindStatusCondition(conditions, string(poolerReady)) + if cond != nil && cond.Status == metav1.ConditionFalse && cond.Reason == string(reasonPoolerCreating) { + return + } + c.normals = append(c.normals, EventPoolerCreationStarted+":Connection poolers created, waiting for readiness") +} + +func (c configMapNotFoundClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if _, ok := obj.(*corev1.ConfigMap); ok { + return apierrors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, key.Name) + } + return c.Client.Get(ctx, key, obj, opts...) +} + func TestPoolerResourceName(t *testing.T) { tests := []struct { name string @@ -94,6 +143,46 @@ func TestIsPoolerReady(t *testing.T) { } } +func TestPoolerInstanceCountManual(t *testing.T) { + tests := []struct { + name string + pooler *cnpgv1.Pooler + expectedDesired int32 + expectedScheduled int32 + }{ + { + name: "nil instances defaults desired to 1", + pooler: &cnpgv1.Pooler{ + Status: cnpgv1.PoolerStatus{Instances: 3}, + }, + expectedDesired: 1, + expectedScheduled: 3, + }, + { + name: "explicit instances uses spec value", + pooler: &cnpgv1.Pooler{ + Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(5))}, + Status: cnpgv1.PoolerStatus{Instances: 2}, + }, + expectedDesired: 5, + expectedScheduled: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + desired := int32(1) + if tt.pooler.Spec.Instances != nil { + desired = *tt.pooler.Spec.Instances + } + scheduled := tt.pooler.Status.Instances + + assert.Equal(t, tt.expectedDesired, desired) + assert.Equal(t, tt.expectedScheduled, scheduled) + }) + } +} + func TestNormalizeCNPGClusterSpec(t *testing.T) { tests := []struct { name string @@ -1044,42 +1133,6 @@ func TestGenerateConfigMap(t *testing.T) { }) } -func TestPoolerInstanceCount(t *testing.T) { - tests := []struct { - name string - pooler *cnpgv1.Pooler - expectedDesired int32 - expectedScheduled int32 - }{ - { - name: "nil instances defaults desired to 1", - pooler: &cnpgv1.Pooler{ - Status: cnpgv1.PoolerStatus{Instances: 3}, - }, - expectedDesired: 1, - expectedScheduled: 3, - }, - { - name: "explicit instances returns spec value", - pooler: &cnpgv1.Pooler{ - Spec: cnpgv1.PoolerSpec{Instances: ptr.To(int32(5))}, - Status: cnpgv1.PoolerStatus{Instances: 2}, - }, - expectedDesired: 5, - expectedScheduled: 2, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - desired, scheduled := poolerInstanceCount(tt.pooler) - - assert.Equal(t, tt.expectedDesired, desired) - assert.Equal(t, tt.expectedScheduled, scheduled) - }) - } -} - func TestGeneratePassword(t *testing.T) { pw, err := generatePassword() @@ -1206,3 +1259,774 @@ func TestCreateOrUpdateConnectionPoolers(t *testing.T) { assert.Equal(t, poolerMetricsPortString, ro.Spec.Template.ObjectMeta.Annotations[prometheusPortAnnotation]) }) } + +func TestComponentStateTriggerConditions(t *testing.T) { + t.Parallel() + + ctx := t.Context() + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + + exampleClusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + Status: enterprisev4.PostgresClusterClassStatus{ + Phase: ptr.To(string(enterprisev4.PhaseReady)), + }, + } + + exampleCm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-config", + Namespace: "default", + }, + Data: map[string]string{ + "CLUSTER_RW_ENDPOINT": "pg1-rw.default", + "CLUSTER_RO_ENDPOINT": "pg1-ro.default", + "DEFAULT_CLUSTER_PORT": "5432", + "SUPER_USER_SECRET_REF": "pg1-secret", + }, + } + examplePgCluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: "default", + }, + Status: enterprisev4.PostgresClusterStatus{ + Resources: &enterprisev4.PostgresClusterResources{ + ConfigMapRef: &corev1.LocalObjectReference{Name: "pg1-config"}, + SuperUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "pg1-secret"}, + Key: "password", + }, + }, + }, + } + exampleSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-secret", + Namespace: "default", + }, + Data: map[string][]byte{ + "password": []byte("s3cr3t"), + }, + } + + instances := int32(1) + version := "16" + storageSize := resource.MustParse("10Gi") + mergedConfig := &MergedConfig{ + Spec: &enterprisev4.PostgresClusterSpec{ + Instances: &instances, + PostgresVersion: &version, + Storage: &storageSize, + Resources: &corev1.ResourceRequirements{}, + PostgreSQLConfig: map[string]string{}, + PgHBA: []string{}, + }, + } + + makeReadyProvisioner := func(cluster *enterprisev4.PostgresCluster) *clusterModel { + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: cluster.Name, + Namespace: cluster.Namespace, + }, + Spec: buildCNPGClusterSpec(mergedConfig, "pg1-secret", false), + Status: cnpgv1.ClusterStatus{ + Phase: cnpgv1.PhaseHealthy, + }, + } + c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cnpg).Build() + return newClusterModel(c, scheme, noopEventEmitter{}, nil, cluster, exampleClusterClass, mergedConfig, "pg1-secret") + } + + makeRuntimeView := func(healthy bool) clusterRuntimeView { + if !healthy { + return clusterRuntimeViewAdapter{model: &clusterModel{}} + } + return clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, + }, + }} + } + + // TODO: as soon as coupling is addressed, remove this monster of a test. + combinations := []struct { + name string + components []component + conditions []conditionTypes + requeue []bool + expectAll bool + message string + }{ + { + name: "Provisioner ready, pooler blocked by prerequisites", + components: func() []component { + cluster := examplePgCluster.DeepCopy() + provisioner := makeReadyProvisioner(cluster) + pooler := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + exampleClusterClass, + mergedConfig, + nil, + true, + true, + ) + return []component{provisioner, pooler} + }(), + conditions: []conditionTypes{clusterReady, poolerReady}, + requeue: []bool{false, true}, + expectAll: false, + message: "Provisioner ready but pooler gate is blocked until CNPG is healthy", + }, + { + name: "Provisioner ready, pooler ready, configMap pending from NotFound", + components: func() []component { + cluster := examplePgCluster.DeepCopy() + provisioner := makeReadyProvisioner(cluster) + pooler := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + exampleClusterClass, + mergedConfig, + nil, + false, + false, + ) + configMap := newConfigMapModel( + configMapNotFoundClient{ + Client: fake.NewClientBuilder(). + WithScheme(scheme). + Build(), + }, + scheme, + noopEventEmitter{}, + nil, + makeRuntimeView(true), + cluster, + "pg1-secret", + ) + return []component{provisioner, pooler, configMap} + }(), + conditions: []conditionTypes{clusterReady, poolerReady, configMapsReady}, + requeue: []bool{false, false, true}, + expectAll: false, + message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", + }, + { + name: "Flow successful, all components ready", + components: func() []component { + cluster := examplePgCluster.DeepCopy() + provisioner := makeReadyProvisioner(cluster) + pooler := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + exampleClusterClass, + mergedConfig, + nil, + false, + false, + ) + configMap := newConfigMapModel( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleCm). + Build(), + scheme, + noopEventEmitter{}, + nil, + makeRuntimeView(true), + cluster, + "pg1-secret", + ) + secret := newSecretModel( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleSecret). + Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + "pg1-secret", + ) + return []component{provisioner, pooler, configMap, secret} + }(), + conditions: []conditionTypes{clusterReady, poolerReady, configMapsReady, secretsReady}, + requeue: []bool{false, false, false, false}, + expectAll: true, + message: "", + }, + } + + for _, tt := range combinations { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + state := pgcConstants.Empty + for i, check := range tt.components { + gate, gateErr := check.EvaluatePrerequisites(ctx) + require.NoError(t, gateErr) + if !gate.Allowed { + info := gate.Health + state = info.State + assert.Equal(t, tt.conditions[i], info.Condition) + assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) + continue + } + + require.NoError(t, check.Actuate(ctx)) + info, err := check.Converge(ctx) + require.NoError(t, err) + state = info.State + assert.Equal(t, tt.conditions[i], info.Condition) + assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) + } + assert.Equal(t, tt.expectAll, state&pgcConstants.Ready == pgcConstants.Ready, + tt.message) + }) + } +} + +func TestSyncManagedRolesStatusFromCNPG(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + specRoles []enterprisev4.ManagedRole + cnpgStatus cnpgv1.ManagedRoles + reconciled []string + pending []string + failed map[string]string + }{ + { + name: "marks unreconciled desired role as pending", + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + cnpgStatus: cnpgv1.ManagedRoles{}, + reconciled: nil, + pending: []string{"app_user"}, + failed: nil, + }, + { + name: "maps reconciled and pending roles from CNPG status", + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + {Name: "app_rw", Exists: true}, + }, + cnpgStatus: cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user"}, + cnpgv1.RoleStatusPendingReconciliation: {"app_rw"}, + }, + }, + reconciled: []string{"app_user"}, + pending: []string{"app_rw"}, + failed: nil, + }, + { + name: "maps cannot reconcile errors as failed", + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + cnpgStatus: cnpgv1.ManagedRoles{ + CannotReconcile: map[string][]string{ + "app_user": {"reserved role"}, + }, + }, + reconciled: nil, + pending: nil, + failed: map[string]string{ + "app_user": "reserved role", + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: tt.specRoles, + }, + } + cnpgCluster := &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + ManagedRolesStatus: tt.cnpgStatus, + }, + } + + syncManagedRolesStatusFromCNPG(cluster, cnpgCluster) + + require.NotNil(t, cluster.Status.ManagedRolesStatus) + assert.Equal(t, tt.reconciled, cluster.Status.ManagedRolesStatus.Reconciled) + assert.Equal(t, tt.pending, cluster.Status.ManagedRolesStatus.Pending) + assert.Equal(t, tt.failed, cluster.Status.ManagedRolesStatus.Failed) + }) + } +} + +func TestManagedRolesModelConverge(t *testing.T) { + t.Parallel() + + makeRuntimeView := func(phase string, managedRoles cnpgv1.ManagedRoles) clusterRuntimeView { + return clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + Phase: phase, + ManagedRolesStatus: managedRoles, + }, + }, + }} + } + + tests := []struct { + name string + runtimeView clusterRuntimeView + specRoles []enterprisev4.ManagedRole + expectedState pgcConstants.State + expectedReason conditionReasons + expectErr bool + expectStatusPublished bool + expectPending []string + expectFailed map[string]string + }{ + { + name: "returns pending when runtime is not healthy", + runtimeView: makeRuntimeView(cnpgv1.PhaseFirstPrimary, cnpgv1.ManagedRoles{}), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + expectedState: pgcConstants.Pending, + expectedReason: reasonManagedRolesPending, + expectErr: false, + expectStatusPublished: false, + }, + { + name: "returns pending when role is still pending reconciliation", + runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusPendingReconciliation: {"app_user"}, + }, + }), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + expectedState: pgcConstants.Pending, + expectedReason: reasonManagedRolesPending, + expectErr: false, + expectStatusPublished: true, + expectPending: []string{"app_user"}, + }, + { + name: "returns failed when role cannot reconcile", + runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ + CannotReconcile: map[string][]string{ + "app_user": {"reserved role"}, + }, + }), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + expectedState: pgcConstants.Failed, + expectedReason: reasonManagedRolesFailed, + expectErr: true, + expectStatusPublished: true, + expectFailed: map[string]string{ + "app_user": "reserved role", + }, + }, + { + name: "returns ready when all desired roles are reconciled", + runtimeView: makeRuntimeView(cnpgv1.PhaseHealthy, cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, + }, + }), + specRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + {Name: "app_user_rw", Exists: true}, + }, + expectedState: pgcConstants.Ready, + expectedReason: reasonManagedRolesReady, + expectErr: false, + expectStatusPublished: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: tt.specRoles, + }, + } + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + noopEventEmitter{}, + nil, + tt.runtimeView, + cluster, + "pg1-secret", + ) + + health, err := model.Converge(context.Background()) + if tt.expectErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + + assert.Equal(t, managedRolesReady, health.Condition) + assert.Equal(t, tt.expectedState, health.State) + assert.Equal(t, tt.expectedReason, health.Reason) + if tt.expectStatusPublished { + require.NotNil(t, cluster.Status.ManagedRolesStatus) + assert.Equal(t, tt.expectPending, cluster.Status.ManagedRolesStatus.Pending) + assert.Equal(t, tt.expectFailed, cluster.Status.ManagedRolesStatus.Failed) + } else { + assert.Nil(t, cluster.Status.ManagedRolesStatus) + } + }) + } +} + +func TestManagedRolesRuntimeGateHealthMatchesConverge(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + }, + } + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + noopEventEmitter{}, + nil, + clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseFirstPrimary}}, + }}, + cluster, + "pg1-secret", + ) + + gate, err := model.EvaluatePrerequisites(context.Background()) + require.NoError(t, err) + require.False(t, gate.Allowed) + + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Equal(t, gate.Health, health) +} + +func TestPoolerModelConvergeSetsConnectionPoolerStatus(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + + t.Run("does not set enabled true while pooler is pending", func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + clusterClass, + &MergedConfig{}, + nil, + true, + true, + ) + + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Nil(t, cluster.Status.ConnectionPoolerStatus) + assert.Equal(t, pgcConstants.Pending, health.State) + }) + + t.Run("sets enabled true when pooler converges ready", func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + rwPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readWriteEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + roPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readOnlyEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).WithObjects(rwPooler, roPooler).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + clusterClass, + &MergedConfig{}, + &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}, + true, + true, + ) + + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Equal(t, &enterprisev4.ConnectionPoolerStatus{Enabled: true}, cluster.Status.ConnectionPoolerStatus) + assert.Equal(t, pgcConstants.Ready, health.State) + }) + + t.Run("sets status nil when pooler disabled", func(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: enterprisev4.PostgresClusterStatus{ + ConnectionPoolerStatus: &enterprisev4.ConnectionPoolerStatus{Enabled: true}, + }, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).Build(), + scheme, + noopEventEmitter{}, + nil, + cluster, + clusterClass, + &MergedConfig{}, + nil, + false, + false, + ) + + require.NoError(t, model.Actuate(context.Background())) + health, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Nil(t, cluster.Status.ConnectionPoolerStatus) + assert.Equal(t, pgcConstants.Ready, health.State) + }) +} + +func TestPoolerConvergeEmitsReadyEventOnTransition(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-class", + Namespace: "default", + }, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + events := &captureEventEmitter{} + rwPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readWriteEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + roPooler := &cnpgv1.Pooler{ + ObjectMeta: metav1.ObjectMeta{ + Name: poolerResourceName(cluster.Name, readOnlyEndpoint), + Namespace: cluster.Namespace, + }, + Status: cnpgv1.PoolerStatus{Instances: 1}, + } + model := newPoolerModel( + fake.NewClientBuilder().WithScheme(scheme).WithObjects(rwPooler, roPooler).Build(), + scheme, + events, + nil, + cluster, + clusterClass, + &MergedConfig{}, + &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}, + true, + true, + ) + + _, err := model.Converge(context.Background()) + require.NoError(t, err) + require.NotEmpty(t, events.normals) + assert.Contains(t, events.normals[0], EventPoolerReady) + + // No re-emission when condition already True. + cluster.Status.Conditions = []metav1.Condition{{ + Type: string(poolerReady), + Status: metav1.ConditionTrue, + }} + events.normals = nil + _, err = model.Converge(context.Background()) + require.NoError(t, err) + assert.Empty(t, events.normals) +} + +func TestManagedRolesConvergeDoesNotEmitFailureForPending(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{{Name: "app_user", Exists: true}}, + }, + } + events := &captureEventEmitter{} + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + events, + nil, + clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + Phase: cnpgv1.PhaseHealthy, + ManagedRolesStatus: cnpgv1.ManagedRoles{}, + }, + }, + }}, + cluster, + "pg1-secret", + ) + + _, err := model.Converge(context.Background()) + require.NoError(t, err) + assert.Empty(t, events.warnings) +} + +func TestManagedRolesConvergeEmitsReadyEventOnTransition(t *testing.T) { + t.Parallel() + + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{ + {Name: "app_user", Exists: true}, + }, + }, + } + events := &captureEventEmitter{} + model := newManagedRolesModel( + fake.NewClientBuilder().Build(), + nil, + events, + nil, + clusterRuntimeViewAdapter{model: &clusterModel{ + cnpgCluster: &cnpgv1.Cluster{ + Status: cnpgv1.ClusterStatus{ + Phase: cnpgv1.PhaseHealthy, + ManagedRolesStatus: cnpgv1.ManagedRoles{ + ByStatus: map[cnpgv1.RoleStatus][]string{ + cnpgv1.RoleStatusReconciled: {"app_user"}, + }, + }, + }, + }, + }}, + cluster, + "pg1-secret", + ) + + _, err := model.Converge(context.Background()) + require.NoError(t, err) + require.NotEmpty(t, events.normals) + assert.Contains(t, events.normals[0], EventManagedRolesReady) + + // No re-emission when condition already True. + cluster.Status.Conditions = []metav1.Condition{{ + Type: string(managedRolesReady), + Status: metav1.ConditionTrue, + }} + events.normals = nil + _, err = model.Converge(context.Background()) + require.NoError(t, err) + assert.Empty(t, events.normals) +} diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index afcfd768e..551ce9147 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -25,6 +25,7 @@ const ( EventClusterCreateFailed = "ClusterCreateFailed" EventClusterUpdateFailed = "ClusterUpdateFailed" EventManagedRolesFailed = "ManagedRolesFailed" + EventManagedRolesReady = "ManagedRolesReady" EventPoolerReconcileFailed = "PoolerReconcileFailed" EventConfigMapReconcileFailed = "ConfigMapReconcileFailed" EventClusterDegraded = "ClusterDegraded" @@ -60,3 +61,13 @@ func (rc *ReconcileContext) emitPoolerReadyTransition(obj client.Object, conditi rc.emitNormal(obj, EventPoolerReady, "Connection poolers are ready") } } + +// emitPoolerCreationTransition emits PoolerCreationStarted only when the +// pooler condition is not already in the creating state. +func (rc *ReconcileContext) emitPoolerCreationTransition(obj client.Object, conditions []metav1.Condition) { + cond := meta.FindStatusCondition(conditions, string(poolerReady)) + if cond != nil && cond.Status == metav1.ConditionFalse && cond.Reason == string(reasonPoolerCreating) { + return + } + rc.emitNormal(obj, EventPoolerCreationStarted, "Connection poolers created, waiting for readiness") +} diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index 7684e6df0..b0f0ed0d7 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -45,6 +45,7 @@ type MergedConfig struct { type reconcileClusterPhases string type conditionTypes string type conditionReasons string +type statusMessage = string type objectKind string const ( @@ -53,9 +54,17 @@ const ( readOnlyEndpoint string = "ro" readWriteEndpoint string = "rw" - defaultDatabaseName string = "postgres" - superUsername string = "postgres" - defaultPort string = "5432" + defaultDatabaseName string = "postgres" + superUsername string = "postgres" + defaultPort string = "5432" + configKeyClusterRWEndpoint string = "CLUSTER_RW_ENDPOINT" + configKeyClusterROEndpoint string = "CLUSTER_RO_ENDPOINT" + configKeyClusterREndpoint string = "CLUSTER_R_ENDPOINT" + configKeyDefaultClusterPort string = "DEFAULT_CLUSTER_PORT" + configKeySuperUserName string = "SUPER_USER_NAME" + configKeySuperUserSecretRef string = "SUPER_USER_SECRET_REF" + configKeyPoolerRWEndpoint string = "CLUSTER_POOLER_RW_ENDPOINT" + configKeyPoolerROEndpoint string = "CLUSTER_POOLER_RO_ENDPOINT" secretKeyPassword string = "password" defaultSecretSuffix string = "-secret" @@ -77,21 +86,33 @@ const ( failedClusterPhase reconcileClusterPhases = "Failed" // condition types - clusterReady conditionTypes = "ClusterReady" - poolerReady conditionTypes = "PoolerReady" - - // condition reasons — clusterReady - reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" - reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" - reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" - reasonClusterBuildSucceeded conditionReasons = "ClusterBuildSucceeded" - reasonClusterGetFailed conditionReasons = "ClusterGetFailed" - reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" - reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" - reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" + clusterReady conditionTypes = "ClusterReady" + poolerReady conditionTypes = "PoolerReady" + managedRolesReady conditionTypes = "ManagedRolesReady" + secretsReady conditionTypes = "SecretsReady" + configMapsReady conditionTypes = "ConfigMapsReady" + + // condition reasons — cluster/provisioner + reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" + reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" + + // condition reasons — managedRolesReady + reasonManagedRolesReady conditionReasons = "ManagedRolesReconciled" + reasonManagedRolesPending conditionReasons = "ManagedRolesPending" + reasonManagedRolesFailed conditionReasons = "ManagedRolesReconciliationFailed" + + // condition reasons — configMapsReady + reasonConfigMapReady conditionReasons = "ConfigMapReconciled" + reasonConfigMapFailed conditionReasons = "ConfigMapReconciliationFailed" + + // condition reasons — secretsReady + reasonUserSecretPending conditionReasons = "UserSecretPending" reasonUserSecretFailed conditionReasons = "UserSecretReconciliationFailed" + reasonSuperUserSecretReady conditionReasons = "SuperUserSecretReady" reasonSuperUserSecretFailed conditionReasons = "SuperUserSecretFailed" - reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" + + // condition reasons — lifecycle/finalizer + reasonClusterDeleteFailed conditionReasons = "ClusterDeleteFailed" // condition reasons — poolerReady reasonPoolerReconciliationFailed conditionReasons = "PoolerReconciliationFailed" @@ -114,4 +135,38 @@ const ( reasonCNPGProvisioningFailed conditionReasons = "CNPGProvisioningFailed" reasonCNPGPluginError conditionReasons = "CNPGPluginError" reasonCNPGImageError conditionReasons = "CNPGImageError" + + // status messages — provisioner health check + msgProvisionerHealthy statusMessage = "Provisioner cluster is healthy" + msgCNPGPendingCreation statusMessage = "CNPG cluster is pending creation" + msgFmtCNPGProvisioning statusMessage = "CNPG cluster provisioning: %s" + msgCNPGSwitchover statusMessage = "Cluster changing primary node" + msgCNPGFailingOver statusMessage = "Pod missing, need to change primary" + msgFmtCNPGRestarting statusMessage = "CNPG cluster restarting: %s" + msgFmtCNPGUpgrading statusMessage = "CNPG cluster upgrading: %s" + msgCNPGApplyingConfiguration statusMessage = "Configuration change is being applied" + msgCNPGPromoting statusMessage = "Replica is being promoted to primary" + msgCNPGWaitingForUser statusMessage = "Action from the user is required" + msgCNPGUnrecoverable statusMessage = "Cluster failed, needs manual intervention" + msgCNPGCannotCreateObjects statusMessage = "Cluster resources cannot be created" + msgFmtCNPGPluginError statusMessage = "CNPG plugin error: %s" + msgFmtCNPGImageError statusMessage = "CNPG image error: %s" + msgFmtCNPGClusterPhase statusMessage = "CNPG cluster phase: %s" + + // status messages — aggregate and component readiness checks + msgPoolerDisabled statusMessage = "Connection pooler disabled" + msgPoolerConfigMissing statusMessage = "Connection pooler enabled but configuration is missing" + msgPoolersProvisioning statusMessage = "Connection poolers are being provisioned" + msgWaitRWPoolerObject statusMessage = "Waiting for RW pooler object" + msgWaitROPoolerObject statusMessage = "Waiting for RO pooler object" + msgPoolersNotReady statusMessage = "Connection poolers are not ready yet" + msgPoolersReady statusMessage = "Connection poolers are ready" + msgConfigMapRefNotPublished statusMessage = "ConfigMap reference not published yet" + msgConfigMapNotFoundYet statusMessage = "ConfigMap not found yet" + msgFmtConfigMapMissingRequiredKey statusMessage = "ConfigMap missing required key %q" + msgAccessConfigMapReady statusMessage = "Access ConfigMap is ready" + msgSecretRefNotPublished statusMessage = "Superuser secret reference not published yet" + msgSecretNotFoundYet statusMessage = "Superuser secret not found yet" + msgFmtSecretMissingKey statusMessage = "Superuser secret missing key %q" + msgSuperuserSecretReady statusMessage = "Superuser secret is ready" ) diff --git a/pkg/postgresql/cluster/core/types/constants/components.go b/pkg/postgresql/cluster/core/types/constants/components.go new file mode 100644 index 000000000..f6dcdfb7b --- /dev/null +++ b/pkg/postgresql/cluster/core/types/constants/components.go @@ -0,0 +1,9 @@ +package pgcConstants + +const ( + ComponentManagedRoles = "managedRoles" + ComponentProvisioner = "provisioner" + ComponentPooler = "pooler" + ComponentConfigMap = "configMap" + ComponentSecret = "secret" +) diff --git a/pkg/postgresql/cluster/core/types/constants/state.go b/pkg/postgresql/cluster/core/types/constants/state.go new file mode 100644 index 000000000..7f4da47e9 --- /dev/null +++ b/pkg/postgresql/cluster/core/types/constants/state.go @@ -0,0 +1,24 @@ +package pgcConstants + +type State uint64 + +const ( + Empty State = 0 + Ready State = 1 << iota + Pending + Provisioning + Configuring + Failed +) + +func (s State) Contains(state State) bool { + return s&state == state +} + +func (s State) Add(state State) State { + return s | state +} + +func (s State) Remove(state State) State { + return s &^ state +} From 95fcb2deaa7b09258eb3a7ce5dcc7ca8eec84e77 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Mon, 20 Apr 2026 15:50:56 +0200 Subject: [PATCH 34/36] review changes --- .../postgrescluster_controller_test.go | 152 ++++++- pkg/postgresql/cluster/core/cluster.go | 386 +++++++++++------- .../cluster/core/cluster_unit_test.go | 277 ++++++++++++- pkg/postgresql/cluster/core/events.go | 1 + pkg/postgresql/cluster/core/types.go | 3 + 5 files changed, 652 insertions(+), 167 deletions(-) diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index b7becdff5..9aeed63eb 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -54,18 +54,24 @@ import ( * PC-09 ignores no-op updates */ -func containsEvents(events *[]string, recorder *record.FakeRecorder, eventType string, event string) bool { +func CollectEvents(events *[]string, recorder *record.FakeRecorder) { for { select { case e := <-recorder.Events: *events = append(*events, e) - if strings.Contains(e, eventType) && strings.Contains(e, event) { - return true - } default: - return false + return + } + } +} + +func ContainsEvent(events []string, eventType string, event string) bool { + for _, e := range events { + if strings.Contains(e, eventType) && strings.Contains(e, event) { + return true } } + return false } var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { @@ -278,10 +284,31 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond.Status).To(Equal(metav1.ConditionFalse)) Expect(cond.Reason).To(Equal("CNPGClusterProvisioning")) - // Simulate external CNPG controller status progression. + secretCond := meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") + Expect(secretCond).NotTo(BeNil()) + Expect(secretCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(secretCond.Reason).To(Equal("SuperUserSecretReady")) + + configMapCond := meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") + // ConfigMap converge runs in the runtime phase; at this point reconcile may + // still be returning from provisioner pending and not have written it yet. + Expect(configMapCond).To(BeNil()) + + // Simulate CNPG becoming healthy first, but without managed roles status published yet. cnpg := &cnpgv1.Cluster{} Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) cnpg.Status.Phase = cnpgv1.PhaseHealthy + Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) + reconcileNTimes(1) + + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + managedRolesCond := meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") + Expect(managedRolesCond).NotTo(BeNil()) + Expect(managedRolesCond.Status).To(Equal(metav1.ConditionFalse)) + Expect(managedRolesCond.Reason).To(Equal("ManagedRolesPending")) + + // Simulate external CNPG controller publishing managed roles status. + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) cnpg.Status.ManagedRolesStatus = cnpgv1.ManagedRoles{ ByStatus: map[cnpgv1.RoleStatus][]string{ cnpgv1.RoleStatusReconciled: {"app_user", "app_user_rw"}, @@ -297,17 +324,17 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cond.Status).To(Equal(metav1.ConditionTrue)) Expect(cond.Reason).To(Equal("CNPGClusterHealthy")) - secretCond := meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") + secretCond = meta.FindStatusCondition(pc.Status.Conditions, "SecretsReady") Expect(secretCond).NotTo(BeNil()) Expect(secretCond.Status).To(Equal(metav1.ConditionTrue)) Expect(secretCond.Reason).To(Equal("SuperUserSecretReady")) - configMapCond := meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") + configMapCond = meta.FindStatusCondition(pc.Status.Conditions, "ConfigMapsReady") Expect(configMapCond).NotTo(BeNil()) Expect(configMapCond.Status).To(Equal(metav1.ConditionTrue)) Expect(configMapCond.Reason).To(Equal("ConfigMapReconciled")) - managedRolesCond := meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") + managedRolesCond = meta.FindStatusCondition(pc.Status.Conditions, "ManagedRolesReady") Expect(managedRolesCond).NotTo(BeNil()) Expect(managedRolesCond.Status).To(Equal(metav1.ConditionTrue)) Expect(managedRolesCond.Reason).To(Equal("ManagedRolesReconciled")) @@ -333,8 +360,13 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(pc.Status.Resources.ConfigMapRef).NotTo(BeNil()) received := make([]string, 0, 8) - Expect(containsEvents( - &received, fakeRecorder, + CollectEvents(&received, fakeRecorder) + Expect(ContainsEvent( + received, + v1.EventTypeNormal, core.EventConfigMapReconciled, + )).To(BeTrue(), "events seen: %v", received) + Expect(ContainsEvent( + received, v1.EventTypeNormal, core.EventClusterReady, )).To(BeTrue(), "events seen: %v", received) }) @@ -442,6 +474,13 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(apierrors.IsNotFound(k8sClient.Get(ctx, rwKey, &cnpgv1.Pooler{}))).To(BeTrue()) Expect(apierrors.IsNotFound(k8sClient.Get(ctx, roKey, &cnpgv1.Pooler{}))).To(BeTrue()) + pc := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, pc)).To(Succeed()) + poolerCond := meta.FindStatusCondition(pc.Status.Conditions, "PoolerReady") + // Pooler component is gated behind provisioner readiness, so before CNPG + // becomes healthy the condition may not be written yet. + Expect(poolerCond).To(BeNil()) + cnpg := &cnpgv1.Cluster{} Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) cnpg.Status.Phase = cnpgv1.PhaseHealthy @@ -464,6 +503,28 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { g.Expect(ro.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(scrapeAnnotationKey, "true")) g.Expect(ro.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(pathAnnotationKey, metricsPath)) g.Expect(ro.Spec.Template.ObjectMeta.Annotations).To(HaveKeyWithValue(portAnnotationKey, poolerPort)) + + // Simulate CNPG pooler controller publishing status progression. + if rw.Status.Instances < 2 { + rw.Status.Instances = 2 + g.Expect(k8sClient.Status().Update(ctx, rw)).To(Succeed()) + } + if ro.Status.Instances < 2 { + ro.Status.Instances = 2 + g.Expect(k8sClient.Status().Update(ctx, ro)).To(Succeed()) + } + }, "20s", "250ms").Should(Succeed()) + + Eventually(func(g Gomega) { + _, err := reconciler.Reconcile(ctx, req) + g.Expect(err).NotTo(HaveOccurred()) + + updated := &enterprisev4.PostgresCluster{} + g.Expect(k8sClient.Get(ctx, pgClusterKey, updated)).To(Succeed()) + poolerReadyCond := meta.FindStatusCondition(updated.Status.Conditions, "PoolerReady") + g.Expect(poolerReadyCond).NotTo(BeNil()) + g.Expect(poolerReadyCond.Status).To(Equal(metav1.ConditionTrue)) + g.Expect(poolerReadyCond.Reason).To(Equal("AllInstancesReady")) }, "20s", "250ms").Should(Succeed()) }) }) @@ -543,8 +604,9 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { }, "20s", "250ms").Should(BeTrue()) received := make([]string, 0, 8) - Expect(containsEvents( - &received, fakeRecorder, + CollectEvents(&received, fakeRecorder) + Expect(ContainsEvent( + received, v1.EventTypeWarning, core.EventClusterClassNotFound, )).To(BeTrue(), "events seen: %v", received) }) @@ -566,5 +628,69 @@ var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { Expect(cnpg.Spec.Instances).To(Equal(int(clusterMemberCount))) }) }) + + Context("when a configmap spec changes", func() { + BeforeEach(func() { + // Keep this test focused on ConfigMap behavior; otherwise reconcile can + // stop on ManagedRolesPending before ConfigMap status is written. + pgCluster.Spec.ManagedRoles = nil + }) + + It("emits ConfigMapReconciled event on configmap update", func() { + Expect(k8sClient.Create(ctx, pgCluster)).To(Succeed()) + reconcileNTimes(2) + + // Make sure runtime can proceed (if needed in your fixture) + cnpg := &cnpgv1.Cluster{} + Expect(k8sClient.Get(ctx, pgClusterKey, cnpg)).To(Succeed()) + cnpg.Status.Phase = cnpgv1.PhaseHealthy + Expect(k8sClient.Status().Update(ctx, cnpg)).To(Succeed()) + reconcileNTimes(1) + + // Drain baseline events so we don't match the initial "created" event. + received := make([]string, 0, 16) + CollectEvents(&received, fakeRecorder) + received = received[:0] + + // Drift the managed ConfigMap. + pc := &enterprisev4.PostgresCluster{} + Eventually(func() bool { + if err := k8sClient.Get(ctx, pgClusterKey, pc); err != nil { + return false + } + return pc.Status.Resources != nil && pc.Status.Resources.ConfigMapRef != nil + }, "5s", "100ms").Should(BeTrue()) + + cmKey := types.NamespacedName{ + Name: pc.Status.Resources.ConfigMapRef.Name, + Namespace: namespace, + } + cm := &v1.ConfigMap{} + Expect(k8sClient.Get(ctx, cmKey, cm)).To(Succeed()) + delete(cm.Data, "CLUSTER_RW_ENDPOINT") // force reconciliation update + Expect(k8sClient.Update(ctx, cm)).To(Succeed()) + + // Reconcile and assert updated event. + reconcileNTimes(1) + + Eventually(func() bool { + CollectEvents(&received, fakeRecorder) + + // reason match + if !ContainsEvent(received, v1.EventTypeNormal, core.EventConfigMapReconciled) { + return false + } + // message-level match for update (not create) + for _, e := range received { + if strings.Contains(e, v1.EventTypeNormal) && + strings.Contains(e, core.EventConfigMapReconciled) && + strings.Contains(e, "updated") { + return true + } + } + return false + }, "5s", "100ms").Should(BeTrue(), "events seen: %v", received) + }) + }) }) }) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index f0efc1926..aa566b9bd 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -24,6 +24,7 @@ import ( "strings" cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" + "github.com/go-logr/logr" password "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" @@ -165,49 +166,92 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. secretComponent := newSecretModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, postgresSecretName) clusterComponent := newClusterModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, postgresSecretName) - bootstrapComponents := []component{ - secretComponent, - clusterComponent, + bootstrapManager := &componentManager{ + components: []component{ + secretComponent, + clusterComponent, + }, + logger: logger, + } + result, err := bootstrapManager.Handle(ctx) + if err != nil { + return result, err + } + if result != (ctrl.Result{}) { + return result, nil } - phase := func(component component) (ctrl.Result, error) { - componentLogger := logger.WithValues("component", component.Name()) - gate, gateErr := component.EvaluatePrerequisites(ctx) - if gateErr != nil { - if isTransientError(gateErr) { - componentLogger.Error(gateErr, "Component prerequisite transient error, requeueing", "step", "prerequisites") - return transientResult(gateErr), nil - } - componentLogger.Error(gateErr, "Component prerequisite evaluation failed", "step", "prerequisites") - return ctrl.Result{}, fmt.Errorf("%s prerequisites: %w", component.Name(), gateErr) + cnpgCluster = clusterComponent.cnpgCluster + runtimeView := clusterRuntimeViewAdapter{model: clusterComponent} + + runtimeManager := &componentManager{ + components: []component{ + newManagedRolesModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), + newPoolerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), + newConfigMapModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), + }, + logger: logger, + } + + result, err = runtimeManager.Handle(ctx) + if err != nil { + return result, err + } + if result != (ctrl.Result{}) { + return result, nil + } + + logger.Info("Reconciliation complete") + if err := updatePhaseStatus(readyClusterPhase); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil } - if !gate.Allowed { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} + +func isTransientError(err error) bool { + return apierrors.IsConflict(err) || + apierrors.IsServerTimeout(err) || + apierrors.IsTooManyRequests(err) || + apierrors.IsTimeout(err) +} + +func transientResult(err error) ctrl.Result { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true} + } + return ctrl.Result{RequeueAfter: retryDelay} +} + +func writeComponentStatus(updateStatus healthStatusUpdater, health componentHealth) error { + if updateStatus == nil { + return nil + } + return updateStatus(health) +} + +type componentManager struct { + components []component + logger logr.Logger +} + +func (m *componentManager) Handle(ctx context.Context) (ctrl.Result, error) { + for _, component := range m.components { + componentLogger := m.logger.WithValues("component", component.Name()) + gate := component.EvaluatePrerequisites(ctx) + + if gate.Allowed { + component.Actuate(ctx) + } else { componentLogger.Info("Component blocked by prerequisites", "step", "prerequisites", "condition", gate.Health.Condition, "reason", gate.Health.Reason, "phase", gate.Health.Phase, "requeueAfter", gate.Health.Result.RequeueAfter) - health, err := component.Converge(ctx) - if err != nil && isTransientError(err) { - return transientResult(err), nil - } - if err != nil { - componentLogger.Error(err, "Blocked component convergence failed", "step", "converge") - return health.Result, fmt.Errorf("%s converge (blocked): %w", component.Name(), err) - } - return health.Result, nil - } - - if err := component.Actuate(ctx); err != nil { - if isTransientError(err) { - componentLogger.Error(err, "Component actuation transient error, requeueing", "step", "actuate") - return transientResult(err), nil - } - componentLogger.Error(err, "Component actuation failed", "step", "actuate") - return ctrl.Result{}, fmt.Errorf("%s actuate: %w", component.Name(), err) } - componentLogger.Info("Component actuation completed", "step", "actuate") health, err := component.Converge(ctx) if err != nil && isTransientError(err) { @@ -243,67 +287,10 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. "requeueAfter", health.Result.RequeueAfter) return health.Result, nil } - return ctrl.Result{}, nil - } - - for _, component := range bootstrapComponents { - result, err := phase(component) - if err != nil { - return result, err - } - if result != (ctrl.Result{}) { - return result, nil - } - } - - cnpgCluster = clusterComponent.cnpgCluster - runtimeView := clusterRuntimeViewAdapter{model: clusterComponent} - runtimeComponents := []component{ - newManagedRolesModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), - newPoolerModel(c, rc.Scheme, rc, updateComponentHealthStatus, postgresCluster, clusterClass, mergedConfig, cnpgCluster, poolerEnabled, poolerConfigPresent), - newConfigMapModel(c, rc.Scheme, rc, updateComponentHealthStatus, runtimeView, postgresCluster, postgresSecretName), - } - - for _, component := range runtimeComponents { - result, err := phase(component) - if err != nil { - return result, err - } - if result != (ctrl.Result{}) { - return result, nil - } - } - logger.Info("Reconciliation complete") - if err := updatePhaseStatus(readyClusterPhase); err != nil { - if apierrors.IsConflict(err) { - return ctrl.Result{Requeue: true}, nil - } - return ctrl.Result{}, err } return ctrl.Result{}, nil } -func isTransientError(err error) bool { - return apierrors.IsConflict(err) || - apierrors.IsServerTimeout(err) || - apierrors.IsTooManyRequests(err) || - apierrors.IsTimeout(err) -} - -func transientResult(err error) ctrl.Result { - if apierrors.IsConflict(err) { - return ctrl.Result{Requeue: true} - } - return ctrl.Result{RequeueAfter: retryDelay} -} - -func writeComponentStatus(updateStatus healthStatusUpdater, health componentHealth) error { - if updateStatus == nil { - return nil - } - return updateStatus(health) -} - // types/dto candidate type componentHealth struct { State pgcConstants.State @@ -315,9 +302,9 @@ type componentHealth struct { } type component interface { - Actuate(ctx context.Context) error + Actuate(ctx context.Context) Converge(ctx context.Context) (componentHealth, error) - EvaluatePrerequisites(ctx context.Context) (prerequisiteDecision, error) + EvaluatePrerequisites(ctx context.Context) prerequisiteDecision Name() string } @@ -371,6 +358,7 @@ type clusterModel struct { metricsEnabled bool health componentHealth + actuateErr error } func newClusterModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, clusterClass *enterprisev4.PostgresClusterClass, mergedConfig *MergedConfig, secretName string) *clusterModel { @@ -386,17 +374,18 @@ func newClusterModel(c client.Client, scheme *runtime.Scheme, events eventEmitte func (p *clusterModel) Name() string { return pgcConstants.ComponentProvisioner } -func (p *clusterModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { +func (p *clusterModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { if health, missing := p.getHealthOnMissingSecretRef(); missing { return prerequisiteDecision{ Allowed: false, Health: health, - }, nil + } } - return prerequisiteDecision{Allowed: true}, nil + return prerequisiteDecision{Allowed: true} } -func (p *clusterModel) Actuate(ctx context.Context) error { +func (p *clusterModel) Actuate(ctx context.Context) { + p.actuateErr = nil p.cnpgCreated = false p.cnpgPatched = false @@ -408,17 +397,35 @@ func (p *clusterModel) Actuate(ctx context.Context) error { newCluster, err := buildCNPGCluster(p.scheme, p.cluster, p.mergedConfig, p.secretName, p.metricsEnabled) if err != nil { p.events.emitWarning(p.cluster, EventClusterCreateFailed, fmt.Sprintf("Failed to build CNPG cluster: %v", err)) - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonClusterBuildFailed + p.health.Message = fmt.Sprintf("Failed to build CNPG cluster: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return } if err = p.client.Create(ctx, newCluster); err != nil { p.events.emitWarning(p.cluster, EventClusterCreateFailed, fmt.Sprintf("Failed to create CNPG cluster: %v", err)) - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonClusterBuildFailed + p.health.Message = fmt.Sprintf("Failed to create CNPG cluster: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return } p.events.emitNormal(p.cluster, EventClusterCreationStarted, "CNPG cluster created, waiting for healthy state") p.cnpgCluster = newCluster p.cnpgCreated = true case err != nil: - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonClusterGetFailed + p.health.Message = fmt.Sprintf("Failed to get CNPG cluster: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return default: p.cnpgCluster = existingCNPG currentNormalized := normalizeCNPGClusterSpec(p.cnpgCluster.Spec, p.mergedConfig.Spec.PostgreSQLConfig) @@ -428,7 +435,13 @@ func (p *clusterModel) Actuate(ctx context.Context) error { p.cnpgCluster.Spec = desiredSpec if patchErr := patchObject(ctx, p.client, originalCluster, p.cnpgCluster, "CNPGCluster"); patchErr != nil { p.events.emitWarning(p.cluster, EventClusterUpdateFailed, fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr)) - return patchErr + p.health.State = pgcConstants.Failed + p.health.Reason = reasonClusterPatchFailed + p.health.Message = fmt.Sprintf("Failed to patch CNPG cluster: %v", patchErr) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = patchErr + return } p.events.emitNormal(p.cluster, EventClusterUpdateStarted, "CNPG cluster spec updated, waiting for healthy state") p.cnpgPatched = true @@ -444,7 +457,7 @@ func (p *clusterModel) Actuate(ctx context.Context) error { UID: p.cnpgCluster.UID, } } - return nil + return } func (p *clusterModel) Converge(_ context.Context) (health componentHealth, err error) { @@ -465,6 +478,9 @@ func (p *clusterModel) Converge(_ context.Context) (health componentHealth, err p.health = missingHealth return p.health, nil } + if p.actuateErr != nil { + return p.health, p.actuateErr + } if p.cnpgCluster == nil { p.health.State = pgcConstants.Pending @@ -625,7 +641,8 @@ type managedRolesModel struct { cluster *enterprisev4.PostgresCluster secret string - health componentHealth + health componentHealth + actuateErr error } func newManagedRolesModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *managedRolesModel { @@ -648,17 +665,18 @@ func (m *managedRolesModel) runtimeGateHealth() (componentHealth, bool) { return componentHealth{}, false } -func (m *managedRolesModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { +func (m *managedRolesModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { if gateHealth, blocked := m.runtimeGateHealth(); blocked { return prerequisiteDecision{ Allowed: false, Health: gateHealth, - }, nil + } } - return prerequisiteDecision{Allowed: true}, nil + return prerequisiteDecision{Allowed: true} } -func (m *managedRolesModel) Actuate(ctx context.Context) error { +func (m *managedRolesModel) Actuate(ctx context.Context) { + m.actuateErr = nil if rolesErr := reconcileManagedRoles(ctx, m.client, m.cluster, m.runtime.Cluster()); rolesErr != nil { m.events.emitWarning(m.cluster, EventManagedRolesFailed, fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr)) m.health.State = pgcConstants.Failed @@ -666,14 +684,15 @@ func (m *managedRolesModel) Actuate(ctx context.Context) error { m.health.Message = fmt.Sprintf("Failed to reconcile managed roles: %v", rolesErr) m.health.Phase = failedClusterPhase m.health.Result = ctrl.Result{} - return rolesErr + m.actuateErr = rolesErr + return } - return nil + return } func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealth, err error) { _ = ctx - m.health = componentHealth{Condition: managedRolesReady} + m.health.Condition = managedRolesReady defer func() { statusErr := writeComponentStatus(m.updateStatus, m.health) if statusErr != nil { @@ -690,6 +709,9 @@ func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealt m.health = gateHealth return m.health, nil } + if m.actuateErr != nil { + return m.health, m.actuateErr + } syncManagedRolesStatusFromCNPG(m.cluster, m.runtime.Cluster()) status := m.cluster.Status.ManagedRolesStatus @@ -817,6 +839,7 @@ type poolerModel struct { metricsEnabled bool health componentHealth + actuateErr error } func newPoolerModel(c client.Client, scheme *runtime.Scheme, events poolerEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, clusterClass *enterprisev4.PostgresClusterClass, mergedConfig *MergedConfig, cnpgCluster *cnpgv1.Cluster, poolerEnabled bool, poolerConfigPresent bool) *poolerModel { @@ -838,9 +861,9 @@ func newPoolerModel(c client.Client, scheme *runtime.Scheme, events poolerEmitte func (p *poolerModel) Name() string { return pgcConstants.ComponentPooler } -func (p *poolerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { +func (p *poolerModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { if !p.poolerEnabled || !p.poolerConfigPresent { - return prerequisiteDecision{Allowed: true}, nil + return prerequisiteDecision{Allowed: true} } if p.cnpgCluster == nil { return prerequisiteDecision{ @@ -853,7 +876,7 @@ func (p *poolerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDeci Phase: pendingClusterPhase, Result: ctrl.Result{RequeueAfter: retryDelay}, }, - }, nil + } } if p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy { return prerequisiteDecision{ @@ -866,35 +889,49 @@ func (p *poolerModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDeci Phase: provisioningClusterPhase, Result: ctrl.Result{RequeueAfter: retryDelay}, }, - }, nil + } } - return prerequisiteDecision{Allowed: true}, nil + return prerequisiteDecision{Allowed: true} } -func (p *poolerModel) Actuate(ctx context.Context) error { +func (p *poolerModel) Actuate(ctx context.Context) { + p.actuateErr = nil switch { case !p.poolerEnabled: if err := deleteConnectionPoolers(ctx, p.client, p.cluster); err != nil { - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to delete poolers: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return } p.cluster.Status.ConnectionPoolerStatus = nil meta.RemoveStatusCondition(&p.cluster.Status.Conditions, string(poolerReady)) - return nil + return case !p.poolerConfigPresent: - return nil + return case p.cnpgCluster == nil || p.cnpgCluster.Status.Phase != cnpgv1.PhaseHealthy: - return nil + return default: if err := createOrUpdateConnectionPoolers(ctx, p.client, p.scheme, p.cluster, p.mergedConfig, p.cnpgCluster, p.metricsEnabled); err != nil { p.events.emitWarning(p.cluster, EventPoolerReconcileFailed, fmt.Sprintf("Failed to reconcile connection pooler: %v", err)) - return err + p.health.State = pgcConstants.Failed + p.health.Reason = reasonPoolerReconciliationFailed + p.health.Message = fmt.Sprintf("Failed to reconcile connection pooler: %v", err) + p.health.Phase = failedClusterPhase + p.health.Result = ctrl.Result{} + p.actuateErr = err + return } - return nil + return } } func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err error) { - p.health = componentHealth{Condition: poolerReady} + p.health.Condition = poolerReady + oldConditions := append([]metav1.Condition(nil), p.cluster.Status.Conditions...) defer func() { statusErr := writeComponentStatus(p.updateStatus, p.health) if statusErr != nil { @@ -923,6 +960,9 @@ func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err p.health.Result = ctrl.Result{} return p.health, fmt.Errorf("pooler config missing") } + if p.actuateErr != nil { + return p.health, p.actuateErr + } if p.cnpgCluster == nil { p.health.State = pgcConstants.Pending p.health.Reason = reasonCNPGProvisioning @@ -1013,7 +1053,7 @@ func (p *poolerModel) Converge(ctx context.Context) (health componentHealth, err p.health.Message = msgPoolersReady p.health.Phase = readyClusterPhase p.health.Result = ctrl.Result{} - p.events.emitPoolerReadyTransition(p.cluster, p.cluster.Status.Conditions) + p.events.emitPoolerReadyTransition(p.cluster, oldConditions) return p.health, nil } @@ -1026,7 +1066,8 @@ type configMapModel struct { cluster *enterprisev4.PostgresCluster secret string - health componentHealth + health componentHealth + actuateErr error } func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, runtime clusterRuntimeView, cluster *enterprisev4.PostgresCluster, secret string) *configMapModel { @@ -1035,22 +1076,29 @@ func newConfigMapModel(c client.Client, scheme *runtime.Scheme, events eventEmit func (c *configMapModel) Name() string { return pgcConstants.ComponentConfigMap } -func (c *configMapModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { - return prerequisiteDecision{Allowed: true}, nil +func (c *configMapModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { + return prerequisiteDecision{Allowed: true} } -func (c *configMapModel) Actuate(ctx context.Context) error { +func (c *configMapModel) Actuate(ctx context.Context) { + c.actuateErr = nil cnpgCluster := c.runtime.Cluster() if cnpgCluster == nil { - return nil + return } desiredCM, err := generateConfigMap(ctx, c.client, c.scheme, c.cluster, cnpgCluster, c.secret) if err != nil { c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - return err + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf("Failed to reconcile ConfigMap: %v", err) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + c.actuateErr = err + return } cm := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: desiredCM.Name, Namespace: desiredCM.Namespace}} - _, err = controllerutil.CreateOrUpdate(ctx, c.client, cm, func() error { + op, err := controllerutil.CreateOrUpdate(ctx, c.client, cm, func() error { cm.Data = desiredCM.Data cm.Annotations = desiredCM.Annotations cm.Labels = desiredCM.Labels @@ -1063,16 +1111,27 @@ func (c *configMapModel) Actuate(ctx context.Context) error { }) if err != nil { c.events.emitWarning(c.cluster, EventConfigMapReconcileFailed, fmt.Sprintf("Failed to reconcile ConfigMap: %v", err)) - return err + c.health.State = pgcConstants.Failed + c.health.Reason = reasonConfigMapFailed + c.health.Message = fmt.Sprintf("Failed to reconcile ConfigMap: %v", err) + c.health.Phase = failedClusterPhase + c.health.Result = ctrl.Result{} + c.actuateErr = err + return + } + if op == controllerutil.OperationResultCreated { + c.events.emitNormal(c.cluster, EventConfigMapReconciled, fmt.Sprintf("ConfigMap %s created", desiredCM.Name)) + } else if op == controllerutil.OperationResultUpdated { + c.events.emitNormal(c.cluster, EventConfigMapReconciled, fmt.Sprintf("ConfigMap %s updated", desiredCM.Name)) } if c.cluster.Status.Resources.ConfigMapRef == nil { c.cluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} } - return nil + return } func (c *configMapModel) Converge(ctx context.Context) (health componentHealth, err error) { - c.health = componentHealth{Condition: configMapsReady} + c.health.Condition = configMapsReady defer func() { statusErr := writeComponentStatus(c.updateStatus, c.health) if statusErr != nil { @@ -1093,6 +1152,9 @@ func (c *configMapModel) Converge(ctx context.Context) (health componentHealth, c.health.Result = ctrl.Result{RequeueAfter: retryDelay} return c.health, nil } + if c.actuateErr != nil { + return c.health, c.actuateErr + } if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { c.health.State = pgcConstants.Provisioning @@ -1159,7 +1221,8 @@ type secretModel struct { cluster *enterprisev4.PostgresCluster name string - health componentHealth + health componentHealth + actuateErr error } func newSecretModel(c client.Client, scheme *runtime.Scheme, events eventEmitter, updateStatus healthStatusUpdater, cluster *enterprisev4.PostgresCluster, name string) *secretModel { @@ -1168,35 +1231,66 @@ func newSecretModel(c client.Client, scheme *runtime.Scheme, events eventEmitter func (s *secretModel) Name() string { return pgcConstants.ComponentSecret } -func (s *secretModel) EvaluatePrerequisites(_ context.Context) (prerequisiteDecision, error) { - return prerequisiteDecision{Allowed: true}, nil +func (s *secretModel) EvaluatePrerequisites(_ context.Context) prerequisiteDecision { + return prerequisiteDecision{Allowed: true} } -func (s *secretModel) Actuate(ctx context.Context) error { +func (s *secretModel) Actuate(ctx context.Context) { + s.actuateErr = nil secret := &corev1.Secret{} secretExists, secretErr := clusterSecretExists(ctx, s.client, s.cluster.Namespace, s.name, secret) if secretErr != nil { s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to check secret existence: %v", secretErr)) - return secretErr + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("Failed to check secret existence: %v", secretErr) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = secretErr + return } if !secretExists { if err := ensureClusterSecret(ctx, s.client, s.scheme, s.cluster, s.name, secret); err != nil { s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to generate cluster secret: %v", err)) - return err + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("Failed to generate cluster secret: %v", err) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = err + return } } hasOwnerRef, ownerRefErr := controllerutil.HasOwnerReference(secret.GetOwnerReferences(), s.cluster, s.scheme) if ownerRefErr != nil { - return fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("failed to check owner reference on secret: %v", ownerRefErr) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = fmt.Errorf("failed to check owner reference on secret: %w", ownerRefErr) + return } if secretExists && !hasOwnerRef { originalSecret := secret.DeepCopy() if err := ctrl.SetControllerReference(s.cluster, secret, s.scheme); err != nil { - return fmt.Errorf("failed to set controller reference on existing secret: %w", err) + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("failed to set controller reference on existing secret: %v", err) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = fmt.Errorf("failed to set controller reference on existing secret: %w", err) + return } if err := patchObject(ctx, s.client, originalSecret, secret, "Secret"); err != nil { s.events.emitWarning(s.cluster, EventSecretReconcileFailed, fmt.Sprintf("Failed to patch existing secret: %v", err)) - return err + s.health.State = pgcConstants.Failed + s.health.Reason = reasonSuperUserSecretFailed + s.health.Message = fmt.Sprintf("Failed to patch existing secret: %v", err) + s.health.Phase = failedClusterPhase + s.health.Result = ctrl.Result{} + s.actuateErr = err + return } s.events.emitNormal(s.cluster, EventClusterAdopted, fmt.Sprintf("Adopted existing CNPG cluster and secret %s", s.name)) } @@ -1206,11 +1300,11 @@ func (s *secretModel) Actuate(ctx context.Context) error { Key: secretKeyPassword, } } - return nil + return } func (s *secretModel) Converge(ctx context.Context) (health componentHealth, err error) { - s.health = componentHealth{Condition: secretsReady} + s.health.Condition = secretsReady defer func() { statusErr := writeComponentStatus(s.updateStatus, s.health) if statusErr != nil { @@ -1223,6 +1317,10 @@ func (s *secretModel) Converge(ctx context.Context) (health componentHealth, err health = s.health }() + if s.actuateErr != nil { + return s.health, s.actuateErr + } + if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { s.health.State = pgcConstants.Provisioning s.health.Reason = reasonUserSecretPending diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index 85eeba7e2..6df965cc6 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -25,6 +25,41 @@ type configMapNotFoundClient struct { client.Client } +type getErrorClient struct { + client.Client + err error + matcher func(client.Object) bool +} + +func (c getErrorClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if c.matcher != nil && c.matcher(obj) { + return c.err + } + return c.Client.Get(ctx, key, obj, opts...) +} + +type createErrorClient struct { + client.Client + err error + matcher func(client.Object) bool +} + +func (c createErrorClient) Create(ctx context.Context, obj client.Object, opts ...client.CreateOption) error { + if c.matcher != nil && c.matcher(obj) { + return c.err + } + return c.Client.Create(ctx, obj, opts...) +} + +type patchErrorClient struct { + client.Client + err error +} + +func (c patchErrorClient) Patch(_ context.Context, _ client.Object, _ client.Patch, _ ...client.PatchOption) error { + return c.err +} + type noopEventEmitter struct{} func (noopEventEmitter) emitNormal(_ client.Object, _, _ string) {} @@ -1290,10 +1325,11 @@ func TestComponentStateTriggerConditions(t *testing.T) { Namespace: "default", }, Data: map[string]string{ - "CLUSTER_RW_ENDPOINT": "pg1-rw.default", - "CLUSTER_RO_ENDPOINT": "pg1-ro.default", - "DEFAULT_CLUSTER_PORT": "5432", - "SUPER_USER_SECRET_REF": "pg1-secret", + configKeyClusterRWEndpoint: "pg1-rw.default", + configKeyClusterROEndpoint: "pg1-ro.default", + configKeyClusterREndpoint: "pg1-r.default", + configKeyDefaultClusterPort: "5432", + configKeySuperUserSecretRef: "pg1-secret", }, } examplePgCluster := &enterprisev4.PostgresCluster{ @@ -1487,8 +1523,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { state := pgcConstants.Empty for i, check := range tt.components { - gate, gateErr := check.EvaluatePrerequisites(ctx) - require.NoError(t, gateErr) + gate := check.EvaluatePrerequisites(ctx) if !gate.Allowed { info := gate.Health state = info.State @@ -1497,7 +1532,7 @@ func TestComponentStateTriggerConditions(t *testing.T) { continue } - require.NoError(t, check.Actuate(ctx)) + check.Actuate(ctx) info, err := check.Converge(ctx) require.NoError(t, err) state = info.State @@ -1742,8 +1777,7 @@ func TestManagedRolesRuntimeGateHealthMatchesConverge(t *testing.T) { "pg1-secret", ) - gate, err := model.EvaluatePrerequisites(context.Background()) - require.NoError(t, err) + gate := model.EvaluatePrerequisites(context.Background()) require.False(t, gate.Allowed) health, err := model.Converge(context.Background()) @@ -1751,6 +1785,229 @@ func TestManagedRolesRuntimeGateHealthMatchesConverge(t *testing.T) { assert.Equal(t, gate.Health, health) } +func TestActuateErrorPassdownConvergeHandling(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, enterprisev4.AddToScheme(scheme)) + require.NoError(t, cnpgv1.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + + instances := int32(1) + version := "16" + storageSize := resource.MustParse("10Gi") + mergedConfig := &MergedConfig{ + Spec: &enterprisev4.PostgresClusterSpec{ + Instances: &instances, + PostgresVersion: &version, + Storage: &storageSize, + Resources: &corev1.ResourceRequirements{}, + PostgreSQLConfig: map[string]string{}, + PgHBA: []string{}, + }, + } + clusterClass := &enterprisev4.PostgresClusterClass{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1-class"}, + Spec: enterprisev4.PostgresClusterClassSpec{ + Config: &enterprisev4.PostgresClusterClassConfig{ + ConnectionPoolerEnabled: ptr.To(true), + }, + }, + } + + type convergeComponent interface { + Actuate(ctx context.Context) + Converge(ctx context.Context) (componentHealth, error) + } + type testCase struct { + name string + expectedCondition conditionTypes + expectedReason conditionReasons + build func(updateStatus healthStatusUpdater) convergeComponent + } + + tests := []testCase{ + { + name: "cluster component passes actuate get error through converge", + expectedCondition: clusterReady, + expectedReason: reasonClusterGetFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: enterprisev4.PostgresClusterStatus{ + Resources: &enterprisev4.PostgresClusterResources{ + SuperUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "pg1-secret"}, + Key: "password", + }, + }, + }, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := getErrorClient{ + Client: base, + err: assert.AnError, + matcher: func(obj client.Object) bool { + _, ok := obj.(*cnpgv1.Cluster) + return ok + }, + } + return newClusterModel(errClient, scheme, noopEventEmitter{}, updateStatus, cluster, clusterClass, mergedConfig, "pg1-secret") + }, + }, + { + name: "managed roles component passes actuate patch error through converge", + expectedCondition: managedRolesReady, + expectedReason: reasonManagedRolesFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Spec: enterprisev4.PostgresClusterSpec{ + ManagedRoles: []enterprisev4.ManagedRole{{Name: "app_user", Exists: true}}, + }, + } + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := patchErrorClient{Client: base, err: assert.AnError} + return newManagedRolesModel( + errClient, + scheme, + noopEventEmitter{}, + updateStatus, + clusterRuntimeViewAdapter{model: &clusterModel{cnpgCluster: cnpg}}, + cluster, + "pg1-secret", + ) + }, + }, + { + name: "pooler component passes actuate create error through converge", + expectedCondition: poolerReady, + expectedReason: reasonPoolerReconciliationFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + poolerInstances := int32(2) + poolerMode := enterprisev4.ConnectionPoolerModeTransaction + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + } + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := createErrorClient{ + Client: base, + err: assert.AnError, + matcher: func(obj client.Object) bool { + _, ok := obj.(*cnpgv1.Pooler) + return ok + }, + } + poolerCfg := &MergedConfig{ + Spec: mergedConfig.Spec, + CNPG: &enterprisev4.CNPGConfig{ + ConnectionPooler: &enterprisev4.ConnectionPoolerConfig{ + Instances: &poolerInstances, + Mode: &poolerMode, + Config: map[string]string{}, + }, + }, + } + return newPoolerModel(errClient, scheme, noopEventEmitter{}, updateStatus, cluster, clusterClass, poolerCfg, cnpg, true, true) + }, + }, + { + name: "configmap component passes actuate pooler lookup error through converge", + expectedCondition: configMapsReady, + expectedReason: reasonConfigMapFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: enterprisev4.PostgresClusterStatus{Resources: &enterprisev4.PostgresClusterResources{}}, + } + cnpg := &cnpgv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := getErrorClient{ + Client: base, + err: assert.AnError, + matcher: func(obj client.Object) bool { + _, ok := obj.(*cnpgv1.Pooler) + return ok + }, + } + return newConfigMapModel( + errClient, + scheme, + noopEventEmitter{}, + updateStatus, + clusterRuntimeViewAdapter{model: &clusterModel{cnpgCluster: cnpg}}, + cluster, + "pg1-secret", + ) + }, + }, + { + name: "secret component passes actuate existence-check error through converge", + expectedCondition: secretsReady, + expectedReason: reasonSuperUserSecretFailed, + build: func(updateStatus healthStatusUpdater) convergeComponent { + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "pg1", Namespace: "default"}, + Status: enterprisev4.PostgresClusterStatus{ + Resources: &enterprisev4.PostgresClusterResources{}, + }, + } + base := fake.NewClientBuilder().WithScheme(scheme).Build() + errClient := getErrorClient{ + Client: base, + err: assert.AnError, + matcher: func(obj client.Object) bool { + _, ok := obj.(*corev1.Secret) + return ok + }, + } + return newSecretModel(errClient, scheme, noopEventEmitter{}, updateStatus, cluster, "pg1-secret") + }, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + var ( + written componentHealth + writes int + ) + updateStatus := func(health componentHealth) error { + written = health + writes++ + return nil + } + model := tt.build(updateStatus) + + model.Actuate(context.Background()) + health, err := model.Converge(context.Background()) + + require.Error(t, err) + require.ErrorIs(t, err, assert.AnError) + assert.Equal(t, tt.expectedCondition, health.Condition) + assert.Equal(t, pgcConstants.Failed, health.State) + assert.Equal(t, tt.expectedReason, health.Reason) + assert.Equal(t, failedClusterPhase, health.Phase) + assert.NotEmpty(t, health.Message) + assert.Equal(t, 1, writes) + assert.Equal(t, health, written) + }) + } +} + func TestPoolerModelConvergeSetsConnectionPoolerStatus(t *testing.T) { t.Parallel() @@ -1878,7 +2135,7 @@ func TestPoolerModelConvergeSetsConnectionPoolerStatus(t *testing.T) { false, ) - require.NoError(t, model.Actuate(context.Background())) + model.Actuate(context.Background()) health, err := model.Converge(context.Background()) require.NoError(t, err) assert.Nil(t, cluster.Status.ConnectionPoolerStatus) diff --git a/pkg/postgresql/cluster/core/events.go b/pkg/postgresql/cluster/core/events.go index 551ce9147..1472a3922 100644 --- a/pkg/postgresql/cluster/core/events.go +++ b/pkg/postgresql/cluster/core/events.go @@ -12,6 +12,7 @@ import ( const ( EventSecretReady = "SecretReady" EventConfigMapReady = "ConfigMapReady" + EventConfigMapReconciled = "ConfigMapReconciled" EventClusterAdopted = "ClusterAdopted" EventClusterCreationStarted = "ClusterCreationStarted" EventClusterUpdateStarted = "ClusterUpdateStarted" diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index b0f0ed0d7..79e29ce63 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -95,6 +95,9 @@ const ( // condition reasons — cluster/provisioner reasonClusterClassNotFound conditionReasons = "ClusterClassNotFound" reasonInvalidConfiguration conditionReasons = "InvalidConfiguration" + reasonClusterBuildFailed conditionReasons = "ClusterBuildFailed" + reasonClusterGetFailed conditionReasons = "ClusterGetFailed" + reasonClusterPatchFailed conditionReasons = "ClusterPatchFailed" // condition reasons — managedRolesReady reasonManagedRolesReady conditionReasons = "ManagedRolesReconciled" From 50b724f14ac5fd5873291c83deab5ccafd1c7c08 Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Tue, 21 Apr 2026 15:42:24 +0200 Subject: [PATCH 35/36] removed smelly returns --- pkg/postgresql/cluster/core/cluster.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index aa566b9bd..a09e95780 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -457,7 +457,6 @@ func (p *clusterModel) Actuate(ctx context.Context) { UID: p.cnpgCluster.UID, } } - return } func (p *clusterModel) Converge(_ context.Context) (health componentHealth, err error) { @@ -687,7 +686,6 @@ func (m *managedRolesModel) Actuate(ctx context.Context) { m.actuateErr = rolesErr return } - return } func (m *managedRolesModel) Converge(ctx context.Context) (health componentHealth, err error) { @@ -1127,7 +1125,6 @@ func (c *configMapModel) Actuate(ctx context.Context) { if c.cluster.Status.Resources.ConfigMapRef == nil { c.cluster.Status.Resources.ConfigMapRef = &corev1.LocalObjectReference{Name: desiredCM.Name} } - return } func (c *configMapModel) Converge(ctx context.Context) (health componentHealth, err error) { @@ -1300,7 +1297,6 @@ func (s *secretModel) Actuate(ctx context.Context) { Key: secretKeyPassword, } } - return } func (s *secretModel) Converge(ctx context.Context) (health componentHealth, err error) { From a193dc2c1d6ec7a391c2d147084b179315de6daf Mon Sep 17 00:00:00 2001 From: Kamil Ubych <56136249+limak9182@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:50:33 +0200 Subject: [PATCH 36/36] pg_hba validation and validation webhook (#1855) * pg_hba validation * comment fix * webhook resources fix * integration tests * fix tests * structure fix * fix tests * docs fix --- config/webhook/manifests.yaml | 2 + .../postgres_webhook_integration_test.go | 573 ++++++++++++++++++ .../webhook/postgrescluster_validation.go | 56 ++ .../postgrescluster_validation_test.go | 192 ++++++ .../postgresclusterclass_validation.go | 56 ++ .../postgresclusterclass_validation_test.go | 191 ++++++ pkg/postgresql/cluster/core/hba.go | 253 ++++++++ pkg/postgresql/cluster/core/hba_unit_test.go | 364 +++++++++++ pkg/splunk/enterprise/validation/registry.go | 35 ++ pkg/splunk/enterprise/validation/server.go | 6 +- .../enterprise/validation/server_test.go | 4 +- 11 files changed, 1727 insertions(+), 5 deletions(-) create mode 100644 pkg/postgresql/cluster/adapter/webhook/postgres_webhook_integration_test.go create mode 100644 pkg/postgresql/cluster/adapter/webhook/postgrescluster_validation.go create mode 100644 pkg/postgresql/cluster/adapter/webhook/postgrescluster_validation_test.go create mode 100644 pkg/postgresql/cluster/adapter/webhook/postgresclusterclass_validation.go create mode 100644 pkg/postgresql/cluster/adapter/webhook/postgresclusterclass_validation_test.go create mode 100644 pkg/postgresql/cluster/core/hba.go create mode 100644 pkg/postgresql/cluster/core/hba_unit_test.go diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index f534bd66b..8d6ea21fa 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -28,4 +28,6 @@ webhooks: - clustermanagers - licensemanagers - monitoringconsoles + - postgresclusters + - postgresclusterclasses sideEffects: None diff --git a/pkg/postgresql/cluster/adapter/webhook/postgres_webhook_integration_test.go b/pkg/postgresql/cluster/adapter/webhook/postgres_webhook_integration_test.go new file mode 100644 index 000000000..c8f2eaf3d --- /dev/null +++ b/pkg/postgresql/cluster/adapter/webhook/postgres_webhook_integration_test.go @@ -0,0 +1,573 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhook_test + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + admissionv1 "k8s.io/api/admission/v1" + authenticationv1 "k8s.io/api/authentication/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + "github.com/splunk/splunk-operator/pkg/splunk/enterprise/validation" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func mustMarshal(t *testing.T, obj interface{}) []byte { + t.Helper() + data, err := json.Marshal(obj) + if err != nil { + t.Fatalf("failed to marshal object: %v", err) + } + return data +} + +func newPostgresClusterAdmissionReview(t *testing.T, uid string, op admissionv1.Operation, obj *enterpriseApi.PostgresCluster, oldObj *enterpriseApi.PostgresCluster) *admissionv1.AdmissionReview { + t.Helper() + ar := &admissionv1.AdmissionReview{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "admission.k8s.io/v1", + Kind: "AdmissionReview", + }, + Request: &admissionv1.AdmissionRequest{ + UID: types.UID(uid), + Kind: metav1.GroupVersionKind{ + Group: "enterprise.splunk.com", + Version: "v4", + Kind: "PostgresCluster", + }, + Resource: metav1.GroupVersionResource{ + Group: "enterprise.splunk.com", + Version: "v4", + Resource: "postgresclusters", + }, + Name: obj.Name, + Namespace: obj.Namespace, + Operation: op, + Object: runtime.RawExtension{ + Raw: mustMarshal(t, obj), + }, + UserInfo: authenticationv1.UserInfo{Username: "test-user"}, + }, + } + if oldObj != nil { + ar.Request.OldObject = runtime.RawExtension{ + Raw: mustMarshal(t, oldObj), + } + } + return ar +} + +func newPostgresClusterClassAdmissionReview(t *testing.T, uid string, op admissionv1.Operation, obj *enterpriseApi.PostgresClusterClass, oldObj *enterpriseApi.PostgresClusterClass) *admissionv1.AdmissionReview { + t.Helper() + ar := &admissionv1.AdmissionReview{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "admission.k8s.io/v1", + Kind: "AdmissionReview", + }, + Request: &admissionv1.AdmissionRequest{ + UID: types.UID(uid), + Kind: metav1.GroupVersionKind{ + Group: "enterprise.splunk.com", + Version: "v4", + Kind: "PostgresClusterClass", + }, + Resource: metav1.GroupVersionResource{ + Group: "enterprise.splunk.com", + Version: "v4", + Resource: "postgresclusterclasses", + }, + Name: obj.Name, + Operation: op, + Object: runtime.RawExtension{ + Raw: mustMarshal(t, obj), + }, + UserInfo: authenticationv1.UserInfo{Username: "test-user"}, + }, + } + if oldObj != nil { + ar.Request.OldObject = runtime.RawExtension{ + Raw: mustMarshal(t, oldObj), + } + } + return ar +} + +func sendAdmissionReview(t *testing.T, server *validation.WebhookServer, ar *admissionv1.AdmissionReview) *admissionv1.AdmissionResponse { + t.Helper() + body, err := json.Marshal(ar) + require.NoError(t, err) + + req := httptest.NewRequest(http.MethodPost, "/validate", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + rr := httptest.NewRecorder() + + server.HandleValidate(rr, req) + require.Equal(t, http.StatusOK, rr.Code) + + var response admissionv1.AdmissionReview + require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &response)) + require.NotNil(t, response.Response) + return response.Response +} + +func TestPostgresClusterPgHBAIntegration(t *testing.T) { + server := validation.NewWebhookServer(validation.WebhookServerOptions{ + Port: 9443, + Validators: validation.DefaultValidators, + }) + + tests := []struct { + name string + obj *enterpriseApi.PostgresCluster + wantAllowed bool + wantMessage string + wantMessages []string + }{ + { + name: "valid - no pgHBA rules", + obj: &enterpriseApi.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + }, + }, + wantAllowed: true, + }, + { + name: "valid - correct pgHBA rules", + obj: &enterpriseApi.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "hostnossl all all 0.0.0.0/0 reject", + "hostssl all all 0.0.0.0/0 scram-sha-256", + "local all all peer", + }, + }, + }, + wantAllowed: true, + }, + { + name: "rejected - bad connection type", + obj: &enterpriseApi.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "hostx all all 0.0.0.0/0 md5", + }, + }, + }, + wantAllowed: false, + wantMessage: "unknown connection type", + }, + { + name: "rejected - bad CIDR", + obj: &enterpriseApi.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "host all all 192.168.0.0/33 md5", + }, + }, + }, + wantAllowed: false, + wantMessage: "invalid CIDR", + }, + { + name: "rejected - unknown auth method", + obj: &enterpriseApi.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "host all all 0.0.0.0/0 bogus", + }, + }, + }, + wantAllowed: false, + wantMessage: "unknown auth method", + }, + { + name: "rejected - too few fields", + obj: &enterpriseApi.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "host all all", + }, + }, + }, + wantAllowed: false, + wantMessage: "too few fields", + }, + { + name: "rejected - multiple bad rules reports all errors", + obj: &enterpriseApi.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "hostssl all all 0.0.0.0/0 scram-sha-256", + "hostx all all 0.0.0.0/0 md5", + "host all all 10.0.0.0/8 bogus", + }, + }, + }, + wantAllowed: false, + wantMessages: []string{"spec.pgHBA[1]", "spec.pgHBA[2]"}, + }, + { + name: "valid - rules with auth options and comments", + obj: &enterpriseApi.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "host all all 0.0.0.0/0 ldap ldapserver=ldap.example.com ldapport=389", + "host all all 0.0.0.0/0 md5 # office access", + }, + }, + }, + wantAllowed: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ar := newPostgresClusterAdmissionReview(t, "uid-"+tt.name, admissionv1.Create, tt.obj, nil) + resp := sendAdmissionReview(t, server, ar) + + assert.Equal(t, tt.wantAllowed, resp.Allowed, "unexpected admission result") + if !tt.wantAllowed { + require.NotNil(t, resp.Result) + assert.Equal(t, metav1.StatusReasonInvalid, resp.Result.Reason) + assert.Equal(t, int32(http.StatusUnprocessableEntity), resp.Result.Code) + } + if tt.wantMessage != "" { + require.NotNil(t, resp.Result) + assert.Contains(t, resp.Result.Message, tt.wantMessage) + } + for _, msg := range tt.wantMessages { + require.NotNil(t, resp.Result) + assert.Contains(t, resp.Result.Message, msg) + } + }) + } +} + +func TestPostgresClusterPgHBAUpdateIntegration(t *testing.T) { + server := validation.NewWebhookServer(validation.WebhookServerOptions{ + Port: 9443, + Validators: validation.DefaultValidators, + }) + + oldObj := &enterpriseApi.PostgresCluster{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresCluster", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "host all all 0.0.0.0/0 scram-sha-256", + }, + }, + } + + t.Run("valid update - change rules", func(t *testing.T) { + newObj := oldObj.DeepCopy() + newObj.Spec.PgHBA = []string{ + "hostssl all all 0.0.0.0/0 scram-sha-256", + "local all all peer", + } + ar := newPostgresClusterAdmissionReview(t, "uid-update-valid", admissionv1.Update, newObj, oldObj) + resp := sendAdmissionReview(t, server, ar) + assert.True(t, resp.Allowed) + }) + + t.Run("rejected update - invalid new rules", func(t *testing.T) { + newObj := oldObj.DeepCopy() + newObj.Spec.PgHBA = []string{ + "hostx all all 0.0.0.0/0 md5", + } + ar := newPostgresClusterAdmissionReview(t, "uid-update-invalid", admissionv1.Update, newObj, oldObj) + resp := sendAdmissionReview(t, server, ar) + assert.False(t, resp.Allowed) + assert.Equal(t, metav1.StatusReasonInvalid, resp.Result.Reason) + assert.Equal(t, int32(http.StatusUnprocessableEntity), resp.Result.Code) + assert.Contains(t, resp.Result.Message, "unknown connection type") + }) +} + +func TestPostgresClusterClassPgHBAIntegration(t *testing.T) { + server := validation.NewWebhookServer(validation.WebhookServerOptions{ + Port: 9443, + Validators: validation.DefaultValidators, + }) + + tests := []struct { + name string + obj *enterpriseApi.PostgresClusterClass + wantAllowed bool + wantMessage string + }{ + { + name: "valid - no pgHBA rules", + obj: &enterpriseApi.PostgresClusterClass{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresClusterClass", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "dev", + }, + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + }, + }, + wantAllowed: true, + }, + { + name: "valid - correct pgHBA rules", + obj: &enterpriseApi.PostgresClusterClass{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresClusterClass", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "dev", + }, + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{ + "hostnossl all all 0.0.0.0/0 reject", + "hostssl all all 0.0.0.0/0 scram-sha-256", + }, + }, + }, + }, + wantAllowed: true, + }, + { + name: "rejected - bad connection type", + obj: &enterpriseApi.PostgresClusterClass{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresClusterClass", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "dev", + }, + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{ + "hostx all all 0.0.0.0/0 md5", + }, + }, + }, + }, + wantAllowed: false, + wantMessage: "unknown connection type", + }, + { + name: "rejected - invalid CIDR in class", + obj: &enterpriseApi.PostgresClusterClass{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresClusterClass", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "dev", + }, + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{ + "host all all 256.1.1.1/24 md5", + }, + }, + }, + }, + wantAllowed: false, + wantMessage: "invalid CIDR", + }, + { + name: "rejected - unknown auth method in class", + obj: &enterpriseApi.PostgresClusterClass{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresClusterClass", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "dev", + }, + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{ + "host all all 0.0.0.0/0 fake-method", + }, + }, + }, + }, + wantAllowed: false, + wantMessage: "unknown auth method", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ar := newPostgresClusterClassAdmissionReview(t, "uid-"+tt.name, admissionv1.Create, tt.obj, nil) + resp := sendAdmissionReview(t, server, ar) + + assert.Equal(t, tt.wantAllowed, resp.Allowed, "unexpected admission result") + if !tt.wantAllowed { + require.NotNil(t, resp.Result) + assert.Equal(t, metav1.StatusReasonInvalid, resp.Result.Reason) + assert.Equal(t, int32(http.StatusUnprocessableEntity), resp.Result.Code) + } + if tt.wantMessage != "" { + require.NotNil(t, resp.Result) + assert.Contains(t, resp.Result.Message, tt.wantMessage) + } + }) + } +} + +func TestPostgresClusterClassPgHBAUpdateIntegration(t *testing.T) { + server := validation.NewWebhookServer(validation.WebhookServerOptions{ + Port: 9443, + Validators: validation.DefaultValidators, + }) + + oldObj := &enterpriseApi.PostgresClusterClass{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "enterprise.splunk.com/v4", + Kind: "PostgresClusterClass", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "dev", + }, + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{ + "host all all 0.0.0.0/0 scram-sha-256", + }, + }, + }, + } + + t.Run("valid update - change rules", func(t *testing.T) { + newObj := oldObj.DeepCopy() + newObj.Spec.Config.PgHBA = []string{ + "hostssl all all 0.0.0.0/0 scram-sha-256", + "hostnossl all all 0.0.0.0/0 reject", + } + ar := newPostgresClusterClassAdmissionReview(t, "uid-class-update-valid", admissionv1.Update, newObj, oldObj) + resp := sendAdmissionReview(t, server, ar) + assert.True(t, resp.Allowed) + }) + + t.Run("rejected update - invalid new rules", func(t *testing.T) { + newObj := oldObj.DeepCopy() + newObj.Spec.Config.PgHBA = []string{ + "host all all 0.0.0.0/0 bogus", + } + ar := newPostgresClusterClassAdmissionReview(t, "uid-class-update-invalid", admissionv1.Update, newObj, oldObj) + resp := sendAdmissionReview(t, server, ar) + assert.False(t, resp.Allowed) + assert.Equal(t, metav1.StatusReasonInvalid, resp.Result.Reason) + assert.Equal(t, int32(http.StatusUnprocessableEntity), resp.Result.Code) + assert.Contains(t, resp.Result.Message, "unknown auth method") + }) +} diff --git a/pkg/postgresql/cluster/adapter/webhook/postgrescluster_validation.go b/pkg/postgresql/cluster/adapter/webhook/postgrescluster_validation.go new file mode 100644 index 000000000..7fc724c4f --- /dev/null +++ b/pkg/postgresql/cluster/adapter/webhook/postgrescluster_validation.go @@ -0,0 +1,56 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhook + +import ( + "k8s.io/apimachinery/pkg/util/validation/field" + + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + hba "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" +) + +// ValidatePostgresClusterCreate validates a PostgresCluster on CREATE. +func ValidatePostgresClusterCreate(obj *enterpriseApi.PostgresCluster) field.ErrorList { + var allErrs field.ErrorList + + if len(obj.Spec.PgHBA) > 0 { + pgHBAPath := field.NewPath("spec").Child("pgHBA") + for _, re := range hba.ValidateRules(obj.Spec.PgHBA) { + allErrs = append(allErrs, field.Invalid( + pgHBAPath.Index(re.Index), + obj.Spec.PgHBA[re.Index], + re.Message)) + } + } + + return allErrs +} + +// ValidatePostgresClusterUpdate validates a PostgresCluster on UPDATE. +func ValidatePostgresClusterUpdate(obj, oldObj *enterpriseApi.PostgresCluster) field.ErrorList { + return ValidatePostgresClusterCreate(obj) +} + +// GetPostgresClusterWarningsOnCreate returns warnings for PostgresCluster CREATE. +func GetPostgresClusterWarningsOnCreate(obj *enterpriseApi.PostgresCluster) []string { + return nil +} + +// GetPostgresClusterWarningsOnUpdate returns warnings for PostgresCluster UPDATE. +func GetPostgresClusterWarningsOnUpdate(obj, oldObj *enterpriseApi.PostgresCluster) []string { + return nil +} diff --git a/pkg/postgresql/cluster/adapter/webhook/postgrescluster_validation_test.go b/pkg/postgresql/cluster/adapter/webhook/postgrescluster_validation_test.go new file mode 100644 index 000000000..56ff34c9c --- /dev/null +++ b/pkg/postgresql/cluster/adapter/webhook/postgrescluster_validation_test.go @@ -0,0 +1,192 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhook + +import ( + "testing" + + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + "github.com/stretchr/testify/assert" +) + +func TestValidatePostgresClusterCreate(t *testing.T) { + tests := []struct { + name string + obj *enterpriseApi.PostgresCluster + wantErrCount int + wantErrField string + }{ + { + name: "valid - no pgHBA rules", + obj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + }, + }, + wantErrCount: 0, + }, + { + name: "valid - empty pgHBA", + obj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{}, + }, + }, + wantErrCount: 0, + }, + { + name: "valid - correct pgHBA rules", + obj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "hostnossl all all 0.0.0.0/0 reject", + "hostssl all all 0.0.0.0/0 scram-sha-256", + }, + }, + }, + wantErrCount: 0, + }, + { + name: "invalid - bad connection type", + obj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "hostx all all 0.0.0.0/0 md5", + }, + }, + }, + wantErrCount: 1, + wantErrField: "spec.pgHBA[0]", + }, + { + name: "invalid - bad CIDR", + obj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "host all all 192.168.0.0/33 md5", + }, + }, + }, + wantErrCount: 1, + wantErrField: "spec.pgHBA[0]", + }, + { + name: "invalid - bad auth method", + obj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "host all all 0.0.0.0/0 bogus-auth", + }, + }, + }, + wantErrCount: 1, + wantErrField: "spec.pgHBA[0]", + }, + { + name: "invalid - missing fields", + obj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{ + "host all all", + }, + }, + }, + wantErrCount: 1, + wantErrField: "spec.pgHBA[0]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + errs := ValidatePostgresClusterCreate(tt.obj) + assert.Len(t, errs, tt.wantErrCount, "unexpected error count") + if tt.wantErrField != "" && len(errs) > 0 { + assert.Equal(t, tt.wantErrField, errs[0].Field, "unexpected error field") + } + }) + } +} + +func TestValidatePostgresClusterUpdate(t *testing.T) { + tests := []struct { + name string + obj *enterpriseApi.PostgresCluster + oldObj *enterpriseApi.PostgresCluster + wantErrCount int + }{ + { + name: "valid update - add pgHBA rules", + obj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{"host all all 0.0.0.0/0 scram-sha-256"}, + }, + }, + oldObj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + }, + }, + wantErrCount: 0, + }, + { + name: "invalid update - bad pgHBA", + obj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + PgHBA: []string{"hostx all all 0.0.0.0/0 md5"}, + }, + }, + oldObj: &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{ + Class: "dev", + }, + }, + wantErrCount: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + errs := ValidatePostgresClusterUpdate(tt.obj, tt.oldObj) + assert.Len(t, errs, tt.wantErrCount, "unexpected error count") + }) + } +} + +func TestGetPostgresClusterWarningsOnCreate(t *testing.T) { + obj := &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{Class: "dev"}, + } + assert.Empty(t, GetPostgresClusterWarningsOnCreate(obj)) +} + +func TestGetPostgresClusterWarningsOnUpdate(t *testing.T) { + obj := &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{Class: "dev"}, + } + oldObj := &enterpriseApi.PostgresCluster{ + Spec: enterpriseApi.PostgresClusterSpec{Class: "dev"}, + } + assert.Empty(t, GetPostgresClusterWarningsOnUpdate(obj, oldObj)) +} diff --git a/pkg/postgresql/cluster/adapter/webhook/postgresclusterclass_validation.go b/pkg/postgresql/cluster/adapter/webhook/postgresclusterclass_validation.go new file mode 100644 index 000000000..28246cba4 --- /dev/null +++ b/pkg/postgresql/cluster/adapter/webhook/postgresclusterclass_validation.go @@ -0,0 +1,56 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhook + +import ( + "k8s.io/apimachinery/pkg/util/validation/field" + + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + hba "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core" +) + +// ValidatePostgresClusterClassCreate validates a PostgresClusterClass on CREATE. +func ValidatePostgresClusterClassCreate(obj *enterpriseApi.PostgresClusterClass) field.ErrorList { + var allErrs field.ErrorList + + if obj.Spec.Config != nil && len(obj.Spec.Config.PgHBA) > 0 { + pgHBAPath := field.NewPath("spec").Child("config").Child("pgHBA") + for _, re := range hba.ValidateRules(obj.Spec.Config.PgHBA) { + allErrs = append(allErrs, field.Invalid( + pgHBAPath.Index(re.Index), + obj.Spec.Config.PgHBA[re.Index], + re.Message)) + } + } + + return allErrs +} + +// ValidatePostgresClusterClassUpdate validates a PostgresClusterClass on UPDATE. +func ValidatePostgresClusterClassUpdate(obj, oldObj *enterpriseApi.PostgresClusterClass) field.ErrorList { + return ValidatePostgresClusterClassCreate(obj) +} + +// GetPostgresClusterClassWarningsOnCreate returns warnings for PostgresClusterClass CREATE. +func GetPostgresClusterClassWarningsOnCreate(obj *enterpriseApi.PostgresClusterClass) []string { + return nil +} + +// GetPostgresClusterClassWarningsOnUpdate returns warnings for PostgresClusterClass UPDATE. +func GetPostgresClusterClassWarningsOnUpdate(obj, oldObj *enterpriseApi.PostgresClusterClass) []string { + return nil +} diff --git a/pkg/postgresql/cluster/adapter/webhook/postgresclusterclass_validation_test.go b/pkg/postgresql/cluster/adapter/webhook/postgresclusterclass_validation_test.go new file mode 100644 index 000000000..5f0bef95c --- /dev/null +++ b/pkg/postgresql/cluster/adapter/webhook/postgresclusterclass_validation_test.go @@ -0,0 +1,191 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhook + +import ( + "testing" + + enterpriseApi "github.com/splunk/splunk-operator/api/v4" + "github.com/stretchr/testify/assert" +) + +func TestValidatePostgresClusterClassCreate(t *testing.T) { + tests := []struct { + name string + obj *enterpriseApi.PostgresClusterClass + wantErrCount int + wantErrField string + }{ + { + name: "valid - no config", + obj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + }, + }, + wantErrCount: 0, + }, + { + name: "valid - config without pgHBA", + obj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{}, + }, + }, + wantErrCount: 0, + }, + { + name: "valid - correct pgHBA rules", + obj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{ + "hostnossl all all 0.0.0.0/0 reject", + "hostssl all all 0.0.0.0/0 scram-sha-256", + }, + }, + }, + }, + wantErrCount: 0, + }, + { + name: "invalid - bad connection type", + obj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{ + "hostx all all 0.0.0.0/0 md5", + }, + }, + }, + }, + wantErrCount: 1, + wantErrField: "spec.config.pgHBA[0]", + }, + { + name: "invalid - bad CIDR in class", + obj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{ + "host all all 256.1.1.1/24 md5", + }, + }, + }, + }, + wantErrCount: 1, + wantErrField: "spec.config.pgHBA[0]", + }, + { + name: "invalid - unknown auth method in class", + obj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{ + "host all all 0.0.0.0/0 bogus", + }, + }, + }, + }, + wantErrCount: 1, + wantErrField: "spec.config.pgHBA[0]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + errs := ValidatePostgresClusterClassCreate(tt.obj) + assert.Len(t, errs, tt.wantErrCount, "unexpected error count") + if tt.wantErrField != "" && len(errs) > 0 { + assert.Equal(t, tt.wantErrField, errs[0].Field, "unexpected error field") + } + }) + } +} + +func TestValidatePostgresClusterClassUpdate(t *testing.T) { + tests := []struct { + name string + obj *enterpriseApi.PostgresClusterClass + oldObj *enterpriseApi.PostgresClusterClass + wantErrCount int + }{ + { + name: "valid update", + obj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{"host all all 0.0.0.0/0 scram-sha-256"}, + }, + }, + }, + oldObj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + }, + }, + wantErrCount: 0, + }, + { + name: "invalid update - bad pgHBA", + obj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + Config: &enterpriseApi.PostgresClusterClassConfig{ + PgHBA: []string{"host all all 0.0.0.0/0 fake-method"}, + }, + }, + }, + oldObj: &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{ + Provisioner: "postgresql.cnpg.io", + }, + }, + wantErrCount: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + errs := ValidatePostgresClusterClassUpdate(tt.obj, tt.oldObj) + assert.Len(t, errs, tt.wantErrCount, "unexpected error count") + }) + } +} + +func TestGetPostgresClusterClassWarningsOnCreate(t *testing.T) { + obj := &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{Provisioner: "postgresql.cnpg.io"}, + } + assert.Empty(t, GetPostgresClusterClassWarningsOnCreate(obj)) +} + +func TestGetPostgresClusterClassWarningsOnUpdate(t *testing.T) { + obj := &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{Provisioner: "postgresql.cnpg.io"}, + } + oldObj := &enterpriseApi.PostgresClusterClass{ + Spec: enterpriseApi.PostgresClusterClassSpec{Provisioner: "postgresql.cnpg.io"}, + } + assert.Empty(t, GetPostgresClusterClassWarningsOnUpdate(obj, oldObj)) +} diff --git a/pkg/postgresql/cluster/core/hba.go b/pkg/postgresql/cluster/core/hba.go new file mode 100644 index 000000000..099597696 --- /dev/null +++ b/pkg/postgresql/cluster/core/hba.go @@ -0,0 +1,253 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "fmt" + "net" + "regexp" + "strings" +) + +var hbaConnectionTypes = map[string]bool{ + "local": true, + "host": true, + "hostssl": true, + "hostnossl": true, + "hostgssenc": true, + "hostnogssenc": true, +} + +var hbaAuthMethods = map[string]bool{ + "trust": true, + "reject": true, + "scram-sha-256": true, + "md5": true, + "password": true, + "gss": true, + "sspi": true, + "ident": true, + "peer": true, + "pam": true, + "ldap": true, + "radius": true, + "cert": true, + "oauth": true, +} + +var hbaSpecialAddresses = map[string]bool{ + "all": true, + "samehost": true, + "samenet": true, +} + +// tokenPattern splits on whitespace while keeping double-quoted strings intact. +var hbaTokenPattern = regexp.MustCompile(`(?:"+.*?"+|\S)+`) + +// hbaLabelPattern matches a valid DNS label sequence (hostname or domain suffix). +var hbaLabelPattern = regexp.MustCompile(`^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)*$`) + +// RuleError describes a validation error for a single pg_hba.conf rule. +type RuleError struct { + Index int + Message string +} + +// ValidateRules validates a slice of pg_hba.conf rule strings. +func ValidateRules(rules []string) []RuleError { + var errs []RuleError + for i, rule := range rules { + for _, msg := range validateRule(rule) { + errs = append(errs, RuleError{Index: i, Message: msg}) + } + } + return errs +} + +// validateRule validates a single pg_hba rule using positional parsing. +// pg_hba.conf has two formats: +// +// local DATABASE USER METHOD [OPTIONS] +// host* DATABASE USER ADDRESS METHOD [OPTIONS] +// host* DATABASE USER IP-ADDRESS NETMASK METHOD [OPTIONS] +// +// Validation order: connection type → minimum field count → auth method +// (at a fixed positional index) → address for host* types. The IP+netmask +// form is detected by checking whether tokens[4] parses as a valid IP. +func validateRule(rule string) []string { + trimmed := strings.TrimSpace(rule) + if trimmed == "" { + return nil + } + + tokens := tokenize(trimmed) + if len(tokens) == 0 { + return nil + } + + var errs []string + + connType := tokens[0] + if !hbaConnectionTypes[connType] { + return []string{fmt.Sprintf("unknown connection type %q", connType)} + } + + isLocal := connType == "local" + minFields := 5 // TYPE DATABASE USER ADDRESS METHOD + if isLocal { + minFields = 4 // local DATABASE USER METHOD + } + if len(tokens) < minFields { + return []string{fmt.Sprintf("too few fields: expected at least %d (%s DATABASE USER %sMETHOD), got %d", + minFields, connType, map[bool]string{true: "", false: "ADDRESS "}[isLocal], len(tokens))} + } + + methodIdx := 3 // local: tokens[3] + if !isLocal { + if len(tokens) > 5 && net.ParseIP(tokens[4]) != nil { + methodIdx = 5 + } else { + methodIdx = 4 + } + } + if methodIdx >= len(tokens) { + return []string{fmt.Sprintf("too few fields: missing auth method")} + } + method := tokens[methodIdx] + if !hbaAuthMethods[method] { + errs = append(errs, fmt.Sprintf("unknown auth method %q", method)) + } + + if !isLocal { + address := tokens[3] + if methodIdx == 5 { + if addrErr := validateIPNetmask(tokens[3], tokens[4]); addrErr != "" { + errs = append(errs, addrErr) + } + } else { + if addrErr := validateAddress(address); addrErr != "" { + errs = append(errs, addrErr) + } + } + } + + return errs +} + +// stripComment removes pg_hba.conf comments: a # outside double quotes starts +// a comment that runs to the end of the line. +func stripComment(line string) string { + inQuotes := false + for i, ch := range line { + switch ch { + case '"': + inQuotes = !inQuotes + case '#': + if !inQuotes { + return line[:i] + } + } + } + return line +} + +// tokenize splits a rule string on whitespace, keeping double-quoted strings intact. +// Comments (# to end of line, outside quotes) are stripped first. +func tokenize(line string) []string { + stripped := stripComment(line) + matches := hbaTokenPattern.FindAllString(stripped, -1) + var tokens []string + for _, m := range matches { + if s := strings.TrimSpace(m); s != "" { + tokens = append(tokens, s) + } + } + return tokens +} + +// validateAddress validates the address field for host* connection types. +func validateAddress(address string) string { + if hbaSpecialAddresses[address] { + return "" + } + + // Domain suffix match: .example.com + if strings.HasPrefix(address, ".") && len(address) > 1 { + suffix := address[1:] + if hbaLabelPattern.MatchString(suffix) { + return "" + } + return fmt.Sprintf("invalid domain suffix %q", address) + } + + // CIDR notation + if strings.Contains(address, "/") { + if _, _, err := net.ParseCIDR(address); err != nil { + return fmt.Sprintf("invalid CIDR address %q: %v", address, err) + } + return "" + } + + // IP address without CIDR (used with separate netmask field) + if ip := net.ParseIP(address); ip != nil { + return "" + } + + // Hostname + if hbaLabelPattern.MatchString(address) { + return "" + } + + return fmt.Sprintf("invalid address %q: expected CIDR, IP, hostname, or special keyword (all, samehost, samenet)", address) +} + +// validateIPNetmask validates the IP + netmask form (two separate fields). +func validateIPNetmask(ip, mask string) string { + parsedIP := net.ParseIP(ip) + if parsedIP == nil { + return fmt.Sprintf("invalid IP address %q in IP/netmask pair", ip) + } + + parsedMask := net.ParseIP(mask) + if parsedMask == nil { + return fmt.Sprintf("invalid netmask %q: not a valid IP address", mask) + } + + // Verify the mask is a valid contiguous subnet mask. + // Convert to 4 or 16 bytes depending on IPv4/IPv6. + var maskBytes net.IPMask + if v4 := parsedMask.To4(); v4 != nil { + maskBytes = net.IPMask(v4) + } else { + maskBytes = net.IPMask(parsedMask.To16()) + } + + // net.IPMask.Size() returns (ones, bits); ones == 0 && bits == 0 means invalid mask + ones, bits := maskBytes.Size() + if ones == 0 && bits == 0 { + return fmt.Sprintf("invalid netmask %q: not a contiguous subnet mask", mask) + } + + // IP and mask must be the same address family + ipIs4 := parsedIP.To4() != nil + maskIs4 := parsedMask.To4() != nil + if ipIs4 != maskIs4 { + return fmt.Sprintf("IP %q and netmask %q are not the same address family", ip, mask) + } + + return "" +} diff --git a/pkg/postgresql/cluster/core/hba_unit_test.go b/pkg/postgresql/cluster/core/hba_unit_test.go new file mode 100644 index 000000000..dffcb1cec --- /dev/null +++ b/pkg/postgresql/cluster/core/hba_unit_test.go @@ -0,0 +1,364 @@ +/* +Copyright 2026. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package core + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestValidateRules(t *testing.T) { + t.Run("nil slice returns empty", func(t *testing.T) { + assert.Empty(t, ValidateRules(nil)) + }) + + t.Run("empty slice returns empty", func(t *testing.T) { + assert.Empty(t, ValidateRules([]string{})) + }) + + t.Run("all valid rules returns empty", func(t *testing.T) { + rules := []string{ + "local all all trust", + "host all all 0.0.0.0/0 scram-sha-256", + "hostssl all all 192.168.1.0/24 md5", + } + assert.Empty(t, ValidateRules(rules)) + }) + + t.Run("mixed valid and invalid returns correct indices", func(t *testing.T) { + rules := []string{ + "host all all 0.0.0.0/0 scram-sha-256", + "hostx all all 0.0.0.0/0 md5", + "host all all 0.0.0.0/0 md5", + } + errs := ValidateRules(rules) + require.Len(t, errs, 1) + assert.Equal(t, 1, errs[0].Index) + assert.Contains(t, errs[0].Message, "unknown connection type") + }) + + t.Run("multiple errors in different rules", func(t *testing.T) { + rules := []string{ + "hostx all all 0.0.0.0/0 md5", + "host all all 192.168.0.0/33 bogus", + } + errs := ValidateRules(rules) + require.Len(t, errs, 3) + assert.Equal(t, 0, errs[0].Index) + assert.Equal(t, 1, errs[1].Index) + assert.Equal(t, 1, errs[2].Index) + }) +} + +func TestValidateRule(t *testing.T) { + // === Valid rules === + + validRules := []struct { + name string + rule string + }{ + {"local basic", "local all all trust"}, + {"local with peer", "local postgres postgres peer"}, + {"host CIDR IPv4", "host all all 0.0.0.0/0 scram-sha-256"}, + {"hostssl CIDR", "hostssl all all 192.168.1.0/24 md5"}, + {"hostnossl reject", "hostnossl all all 0.0.0.0/0 reject"}, + {"hostgssenc", "hostgssenc all all 0.0.0.0/0 gss"}, + {"hostnogssenc", "hostnogssenc all all 0.0.0.0/0 scram-sha-256"}, + {"host replication", "host replication all 10.0.0.0/8 password"}, + {"host samehost", "host all all samehost trust"}, + {"host samenet", "host all all samenet trust"}, + {"host address all", "host all all all scram-sha-256"}, + {"host domain suffix", "host all all .example.com cert"}, + {"host IPv6", "host all all ::1/128 scram-sha-256"}, + {"host IPv6 all", "host all all ::0/0 md5"}, + {"host IP+netmask", "host all all 192.168.1.1 255.255.255.0 md5"}, + {"host IP+netmask /8", "host all all 10.0.0.0 255.0.0.0 md5"}, + {"inline comment", "host all all 0.0.0.0/0 md5 # office access"}, + {"inline comment with spaces", "host all all 0.0.0.0/0 md5 # allow all"}, + {"full-line comment", "# this is a comment"}, + {"comment-only with spaces", " # indented comment"}, + {"host auth options", "host all all 0.0.0.0/0 ldap ldapserver=ldap.example.com ldapport=389"}, + {"host quoted option", "host all all 0.0.0.0/0 ident map=omicron"}, + {"host quoted value", `host all all 0.0.0.0/0 ldap ldapprefix="cn="`}, + {"quoted db with equals", `host "db=name" all 0.0.0.0/0 md5`}, + {"comma-separated db", "host db1,db2 all 0.0.0.0/0 md5"}, + {"comma-separated user", "host all user1,user2 0.0.0.0/0 md5"}, + {"host hostname", "host all all myhost.example.com md5"}, + {"host with sspi", "host all all 0.0.0.0/0 sspi"}, + {"host with ident", "host all all 0.0.0.0/0 ident"}, + {"host with pam", "host all all 0.0.0.0/0 pam"}, + {"host with radius", "host all all 0.0.0.0/0 radius"}, + {"host with oauth (PG18)", "host all all 0.0.0.0/0 oauth"}, + {"empty string", ""}, + {"whitespace only", " "}, + } + + for _, tc := range validRules { + t.Run("valid/"+tc.name, func(t *testing.T) { + errs := validateRule(tc.rule) + assert.Empty(t, errs, "expected no errors for rule %q, got: %v", tc.rule, errs) + }) + } + + // === Layer 0: connection type errors === + + t.Run("layer0/unknown connection type", func(t *testing.T) { + errs := validateRule("hostx all all 0.0.0.0/0 md5") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], `unknown connection type "hostx"`) + }) + + t.Run("layer0/uppercase connection type", func(t *testing.T) { + errs := validateRule("HOST all all 0.0.0.0/0 md5") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], `unknown connection type "HOST"`) + }) + + // === Layer 1: field count errors === + + t.Run("layer1/host missing method", func(t *testing.T) { + errs := validateRule("host all all 0.0.0.0/0") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "too few fields") + }) + + t.Run("layer1/host only three fields", func(t *testing.T) { + errs := validateRule("host all all") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "too few fields") + }) + + t.Run("layer1/local missing user and method", func(t *testing.T) { + errs := validateRule("local all") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "too few fields") + }) + + t.Run("layer1/local missing method", func(t *testing.T) { + errs := validateRule("local all all") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "too few fields") + }) + + // === Layer 2: auth method errors === + + t.Run("layer2/unknown auth method", func(t *testing.T) { + errs := validateRule("host all all 0.0.0.0/0 bogus") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], `unknown auth method "bogus"`) + }) + + t.Run("layer2/typo scram-sha256", func(t *testing.T) { + errs := validateRule("host all all 0.0.0.0/0 scram-sha256") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], `unknown auth method "scram-sha256"`) + }) + + t.Run("layer2/local unknown method", func(t *testing.T) { + errs := validateRule("local all all unknown") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], `unknown auth method "unknown"`) + }) + + // === Layer 3: address errors === + + t.Run("layer3/invalid CIDR mask too large", func(t *testing.T) { + errs := validateRule("host all all 192.168.0.0/33 md5") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "invalid CIDR") + }) + + t.Run("layer3/invalid IP in CIDR", func(t *testing.T) { + errs := validateRule("host all all 256.1.1.1/24 md5") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "invalid CIDR") + }) + + t.Run("layer3/garbage address", func(t *testing.T) { + errs := validateRule("host all all not@valid md5") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "invalid address") + }) + + // === Layer 3: netmask errors === + + t.Run("layer3/non-contiguous netmask", func(t *testing.T) { + errs := validateRule("host all all 10.0.0.1 255.0.255.0 md5") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "not a contiguous subnet mask") + }) + + t.Run("layer3/invalid IP in netmask pair", func(t *testing.T) { + errs := validateRule("host all all 999.0.0.1 255.255.255.0 md5") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "invalid IP address") + }) + + t.Run("layer2/garbage where netmask expected", func(t *testing.T) { + errs := validateRule("host all all 10.0.0.1 notamask md5") + require.Len(t, errs, 1) + assert.Contains(t, errs[0], "unknown auth method") + }) + + // === Multiple errors in one rule === + + t.Run("multiple/bad method and bad address", func(t *testing.T) { + errs := validateRule("host all all 192.168.0.0/33 bogus") + assert.Len(t, errs, 2) + }) +} + +func TestTokenize(t *testing.T) { + t.Run("simple fields", func(t *testing.T) { + tokens := tokenize("host all all 0.0.0.0/0 md5") + assert.Equal(t, []string{"host", "all", "all", "0.0.0.0/0", "md5"}, tokens) + }) + + t.Run("multiple spaces", func(t *testing.T) { + tokens := tokenize("host all all 0.0.0.0/0 md5") + assert.Equal(t, []string{"host", "all", "all", "0.0.0.0/0", "md5"}, tokens) + }) + + t.Run("quoted string preserved", func(t *testing.T) { + tokens := tokenize(`host all all 0.0.0.0/0 ldap ldapprefix="cn="`) + assert.Equal(t, []string{"host", "all", "all", "0.0.0.0/0", "ldap", `ldapprefix="cn="`}, tokens) + }) + + t.Run("auth option with equals", func(t *testing.T) { + tokens := tokenize("host all all 0.0.0.0/0 ident map=omicron") + assert.Equal(t, []string{"host", "all", "all", "0.0.0.0/0", "ident", "map=omicron"}, tokens) + }) + + t.Run("empty string", func(t *testing.T) { + tokens := tokenize("") + assert.Empty(t, tokens) + }) + + t.Run("inline comment stripped", func(t *testing.T) { + tokens := tokenize("host all all 0.0.0.0/0 md5 # office access") + assert.Equal(t, []string{"host", "all", "all", "0.0.0.0/0", "md5"}, tokens) + }) + + t.Run("full-line comment", func(t *testing.T) { + tokens := tokenize("# this is a comment") + assert.Empty(t, tokens) + }) + + t.Run("hash inside quotes not treated as comment", func(t *testing.T) { + tokens := tokenize(`host all all 0.0.0.0/0 ldap ldapprefix="cn=#test"`) + assert.Equal(t, []string{"host", "all", "all", "0.0.0.0/0", "ldap", `ldapprefix="cn=#test"`}, tokens) + }) +} + +func TestStripComment(t *testing.T) { + t.Run("no comment", func(t *testing.T) { + assert.Equal(t, "host all all 0.0.0.0/0 md5", stripComment("host all all 0.0.0.0/0 md5")) + }) + + t.Run("inline comment", func(t *testing.T) { + assert.Equal(t, "host all all 0.0.0.0/0 md5 ", stripComment("host all all 0.0.0.0/0 md5 # comment")) + }) + + t.Run("full-line comment", func(t *testing.T) { + assert.Equal(t, "", stripComment("# full line comment")) + }) + + t.Run("hash inside quotes preserved", func(t *testing.T) { + assert.Equal(t, `host all all 0.0.0.0/0 ldap ldapprefix="cn=#x"`, stripComment(`host all all 0.0.0.0/0 ldap ldapprefix="cn=#x"`)) + }) + + t.Run("hash after closing quote", func(t *testing.T) { + assert.Equal(t, `host all all 0.0.0.0/0 ldap ldapprefix="cn" `, stripComment(`host all all 0.0.0.0/0 ldap ldapprefix="cn" # comment`)) + }) +} + +func TestValidateIPNetmask(t *testing.T) { + t.Run("valid IPv4", func(t *testing.T) { + assert.Empty(t, validateIPNetmask("192.168.1.0", "255.255.255.0")) + }) + + t.Run("valid /8", func(t *testing.T) { + assert.Empty(t, validateIPNetmask("10.0.0.0", "255.0.0.0")) + }) + + t.Run("invalid IP", func(t *testing.T) { + result := validateIPNetmask("999.0.0.1", "255.255.255.0") + assert.Contains(t, result, "invalid IP address") + }) + + t.Run("invalid mask not an IP", func(t *testing.T) { + result := validateIPNetmask("10.0.0.1", "notamask") + assert.Contains(t, result, "invalid netmask") + }) + + t.Run("non-contiguous mask", func(t *testing.T) { + result := validateIPNetmask("10.0.0.1", "255.0.255.0") + assert.Contains(t, result, "not a contiguous subnet mask") + }) +} + +func TestValidateAddress(t *testing.T) { + validAddresses := []string{ + "0.0.0.0/0", + "192.168.1.0/24", + "10.0.0.0/8", + "::1/128", + "::0/0", + "all", + "samehost", + "samenet", + ".example.com", + ".sub.domain.com", + "192.168.1.1", + "myhost.example.com", + "my-host", + "localhost", + } + + for _, addr := range validAddresses { + t.Run("valid/"+addr, func(t *testing.T) { + assert.Empty(t, validateAddress(addr)) + }) + } + + invalidAddresses := []struct { + name string + address string + errMsg string + }{ + {"CIDR mask too large", "192.168.0.0/33", "invalid CIDR"}, + {"invalid IP in CIDR", "256.1.1.1/24", "invalid CIDR"}, + {"bad CIDR format", "999.999.999.999/32", "invalid CIDR"}, + {"special chars", "host@name", "invalid address"}, + {"spaces in addr", "my host", "invalid address"}, + {"double dot hostname", "myhost..example.com", "invalid address"}, + {"leading dash hostname", "-myhost", "invalid address"}, + {"trailing dash hostname", "myhost-", "invalid address"}, + {"double dot domain suffix", ".foo..bar", "invalid domain suffix"}, + {"dash-prefixed domain suffix", ".-bad", "invalid domain suffix"}, + {"trailing dash domain suffix", ".bad-", "invalid domain suffix"}, + } + + for _, tc := range invalidAddresses { + t.Run("invalid/"+tc.name, func(t *testing.T) { + result := validateAddress(tc.address) + assert.Contains(t, result, tc.errMsg) + }) + } +} diff --git a/pkg/splunk/enterprise/validation/registry.go b/pkg/splunk/enterprise/validation/registry.go index 98b386f18..5eab98402 100644 --- a/pkg/splunk/enterprise/validation/registry.go +++ b/pkg/splunk/enterprise/validation/registry.go @@ -20,6 +20,7 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" enterpriseApi "github.com/splunk/splunk-operator/api/v4" + pgwebhook "github.com/splunk/splunk-operator/pkg/postgresql/cluster/adapter/webhook" ) // GVR constants for all Splunk Enterprise CRDs @@ -71,6 +72,18 @@ var ( Version: "v4", Resource: "monitoringconsoles", } + + PostgresClusterGVR = schema.GroupVersionResource{ + Group: "enterprise.splunk.com", + Version: "v4", + Resource: "postgresclusters", + } + + PostgresClusterClassGVR = schema.GroupVersionResource{ + Group: "enterprise.splunk.com", + Version: "v4", + Resource: "postgresclusterclasses", + } ) // DefaultValidators is the registry of validators for all Splunk Enterprise CRDs @@ -180,4 +193,26 @@ var DefaultValidators = map[schema.GroupVersionResource]Validator{ Kind: "MonitoringConsole", }, }, + + PostgresClusterGVR: &GenericValidator[*enterpriseApi.PostgresCluster]{ + ValidateCreateFunc: pgwebhook.ValidatePostgresClusterCreate, + ValidateUpdateFunc: pgwebhook.ValidatePostgresClusterUpdate, + WarningsOnCreateFunc: pgwebhook.GetPostgresClusterWarningsOnCreate, + WarningsOnUpdateFunc: pgwebhook.GetPostgresClusterWarningsOnUpdate, + GroupKind: schema.GroupKind{ + Group: "enterprise.splunk.com", + Kind: "PostgresCluster", + }, + }, + + PostgresClusterClassGVR: &GenericValidator[*enterpriseApi.PostgresClusterClass]{ + ValidateCreateFunc: pgwebhook.ValidatePostgresClusterClassCreate, + ValidateUpdateFunc: pgwebhook.ValidatePostgresClusterClassUpdate, + WarningsOnCreateFunc: pgwebhook.GetPostgresClusterClassWarningsOnCreate, + WarningsOnUpdateFunc: pgwebhook.GetPostgresClusterClassWarningsOnUpdate, + GroupKind: schema.GroupKind{ + Group: "enterprise.splunk.com", + Kind: "PostgresClusterClass", + }, + }, } diff --git a/pkg/splunk/enterprise/validation/server.go b/pkg/splunk/enterprise/validation/server.go index c94e03f45..882f89878 100644 --- a/pkg/splunk/enterprise/validation/server.go +++ b/pkg/splunk/enterprise/validation/server.go @@ -80,7 +80,7 @@ func (s *WebhookServer) Start(ctx context.Context) error { mux := http.NewServeMux() // Register validation endpoint - mux.HandleFunc("/validate", s.handleValidate) + mux.HandleFunc("/validate", s.HandleValidate) // Register health check endpoint mux.HandleFunc("/readyz", s.handleReadyz) @@ -140,8 +140,8 @@ func (s *WebhookServer) Start(ctx context.Context) error { } } -// handleValidate handles validation requests -func (s *WebhookServer) handleValidate(w http.ResponseWriter, r *http.Request) { +// HandleValidate handles validation requests +func (s *WebhookServer) HandleValidate(w http.ResponseWriter, r *http.Request) { reqLog := log.FromContext(r.Context()).WithName("webhook-server") reqLog.V(1).Info("Received validation request", "method", r.Method, "path", r.URL.Path) diff --git a/pkg/splunk/enterprise/validation/server_test.go b/pkg/splunk/enterprise/validation/server_test.go index 0b2543014..020825041 100644 --- a/pkg/splunk/enterprise/validation/server_test.go +++ b/pkg/splunk/enterprise/validation/server_test.go @@ -253,7 +253,7 @@ func TestHandleValidate(t *testing.T) { req.Header.Set("Content-Type", "application/json") rr := httptest.NewRecorder() - server.handleValidate(rr, req) + server.HandleValidate(rr, req) if rr.Code != tt.wantStatusCode { t.Errorf("expected status code %d, got %d", tt.wantStatusCode, rr.Code) @@ -382,7 +382,7 @@ func TestHandleValidateWithWarnings(t *testing.T) { req.Header.Set("Content-Type", "application/json") rr := httptest.NewRecorder() - server.handleValidate(rr, req) + server.HandleValidate(rr, req) if rr.Code != http.StatusOK { t.Errorf("expected status code %d, got %d", http.StatusOK, rr.Code)