From d5f2eb293b6ab0764069bc92a8cd0a978c241d13 Mon Sep 17 00:00:00 2001 From: Kilian Ries Date: Mon, 29 Jun 2026 17:48:36 +0200 Subject: [PATCH] Add automated backup and restore via dedicated CRDs Introduces operator-managed backup and restore for ClickHouse using clickhouse-backup, exposed through three new custom resources in the clickhouse.altinity.com/v1 API group: - ClickHouseBackup (chb): one-off backup -> Kubernetes Job - ClickHouseBackupSchedule (chbs): recurring backup -> managed CronJob - ClickHouseRestore (chr): one-off restore -> Kubernetes Job The controllers follow the existing ClickHouseKeeper controller-runtime pattern. clickhouse-backup runs as a sidecar (a documented prerequisite); the generated jobs trigger it remotely through the system.backup_actions integration table, so no backup logic is reimplemented in the operator. Cluster-aware: backs up one replica per shard for Replicated* tables (AllReplicas opt-in for non-replicated data); on restore it applies the schema on the first replica per shard via ON CLUSTER (requires the sidecar's restore_schema_on_cluster) and the data on the first replica, letting native replication synchronize the remaining replicas. Restore safety follows the conventions of mature DB operators: preflight validation (target CHI Completed, topology reachable) and an overwrite guard that refuses a non-empty target unless overwrite=true. Also adds: selective (tables/partitions) and incremental (--diff-from-remote) backups; remote-backup retention (keepLastRemote); optional post-backup verification; Prometheus metrics on the operator's existing :9999 endpoint plus Kubernetes Events; and annotation-driven bootstrap-from-backup for new installations. Compression and encryption are documented as clickhouse-backup sidecar settings. Includes the CRDs, RBAC (incl. batch jobs/cronjobs), regenerated install bundles and Helm chart, documentation and examples, Go unit tests and a TestFlows e2e test. Refs #1795, #862. Supersedes the gRPC-plugin approach of #1798. Co-Authored-By: Claude Opus 4.8 Signed-off-by: Kilian Ries --- cmd/operator/app/main.go | 20 + cmd/operator/app/thread_backup.go | 138 +++++ .../cat-clickhouse-operator-install-yaml.sh | 8 + ...aml-template-01-section-crd-04-backup.yaml | 416 ++++++++++++++ ...yaml-template-02-section-rbac-02-role.yaml | 54 ++ ...khousebackups.clickhouse.altinity.com.yaml | 147 +++++ ...ckupschedules.clickhouse.altinity.com.yaml | 144 +++++ ...houserestores.clickhouse.altinity.com.yaml | 123 ++++ ...rRole-clickhouse-operator-kube-system.yaml | 51 ++ .../generated/Role-clickhouse-operator.yaml | 51 ++ .../clickhouse-operator-install-ansible.yaml | 471 ++++++++++++++++ ...house-operator-install-bundle-v1beta1.yaml | 521 ++++++++++++++++- .../clickhouse-operator-install-bundle.yaml | 525 ++++++++++++++++++ ...use-operator-install-template-v1beta1.yaml | 469 +++++++++++++++- .../clickhouse-operator-install-template.yaml | 471 ++++++++++++++++ .../clickhouse-operator-install-tf.yaml | 471 ++++++++++++++++ deploy/operator/parts/crd.yaml | 417 ++++++++++++++ docs/backup.md | 173 ++++++ .../01-prerequisite-chi-with-sidecar.yaml | 101 ++++ docs/chb-examples/02-backup-once.yaml | 17 + docs/chb-examples/03-backup-schedule.yaml | 19 + docs/chb-examples/04-restore.yaml | 20 + .../v1/api_register.go | 6 + .../v1/api_resources.go | 3 + .../clickhouse.altinity.com/v1/type_chb.go | 134 +++++ .../clickhouse.altinity.com/v1/type_chbs.go | 104 ++++ .../clickhouse.altinity.com/v1/type_chr.go | 94 ++++ .../v1/zz_generated.deepcopy.go | 370 ++++++++++++ pkg/controller/chbackup/backup_controller.go | 204 +++++++ .../chbackup/backup_controller_test.go | 144 +++++ .../chbackup/bootstrap_controller.go | 106 ++++ .../chbackup/bootstrap_controller_test.go | 79 +++ pkg/controller/chbackup/helpers.go | 100 ++++ pkg/controller/chbackup/metrics/metrics.go | 134 +++++ pkg/controller/chbackup/restore_controller.go | 148 +++++ .../chbackup/schedule_controller.go | 93 ++++ pkg/model/chbackup/builder.go | 253 +++++++++ pkg/model/chbackup/builder_test.go | 227 ++++++++ pkg/model/chbackup/options_test.go | 90 +++ pkg/model/chbackup/script.go | 188 +++++++ pkg/model/chbackup/topology.go | 137 +++++ .../manifests/chb/test-clickhousebackup.yaml | 8 + .../chbs/test-clickhousebackupschedule.yaml | 11 + .../manifests/chr/test-clickhouserestore.yaml | 9 + tests/e2e/test_backup_restore.py | 131 +++++ tests/regression.py | 1 + 46 files changed, 7598 insertions(+), 3 deletions(-) create mode 100644 cmd/operator/app/thread_backup.go create mode 100644 deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-04-backup.yaml create mode 100644 deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousebackups.clickhouse.altinity.com.yaml create mode 100644 deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousebackupschedules.clickhouse.altinity.com.yaml create mode 100644 deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouserestores.clickhouse.altinity.com.yaml create mode 100644 docs/backup.md create mode 100644 docs/chb-examples/01-prerequisite-chi-with-sidecar.yaml create mode 100644 docs/chb-examples/02-backup-once.yaml create mode 100644 docs/chb-examples/03-backup-schedule.yaml create mode 100644 docs/chb-examples/04-restore.yaml create mode 100644 pkg/apis/clickhouse.altinity.com/v1/type_chb.go create mode 100644 pkg/apis/clickhouse.altinity.com/v1/type_chbs.go create mode 100644 pkg/apis/clickhouse.altinity.com/v1/type_chr.go create mode 100644 pkg/controller/chbackup/backup_controller.go create mode 100644 pkg/controller/chbackup/backup_controller_test.go create mode 100644 pkg/controller/chbackup/bootstrap_controller.go create mode 100644 pkg/controller/chbackup/bootstrap_controller_test.go create mode 100644 pkg/controller/chbackup/helpers.go create mode 100644 pkg/controller/chbackup/metrics/metrics.go create mode 100644 pkg/controller/chbackup/restore_controller.go create mode 100644 pkg/controller/chbackup/schedule_controller.go create mode 100644 pkg/model/chbackup/builder.go create mode 100644 pkg/model/chbackup/builder_test.go create mode 100644 pkg/model/chbackup/options_test.go create mode 100644 pkg/model/chbackup/script.go create mode 100644 pkg/model/chbackup/topology.go create mode 100644 tests/e2e/manifests/chb/test-clickhousebackup.yaml create mode 100644 tests/e2e/manifests/chbs/test-clickhousebackupschedule.yaml create mode 100644 tests/e2e/manifests/chr/test-clickhouserestore.yaml create mode 100644 tests/e2e/test_backup_restore.py diff --git a/cmd/operator/app/main.go b/cmd/operator/app/main.go index 61bfb132e..b98e16bb5 100644 --- a/cmd/operator/app/main.go +++ b/cmd/operator/app/main.go @@ -99,6 +99,7 @@ func Run() { launchClickHouse(ctx, &wg) launchClickHouseReconcilerMetricsExporter(ctx, &wg) launchKeeper(ctx, &wg) + launchBackup(ctx, &wg) // Wait for completion <-ctx.Done() @@ -142,6 +143,25 @@ func launchKeeper(ctx context.Context, wg *sync.WaitGroup) { }() } +func launchBackup(ctx context.Context, wg *sync.WaitGroup) { + backupErr := initBackup(ctx) + wg.Add(1) + go func() { + defer wg.Done() + if backupErr == nil { + log.Info("Starting backup") + backupErr = runBackup(ctx) + if backupErr == nil { + log.Info("Starting backup OK") + } else { + log.Warning("Starting backup FAILED with err: %v", backupErr) + } + } else { + log.Warning("Starting backup skipped due to failed initialization with err: %v", backupErr) + } + }() +} + // setupSignalsNotification sets up OS signals func setupSignalsNotification(cancel context.CancelFunc) { stopChan := make(chan os.Signal, 2) diff --git a/cmd/operator/app/thread_backup.go b/cmd/operator/app/thread_backup.go new file mode 100644 index 000000000..e85b86288 --- /dev/null +++ b/cmd/operator/app/thread_backup.go @@ -0,0 +1,138 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package app + +import ( + "context" + + "github.com/go-logr/logr" + + batchv1 "k8s.io/api/batch/v1" + apiMachineryRuntime "k8s.io/apimachinery/pkg/runtime" + clientGoScheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + ctrlRuntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/cache" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/chop" + backup "github.com/altinity/clickhouse-operator/pkg/controller/chbackup" +) + +var ( + backupScheme *apiMachineryRuntime.Scheme + backupManager ctrlRuntime.Manager + backupLogger logr.Logger +) + +func initBackup(ctx context.Context) error { + var err error + + backupLogger = ctrl.Log.WithName("backup-runner") + + backupScheme = apiMachineryRuntime.NewScheme() + if err = clientGoScheme.AddToScheme(backupScheme); err != nil { + backupLogger.Error(err, "init backup - unable to clientGoScheme.AddToScheme") + return err + } + // Registers ClickHouseInstallation along with ClickHouseBackup/Schedule/Restore kinds. + if err = api.AddToScheme(backupScheme); err != nil { + backupLogger.Error(err, "init backup - unable to api.AddToScheme") + return err + } + + defaultNamespaces := make(map[string]cache.Config) + for _, ns := range chop.Config().GetCacheNamespaces() { + defaultNamespaces[ns] = cache.Config{} + } + backupManager, err = ctrlRuntime.NewManager(ctrlRuntime.GetConfigOrDie(), ctrlRuntime.Options{ + Scheme: backupScheme, + Cache: cache.Options{ + DefaultNamespaces: defaultNamespaces, + }, + // Disable the metrics listener: the keeper manager owns it on this pod. + Metrics: metricsserver.Options{BindAddress: "0"}, + }) + if err != nil { + backupLogger.Error(err, "init backup - unable to ctrlRuntime.NewManager") + return err + } + + recorder := backupManager.GetEventRecorderFor("clickhouse-backup") + + if err = ctrlRuntime. + NewControllerManagedBy(backupManager). + For(&api.ClickHouseBackup{}). + Owns(&batchv1.Job{}). + Complete(&backup.BackupController{ + Client: backupManager.GetClient(), + Scheme: backupManager.GetScheme(), + Recorder: recorder, + }); err != nil { + backupLogger.Error(err, "init backup - unable to build ClickHouseBackup controller") + return err + } + + if err = ctrlRuntime. + NewControllerManagedBy(backupManager). + For(&api.ClickHouseBackupSchedule{}). + Owns(&batchv1.CronJob{}). + Complete(&backup.ScheduleController{ + Client: backupManager.GetClient(), + Scheme: backupManager.GetScheme(), + }); err != nil { + backupLogger.Error(err, "init backup - unable to build ClickHouseBackupSchedule controller") + return err + } + + if err = ctrlRuntime. + NewControllerManagedBy(backupManager). + For(&api.ClickHouseRestore{}). + Owns(&batchv1.Job{}). + Complete(&backup.RestoreController{ + Client: backupManager.GetClient(), + Scheme: backupManager.GetScheme(), + Recorder: recorder, + }); err != nil { + backupLogger.Error(err, "init backup - unable to build ClickHouseRestore controller") + return err + } + + // Bootstrap-from-backup: watch CHIs and auto-restore when annotated. + if err = ctrlRuntime. + NewControllerManagedBy(backupManager). + For(&api.ClickHouseInstallation{}). + Complete(&backup.BootstrapController{ + Client: backupManager.GetClient(), + Scheme: backupManager.GetScheme(), + Recorder: recorder, + }); err != nil { + backupLogger.Error(err, "init backup - unable to build bootstrap controller") + return err + } + + // Initialization successful + return nil +} + +func runBackup(ctx context.Context) error { + if err := backupManager.Start(ctx); err != nil { + backupLogger.Error(err, "run backup - unable to backupManager.Start") + return err + } + // Run successful + return nil +} diff --git a/deploy/builder/cat-clickhouse-operator-install-yaml.sh b/deploy/builder/cat-clickhouse-operator-install-yaml.sh index 2e1940ae8..c54f68937 100755 --- a/deploy/builder/cat-clickhouse-operator-install-yaml.sh +++ b/deploy/builder/cat-clickhouse-operator-install-yaml.sh @@ -154,6 +154,14 @@ if [[ "${MANIFEST_PRINT_CRD}" == "yes" ]]; then cat "${TEMPLATES_DIR}/${SECTION_FILE_NAME}" | \ OPERATOR_VERSION="${OPERATOR_VERSION}" \ envsubst + + # Render Backup CRDs (ClickHouseBackup, ClickHouseBackupSchedule, ClickHouseRestore) + SECTION_FILE_NAME="clickhouse-operator-install-yaml-template-01-section-crd-04-backup.yaml" + ensure_file "${TEMPLATES_DIR}" "${SECTION_FILE_NAME}" "${REPO_PATH_TEMPLATES_PATH}" + render_separator + cat "${TEMPLATES_DIR}/${SECTION_FILE_NAME}" | \ + OPERATOR_VERSION="${OPERATOR_VERSION}" \ + envsubst fi if [[ "${MANIFEST_PRINT_RBAC_CLUSTERED}" == "yes" || "${MANIFEST_PRINT_RBAC_NAMESPACED}" == "yes" ]]; then diff --git a/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-04-backup.yaml b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-04-backup.yaml new file mode 100644 index 000000000..7a45525e0 --- /dev/null +++ b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-01-section-crd-04-backup.yaml @@ -0,0 +1,416 @@ +# Template Parameters: +# +# OPERATOR_VERSION=${OPERATOR_VERSION} +# +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackups.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: ${OPERATOR_VERSION} +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackup + singular: clickhousebackup + plural: clickhousebackups + shortNames: + - chb + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name + jsonPath: .status.backupName + - name: phase + type: string + description: Backup phase + jsonPath: .status.phase + - name: duration + type: integer + description: Backup duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackup defines a one-off backup of a ClickHouseInstallation driven by clickhouse-backup" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + backupName: + type: string + description: "Optional explicit remote backup name; generated when empty" + schemaOnly: + type: boolean + description: "Back up table/database definitions only (no data)" + replicaSelection: + type: string + description: "Which replicas to back up" + enum: + - FirstPerShard + - AllReplicas + image: + type: string + description: "Optional container image override for the trigger job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + tables: + type: string + description: "Optional clickhouse-backup --tables pattern to back up only matching tables" + partitions: + type: array + nullable: true + items: + type: string + description: "Optional partition ids to back up" + diffFromRemote: + type: string + description: "Existing remote backup name to make this an incremental backup" + keepLastRemote: + type: integer + format: int32 + description: "Keep only the N most recent remote backups (retention)" + verify: + type: boolean + description: "Run a verification job after the backup to confirm it is restorable" + status: + type: object + properties: + phase: + type: string + backupName: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackupschedules.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: ${OPERATOR_VERSION} +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackupSchedule + singular: clickhousebackupschedule + plural: clickhousebackupschedules + shortNames: + - chbs + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: schedule + type: string + description: Cron schedule + jsonPath: .spec.schedule + - name: suspend + type: boolean + description: Whether the schedule is suspended + jsonPath: .spec.suspend + - name: last-schedule + type: date + jsonPath: .status.lastScheduleTime + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - schedule + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + schedule: + type: string + description: "Cron schedule in standard Kubernetes CronJob format" + suspend: + type: boolean + description: "Pause creation of new backup jobs" + concurrencyPolicy: + type: string + description: "How to treat concurrent executions" + enum: + - Allow + - Forbid + - Replace + startingDeadlineSeconds: + type: integer + format: int64 + successfulJobsHistoryLimit: + type: integer + format: int32 + failedJobsHistoryLimit: + type: integer + format: int32 + backupTemplate: + type: object + description: "Backup specification stamped out on each scheduled run" + properties: + backupNamePrefix: + type: string + schemaOnly: + type: boolean + replicaSelection: + type: string + enum: + - FirstPerShard + - AllReplicas + image: + type: string + clickHouseCredentialsSecretName: + type: string + tables: + type: string + partitions: + type: array + nullable: true + items: + type: string + keepLastRemote: + type: integer + format: int32 + status: + type: object + properties: + cronJobName: + type: string + lastScheduleTime: + type: string + format: date-time + nullable: true + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhouserestores.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: ${OPERATOR_VERSION} +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseRestore + singular: clickhouserestore + plural: clickhouserestores + shortNames: + - chr + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name being restored + jsonPath: .spec.backupName + - name: phase + type: string + description: Restore phase + jsonPath: .status.phase + - name: duration + type: integer + description: Restore duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - backupName + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace (a fresh, empty installation is recommended)" + backupName: + type: string + description: "Remote backup to restore" + schemaOnly: + type: boolean + description: "Restore table/database definitions only (no data)" + overwrite: + type: boolean + description: "Allow restoring over existing, non-empty tables. When false (default) the restore is refused if target tables already contain data" + validateTopology: + type: boolean + description: "When true (default) the restore validates the target cluster topology is reachable before touching data" + image: + type: string + description: "Optional container image override for the restore job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + status: + type: object + properties: + phase: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 diff --git a/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-02-section-rbac-02-role.yaml b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-02-section-rbac-02-role.yaml index 8e8831222..5376cd462 100644 --- a/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-02-section-rbac-02-role.yaml +++ b/deploy/builder/templates-install-bundle/clickhouse-operator-install-yaml-template-02-section-rbac-02-role.yaml @@ -241,6 +241,60 @@ rules: - patch - create - delete + + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + + # + # batch.* resources - backup/restore Jobs and CronJobs + # + + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete --- # Specifies either # ClusterRoleBinding between ClusterRole and ServiceAccount. diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousebackups.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousebackups.clickhouse.altinity.com.yaml new file mode 100644 index 000000000..8a964a733 --- /dev/null +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousebackups.clickhouse.altinity.com.yaml @@ -0,0 +1,147 @@ +# Template Parameters: +# +# OPERATOR_VERSION=0.27.1 +# +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackups.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackup + singular: clickhousebackup + plural: clickhousebackups + shortNames: + - chb + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name + jsonPath: .status.backupName + - name: phase + type: string + description: Backup phase + jsonPath: .status.phase + - name: duration + type: integer + description: Backup duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackup defines a one-off backup of a ClickHouseInstallation driven by clickhouse-backup" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + backupName: + type: string + description: "Optional explicit remote backup name; generated when empty" + schemaOnly: + type: boolean + description: "Back up table/database definitions only (no data)" + replicaSelection: + type: string + description: "Which replicas to back up" + enum: + - FirstPerShard + - AllReplicas + image: + type: string + description: "Optional container image override for the trigger job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + tables: + type: string + description: "Optional clickhouse-backup --tables pattern to back up only matching tables" + partitions: + type: array + nullable: true + items: + type: string + description: "Optional partition ids to back up" + diffFromRemote: + type: string + description: "Existing remote backup name to make this an incremental backup" + keepLastRemote: + type: integer + format: int32 + description: "Keep only the N most recent remote backups (retention)" + verify: + type: boolean + description: "Run a verification job after the backup to confirm it is restorable" + status: + type: object + properties: + phase: + type: string + backupName: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousebackupschedules.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousebackupschedules.clickhouse.altinity.com.yaml new file mode 100644 index 000000000..9c4cbc8a7 --- /dev/null +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhousebackupschedules.clickhouse.altinity.com.yaml @@ -0,0 +1,144 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackupschedules.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackupSchedule + singular: clickhousebackupschedule + plural: clickhousebackupschedules + shortNames: + - chbs + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: schedule + type: string + description: Cron schedule + jsonPath: .spec.schedule + - name: suspend + type: boolean + description: Whether the schedule is suspended + jsonPath: .spec.suspend + - name: last-schedule + type: date + jsonPath: .status.lastScheduleTime + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - schedule + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + schedule: + type: string + description: "Cron schedule in standard Kubernetes CronJob format" + suspend: + type: boolean + description: "Pause creation of new backup jobs" + concurrencyPolicy: + type: string + description: "How to treat concurrent executions" + enum: + - Allow + - Forbid + - Replace + startingDeadlineSeconds: + type: integer + format: int64 + successfulJobsHistoryLimit: + type: integer + format: int32 + failedJobsHistoryLimit: + type: integer + format: int32 + backupTemplate: + type: object + description: "Backup specification stamped out on each scheduled run" + properties: + backupNamePrefix: + type: string + schemaOnly: + type: boolean + replicaSelection: + type: string + enum: + - FirstPerShard + - AllReplicas + image: + type: string + clickHouseCredentialsSecretName: + type: string + tables: + type: string + partitions: + type: array + nullable: true + items: + type: string + keepLastRemote: + type: integer + format: int32 + status: + type: object + properties: + cronJobName: + type: string + lastScheduleTime: + type: string + format: date-time + nullable: true + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 diff --git a/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouserestores.clickhouse.altinity.com.yaml b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouserestores.clickhouse.altinity.com.yaml new file mode 100644 index 000000000..a6dc234de --- /dev/null +++ b/deploy/helm/clickhouse-operator/crds/CustomResourceDefinition-clickhouserestores.clickhouse.altinity.com.yaml @@ -0,0 +1,123 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhouserestores.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseRestore + singular: clickhouserestore + plural: clickhouserestores + shortNames: + - chr + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name being restored + jsonPath: .spec.backupName + - name: phase + type: string + description: Restore phase + jsonPath: .status.phase + - name: duration + type: integer + description: Restore duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - backupName + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace (a fresh, empty installation is recommended)" + backupName: + type: string + description: "Remote backup to restore" + schemaOnly: + type: boolean + description: "Restore table/database definitions only (no data)" + overwrite: + type: boolean + description: "Allow restoring over existing, non-empty tables. When false (default) the restore is refused if target tables already contain data" + validateTopology: + type: boolean + description: "When true (default) the restore validates the target cluster topology is reachable before touching data" + image: + type: string + description: "Optional container image override for the restore job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + status: + type: object + properties: + phase: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 diff --git a/deploy/helm/clickhouse-operator/templates/generated/ClusterRole-clickhouse-operator-kube-system.yaml b/deploy/helm/clickhouse-operator/templates/generated/ClusterRole-clickhouse-operator-kube-system.yaml index 3a87ea7af..3688f8210 100644 --- a/deploy/helm/clickhouse-operator/templates/generated/ClusterRole-clickhouse-operator-kube-system.yaml +++ b/deploy/helm/clickhouse-operator/templates/generated/ClusterRole-clickhouse-operator-kube-system.yaml @@ -219,4 +219,55 @@ rules: - patch - create - delete + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + # + # batch.* resources - backup/restore Jobs and CronJobs + # + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete {{- end }} diff --git a/deploy/helm/clickhouse-operator/templates/generated/Role-clickhouse-operator.yaml b/deploy/helm/clickhouse-operator/templates/generated/Role-clickhouse-operator.yaml index eaa8ff45b..c2ac421db 100644 --- a/deploy/helm/clickhouse-operator/templates/generated/Role-clickhouse-operator.yaml +++ b/deploy/helm/clickhouse-operator/templates/generated/Role-clickhouse-operator.yaml @@ -219,4 +219,55 @@ rules: - patch - create - delete + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + # + # batch.* resources - backup/restore Jobs and CronJobs + # + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete {{- end }} diff --git a/deploy/operator/clickhouse-operator-install-ansible.yaml b/deploy/operator/clickhouse-operator-install-ansible.yaml index 46a8b2125..6c21e62ed 100644 --- a/deploy/operator/clickhouse-operator-install-ansible.yaml +++ b/deploy/operator/clickhouse-operator-install-ansible.yaml @@ -5209,6 +5209,423 @@ spec: --- # Template Parameters: # +# OPERATOR_VERSION=0.27.1 +# +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackups.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackup + singular: clickhousebackup + plural: clickhousebackups + shortNames: + - chb + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name + jsonPath: .status.backupName + - name: phase + type: string + description: Backup phase + jsonPath: .status.phase + - name: duration + type: integer + description: Backup duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackup defines a one-off backup of a ClickHouseInstallation driven by clickhouse-backup" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + backupName: + type: string + description: "Optional explicit remote backup name; generated when empty" + schemaOnly: + type: boolean + description: "Back up table/database definitions only (no data)" + replicaSelection: + type: string + description: "Which replicas to back up" + enum: + - FirstPerShard + - AllReplicas + image: + type: string + description: "Optional container image override for the trigger job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + tables: + type: string + description: "Optional clickhouse-backup --tables pattern to back up only matching tables" + partitions: + type: array + nullable: true + items: + type: string + description: "Optional partition ids to back up" + diffFromRemote: + type: string + description: "Existing remote backup name to make this an incremental backup" + keepLastRemote: + type: integer + format: int32 + description: "Keep only the N most recent remote backups (retention)" + verify: + type: boolean + description: "Run a verification job after the backup to confirm it is restorable" + status: + type: object + properties: + phase: + type: string + backupName: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackupschedules.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackupSchedule + singular: clickhousebackupschedule + plural: clickhousebackupschedules + shortNames: + - chbs + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: schedule + type: string + description: Cron schedule + jsonPath: .spec.schedule + - name: suspend + type: boolean + description: Whether the schedule is suspended + jsonPath: .spec.suspend + - name: last-schedule + type: date + jsonPath: .status.lastScheduleTime + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - schedule + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + schedule: + type: string + description: "Cron schedule in standard Kubernetes CronJob format" + suspend: + type: boolean + description: "Pause creation of new backup jobs" + concurrencyPolicy: + type: string + description: "How to treat concurrent executions" + enum: + - Allow + - Forbid + - Replace + startingDeadlineSeconds: + type: integer + format: int64 + successfulJobsHistoryLimit: + type: integer + format: int32 + failedJobsHistoryLimit: + type: integer + format: int32 + backupTemplate: + type: object + description: "Backup specification stamped out on each scheduled run" + properties: + backupNamePrefix: + type: string + schemaOnly: + type: boolean + replicaSelection: + type: string + enum: + - FirstPerShard + - AllReplicas + image: + type: string + clickHouseCredentialsSecretName: + type: string + tables: + type: string + partitions: + type: array + nullable: true + items: + type: string + keepLastRemote: + type: integer + format: int32 + status: + type: object + properties: + cronJobName: + type: string + lastScheduleTime: + type: string + format: date-time + nullable: true + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhouserestores.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseRestore + singular: clickhouserestore + plural: clickhouserestores + shortNames: + - chr + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name being restored + jsonPath: .spec.backupName + - name: phase + type: string + description: Restore phase + jsonPath: .status.phase + - name: duration + type: integer + description: Restore duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - backupName + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace (a fresh, empty installation is recommended)" + backupName: + type: string + description: "Remote backup to restore" + schemaOnly: + type: boolean + description: "Restore table/database definitions only (no data)" + overwrite: + type: boolean + description: "Allow restoring over existing, non-empty tables. When false (default) the restore is refused if target tables already contain data" + validateTopology: + type: boolean + description: "When true (default) the restore validates the target cluster topology is reachable before touching data" + image: + type: string + description: "Optional container image override for the restore job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + status: + type: object + properties: + phase: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +# Template Parameters: +# # COMMENT= # NAMESPACE={{ namespace }} # NAME=clickhouse-operator @@ -5465,6 +5882,60 @@ rules: - patch - create - delete + + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + + # + # batch.* resources - backup/restore Jobs and CronJobs + # + + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete --- # Specifies either # ClusterRoleBinding between ClusterRole and ServiceAccount. diff --git a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml index a294e39af..19e68305c 100644 --- a/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle-v1beta1.yaml @@ -5168,6 +5168,423 @@ spec: --- # Template Parameters: # +# OPERATOR_VERSION=0.27.1 +# +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackups.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackup + singular: clickhousebackup + plural: clickhousebackups + shortNames: + - chb + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name + jsonPath: .status.backupName + - name: phase + type: string + description: Backup phase + jsonPath: .status.phase + - name: duration + type: integer + description: Backup duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackup defines a one-off backup of a ClickHouseInstallation driven by clickhouse-backup" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + backupName: + type: string + description: "Optional explicit remote backup name; generated when empty" + schemaOnly: + type: boolean + description: "Back up table/database definitions only (no data)" + replicaSelection: + type: string + description: "Which replicas to back up" + enum: + - FirstPerShard + - AllReplicas + image: + type: string + description: "Optional container image override for the trigger job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + tables: + type: string + description: "Optional clickhouse-backup --tables pattern to back up only matching tables" + partitions: + type: array + nullable: true + items: + type: string + description: "Optional partition ids to back up" + diffFromRemote: + type: string + description: "Existing remote backup name to make this an incremental backup" + keepLastRemote: + type: integer + format: int32 + description: "Keep only the N most recent remote backups (retention)" + verify: + type: boolean + description: "Run a verification job after the backup to confirm it is restorable" + status: + type: object + properties: + phase: + type: string + backupName: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackupschedules.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackupSchedule + singular: clickhousebackupschedule + plural: clickhousebackupschedules + shortNames: + - chbs + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: schedule + type: string + description: Cron schedule + jsonPath: .spec.schedule + - name: suspend + type: boolean + description: Whether the schedule is suspended + jsonPath: .spec.suspend + - name: last-schedule + type: date + jsonPath: .status.lastScheduleTime + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - schedule + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + schedule: + type: string + description: "Cron schedule in standard Kubernetes CronJob format" + suspend: + type: boolean + description: "Pause creation of new backup jobs" + concurrencyPolicy: + type: string + description: "How to treat concurrent executions" + enum: + - Allow + - Forbid + - Replace + startingDeadlineSeconds: + type: integer + format: int64 + successfulJobsHistoryLimit: + type: integer + format: int32 + failedJobsHistoryLimit: + type: integer + format: int32 + backupTemplate: + type: object + description: "Backup specification stamped out on each scheduled run" + properties: + backupNamePrefix: + type: string + schemaOnly: + type: boolean + replicaSelection: + type: string + enum: + - FirstPerShard + - AllReplicas + image: + type: string + clickHouseCredentialsSecretName: + type: string + tables: + type: string + partitions: + type: array + nullable: true + items: + type: string + keepLastRemote: + type: integer + format: int32 + status: + type: object + properties: + cronJobName: + type: string + lastScheduleTime: + type: string + format: date-time + nullable: true + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhouserestores.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseRestore + singular: clickhouserestore + plural: clickhouserestores + shortNames: + - chr + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name being restored + jsonPath: .spec.backupName + - name: phase + type: string + description: Restore phase + jsonPath: .status.phase + - name: duration + type: integer + description: Restore duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - backupName + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace (a fresh, empty installation is recommended)" + backupName: + type: string + description: "Remote backup to restore" + schemaOnly: + type: boolean + description: "Restore table/database definitions only (no data)" + overwrite: + type: boolean + description: "Allow restoring over existing, non-empty tables. When false (default) the restore is refused if target tables already contain data" + validateTopology: + type: boolean + description: "When true (default) the restore validates the target cluster topology is reachable before touching data" + image: + type: string + description: "Optional container image override for the restore job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + status: + type: object + properties: + phase: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +# Template Parameters: +# # COMMENT= # NAMESPACE=kube-system # NAME=clickhouse-operator @@ -5180,7 +5597,6 @@ metadata: namespace: kube-system labels: clickhouse.altinity.com/chop: 0.27.1 - # Template Parameters: # # NAMESPACE=kube-system @@ -5411,6 +5827,57 @@ rules: - patch - create - delete + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + # + # batch.* resources - backup/restore Jobs and CronJobs + # + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete --- # Specifies either # ClusterRoleBinding between ClusterRole and ServiceAccount. @@ -5433,7 +5900,6 @@ subjects: - kind: ServiceAccount name: clickhouse-operator namespace: kube-system - # Template Parameters: # # NAMESPACE=kube-system @@ -5664,6 +6130,57 @@ rules: - patch - create - delete + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + # + # batch.* resources - backup/restore Jobs and CronJobs + # + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete --- # Specifies either # ClusterRoleBinding between ClusterRole and ServiceAccount. diff --git a/deploy/operator/clickhouse-operator-install-bundle.yaml b/deploy/operator/clickhouse-operator-install-bundle.yaml index 623d8e115..75fc7b225 100644 --- a/deploy/operator/clickhouse-operator-install-bundle.yaml +++ b/deploy/operator/clickhouse-operator-install-bundle.yaml @@ -5202,6 +5202,423 @@ spec: --- # Template Parameters: # +# OPERATOR_VERSION=0.27.1 +# +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackups.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackup + singular: clickhousebackup + plural: clickhousebackups + shortNames: + - chb + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name + jsonPath: .status.backupName + - name: phase + type: string + description: Backup phase + jsonPath: .status.phase + - name: duration + type: integer + description: Backup duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackup defines a one-off backup of a ClickHouseInstallation driven by clickhouse-backup" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + backupName: + type: string + description: "Optional explicit remote backup name; generated when empty" + schemaOnly: + type: boolean + description: "Back up table/database definitions only (no data)" + replicaSelection: + type: string + description: "Which replicas to back up" + enum: + - FirstPerShard + - AllReplicas + image: + type: string + description: "Optional container image override for the trigger job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + tables: + type: string + description: "Optional clickhouse-backup --tables pattern to back up only matching tables" + partitions: + type: array + nullable: true + items: + type: string + description: "Optional partition ids to back up" + diffFromRemote: + type: string + description: "Existing remote backup name to make this an incremental backup" + keepLastRemote: + type: integer + format: int32 + description: "Keep only the N most recent remote backups (retention)" + verify: + type: boolean + description: "Run a verification job after the backup to confirm it is restorable" + status: + type: object + properties: + phase: + type: string + backupName: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackupschedules.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackupSchedule + singular: clickhousebackupschedule + plural: clickhousebackupschedules + shortNames: + - chbs + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: schedule + type: string + description: Cron schedule + jsonPath: .spec.schedule + - name: suspend + type: boolean + description: Whether the schedule is suspended + jsonPath: .spec.suspend + - name: last-schedule + type: date + jsonPath: .status.lastScheduleTime + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - schedule + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + schedule: + type: string + description: "Cron schedule in standard Kubernetes CronJob format" + suspend: + type: boolean + description: "Pause creation of new backup jobs" + concurrencyPolicy: + type: string + description: "How to treat concurrent executions" + enum: + - Allow + - Forbid + - Replace + startingDeadlineSeconds: + type: integer + format: int64 + successfulJobsHistoryLimit: + type: integer + format: int32 + failedJobsHistoryLimit: + type: integer + format: int32 + backupTemplate: + type: object + description: "Backup specification stamped out on each scheduled run" + properties: + backupNamePrefix: + type: string + schemaOnly: + type: boolean + replicaSelection: + type: string + enum: + - FirstPerShard + - AllReplicas + image: + type: string + clickHouseCredentialsSecretName: + type: string + tables: + type: string + partitions: + type: array + nullable: true + items: + type: string + keepLastRemote: + type: integer + format: int32 + status: + type: object + properties: + cronJobName: + type: string + lastScheduleTime: + type: string + format: date-time + nullable: true + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhouserestores.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseRestore + singular: clickhouserestore + plural: clickhouserestores + shortNames: + - chr + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name being restored + jsonPath: .spec.backupName + - name: phase + type: string + description: Restore phase + jsonPath: .status.phase + - name: duration + type: integer + description: Restore duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - backupName + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace (a fresh, empty installation is recommended)" + backupName: + type: string + description: "Remote backup to restore" + schemaOnly: + type: boolean + description: "Restore table/database definitions only (no data)" + overwrite: + type: boolean + description: "Allow restoring over existing, non-empty tables. When false (default) the restore is refused if target tables already contain data" + validateTopology: + type: boolean + description: "When true (default) the restore validates the target cluster topology is reachable before touching data" + image: + type: string + description: "Optional container image override for the restore job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + status: + type: object + properties: + phase: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +# Template Parameters: +# # COMMENT= # NAMESPACE=kube-system # NAME=clickhouse-operator @@ -5458,6 +5875,60 @@ rules: - patch - create - delete + + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + + # + # batch.* resources - backup/restore Jobs and CronJobs + # + + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete --- # Specifies either # ClusterRoleBinding between ClusterRole and ServiceAccount. @@ -5724,6 +6195,60 @@ rules: - patch - create - delete + + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + + # + # batch.* resources - backup/restore Jobs and CronJobs + # + + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete --- # Specifies either # ClusterRoleBinding between ClusterRole and ServiceAccount. diff --git a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml index 4e027223e..42bdaed7d 100644 --- a/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml +++ b/deploy/operator/clickhouse-operator-install-template-v1beta1.yaml @@ -5168,6 +5168,423 @@ spec: --- # Template Parameters: # +# OPERATOR_VERSION=0.27.1 +# +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackups.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackup + singular: clickhousebackup + plural: clickhousebackups + shortNames: + - chb + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name + jsonPath: .status.backupName + - name: phase + type: string + description: Backup phase + jsonPath: .status.phase + - name: duration + type: integer + description: Backup duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackup defines a one-off backup of a ClickHouseInstallation driven by clickhouse-backup" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + backupName: + type: string + description: "Optional explicit remote backup name; generated when empty" + schemaOnly: + type: boolean + description: "Back up table/database definitions only (no data)" + replicaSelection: + type: string + description: "Which replicas to back up" + enum: + - FirstPerShard + - AllReplicas + image: + type: string + description: "Optional container image override for the trigger job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + tables: + type: string + description: "Optional clickhouse-backup --tables pattern to back up only matching tables" + partitions: + type: array + nullable: true + items: + type: string + description: "Optional partition ids to back up" + diffFromRemote: + type: string + description: "Existing remote backup name to make this an incremental backup" + keepLastRemote: + type: integer + format: int32 + description: "Keep only the N most recent remote backups (retention)" + verify: + type: boolean + description: "Run a verification job after the backup to confirm it is restorable" + status: + type: object + properties: + phase: + type: string + backupName: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackupschedules.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackupSchedule + singular: clickhousebackupschedule + plural: clickhousebackupschedules + shortNames: + - chbs + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: schedule + type: string + description: Cron schedule + jsonPath: .spec.schedule + - name: suspend + type: boolean + description: Whether the schedule is suspended + jsonPath: .spec.suspend + - name: last-schedule + type: date + jsonPath: .status.lastScheduleTime + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - schedule + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + schedule: + type: string + description: "Cron schedule in standard Kubernetes CronJob format" + suspend: + type: boolean + description: "Pause creation of new backup jobs" + concurrencyPolicy: + type: string + description: "How to treat concurrent executions" + enum: + - Allow + - Forbid + - Replace + startingDeadlineSeconds: + type: integer + format: int64 + successfulJobsHistoryLimit: + type: integer + format: int32 + failedJobsHistoryLimit: + type: integer + format: int32 + backupTemplate: + type: object + description: "Backup specification stamped out on each scheduled run" + properties: + backupNamePrefix: + type: string + schemaOnly: + type: boolean + replicaSelection: + type: string + enum: + - FirstPerShard + - AllReplicas + image: + type: string + clickHouseCredentialsSecretName: + type: string + tables: + type: string + partitions: + type: array + nullable: true + items: + type: string + keepLastRemote: + type: integer + format: int32 + status: + type: object + properties: + cronJobName: + type: string + lastScheduleTime: + type: string + format: date-time + nullable: true + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhouserestores.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseRestore + singular: clickhouserestore + plural: clickhouserestores + shortNames: + - chr + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name being restored + jsonPath: .spec.backupName + - name: phase + type: string + description: Restore phase + jsonPath: .status.phase + - name: duration + type: integer + description: Restore duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - backupName + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace (a fresh, empty installation is recommended)" + backupName: + type: string + description: "Remote backup to restore" + schemaOnly: + type: boolean + description: "Restore table/database definitions only (no data)" + overwrite: + type: boolean + description: "Allow restoring over existing, non-empty tables. When false (default) the restore is refused if target tables already contain data" + validateTopology: + type: boolean + description: "When true (default) the restore validates the target cluster topology is reachable before touching data" + image: + type: string + description: "Optional container image override for the restore job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + status: + type: object + properties: + phase: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +# Template Parameters: +# # COMMENT= # NAMESPACE=${OPERATOR_NAMESPACE} # NAME=clickhouse-operator @@ -5180,7 +5597,6 @@ metadata: namespace: ${OPERATOR_NAMESPACE} labels: clickhouse.altinity.com/chop: 0.27.1 - # Template Parameters: # # NAMESPACE=${OPERATOR_NAMESPACE} @@ -5411,6 +5827,57 @@ rules: - patch - create - delete + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + # + # batch.* resources - backup/restore Jobs and CronJobs + # + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete --- # Specifies either # ClusterRoleBinding between ClusterRole and ServiceAccount. diff --git a/deploy/operator/clickhouse-operator-install-template.yaml b/deploy/operator/clickhouse-operator-install-template.yaml index 97bdd7d6f..4614ca095 100644 --- a/deploy/operator/clickhouse-operator-install-template.yaml +++ b/deploy/operator/clickhouse-operator-install-template.yaml @@ -5202,6 +5202,423 @@ spec: --- # Template Parameters: # +# OPERATOR_VERSION=0.27.1 +# +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackups.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackup + singular: clickhousebackup + plural: clickhousebackups + shortNames: + - chb + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name + jsonPath: .status.backupName + - name: phase + type: string + description: Backup phase + jsonPath: .status.phase + - name: duration + type: integer + description: Backup duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackup defines a one-off backup of a ClickHouseInstallation driven by clickhouse-backup" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + backupName: + type: string + description: "Optional explicit remote backup name; generated when empty" + schemaOnly: + type: boolean + description: "Back up table/database definitions only (no data)" + replicaSelection: + type: string + description: "Which replicas to back up" + enum: + - FirstPerShard + - AllReplicas + image: + type: string + description: "Optional container image override for the trigger job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + tables: + type: string + description: "Optional clickhouse-backup --tables pattern to back up only matching tables" + partitions: + type: array + nullable: true + items: + type: string + description: "Optional partition ids to back up" + diffFromRemote: + type: string + description: "Existing remote backup name to make this an incremental backup" + keepLastRemote: + type: integer + format: int32 + description: "Keep only the N most recent remote backups (retention)" + verify: + type: boolean + description: "Run a verification job after the backup to confirm it is restorable" + status: + type: object + properties: + phase: + type: string + backupName: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackupschedules.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackupSchedule + singular: clickhousebackupschedule + plural: clickhousebackupschedules + shortNames: + - chbs + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: schedule + type: string + description: Cron schedule + jsonPath: .spec.schedule + - name: suspend + type: boolean + description: Whether the schedule is suspended + jsonPath: .spec.suspend + - name: last-schedule + type: date + jsonPath: .status.lastScheduleTime + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - schedule + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + schedule: + type: string + description: "Cron schedule in standard Kubernetes CronJob format" + suspend: + type: boolean + description: "Pause creation of new backup jobs" + concurrencyPolicy: + type: string + description: "How to treat concurrent executions" + enum: + - Allow + - Forbid + - Replace + startingDeadlineSeconds: + type: integer + format: int64 + successfulJobsHistoryLimit: + type: integer + format: int32 + failedJobsHistoryLimit: + type: integer + format: int32 + backupTemplate: + type: object + description: "Backup specification stamped out on each scheduled run" + properties: + backupNamePrefix: + type: string + schemaOnly: + type: boolean + replicaSelection: + type: string + enum: + - FirstPerShard + - AllReplicas + image: + type: string + clickHouseCredentialsSecretName: + type: string + tables: + type: string + partitions: + type: array + nullable: true + items: + type: string + keepLastRemote: + type: integer + format: int32 + status: + type: object + properties: + cronJobName: + type: string + lastScheduleTime: + type: string + format: date-time + nullable: true + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhouserestores.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseRestore + singular: clickhouserestore + plural: clickhouserestores + shortNames: + - chr + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name being restored + jsonPath: .spec.backupName + - name: phase + type: string + description: Restore phase + jsonPath: .status.phase + - name: duration + type: integer + description: Restore duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - backupName + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace (a fresh, empty installation is recommended)" + backupName: + type: string + description: "Remote backup to restore" + schemaOnly: + type: boolean + description: "Restore table/database definitions only (no data)" + overwrite: + type: boolean + description: "Allow restoring over existing, non-empty tables. When false (default) the restore is refused if target tables already contain data" + validateTopology: + type: boolean + description: "When true (default) the restore validates the target cluster topology is reachable before touching data" + image: + type: string + description: "Optional container image override for the restore job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + status: + type: object + properties: + phase: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +# Template Parameters: +# # COMMENT= # NAMESPACE=${OPERATOR_NAMESPACE} # NAME=clickhouse-operator @@ -5458,6 +5875,60 @@ rules: - patch - create - delete + + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + + # + # batch.* resources - backup/restore Jobs and CronJobs + # + + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete --- # Specifies either # ClusterRoleBinding between ClusterRole and ServiceAccount. diff --git a/deploy/operator/clickhouse-operator-install-tf.yaml b/deploy/operator/clickhouse-operator-install-tf.yaml index 4c1ed23b5..9a0b3443f 100644 --- a/deploy/operator/clickhouse-operator-install-tf.yaml +++ b/deploy/operator/clickhouse-operator-install-tf.yaml @@ -5209,6 +5209,423 @@ spec: --- # Template Parameters: # +# OPERATOR_VERSION=0.27.1 +# +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackups.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackup + singular: clickhousebackup + plural: clickhousebackups + shortNames: + - chb + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name + jsonPath: .status.backupName + - name: phase + type: string + description: Backup phase + jsonPath: .status.phase + - name: duration + type: integer + description: Backup duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackup defines a one-off backup of a ClickHouseInstallation driven by clickhouse-backup" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + backupName: + type: string + description: "Optional explicit remote backup name; generated when empty" + schemaOnly: + type: boolean + description: "Back up table/database definitions only (no data)" + replicaSelection: + type: string + description: "Which replicas to back up" + enum: + - FirstPerShard + - AllReplicas + image: + type: string + description: "Optional container image override for the trigger job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + tables: + type: string + description: "Optional clickhouse-backup --tables pattern to back up only matching tables" + partitions: + type: array + nullable: true + items: + type: string + description: "Optional partition ids to back up" + diffFromRemote: + type: string + description: "Existing remote backup name to make this an incremental backup" + keepLastRemote: + type: integer + format: int32 + description: "Keep only the N most recent remote backups (retention)" + verify: + type: boolean + description: "Run a verification job after the backup to confirm it is restorable" + status: + type: object + properties: + phase: + type: string + backupName: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackupschedules.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackupSchedule + singular: clickhousebackupschedule + plural: clickhousebackupschedules + shortNames: + - chbs + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: schedule + type: string + description: Cron schedule + jsonPath: .spec.schedule + - name: suspend + type: boolean + description: Whether the schedule is suspended + jsonPath: .spec.suspend + - name: last-schedule + type: date + jsonPath: .status.lastScheduleTime + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - schedule + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + schedule: + type: string + description: "Cron schedule in standard Kubernetes CronJob format" + suspend: + type: boolean + description: "Pause creation of new backup jobs" + concurrencyPolicy: + type: string + description: "How to treat concurrent executions" + enum: + - Allow + - Forbid + - Replace + startingDeadlineSeconds: + type: integer + format: int64 + successfulJobsHistoryLimit: + type: integer + format: int32 + failedJobsHistoryLimit: + type: integer + format: int32 + backupTemplate: + type: object + description: "Backup specification stamped out on each scheduled run" + properties: + backupNamePrefix: + type: string + schemaOnly: + type: boolean + replicaSelection: + type: string + enum: + - FirstPerShard + - AllReplicas + image: + type: string + clickHouseCredentialsSecretName: + type: string + tables: + type: string + partitions: + type: array + nullable: true + items: + type: string + keepLastRemote: + type: integer + format: int32 + status: + type: object + properties: + cronJobName: + type: string + lastScheduleTime: + type: string + format: date-time + nullable: true + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhouserestores.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseRestore + singular: clickhouserestore + plural: clickhouserestores + shortNames: + - chr + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name being restored + jsonPath: .spec.backupName + - name: phase + type: string + description: Restore phase + jsonPath: .status.phase + - name: duration + type: integer + description: Restore duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - backupName + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace (a fresh, empty installation is recommended)" + backupName: + type: string + description: "Remote backup to restore" + schemaOnly: + type: boolean + description: "Restore table/database definitions only (no data)" + overwrite: + type: boolean + description: "Allow restoring over existing, non-empty tables. When false (default) the restore is refused if target tables already contain data" + validateTopology: + type: boolean + description: "When true (default) the restore validates the target cluster topology is reachable before touching data" + image: + type: string + description: "Optional container image override for the restore job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + status: + type: object + properties: + phase: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +# Template Parameters: +# # COMMENT= # NAMESPACE=${namespace} # NAME=clickhouse-operator @@ -5465,6 +5882,60 @@ rules: - patch - create - delete + + # clickhouse backup - related resources + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups + - clickhousebackupschedules + - clickhouserestores + verbs: + - get + - list + - watch + - patch + - update + - create + - delete + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/finalizers + - clickhousebackupschedules/finalizers + - clickhouserestores/finalizers + verbs: + - update + - apiGroups: + - clickhouse.altinity.com + resources: + - clickhousebackups/status + - clickhousebackupschedules/status + - clickhouserestores/status + verbs: + - get + - update + - patch + - create + - delete + + # + # batch.* resources - backup/restore Jobs and CronJobs + # + + - apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch + - patch + - update + - create + - delete --- # Specifies either # ClusterRoleBinding between ClusterRole and ServiceAccount. diff --git a/deploy/operator/parts/crd.yaml b/deploy/operator/parts/crd.yaml index 03b5dbebd..ca2cdecf2 100644 --- a/deploy/operator/parts/crd.yaml +++ b/deploy/operator/parts/crd.yaml @@ -10005,3 +10005,420 @@ spec: More info: https://kubernetes.io/docs/concepts/services-networking/service/ # nullable: true x-kubernetes-preserve-unknown-fields: true +--- +# Template Parameters: +# +# OPERATOR_VERSION=0.27.1 +# +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackups.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackup + singular: clickhousebackup + plural: clickhousebackups + shortNames: + - chb + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name + jsonPath: .status.backupName + - name: phase + type: string + description: Backup phase + jsonPath: .status.phase + - name: duration + type: integer + description: Backup duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackup defines a one-off backup of a ClickHouseInstallation driven by clickhouse-backup" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + backupName: + type: string + description: "Optional explicit remote backup name; generated when empty" + schemaOnly: + type: boolean + description: "Back up table/database definitions only (no data)" + replicaSelection: + type: string + description: "Which replicas to back up" + enum: + - FirstPerShard + - AllReplicas + image: + type: string + description: "Optional container image override for the trigger job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + tables: + type: string + description: "Optional clickhouse-backup --tables pattern to back up only matching tables" + partitions: + type: array + nullable: true + items: + type: string + description: "Optional partition ids to back up" + diffFromRemote: + type: string + description: "Existing remote backup name to make this an incremental backup" + keepLastRemote: + type: integer + format: int32 + description: "Keep only the N most recent remote backups (retention)" + verify: + type: boolean + description: "Run a verification job after the backup to confirm it is restorable" + status: + type: object + properties: + phase: + type: string + backupName: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhousebackupschedules.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseBackupSchedule + singular: clickhousebackupschedule + plural: clickhousebackupschedules + shortNames: + - chbs + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: schedule + type: string + description: Cron schedule + jsonPath: .spec.schedule + - name: suspend + type: boolean + description: Whether the schedule is suspended + jsonPath: .spec.suspend + - name: last-schedule + type: date + jsonPath: .status.lastScheduleTime + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - schedule + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace" + schedule: + type: string + description: "Cron schedule in standard Kubernetes CronJob format" + suspend: + type: boolean + description: "Pause creation of new backup jobs" + concurrencyPolicy: + type: string + description: "How to treat concurrent executions" + enum: + - Allow + - Forbid + - Replace + startingDeadlineSeconds: + type: integer + format: int64 + successfulJobsHistoryLimit: + type: integer + format: int32 + failedJobsHistoryLimit: + type: integer + format: int32 + backupTemplate: + type: object + description: "Backup specification stamped out on each scheduled run" + properties: + backupNamePrefix: + type: string + schemaOnly: + type: boolean + replicaSelection: + type: string + enum: + - FirstPerShard + - AllReplicas + image: + type: string + clickHouseCredentialsSecretName: + type: string + tables: + type: string + partitions: + type: array + nullable: true + items: + type: string + keepLastRemote: + type: integer + format: int32 + status: + type: object + properties: + cronJobName: + type: string + lastScheduleTime: + type: string + format: date-time + nullable: true + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: clickhouserestores.clickhouse.altinity.com + labels: + clickhouse.altinity.com/chop: 0.27.1 +spec: + group: clickhouse.altinity.com + scope: Namespaced + names: + kind: ClickHouseRestore + singular: clickhouserestore + plural: clickhouserestores + shortNames: + - chr + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: installation + type: string + description: Target ClickHouseInstallation + jsonPath: .spec.clickHouseInstallation + - name: backup + type: string + description: Remote backup name being restored + jsonPath: .spec.backupName + - name: phase + type: string + description: Restore phase + jsonPath: .status.phase + - name: duration + type: integer + description: Restore duration (seconds) + jsonPath: .status.durationSeconds + - name: age + type: date + jsonPath: .metadata.creationTimestamp + subresources: + status: {} + schema: + openAPIV3Schema: + description: "ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation" + type: object + required: + - spec + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + type: object + required: + - clickHouseInstallation + - backupName + properties: + clickHouseInstallation: + type: string + description: "Name of the target ClickHouseInstallation in the same namespace (a fresh, empty installation is recommended)" + backupName: + type: string + description: "Remote backup to restore" + schemaOnly: + type: boolean + description: "Restore table/database definitions only (no data)" + overwrite: + type: boolean + description: "Allow restoring over existing, non-empty tables. When false (default) the restore is refused if target tables already contain data" + validateTopology: + type: boolean + description: "When true (default) the restore validates the target cluster topology is reachable before touching data" + image: + type: string + description: "Optional container image override for the restore job" + clickHouseCredentialsSecretName: + type: string + description: "Optional Secret with CLICKHOUSE_USER/CLICKHOUSE_PASSWORD" + status: + type: object + properties: + phase: + type: string + jobName: + type: string + startTime: + type: string + format: date-time + nullable: true + completionTime: + type: string + format: date-time + nullable: true + durationSeconds: + type: integer + format: int64 + conditions: + type: array + nullable: true + items: + type: object + required: + - type + - status + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastTransitionTime: + type: string + format: date-time + nullable: true + observedGeneration: + type: integer + format: int64 diff --git a/docs/backup.md b/docs/backup.md new file mode 100644 index 000000000..ff73d789a --- /dev/null +++ b/docs/backup.md @@ -0,0 +1,173 @@ +# Backup and Restore + +The operator can manage **automated backup and restore** for ClickHouse using +[`clickhouse-backup`](https://github.com/Altinity/clickhouse-backup) through three custom +resources: + +| Kind | Short | Purpose | +|------|-------|---------| +| `ClickHouseBackup` | `chb` | One-off backup of a `ClickHouseInstallation` | +| `ClickHouseBackupSchedule` | `chbs` | Recurring backups (managed Kubernetes `CronJob`) | +| `ClickHouseRestore` | `chr` | One-off restore of a remote backup | + +These resources reference a `ClickHouseInstallation` (CHI) by name in the same namespace. +The operator reconciles them into Kubernetes `Job`/`CronJob` resources that drive +`clickhouse-backup`. The operator owns those jobs, so they are garbage-collected when the +custom resource is deleted, and their status is reflected back on the custom resource. + +## Architecture + +`clickhouse-backup` must run **as a sidecar** in the ClickHouse pods, because it needs +local access to `/var/lib/clickhouse`. The operator does **not** inject this sidecar; the +generated jobs only *trigger* it remotely by inserting commands into the +`system.backup_actions` integration table of each target host (the sidecar then executes +`create_remote` / `restore_remote` locally and replicates the result to remote storage). + +``` +ClickHouseBackupSchedule ──► CronJob ─┐ +ClickHouseBackup ──► Job ──────┼─► clickhouse-client ──► INSERT INTO system.backup_actions +ClickHouseRestore ──► Job ──────┘ │ + ▼ + clickhouse-backup sidecar (port 7171) ──► S3 / GCS / Azure +``` + +## Prerequisite: the clickhouse-backup sidecar + +Add the sidecar to your CHI via a `podTemplate` and set +`API_CREATE_INTEGRATION_TABLES=true`. See +[`chb-examples/01-prerequisite-chi-with-sidecar.yaml`](chb-examples/01-prerequisite-chi-with-sidecar.yaml). + +Key requirements: + +* `API_CREATE_INTEGRATION_TABLES=true` — exposes `system.backup_actions`, which the jobs use. +* A configured remote storage (`REMOTE_STORAGE`, `S3_BUCKET`, …). +* For **sharded** clusters, include the `{shard}` macro in the remote path + (e.g. `S3_PATH: backup/shard-{shard}`) so each shard's backup is stored separately under + the same backup name. + +If no sidecar is detected in the CHI pod templates, the custom resource reports a +`SidecarPresent=False` condition. + +## Cluster awareness + +ClickHouse replicates `Replicated*MergeTree` data across the replicas of a shard, so a +backup only needs **one replica per shard**: + +* `replicaSelection: FirstPerShard` (default) — back up the first replica of each shard. + Correct and storage-efficient for replicated tables. +* `replicaSelection: AllReplicas` — back up every replica. Use this if the cluster holds + **non-replicated** (plain `MergeTree`) or local `Distributed` tables, whose data differs + between replicas; otherwise those tables would only be captured on the first replica. + +On **restore** the operator restores both schema and data on the **first replica of each +shard**. For `Replicated*` tables the schema `CREATE` is issued **`ON CLUSTER`** so every +replica is created with an identical Keeper path; native ClickHouse replication then clones +the data to the remaining replicas. + +> **Important for replicated restore:** the sidecar must set +> `restore_schema_on_cluster` (env `RESTORE_SCHEMA_ON_CLUSTER`) to the cluster name (or the +> `{cluster}` macro). Without it, clickhouse-backup rewrites the replica path per node and +> the replicas land on **divergent Keeper paths that never sync**. This also requires +> distributed DDL, which the operator configures whenever a CHI uses ZooKeeper/Keeper. + +## Restore safety + +Restore is the most destructive operation, so the operator follows the conventions of +mature database operators (e.g. CloudNativePG): + +1. **Prefer restoring into a fresh, empty CHI.** In-place restore over a live cluster is + supported but guarded. +2. **Preflight validation** runs before any data is touched and is reported through + `status.conditions`: + * the target CHI must exist and be in the `Completed` state; + * with `validateTopology: true` (default) every target host must be reachable, ensuring + the full cluster is up before a `ReplicatedMergeTree` restore (mismatched topology is + the primary cause of Keeper-path corruption). +3. **Overwrite guard** — when `overwrite: false` (default) the restore is **refused** if any + target host already holds user tables. Set `overwrite: true` to drop and recreate them + (`clickhouse-backup --rm`). +4. The restore Job is one-shot (`backoffLimit: 0`, `restartPolicy: Never`). + +## Examples + +* [`01-prerequisite-chi-with-sidecar.yaml`](chb-examples/01-prerequisite-chi-with-sidecar.yaml) — CHI with the sidecar. +* [`02-backup-once.yaml`](chb-examples/02-backup-once.yaml) — one-off `ClickHouseBackup`. +* [`03-backup-schedule.yaml`](chb-examples/03-backup-schedule.yaml) — recurring `ClickHouseBackupSchedule`. +* [`04-restore.yaml`](chb-examples/04-restore.yaml) — `ClickHouseRestore`. + +```bash +kubectl apply -f docs/chb-examples/01-prerequisite-chi-with-sidecar.yaml +kubectl apply -f docs/chb-examples/03-backup-schedule.yaml + +kubectl get chb,chbs,chr +kubectl get jobs,cronjobs -l clickhouse.altinity.com/app=clickhouse-backup +``` + +## Authentication + +If the trigger jobs need credentials to connect to ClickHouse, reference a Secret with +`CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` keys via +`spec.clickHouseCredentialsSecretName` (or `spec.backupTemplate.clickHouseCredentialsSecretName` +for schedules). + +## Backup options + +`ClickHouseBackup` (and `ClickHouseBackupSchedule.backupTemplate`) support: + +- `tables` — clickhouse-backup `--tables` pattern (e.g. `mydb.*`) to back up only matching tables. +- `partitions` — list of partition ids to back up. +- `diffFromRemote` (one-off backup) — name of an existing remote backup to take an **incremental** + backup against (`--diff-from-remote`); the base backup must still exist remotely. +- `keepLastRemote` — **retention**: keep only the N most recent remote backups; older ones are + pruned (best-effort, via `system.backup_list` + a `delete remote` action) after each backup. +- `verify` (one-off backup) — run a verification Job after the backup that downloads it and checks + integrity (no cluster data is touched); the result is surfaced as the `Verified` condition. + +## Compression and encryption + +These are configured on the **clickhouse-backup sidecar** (not operator fields) and apply to every +backup it runs: + +- Compression: `COMPRESSION_FORMAT` (`tar`, `lz4`, `zstd`, `gzip`, …) and `COMPRESSION_USE_MULTI_THREAD`. +- Encryption (object-storage server-side): S3 `S3_SSE` / `SSE_KMS_KEY_ID` / `SSE_CUSTOMER_KEY`, + Azure `SSE_KEY`, GCS `ENCRYPTION_KEY` (CSEK). + +See the sidecar env in +[`chb-examples/01-prerequisite-chi-with-sidecar.yaml`](chb-examples/01-prerequisite-chi-with-sidecar.yaml). + +## Bootstrap a new cluster from a backup + +Annotate a fresh `ClickHouseInstallation` to auto-restore once it is up: + +```yaml +metadata: + annotations: + clickhouse.altinity.com/recover-from-backup: "my-backup-name" + # optional: Secret (CLICKHOUSE_USER/CLICKHOUSE_PASSWORD) for the restore to authenticate + clickhouse.altinity.com/recover-credentials-secret: "ch-backup-creds" +``` + +Once the CHI reaches `Completed`, the operator creates a one-time `ClickHouseRestore` +(`-bootstrap`) and stamps `clickhouse.altinity.com/recovered-from` so the recovery fires +exactly once. + +## Monitoring + +The operator exports backup/restore metrics on its existing Prometheus endpoint (`:9999/metrics`): +`clickhouse_operator_backups_started` / `_completed` / `_failed`, +`clickhouse_operator_restores_started` / `_completed` / `_failed`, +`clickhouse_operator_backup_duration_seconds`, +`clickhouse_operator_backup_last_success_timestamp`, and +`clickhouse_operator_backup_verifications_failed` (labels: `namespace`, `clickhouse_installation`). +It also emits Kubernetes Events (`kubectl describe chb|chr`) on start, completion and failure. + +The repository additionally ships Prometheus alert rules for the `clickhouse-backup` sidecar +([`deploy/prometheus/prometheus-alert-rules-backup.yaml`](../deploy/prometheus/prometheus-alert-rules-backup.yaml)). + +## Limitations + +* Host service names are resolved from the cluster `layout` (`shardsCount`/`replicasCount`) + using the default host naming scheme. Clusters defined with explicit shard/replica lists + or custom host names are a planned follow-up. +* Backup/restore is triggered per host through `system.backup_actions`; the sidecar must be + reachable on the ClickHouse native port (default `9000`). diff --git a/docs/chb-examples/01-prerequisite-chi-with-sidecar.yaml b/docs/chb-examples/01-prerequisite-chi-with-sidecar.yaml new file mode 100644 index 000000000..e08e1e74a --- /dev/null +++ b/docs/chb-examples/01-prerequisite-chi-with-sidecar.yaml @@ -0,0 +1,101 @@ +# Prerequisite for operator-managed backups: a ClickHouseInstallation whose pods run +# the clickhouse-backup sidecar in server mode. +# +# The ClickHouseBackup / ClickHouseBackupSchedule / ClickHouseRestore controllers do NOT +# inject this sidecar - they trigger it remotely through the system.backup_actions +# integration table. The sidecar MUST therefore be present and configured with +# API_CREATE_INTEGRATION_TABLES=true. +# +# For sharded clusters, configure a remote path that includes the {shard} macro so each +# shard's backup is stored in a distinct location under the same backup name. +--- +apiVersion: v1 +kind: Secret +metadata: + name: clickhouse-backup-s3 +type: Opaque +stringData: + # Object storage credentials consumed by the clickhouse-backup sidecar. + S3_ACCESS_KEY: "minio-access-key" + S3_SECRET_KEY: "minio-secret-key" +--- +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseInstallation" +metadata: + name: backup-demo +spec: + defaults: + templates: + podTemplate: clickhouse-with-backup-sidecar + configuration: + clusters: + - name: default + layout: + shardsCount: 1 + replicasCount: 2 + templates: + podTemplates: + - name: clickhouse-with-backup-sidecar + metadata: + annotations: + # Expose the clickhouse-backup metrics endpoint to Prometheus. + clickhouse.backup/scrape: 'true' + clickhouse.backup/port: '7171' + clickhouse.backup/path: '/metrics' + spec: + containers: + - name: clickhouse + image: clickhouse/clickhouse-server:24.8 + + - name: clickhouse-backup + image: altinity/clickhouse-backup:2.7.2 + imagePullPolicy: IfNotPresent + command: + - bash + - -xc + - "/bin/clickhouse-backup server" + env: + - name: API_LISTEN + value: "0.0.0.0:7171" + - name: API_ENABLE_METRICS + value: "true" + # Required: lets the operator's jobs drive backups via system.backup_actions. + - name: API_CREATE_INTEGRATION_TABLES + value: "true" + # Required for restoring Replicated* tables: schema is recreated ON CLUSTER + # so all replicas share one Keeper path and replication can sync the data. + - name: RESTORE_SCHEMA_ON_CLUSTER + value: "{cluster}" + - name: REMOTE_STORAGE + value: "s3" + - name: BACKUPS_TO_KEEP_REMOTE + value: "7" + # Compression (sidecar-level, applies to every backup): tar|lz4|zstd|gzip|... + - name: COMPRESSION_FORMAT + value: "zstd" + # Encryption at rest via object-storage server-side encryption (S3 example). + # For SSE-KMS also set SSE_KMS_KEY_ID; Azure uses SSE_KEY, GCS uses ENCRYPTION_KEY. + - name: S3_SSE + value: "AES256" + - name: S3_ENDPOINT + value: https://minio.minio + - name: S3_BUCKET + value: clickhouse-backup + # {shard} macro keeps each shard's backup in its own remote path. + - name: S3_PATH + value: backup/shard-{shard} + - name: S3_FORCE_PATH_STYLE + value: "true" + - name: S3_ACCESS_KEY + valueFrom: + secretKeyRef: + name: clickhouse-backup-s3 + key: S3_ACCESS_KEY + - name: S3_SECRET_KEY + valueFrom: + secretKeyRef: + name: clickhouse-backup-s3 + key: S3_SECRET_KEY + ports: + - name: backup-rest + containerPort: 7171 diff --git a/docs/chb-examples/02-backup-once.yaml b/docs/chb-examples/02-backup-once.yaml new file mode 100644 index 000000000..5a01ad00a --- /dev/null +++ b/docs/chb-examples/02-backup-once.yaml @@ -0,0 +1,17 @@ +# One-off backup of the "backup-demo" ClickHouseInstallation. +# The operator creates a Kubernetes Job that triggers `create_remote` on the first +# replica of each shard (correct for Replicated* tables). +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseBackup" +metadata: + name: backup-demo-manual +spec: + clickHouseInstallation: backup-demo + # backupName: "" # optional; auto-generated when empty + schemaOnly: false + replicaSelection: FirstPerShard # FirstPerShard (default) | AllReplicas + # tables: "mydb.*" # optional: back up only matching tables + # partitions: ["202601"] # optional: only these partitions + # diffFromRemote: prev-backup # optional: incremental backup against an existing remote backup + # keepLastRemote: 7 # optional: retention - keep only the N most recent remote backups + # verify: true # optional: verify the backup is restorable afterwards diff --git a/docs/chb-examples/03-backup-schedule.yaml b/docs/chb-examples/03-backup-schedule.yaml new file mode 100644 index 000000000..5c509c364 --- /dev/null +++ b/docs/chb-examples/03-backup-schedule.yaml @@ -0,0 +1,19 @@ +# Recurring backup of the "backup-demo" ClickHouseInstallation. +# The operator manages a native Kubernetes CronJob; scheduling, suspension, concurrency +# and job history are handled by the CronJob, while remote retention is delegated to +# clickhouse-backup (BACKUPS_TO_KEEP_REMOTE in the sidecar). +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseBackupSchedule" +metadata: + name: backup-demo-nightly +spec: + clickHouseInstallation: backup-demo + schedule: "0 2 * * *" + suspend: false + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 1 + backupTemplate: + backupNamePrefix: nightly + schemaOnly: false + replicaSelection: FirstPerShard diff --git a/docs/chb-examples/04-restore.yaml b/docs/chb-examples/04-restore.yaml new file mode 100644 index 000000000..a7c7c27b6 --- /dev/null +++ b/docs/chb-examples/04-restore.yaml @@ -0,0 +1,20 @@ +# One-off restore of a remote backup. +# +# Recommended: restore into a FRESH, empty ClickHouseInstallation (here "backup-demo-restored"), +# which must also run the clickhouse-backup sidecar and point at the same remote storage. +# +# The operator's restore Job: +# 1. validates every target host is reachable (validateTopology), +# 2. refuses to overwrite a non-empty target unless overwrite=true, +# 3. restores schema on all replicas, then data on the first replica of each shard; +# native replication synchronizes the remaining replicas. +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseRestore" +metadata: + name: restore-demo +spec: + clickHouseInstallation: backup-demo-restored + backupName: nightly-20260628-020000 + schemaOnly: false + overwrite: false # set true to drop & recreate existing tables + validateTopology: true diff --git a/pkg/apis/clickhouse.altinity.com/v1/api_register.go b/pkg/apis/clickhouse.altinity.com/v1/api_register.go index 149c64ceb..9db9e4f4e 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/api_register.go +++ b/pkg/apis/clickhouse.altinity.com/v1/api_register.go @@ -45,6 +45,12 @@ func init() { &ClickHouseInstallationTemplateList{}, &ClickHouseOperatorConfiguration{}, &ClickHouseOperatorConfigurationList{}, + &ClickHouseBackup{}, + &ClickHouseBackupList{}, + &ClickHouseBackupSchedule{}, + &ClickHouseBackupScheduleList{}, + &ClickHouseRestore{}, + &ClickHouseRestoreList{}, ) } diff --git a/pkg/apis/clickhouse.altinity.com/v1/api_resources.go b/pkg/apis/clickhouse.altinity.com/v1/api_resources.go index 27e173ffd..b1a8aca4b 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/api_resources.go +++ b/pkg/apis/clickhouse.altinity.com/v1/api_resources.go @@ -23,4 +23,7 @@ const ( ClickHouseInstallationCRDResourceKind = "ClickHouseInstallation" ClickHouseInstallationTemplateCRDResourceKind = "ClickHouseInstallationTemplate" ClickHouseOperatorCRDResourceKind = "ClickHouseOperator" + ClickHouseBackupCRDResourceKind = "ClickHouseBackup" + ClickHouseBackupScheduleCRDResourceKind = "ClickHouseBackupSchedule" + ClickHouseRestoreCRDResourceKind = "ClickHouseRestore" ) diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_chb.go b/pkg/apis/clickhouse.altinity.com/v1/type_chb.go new file mode 100644 index 000000000..f87b0afe2 --- /dev/null +++ b/pkg/apis/clickhouse.altinity.com/v1/type_chb.go @@ -0,0 +1,134 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + meta "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// ReplicaSelection defines which replicas of a shard are involved in a backup. +type ReplicaSelection string + +const ( + // ReplicaSelectionFirstPerShard backs up a single (first) replica per shard. + // This is correct and storage-efficient for Replicated* table engines, whose + // data is identical across replicas. + ReplicaSelectionFirstPerShard ReplicaSelection = "FirstPerShard" + // ReplicaSelectionAllReplicas backs up every replica of every shard. + // Required for clusters that hold non-replicated (plain MergeTree) or local + // Distributed tables, which differ between replicas. + ReplicaSelectionAllReplicas ReplicaSelection = "AllReplicas" +) + +// Backup/restore phases reported in the CR Status. +const ( + BackupPhasePending = "Pending" + BackupPhaseRunning = "Running" + BackupPhaseCompleted = "Completed" + BackupPhaseFailed = "Failed" +) + +// Condition types used across backup/restore custom resources. +const ( + // ConditionValidated is True once spec preflight checks have passed. + ConditionValidated = "Validated" + // ConditionJobCreated is True once the operator has created the driving Job/CronJob. + ConditionJobCreated = "JobCreated" + // ConditionReady is True once the operation finished successfully. + ConditionReady = "Ready" + // ConditionVerified is True once a backup has passed verification (spec.verify). + ConditionVerified = "Verified" +) + +// +genclient +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// ClickHouseBackup defines a one-off backup of a ClickHouseInstallation. The operator +// reconciles it into a Kubernetes Job that triggers `clickhouse-backup` (running as a +// sidecar in the ClickHouse pods) to create and upload a remote backup. +type ClickHouseBackup struct { + meta.TypeMeta `json:",inline" yaml:",inline"` + meta.ObjectMeta `json:"metadata,omitempty" yaml:"metadata,omitempty"` + + Spec ClickHouseBackupSpec `json:"spec" yaml:"spec"` + Status ClickHouseBackupStatus `json:"status,omitempty" yaml:"status,omitempty"` +} + +// ClickHouseBackupSpec defines the desired state of a ClickHouseBackup. +type ClickHouseBackupSpec struct { + // ClickHouseInstallation is the name of the target CHI in the same namespace. + ClickHouseInstallation string `json:"clickHouseInstallation" yaml:"clickHouseInstallation"` + // BackupName is the optional explicit remote backup name. When empty the operator + // generates a deterministic name from the CR name and creation timestamp. + BackupName string `json:"backupName,omitempty" yaml:"backupName,omitempty"` + // SchemaOnly backs up table/database definitions only (no data). + SchemaOnly bool `json:"schemaOnly,omitempty" yaml:"schemaOnly,omitempty"` + // ReplicaSelection controls which replicas are backed up. Defaults to FirstPerShard. + ReplicaSelection ReplicaSelection `json:"replicaSelection,omitempty" yaml:"replicaSelection,omitempty"` + // Image optionally overrides the container image used by the trigger Job. + Image string `json:"image,omitempty" yaml:"image,omitempty"` + // ClickHouseCredentialsSecretName optionally references a Secret providing + // CLICKHOUSE_USER and CLICKHOUSE_PASSWORD for the trigger Job to authenticate. + ClickHouseCredentialsSecretName string `json:"clickHouseCredentialsSecretName,omitempty" yaml:"clickHouseCredentialsSecretName,omitempty"` + // Tables optionally restricts the backup to tables matching this clickhouse-backup + // pattern (e.g. "db.table", "db.*"). Empty backs up everything. + Tables string `json:"tables,omitempty" yaml:"tables,omitempty"` + // Partitions optionally restricts the backup to the given partition ids. + Partitions []string `json:"partitions,omitempty" yaml:"partitions,omitempty"` + // DiffFromRemote, when set to an existing remote backup name, makes this an + // incremental backup (clickhouse-backup --diff-from-remote). + DiffFromRemote string `json:"diffFromRemote,omitempty" yaml:"diffFromRemote,omitempty"` + // KeepLastRemote, when set, keeps only the N most recent remote backups + // (clickhouse-backup BACKUPS_TO_KEEP_REMOTE) - older ones are pruned on create. + KeepLastRemote *int32 `json:"keepLastRemote,omitempty" yaml:"keepLastRemote,omitempty"` + // Verify, when true, runs a verification job after the backup that downloads the + // remote backup and checks its integrity (without touching cluster data). + Verify bool `json:"verify,omitempty" yaml:"verify,omitempty"` +} + +// ClickHouseBackupStatus defines the observed state of a ClickHouseBackup. +type ClickHouseBackupStatus struct { + // Phase is one of Pending, Running, Completed, Failed. + Phase string `json:"phase,omitempty" yaml:"phase,omitempty"` + // BackupName is the resolved remote backup name. + BackupName string `json:"backupName,omitempty" yaml:"backupName,omitempty"` + // JobName is the name of the Kubernetes Job driving the backup. + JobName string `json:"jobName,omitempty" yaml:"jobName,omitempty"` + // StartTime is when the backup Job started. + StartTime *meta.Time `json:"startTime,omitempty" yaml:"startTime,omitempty"` + // CompletionTime is when the backup Job finished. + CompletionTime *meta.Time `json:"completionTime,omitempty" yaml:"completionTime,omitempty"` + // DurationSeconds is the backup duration in seconds (completion - start). + DurationSeconds int64 `json:"durationSeconds,omitempty" yaml:"durationSeconds,omitempty"` + // Conditions represent the latest available observations of the backup state. + Conditions []meta.Condition `json:"conditions,omitempty" yaml:"conditions,omitempty"` +} + +// GetReplicaSelection returns the replica selection, defaulting to FirstPerShard. +func (spec *ClickHouseBackupSpec) GetReplicaSelection() ReplicaSelection { + if spec == nil || spec.ReplicaSelection == "" { + return ReplicaSelectionFirstPerShard + } + return spec.ReplicaSelection +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// ClickHouseBackupList defines a list of ClickHouseBackup resources. +type ClickHouseBackupList struct { + meta.TypeMeta `json:",inline" yaml:",inline"` + meta.ListMeta `json:"metadata" yaml:"metadata"` + Items []ClickHouseBackup `json:"items" yaml:"items"` +} diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_chbs.go b/pkg/apis/clickhouse.altinity.com/v1/type_chbs.go new file mode 100644 index 000000000..8fd9874d0 --- /dev/null +++ b/pkg/apis/clickhouse.altinity.com/v1/type_chbs.go @@ -0,0 +1,104 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + meta "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// ClickHouseBackupSchedule defines a recurring backup of a ClickHouseInstallation. +// The operator reconciles it into a Kubernetes CronJob that triggers `clickhouse-backup` +// on the configured schedule. Native CronJob semantics handle scheduling, suspension, +// concurrency and job history; remote retention is delegated to clickhouse-backup +// (BACKUPS_TO_KEEP_REMOTE in the sidecar). +type ClickHouseBackupSchedule struct { + meta.TypeMeta `json:",inline" yaml:",inline"` + meta.ObjectMeta `json:"metadata,omitempty" yaml:"metadata,omitempty"` + + Spec ClickHouseBackupScheduleSpec `json:"spec" yaml:"spec"` + Status ClickHouseBackupScheduleStatus `json:"status,omitempty" yaml:"status,omitempty"` +} + +// ClickHouseBackupScheduleSpec defines the desired state of a ClickHouseBackupSchedule. +type ClickHouseBackupScheduleSpec struct { + // ClickHouseInstallation is the name of the target CHI in the same namespace. + ClickHouseInstallation string `json:"clickHouseInstallation" yaml:"clickHouseInstallation"` + // Schedule is a cron expression in standard Kubernetes CronJob format. + Schedule string `json:"schedule" yaml:"schedule"` + // Suspend pauses creation of new backup jobs. Existing jobs are unaffected. + Suspend *bool `json:"suspend,omitempty" yaml:"suspend,omitempty"` + // ConcurrencyPolicy controls how concurrent executions are treated. + // One of Forbid (default), Allow, Replace. + ConcurrencyPolicy string `json:"concurrencyPolicy,omitempty" yaml:"concurrencyPolicy,omitempty"` + // StartingDeadlineSeconds is the deadline in seconds for starting a missed job. + StartingDeadlineSeconds *int64 `json:"startingDeadlineSeconds,omitempty" yaml:"startingDeadlineSeconds,omitempty"` + // SuccessfulJobsHistoryLimit is how many successful finished jobs to retain. Default 3. + SuccessfulJobsHistoryLimit *int32 `json:"successfulJobsHistoryLimit,omitempty" yaml:"successfulJobsHistoryLimit,omitempty"` + // FailedJobsHistoryLimit is how many failed finished jobs to retain. Default 1. + FailedJobsHistoryLimit *int32 `json:"failedJobsHistoryLimit,omitempty" yaml:"failedJobsHistoryLimit,omitempty"` + // BackupTemplate is the backup specification stamped out on each scheduled run. + BackupTemplate ClickHouseBackupTemplateSpec `json:"backupTemplate,omitempty" yaml:"backupTemplate,omitempty"` +} + +// ClickHouseBackupTemplateSpec is the subset of backup options applied to each scheduled backup. +type ClickHouseBackupTemplateSpec struct { + // BackupNamePrefix is prepended to the generated, timestamped backup name. + BackupNamePrefix string `json:"backupNamePrefix,omitempty" yaml:"backupNamePrefix,omitempty"` + // SchemaOnly backs up table/database definitions only (no data). + SchemaOnly bool `json:"schemaOnly,omitempty" yaml:"schemaOnly,omitempty"` + // ReplicaSelection controls which replicas are backed up. Defaults to FirstPerShard. + ReplicaSelection ReplicaSelection `json:"replicaSelection,omitempty" yaml:"replicaSelection,omitempty"` + // Image optionally overrides the container image used by the trigger Job. + Image string `json:"image,omitempty" yaml:"image,omitempty"` + // ClickHouseCredentialsSecretName optionally references a Secret providing + // CLICKHOUSE_USER and CLICKHOUSE_PASSWORD for the trigger Job to authenticate. + ClickHouseCredentialsSecretName string `json:"clickHouseCredentialsSecretName,omitempty" yaml:"clickHouseCredentialsSecretName,omitempty"` + // Tables optionally restricts scheduled backups to tables matching this pattern. + Tables string `json:"tables,omitempty" yaml:"tables,omitempty"` + // Partitions optionally restricts scheduled backups to the given partition ids. + Partitions []string `json:"partitions,omitempty" yaml:"partitions,omitempty"` + // KeepLastRemote keeps only the N most recent remote backups (retention). + KeepLastRemote *int32 `json:"keepLastRemote,omitempty" yaml:"keepLastRemote,omitempty"` +} + +// ClickHouseBackupScheduleStatus defines the observed state of a ClickHouseBackupSchedule. +type ClickHouseBackupScheduleStatus struct { + // CronJobName is the name of the managed Kubernetes CronJob. + CronJobName string `json:"cronJobName,omitempty" yaml:"cronJobName,omitempty"` + // LastScheduleTime is the last time a backup job was scheduled. + LastScheduleTime *meta.Time `json:"lastScheduleTime,omitempty" yaml:"lastScheduleTime,omitempty"` + // Conditions represent the latest available observations of the schedule state. + Conditions []meta.Condition `json:"conditions,omitempty" yaml:"conditions,omitempty"` +} + +// GetReplicaSelection returns the replica selection, defaulting to FirstPerShard. +func (spec *ClickHouseBackupTemplateSpec) GetReplicaSelection() ReplicaSelection { + if spec == nil || spec.ReplicaSelection == "" { + return ReplicaSelectionFirstPerShard + } + return spec.ReplicaSelection +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// ClickHouseBackupScheduleList defines a list of ClickHouseBackupSchedule resources. +type ClickHouseBackupScheduleList struct { + meta.TypeMeta `json:",inline" yaml:",inline"` + meta.ListMeta `json:"metadata" yaml:"metadata"` + Items []ClickHouseBackupSchedule `json:"items" yaml:"items"` +} diff --git a/pkg/apis/clickhouse.altinity.com/v1/type_chr.go b/pkg/apis/clickhouse.altinity.com/v1/type_chr.go new file mode 100644 index 000000000..887644b2f --- /dev/null +++ b/pkg/apis/clickhouse.altinity.com/v1/type_chr.go @@ -0,0 +1,94 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + meta "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// ClickHouseRestore defines a one-off restore of a remote backup into a ClickHouseInstallation. +// The operator reconciles it into a Kubernetes Job that restores the schema on all replicas +// (ON CLUSTER) and the data on the first replica of each shard, letting native ClickHouse +// replication synchronize the remaining replicas. +// +// For safety the operator runs preflight checks before any data is touched and, by default, +// refuses to overwrite a non-empty target. Restoring into a fresh, empty CHI is the +// recommended path. +type ClickHouseRestore struct { + meta.TypeMeta `json:",inline" yaml:",inline"` + meta.ObjectMeta `json:"metadata,omitempty" yaml:"metadata,omitempty"` + + Spec ClickHouseRestoreSpec `json:"spec" yaml:"spec"` + Status ClickHouseRestoreStatus `json:"status,omitempty" yaml:"status,omitempty"` +} + +// ClickHouseRestoreSpec defines the desired state of a ClickHouseRestore. +type ClickHouseRestoreSpec struct { + // ClickHouseInstallation is the name of the target CHI in the same namespace. + // It is strongly recommended this be a fresh, empty installation. + ClickHouseInstallation string `json:"clickHouseInstallation" yaml:"clickHouseInstallation"` + // BackupName is the remote backup to restore. + BackupName string `json:"backupName" yaml:"backupName"` + // SchemaOnly restores table/database definitions only (no data). + SchemaOnly bool `json:"schemaOnly,omitempty" yaml:"schemaOnly,omitempty"` + // Overwrite allows restoring over existing, non-empty tables. When false (default) + // the operator refuses the restore if target tables already contain data. + Overwrite bool `json:"overwrite,omitempty" yaml:"overwrite,omitempty"` + // ValidateTopology, when true (default), refuses the restore if the target cluster's + // shard/replica layout differs from the backup, preventing ReplicatedMergeTree + // ZooKeeper/Keeper path corruption. + ValidateTopology *bool `json:"validateTopology,omitempty" yaml:"validateTopology,omitempty"` + // Image optionally overrides the container image used by the restore Job. + Image string `json:"image,omitempty" yaml:"image,omitempty"` + // ClickHouseCredentialsSecretName optionally references a Secret providing + // CLICKHOUSE_USER and CLICKHOUSE_PASSWORD for the restore Job to authenticate. + ClickHouseCredentialsSecretName string `json:"clickHouseCredentialsSecretName,omitempty" yaml:"clickHouseCredentialsSecretName,omitempty"` +} + +// ClickHouseRestoreStatus defines the observed state of a ClickHouseRestore. +type ClickHouseRestoreStatus struct { + // Phase is one of Pending, Running, Completed, Failed. + Phase string `json:"phase,omitempty" yaml:"phase,omitempty"` + // JobName is the name of the Kubernetes Job driving the restore. + JobName string `json:"jobName,omitempty" yaml:"jobName,omitempty"` + // StartTime is when the restore Job started. + StartTime *meta.Time `json:"startTime,omitempty" yaml:"startTime,omitempty"` + // CompletionTime is when the restore Job finished. + CompletionTime *meta.Time `json:"completionTime,omitempty" yaml:"completionTime,omitempty"` + // DurationSeconds is the restore duration in seconds (completion - start). + DurationSeconds int64 `json:"durationSeconds,omitempty" yaml:"durationSeconds,omitempty"` + // Conditions represent the latest available observations of the restore state. + Conditions []meta.Condition `json:"conditions,omitempty" yaml:"conditions,omitempty"` +} + +// IsTopologyValidationEnabled reports whether topology validation is on (default true). +func (spec *ClickHouseRestoreSpec) IsTopologyValidationEnabled() bool { + if spec == nil || spec.ValidateTopology == nil { + return true + } + return *spec.ValidateTopology +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// ClickHouseRestoreList defines a list of ClickHouseRestore resources. +type ClickHouseRestoreList struct { + meta.TypeMeta `json:",inline" yaml:",inline"` + meta.ListMeta `json:"metadata" yaml:"metadata"` + Items []ClickHouseRestore `json:"items" yaml:"items"` +} diff --git a/pkg/apis/clickhouse.altinity.com/v1/zz_generated.deepcopy.go b/pkg/apis/clickhouse.altinity.com/v1/zz_generated.deepcopy.go index a3cf2ef4f..41d469515 100644 --- a/pkg/apis/clickhouse.altinity.com/v1/zz_generated.deepcopy.go +++ b/pkg/apis/clickhouse.altinity.com/v1/zz_generated.deepcopy.go @@ -24,6 +24,7 @@ package v1 import ( types "github.com/altinity/clickhouse-operator/pkg/apis/common/types" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -3634,3 +3635,372 @@ func (in ZookeeperNodes) DeepCopy() ZookeeperNodes { in.DeepCopyInto(out) return *out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseBackup) DeepCopyInto(out *ClickHouseBackup) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseBackup. +func (in *ClickHouseBackup) DeepCopy() *ClickHouseBackup { + if in == nil { + return nil + } + out := new(ClickHouseBackup) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ClickHouseBackup) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseBackupSpec) DeepCopyInto(out *ClickHouseBackupSpec) { + *out = *in + if in.Partitions != nil { + in, out := &in.Partitions, &out.Partitions + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.KeepLastRemote != nil { + in, out := &in.KeepLastRemote, &out.KeepLastRemote + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseBackupSpec. +func (in *ClickHouseBackupSpec) DeepCopy() *ClickHouseBackupSpec { + if in == nil { + return nil + } + out := new(ClickHouseBackupSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseBackupStatus) DeepCopyInto(out *ClickHouseBackupStatus) { + *out = *in + if in.StartTime != nil { + in, out := &in.StartTime, &out.StartTime + *out = (*in).DeepCopy() + } + if in.CompletionTime != nil { + in, out := &in.CompletionTime, &out.CompletionTime + *out = (*in).DeepCopy() + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseBackupStatus. +func (in *ClickHouseBackupStatus) DeepCopy() *ClickHouseBackupStatus { + if in == nil { + return nil + } + out := new(ClickHouseBackupStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseBackupList) DeepCopyInto(out *ClickHouseBackupList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ClickHouseBackup, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseBackupList. +func (in *ClickHouseBackupList) DeepCopy() *ClickHouseBackupList { + if in == nil { + return nil + } + out := new(ClickHouseBackupList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ClickHouseBackupList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseBackupSchedule) DeepCopyInto(out *ClickHouseBackupSchedule) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseBackupSchedule. +func (in *ClickHouseBackupSchedule) DeepCopy() *ClickHouseBackupSchedule { + if in == nil { + return nil + } + out := new(ClickHouseBackupSchedule) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ClickHouseBackupSchedule) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseBackupScheduleSpec) DeepCopyInto(out *ClickHouseBackupScheduleSpec) { + *out = *in + if in.Suspend != nil { + in, out := &in.Suspend, &out.Suspend + *out = new(bool) + **out = **in + } + if in.StartingDeadlineSeconds != nil { + in, out := &in.StartingDeadlineSeconds, &out.StartingDeadlineSeconds + *out = new(int64) + **out = **in + } + if in.SuccessfulJobsHistoryLimit != nil { + in, out := &in.SuccessfulJobsHistoryLimit, &out.SuccessfulJobsHistoryLimit + *out = new(int32) + **out = **in + } + if in.FailedJobsHistoryLimit != nil { + in, out := &in.FailedJobsHistoryLimit, &out.FailedJobsHistoryLimit + *out = new(int32) + **out = **in + } + in.BackupTemplate.DeepCopyInto(&out.BackupTemplate) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseBackupScheduleSpec. +func (in *ClickHouseBackupScheduleSpec) DeepCopy() *ClickHouseBackupScheduleSpec { + if in == nil { + return nil + } + out := new(ClickHouseBackupScheduleSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseBackupTemplateSpec) DeepCopyInto(out *ClickHouseBackupTemplateSpec) { + *out = *in + if in.Partitions != nil { + in, out := &in.Partitions, &out.Partitions + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.KeepLastRemote != nil { + in, out := &in.KeepLastRemote, &out.KeepLastRemote + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseBackupTemplateSpec. +func (in *ClickHouseBackupTemplateSpec) DeepCopy() *ClickHouseBackupTemplateSpec { + if in == nil { + return nil + } + out := new(ClickHouseBackupTemplateSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseBackupScheduleStatus) DeepCopyInto(out *ClickHouseBackupScheduleStatus) { + *out = *in + if in.LastScheduleTime != nil { + in, out := &in.LastScheduleTime, &out.LastScheduleTime + *out = (*in).DeepCopy() + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseBackupScheduleStatus. +func (in *ClickHouseBackupScheduleStatus) DeepCopy() *ClickHouseBackupScheduleStatus { + if in == nil { + return nil + } + out := new(ClickHouseBackupScheduleStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseBackupScheduleList) DeepCopyInto(out *ClickHouseBackupScheduleList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ClickHouseBackupSchedule, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseBackupScheduleList. +func (in *ClickHouseBackupScheduleList) DeepCopy() *ClickHouseBackupScheduleList { + if in == nil { + return nil + } + out := new(ClickHouseBackupScheduleList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ClickHouseBackupScheduleList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseRestore) DeepCopyInto(out *ClickHouseRestore) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseRestore. +func (in *ClickHouseRestore) DeepCopy() *ClickHouseRestore { + if in == nil { + return nil + } + out := new(ClickHouseRestore) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ClickHouseRestore) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseRestoreSpec) DeepCopyInto(out *ClickHouseRestoreSpec) { + *out = *in + if in.ValidateTopology != nil { + in, out := &in.ValidateTopology, &out.ValidateTopology + *out = new(bool) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseRestoreSpec. +func (in *ClickHouseRestoreSpec) DeepCopy() *ClickHouseRestoreSpec { + if in == nil { + return nil + } + out := new(ClickHouseRestoreSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseRestoreStatus) DeepCopyInto(out *ClickHouseRestoreStatus) { + *out = *in + if in.StartTime != nil { + in, out := &in.StartTime, &out.StartTime + *out = (*in).DeepCopy() + } + if in.CompletionTime != nil { + in, out := &in.CompletionTime, &out.CompletionTime + *out = (*in).DeepCopy() + } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseRestoreStatus. +func (in *ClickHouseRestoreStatus) DeepCopy() *ClickHouseRestoreStatus { + if in == nil { + return nil + } + out := new(ClickHouseRestoreStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ClickHouseRestoreList) DeepCopyInto(out *ClickHouseRestoreList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ClickHouseRestore, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClickHouseRestoreList. +func (in *ClickHouseRestoreList) DeepCopy() *ClickHouseRestoreList { + if in == nil { + return nil + } + out := new(ClickHouseRestoreList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ClickHouseRestoreList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} diff --git a/pkg/controller/chbackup/backup_controller.go b/pkg/controller/chbackup/backup_controller.go new file mode 100644 index 000000000..7b897374d --- /dev/null +++ b/pkg/controller/chbackup/backup_controller.go @@ -0,0 +1,204 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "context" + + batchv1 "k8s.io/api/batch/v1" + core "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + apiruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/controller/chbackup/metrics" + "github.com/altinity/clickhouse-operator/pkg/model/chbackup" +) + +// BackupController reconciles a ClickHouseBackup object into a backup Job. +type BackupController struct { + client.Client + Scheme *apiruntime.Scheme + Recorder record.EventRecorder +} + +// Reconcile drives a one-off ClickHouseBackup: validate the target CHI, create the driving +// Job, track its completion, optionally verify it, and reflect everything in status, Events +// and Prometheus metrics. +func (c *BackupController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + chb := &api.ClickHouseBackup{} + if err := c.Get(ctx, req.NamespacedName, chb); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Terminal phases are not re-processed. With verification enabled the phase only becomes + // Completed once verification resolves, so this guard still lets us track the verify Job. + if chb.Status.Phase == api.BackupPhaseCompleted || chb.Status.Phase == api.BackupPhaseFailed { + return ctrl.Result{}, nil + } + + ns, chiName := chb.Namespace, chb.Spec.ClickHouseInstallation + + // Preflight: the referenced CHI must exist and be Completed. + chi, err := getCHI(ctx, c.Client, ns, chiName) + if err != nil { + if apierrors.IsNotFound(err) { + chb.Status.Phase = api.BackupPhasePending + setCondition(&chb.Status.Conditions, api.ConditionValidated, metav1.ConditionFalse, + "CHINotFound", "referenced ClickHouseInstallation not found", chb.Generation) + return c.updateStatus(ctx, chb, ctrl.Result{RequeueAfter: requeueInterval}) + } + return ctrl.Result{}, err + } + if !chiCompleted(chi) { + chb.Status.Phase = api.BackupPhasePending + setCondition(&chb.Status.Conditions, api.ConditionValidated, metav1.ConditionFalse, + "CHINotReady", "referenced ClickHouseInstallation is not in Completed state", chb.Generation) + return c.updateStatus(ctx, chb, ctrl.Result{RequeueAfter: requeueInterval}) + } + setCondition(&chb.Status.Conditions, api.ConditionValidated, metav1.ConditionTrue, "Validated", "target CHI is ready", chb.Generation) + c.surfaceSidecar(chb, chi) + + // Ensure the backup Job exists. + job := &batchv1.Job{} + err = c.Get(ctx, types.NamespacedName{Namespace: ns, Name: chbackup.BackupJobName(chb)}, job) + switch { + case apierrors.IsNotFound(err): + job = chbackup.BuildBackupJob(chb, chi) + if err := ctrl.SetControllerReference(chb, job, c.Scheme); err != nil { + return ctrl.Result{}, err + } + if err := c.Create(ctx, job); err != nil { + return ctrl.Result{}, err + } + logger.Info("created backup job", "job", job.Name) + chb.Status.Phase = api.BackupPhaseRunning + chb.Status.JobName = job.Name + chb.Status.BackupName = chbackup.ResolveBackupName(chb) + if chb.Status.StartTime == nil { + now := metav1.Now() + chb.Status.StartTime = &now + } + setCondition(&chb.Status.Conditions, api.ConditionJobCreated, metav1.ConditionTrue, "JobCreated", "backup job created", chb.Generation) + metrics.BackupStarted(ctx, ns, chiName) + c.event(chb, core.EventTypeNormal, "BackupStarted", "backup job %s created", job.Name) + return c.updateStatus(ctx, chb, ctrl.Result{RequeueAfter: requeueInterval}) + case err != nil: + return ctrl.Result{}, err + } + + // Job exists - track its completion. + switch { + case jobComplete(job): + // First-time completion bookkeeping (runs once). + if chb.Status.CompletionTime == nil { + now := metav1.Now() + chb.Status.CompletionTime = &now + chb.Status.DurationSeconds = durationSeconds(chb.Status.StartTime, now) + setCondition(&chb.Status.Conditions, api.ConditionReady, metav1.ConditionTrue, "BackupCompleted", "backup completed successfully", chb.Generation) + metrics.BackupCompleted(ctx, ns, chiName, float64(chb.Status.DurationSeconds)) + c.event(chb, core.EventTypeNormal, "BackupCompleted", "backup %s completed in %ds", chb.Status.BackupName, chb.Status.DurationSeconds) + } + if chb.Spec.Verify { + return c.trackVerify(ctx, chb, chi) + } + chb.Status.Phase = api.BackupPhaseCompleted + return c.updateStatus(ctx, chb, ctrl.Result{}) + case jobFailed(job): + if chb.Status.Phase != api.BackupPhaseFailed { + metrics.BackupFailed(ctx, ns, chiName) + c.event(chb, core.EventTypeWarning, "BackupFailed", "backup job failed; inspect job logs") + } + chb.Status.Phase = api.BackupPhaseFailed + setCondition(&chb.Status.Conditions, api.ConditionReady, metav1.ConditionFalse, "JobFailed", "backup job failed; inspect job logs", chb.Generation) + return c.updateStatus(ctx, chb, ctrl.Result{}) + default: + chb.Status.Phase = api.BackupPhaseRunning + return c.updateStatus(ctx, chb, ctrl.Result{RequeueAfter: requeueInterval}) + } +} + +// trackVerify creates and tracks the verification Job (spec.verify). The backup data is already +// uploaded; verification is advisory, so a failed verification still leaves the backup Completed +// but with Verified=False. +func (c *BackupController) trackVerify(ctx context.Context, chb *api.ClickHouseBackup, chi *api.ClickHouseInstallation) (ctrl.Result, error) { + ns, chiName := chb.Namespace, chb.Spec.ClickHouseInstallation + + vjob := &batchv1.Job{} + err := c.Get(ctx, types.NamespacedName{Namespace: ns, Name: chbackup.VerifyJobName(chb)}, vjob) + switch { + case apierrors.IsNotFound(err): + vjob = chbackup.BuildVerifyJob(chb, chi) + if err := ctrl.SetControllerReference(chb, vjob, c.Scheme); err != nil { + return ctrl.Result{}, err + } + if err := c.Create(ctx, vjob); err != nil { + return ctrl.Result{}, err + } + setCondition(&chb.Status.Conditions, api.ConditionVerified, metav1.ConditionUnknown, "Verifying", "verifying backup integrity", chb.Generation) + chb.Status.Phase = api.BackupPhaseRunning + c.event(chb, core.EventTypeNormal, "VerifyStarted", "verification job %s created", vjob.Name) + return c.updateStatus(ctx, chb, ctrl.Result{RequeueAfter: requeueInterval}) + case err != nil: + return ctrl.Result{}, err + } + + switch { + case jobComplete(vjob): + setCondition(&chb.Status.Conditions, api.ConditionVerified, metav1.ConditionTrue, "Verified", "backup verified restorable", chb.Generation) + c.event(chb, core.EventTypeNormal, "Verified", "backup %s verified", chb.Status.BackupName) + chb.Status.Phase = api.BackupPhaseCompleted + return c.updateStatus(ctx, chb, ctrl.Result{}) + case jobFailed(vjob): + setCondition(&chb.Status.Conditions, api.ConditionVerified, metav1.ConditionFalse, "VerificationFailed", "backup verification failed; inspect job logs", chb.Generation) + metrics.VerificationFailed(ctx, ns, chiName) + c.event(chb, core.EventTypeWarning, "VerificationFailed", "backup %s verification failed", chb.Status.BackupName) + chb.Status.Phase = api.BackupPhaseCompleted // backup data exists; verification is advisory + return c.updateStatus(ctx, chb, ctrl.Result{}) + default: + chb.Status.Phase = api.BackupPhaseRunning + return c.updateStatus(ctx, chb, ctrl.Result{RequeueAfter: requeueInterval}) + } +} + +func (c *BackupController) surfaceSidecar(chb *api.ClickHouseBackup, chi *api.ClickHouseInstallation) { + if hasBackupSidecar(chi) { + setCondition(&chb.Status.Conditions, "SidecarPresent", metav1.ConditionTrue, "SidecarFound", "clickhouse-backup sidecar detected", chb.Generation) + } else { + setCondition(&chb.Status.Conditions, "SidecarPresent", metav1.ConditionFalse, "SidecarMissing", + "no clickhouse-backup sidecar detected in CHI pod templates; the backup job will fail without it", chb.Generation) + } +} + +func (c *BackupController) event(chb *api.ClickHouseBackup, eventType, reason, msgFmt string, args ...interface{}) { + if c.Recorder != nil { + c.Recorder.Eventf(chb, eventType, reason, msgFmt, args...) + } +} + +func (c *BackupController) updateStatus(ctx context.Context, chb *api.ClickHouseBackup, result ctrl.Result) (ctrl.Result, error) { + if err := c.Status().Update(ctx, chb); err != nil { + return ctrl.Result{}, err + } + return result, nil +} diff --git a/pkg/controller/chbackup/backup_controller_test.go b/pkg/controller/chbackup/backup_controller_test.go new file mode 100644 index 000000000..278fa86d0 --- /dev/null +++ b/pkg/controller/chbackup/backup_controller_test.go @@ -0,0 +1,144 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "context" + "testing" + + batchv1 "k8s.io/api/batch/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" +) + +func newScheme(t *testing.T) *runtime.Scheme { + t.Helper() + s := runtime.NewScheme() + if err := clientgoscheme.AddToScheme(s); err != nil { + t.Fatalf("clientgoscheme: %v", err) + } + if err := api.AddToScheme(s); err != nil { + t.Fatalf("api scheme: %v", err) + } + return s +} + +func completedCHI(name string) *api.ClickHouseInstallation { + return &api.ClickHouseInstallation{ + ObjectMeta: meta.ObjectMeta{Name: name, Namespace: "ns"}, + Spec: api.ChiSpec{Configuration: &api.Configuration{ + Clusters: []*api.Cluster{{Name: "default", Layout: &api.ChiClusterLayout{ShardsCount: 1, ReplicasCount: 2}}}, + }}, + Status: &api.Status{Status: api.StatusCompleted}, + } +} + +func reconcileBackup(t *testing.T, objs ...client.Object) (client.Client, ctrl.Result, error) { + t.Helper() + s := newScheme(t) + c := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(objs...). + WithStatusSubresource(&api.ClickHouseBackup{}). + Build() + r := &BackupController{Client: c, Scheme: s} + res, err := r.Reconcile(context.Background(), ctrl.Request{ + NamespacedName: types.NamespacedName{Namespace: "ns", Name: "b1"}, + }) + return c, res, err +} + +func TestBackupReconcileCreatesJob(t *testing.T) { + chb := &api.ClickHouseBackup{ + ObjectMeta: meta.ObjectMeta{Name: "b1", Namespace: "ns"}, + Spec: api.ClickHouseBackupSpec{ClickHouseInstallation: "demo"}, + } + c, _, err := reconcileBackup(t, completedCHI("demo"), chb) + if err != nil { + t.Fatalf("reconcile error: %v", err) + } + + job := &batchv1.Job{} + if err := c.Get(context.Background(), types.NamespacedName{Namespace: "ns", Name: "b1-backup"}, job); err != nil { + t.Fatalf("expected backup job to be created: %v", err) + } + if len(job.OwnerReferences) != 1 || job.OwnerReferences[0].Kind != "ClickHouseBackup" { + t.Errorf("backup job must be owned by the ClickHouseBackup CR, got %+v", job.OwnerReferences) + } + + got := &api.ClickHouseBackup{} + if err := c.Get(context.Background(), types.NamespacedName{Namespace: "ns", Name: "b1"}, got); err != nil { + t.Fatalf("get chb: %v", err) + } + if got.Status.Phase != api.BackupPhaseRunning { + t.Errorf("phase = %q, want Running", got.Status.Phase) + } + if got.Status.JobName != "b1-backup" { + t.Errorf("status.jobName = %q, want b1-backup", got.Status.JobName) + } +} + +func TestBackupReconcilePendingWhenCHIMissing(t *testing.T) { + chb := &api.ClickHouseBackup{ + ObjectMeta: meta.ObjectMeta{Name: "b1", Namespace: "ns"}, + Spec: api.ClickHouseBackupSpec{ClickHouseInstallation: "absent"}, + } + c, res, err := reconcileBackup(t, chb) + if err != nil { + t.Fatalf("reconcile error: %v", err) + } + if res.RequeueAfter == 0 { + t.Errorf("expected requeue while waiting for CHI") + } + // No job should be created. + job := &batchv1.Job{} + err = c.Get(context.Background(), types.NamespacedName{Namespace: "ns", Name: "b1-backup"}, job) + if !apierrors.IsNotFound(err) { + t.Errorf("no job expected when CHI is missing, got err=%v", err) + } + got := &api.ClickHouseBackup{} + _ = c.Get(context.Background(), types.NamespacedName{Namespace: "ns", Name: "b1"}, got) + if got.Status.Phase != api.BackupPhasePending { + t.Errorf("phase = %q, want Pending", got.Status.Phase) + } +} + +func TestBackupReconcilePendingWhenCHINotCompleted(t *testing.T) { + chi := completedCHI("demo") + chi.Status = &api.Status{Status: api.StatusInProgress} + chb := &api.ClickHouseBackup{ + ObjectMeta: meta.ObjectMeta{Name: "b1", Namespace: "ns"}, + Spec: api.ClickHouseBackupSpec{ClickHouseInstallation: "demo"}, + } + c, res, err := reconcileBackup(t, chi, chb) + if err != nil { + t.Fatalf("reconcile error: %v", err) + } + if res.RequeueAfter == 0 { + t.Errorf("expected requeue while CHI not completed") + } + job := &batchv1.Job{} + if err := c.Get(context.Background(), types.NamespacedName{Namespace: "ns", Name: "b1-backup"}, job); !apierrors.IsNotFound(err) { + t.Errorf("no job expected when CHI not completed, got err=%v", err) + } +} diff --git a/pkg/controller/chbackup/bootstrap_controller.go b/pkg/controller/chbackup/bootstrap_controller.go new file mode 100644 index 000000000..cc6ef3739 --- /dev/null +++ b/pkg/controller/chbackup/bootstrap_controller.go @@ -0,0 +1,106 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "context" + + core "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + apiruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" +) + +const ( + // AnnotationRecoverFromBackup, set on a fresh ClickHouseInstallation, makes the operator + // auto-restore the named remote backup once the cluster is up (CloudNativePG-style bootstrap). + AnnotationRecoverFromBackup = "clickhouse.altinity.com/recover-from-backup" + // AnnotationRecoveredFrom is the guard the operator stamps after triggering the recovery, + // so the bootstrap restore fires exactly once. + AnnotationRecoveredFrom = "clickhouse.altinity.com/recovered-from" + // AnnotationRecoverCredentialsSecret optionally names the Secret (CLICKHOUSE_USER/PASSWORD) + // the bootstrap restore should use to authenticate to ClickHouse. + AnnotationRecoverCredentialsSecret = "clickhouse.altinity.com/recover-credentials-secret" +) + +// BootstrapController watches ClickHouseInstallations and, when one carries the +// recover-from-backup annotation and has finished its first reconcile, creates a one-time +// ClickHouseRestore. It only ever reads the CHI and stamps a guard annotation; it never +// mutates the CHI spec or its children, so it does not conflict with the main CHI controller. +type BootstrapController struct { + client.Client + Scheme *apiruntime.Scheme + Recorder record.EventRecorder +} + +// Reconcile implements the bootstrap-from-backup behavior. +func (c *BootstrapController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + chi := &api.ClickHouseInstallation{} + if err := c.Get(ctx, req.NamespacedName, chi); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + backupName := chi.GetAnnotations()[AnnotationRecoverFromBackup] + if backupName == "" { + return ctrl.Result{}, nil // not a bootstrap CHI + } + if chi.GetAnnotations()[AnnotationRecoveredFrom] != "" { + return ctrl.Result{}, nil // already recovered once + } + if !chiCompleted(chi) { + // Wait until the cluster (and its sidecars) are up before restoring. + return ctrl.Result{RequeueAfter: requeueInterval}, nil + } + + restoreName := chi.Name + "-bootstrap" + restore := &api.ClickHouseRestore{ + ObjectMeta: meta.ObjectMeta{Name: restoreName, Namespace: chi.Namespace}, + Spec: api.ClickHouseRestoreSpec{ + ClickHouseInstallation: chi.Name, + BackupName: backupName, + ClickHouseCredentialsSecretName: chi.GetAnnotations()[AnnotationRecoverCredentialsSecret], + }, + } + if err := ctrl.SetControllerReference(chi, restore, c.Scheme); err != nil { + return ctrl.Result{}, err + } + if err := c.Create(ctx, restore); err != nil && !apierrors.IsAlreadyExists(err) { + return ctrl.Result{}, err + } + logger.Info("created bootstrap restore", "restore", restoreName, "backup", backupName) + + // Stamp the guard annotation so this fires exactly once. + base := chi.DeepCopy() + if chi.Annotations == nil { + chi.Annotations = map[string]string{} + } + chi.Annotations[AnnotationRecoveredFrom] = backupName + if err := c.Patch(ctx, chi, client.MergeFrom(base)); err != nil { + return ctrl.Result{}, err + } + if c.Recorder != nil { + c.Recorder.Eventf(chi, core.EventTypeNormal, "BootstrapRestore", + "created ClickHouseRestore %s from backup %s", restoreName, backupName) + } + return ctrl.Result{}, nil +} diff --git a/pkg/controller/chbackup/bootstrap_controller_test.go b/pkg/controller/chbackup/bootstrap_controller_test.go new file mode 100644 index 000000000..972e76e30 --- /dev/null +++ b/pkg/controller/chbackup/bootstrap_controller_test.go @@ -0,0 +1,79 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "context" + "testing" + + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" +) + +func TestBootstrapCreatesRestoreOnce(t *testing.T) { + s := newScheme(t) + chi := &api.ClickHouseInstallation{ + ObjectMeta: meta.ObjectMeta{ + Name: "demo", + Namespace: "ns", + Annotations: map[string]string{AnnotationRecoverFromBackup: "bk"}, + }, + Status: &api.Status{Status: api.StatusCompleted}, + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(chi).Build() + r := &BootstrapController{Client: c, Scheme: s} + + if _, err := r.Reconcile(context.Background(), ctrl.Request{NamespacedName: types.NamespacedName{Namespace: "ns", Name: "demo"}}); err != nil { + t.Fatalf("reconcile: %v", err) + } + + // A one-time restore should exist, referencing the CHI + backup. + restore := &api.ClickHouseRestore{} + if err := c.Get(context.Background(), types.NamespacedName{Namespace: "ns", Name: "demo-bootstrap"}, restore); err != nil { + t.Fatalf("expected bootstrap restore: %v", err) + } + if restore.Spec.ClickHouseInstallation != "demo" || restore.Spec.BackupName != "bk" { + t.Errorf("unexpected restore spec: %+v", restore.Spec) + } + + // The guard annotation must be stamped so it fires once. + got := &api.ClickHouseInstallation{} + _ = c.Get(context.Background(), types.NamespacedName{Namespace: "ns", Name: "demo"}, got) + if got.Annotations[AnnotationRecoveredFrom] != "bk" { + t.Errorf("guard annotation not stamped: %v", got.Annotations) + } +} + +func TestBootstrapSkipsWhenNotAnnotated(t *testing.T) { + s := newScheme(t) + chi := &api.ClickHouseInstallation{ + ObjectMeta: meta.ObjectMeta{Name: "demo", Namespace: "ns"}, + Status: &api.Status{Status: api.StatusCompleted}, + } + c := fake.NewClientBuilder().WithScheme(s).WithObjects(chi).Build() + r := &BootstrapController{Client: c, Scheme: s} + if _, err := r.Reconcile(context.Background(), ctrl.Request{NamespacedName: types.NamespacedName{Namespace: "ns", Name: "demo"}}); err != nil { + t.Fatalf("reconcile: %v", err) + } + restore := &api.ClickHouseRestore{} + err := c.Get(context.Background(), types.NamespacedName{Namespace: "ns", Name: "demo-bootstrap"}, restore) + if err == nil { + t.Errorf("no restore expected for an un-annotated CHI") + } +} diff --git a/pkg/controller/chbackup/helpers.go b/pkg/controller/chbackup/helpers.go new file mode 100644 index 000000000..b4117c269 --- /dev/null +++ b/pkg/controller/chbackup/helpers.go @@ -0,0 +1,100 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package chbackup contains the controller-runtime controllers that reconcile the +// ClickHouseBackup, ClickHouseBackupSchedule and ClickHouseRestore custom resources +// into Kubernetes Jobs/CronJobs driving clickhouse-backup. +package chbackup + +import ( + "context" + "strings" + "time" + + batchv1 "k8s.io/api/batch/v1" + core "k8s.io/api/core/v1" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" +) + +// requeueInterval is how often a controller re-checks an in-flight Job's completion. +const requeueInterval = 15 * time.Second + +// getCHI fetches the referenced ClickHouseInstallation in the given namespace. +func getCHI(ctx context.Context, c client.Client, namespace, name string) (*api.ClickHouseInstallation, error) { + chi := &api.ClickHouseInstallation{} + err := c.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, chi) + return chi, err +} + +// chiCompleted reports whether the CHI has finished reconciling successfully. +func chiCompleted(chi *api.ClickHouseInstallation) bool { + return chi != nil && chi.Status != nil && chi.Status.GetStatus() == api.StatusCompleted +} + +// hasBackupSidecar reports, best-effort, whether the CHI pod templates include a +// clickhouse-backup sidecar container. Used to surface a (non-blocking) warning condition. +func hasBackupSidecar(chi *api.ClickHouseInstallation) bool { + if chi == nil || chi.Spec.Templates == nil { + return false + } + for _, pt := range chi.Spec.Templates.PodTemplates { + for _, container := range pt.Spec.Containers { + if strings.Contains(container.Image, "clickhouse-backup") || strings.Contains(container.Name, "backup") { + return true + } + } + } + return false +} + +// setCondition upserts a status condition. +func setCondition(conditions *[]metav1.Condition, condType string, status metav1.ConditionStatus, reason, message string, generation int64) { + apimeta.SetStatusCondition(conditions, metav1.Condition{ + Type: condType, + Status: status, + Reason: reason, + Message: message, + ObservedGeneration: generation, + }) +} + +// jobConditionTrue reports whether a Job carries the given condition with status True. +func jobConditionTrue(job *batchv1.Job, condType batchv1.JobConditionType) bool { + for _, cond := range job.Status.Conditions { + if cond.Type == condType && cond.Status == core.ConditionTrue { + return true + } + } + return false +} + +func jobComplete(job *batchv1.Job) bool { return jobConditionTrue(job, batchv1.JobComplete) } +func jobFailed(job *batchv1.Job) bool { return jobConditionTrue(job, batchv1.JobFailed) } + +// durationSeconds returns end-start in whole seconds (>=0), or 0 if start is unset. +func durationSeconds(start *metav1.Time, end metav1.Time) int64 { + if start == nil { + return 0 + } + d := int64(end.Time.Sub(start.Time).Seconds()) + if d < 0 { + return 0 + } + return d +} diff --git a/pkg/controller/chbackup/metrics/metrics.go b/pkg/controller/chbackup/metrics/metrics.go new file mode 100644 index 000000000..729896d57 --- /dev/null +++ b/pkg/controller/chbackup/metrics/metrics.go @@ -0,0 +1,134 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package metrics exposes Prometheus metrics for operator-managed backups and restores. +// It registers on the SAME OpenTelemetry meter as the rest of the operator +// (pkg/metrics/operator), so the metrics surface on the operator's existing /metrics +// endpoint (:9999) with no extra wiring. +package metrics + +import ( + "context" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" + + "github.com/altinity/clickhouse-operator/pkg/metrics/operator" +) + +// meter returns the operator's OpenTelemetry meter (pkg/metrics/operator), whose Prometheus +// exporter serves the operator's /metrics endpoint (:9999) - that is where these metrics +// surface, alongside the CHI metrics. The operator does NOT register a global meter provider, +// so we must use operator.Meter() directly. It is nil until StartMetricsExporter runs (well +// before the first backup reconcile in a real operator); the global no-op meter is only a +// fallback so unit tests, which never start the exporter, do not panic. +func meter() metric.Meter { + if m := operator.Meter(); m != nil { + return m + } + return otel.Meter("clickhouse-operator-backup") +} + +type metrics struct { + backupsStarted metric.Int64Counter + backupsCompleted metric.Int64Counter + backupsFailed metric.Int64Counter + restoresStarted metric.Int64Counter + restoresCompleted metric.Int64Counter + restoresFailed metric.Int64Counter + verificationsFailed metric.Int64Counter + backupDuration metric.Float64Histogram + lastSuccess metric.Int64Gauge +} + +var m *metrics + +func ensure() *metrics { + if m == nil { + m = create() + } + return m +} + +func create() *metrics { + x := &metrics{} + x.backupsStarted, _ = meter().Int64Counter( + "clickhouse_operator_backups_started", metric.WithDescription("number of backups started"), metric.WithUnit("items")) + x.backupsCompleted, _ = meter().Int64Counter( + "clickhouse_operator_backups_completed", metric.WithDescription("number of backups completed successfully"), metric.WithUnit("items")) + x.backupsFailed, _ = meter().Int64Counter( + "clickhouse_operator_backups_failed", metric.WithDescription("number of backups failed"), metric.WithUnit("items")) + x.restoresStarted, _ = meter().Int64Counter( + "clickhouse_operator_restores_started", metric.WithDescription("number of restores started"), metric.WithUnit("items")) + x.restoresCompleted, _ = meter().Int64Counter( + "clickhouse_operator_restores_completed", metric.WithDescription("number of restores completed successfully"), metric.WithUnit("items")) + x.restoresFailed, _ = meter().Int64Counter( + "clickhouse_operator_restores_failed", metric.WithDescription("number of restores failed"), metric.WithUnit("items")) + x.verificationsFailed, _ = meter().Int64Counter( + "clickhouse_operator_backup_verifications_failed", metric.WithDescription("number of backup verifications failed"), metric.WithUnit("items")) + x.backupDuration, _ = meter().Float64Histogram( + "clickhouse_operator_backup_duration_seconds", metric.WithDescription("duration of completed backups"), metric.WithUnit("s")) + x.lastSuccess, _ = meter().Int64Gauge( + "clickhouse_operator_backup_last_success_timestamp", metric.WithDescription("unix timestamp of the last successful backup"), metric.WithUnit("s")) + return x +} + +func attrs(namespace, chi string) metric.MeasurementOption { + return metric.WithAttributes( + attribute.String("namespace", namespace), + attribute.String("clickhouse_installation", chi), + ) +} + +// BackupStarted increments the backups-started counter. +func BackupStarted(ctx context.Context, namespace, chi string) { + ensure().backupsStarted.Add(ctx, 1, attrs(namespace, chi)) +} + +// BackupCompleted records a successful backup, its duration, and the last-success timestamp. +func BackupCompleted(ctx context.Context, namespace, chi string, durationSeconds float64) { + e := ensure() + e.backupsCompleted.Add(ctx, 1, attrs(namespace, chi)) + if durationSeconds > 0 { + e.backupDuration.Record(ctx, durationSeconds, attrs(namespace, chi)) + } + e.lastSuccess.Record(ctx, time.Now().Unix(), attrs(namespace, chi)) +} + +// BackupFailed increments the backups-failed counter. +func BackupFailed(ctx context.Context, namespace, chi string) { + ensure().backupsFailed.Add(ctx, 1, attrs(namespace, chi)) +} + +// RestoreStarted increments the restores-started counter. +func RestoreStarted(ctx context.Context, namespace, chi string) { + ensure().restoresStarted.Add(ctx, 1, attrs(namespace, chi)) +} + +// RestoreCompleted increments the restores-completed counter. +func RestoreCompleted(ctx context.Context, namespace, chi string) { + ensure().restoresCompleted.Add(ctx, 1, attrs(namespace, chi)) +} + +// RestoreFailed increments the restores-failed counter. +func RestoreFailed(ctx context.Context, namespace, chi string) { + ensure().restoresFailed.Add(ctx, 1, attrs(namespace, chi)) +} + +// VerificationFailed increments the backup-verifications-failed counter. +func VerificationFailed(ctx context.Context, namespace, chi string) { + ensure().verificationsFailed.Add(ctx, 1, attrs(namespace, chi)) +} diff --git a/pkg/controller/chbackup/restore_controller.go b/pkg/controller/chbackup/restore_controller.go new file mode 100644 index 000000000..45eae083a --- /dev/null +++ b/pkg/controller/chbackup/restore_controller.go @@ -0,0 +1,148 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "context" + + batchv1 "k8s.io/api/batch/v1" + core "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + apiruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/controller/chbackup/metrics" + "github.com/altinity/clickhouse-operator/pkg/model/chbackup" +) + +// RestoreController reconciles a ClickHouseRestore object into a restore Job. +type RestoreController struct { + client.Client + Scheme *apiruntime.Scheme + Recorder record.EventRecorder +} + +// Reconcile drives a one-off ClickHouseRestore. It runs preflight validation, creates the +// restore Job (which itself enforces the overwrite guard and topology check before touching +// data), and tracks completion in the CR status. +func (c *RestoreController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + chr := &api.ClickHouseRestore{} + if err := c.Get(ctx, req.NamespacedName, chr); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + if chr.Status.Phase == api.BackupPhaseCompleted || chr.Status.Phase == api.BackupPhaseFailed { + return ctrl.Result{}, nil + } + + // Preflight: restore requires the target CHI to exist and be Completed. + chi, err := getCHI(ctx, c.Client, chr.Namespace, chr.Spec.ClickHouseInstallation) + if err != nil { + if apierrors.IsNotFound(err) { + chr.Status.Phase = api.BackupPhasePending + setCondition(&chr.Status.Conditions, api.ConditionValidated, metav1.ConditionFalse, + "CHINotFound", "referenced ClickHouseInstallation not found", chr.Generation) + return c.updateStatus(ctx, chr, ctrl.Result{RequeueAfter: requeueInterval}) + } + return ctrl.Result{}, err + } + if !chiCompleted(chi) { + chr.Status.Phase = api.BackupPhasePending + setCondition(&chr.Status.Conditions, api.ConditionValidated, metav1.ConditionFalse, + "CHINotReady", "referenced ClickHouseInstallation is not in Completed state", chr.Generation) + return c.updateStatus(ctx, chr, ctrl.Result{RequeueAfter: requeueInterval}) + } + setCondition(&chr.Status.Conditions, api.ConditionValidated, metav1.ConditionTrue, "Validated", "target CHI is ready", chr.Generation) + if hasBackupSidecar(chi) { + setCondition(&chr.Status.Conditions, "SidecarPresent", metav1.ConditionTrue, "SidecarFound", "clickhouse-backup sidecar detected", chr.Generation) + } else { + setCondition(&chr.Status.Conditions, "SidecarPresent", metav1.ConditionFalse, "SidecarMissing", + "no clickhouse-backup sidecar detected in CHI pod templates; the restore job will fail without it", chr.Generation) + } + + // Ensure the restore Job exists. + job := &batchv1.Job{} + err = c.Get(ctx, types.NamespacedName{Namespace: chr.Namespace, Name: chbackup.RestoreJobName(chr)}, job) + switch { + case apierrors.IsNotFound(err): + job = chbackup.BuildRestoreJob(chr, chi) + if err := ctrl.SetControllerReference(chr, job, c.Scheme); err != nil { + return ctrl.Result{}, err + } + if err := c.Create(ctx, job); err != nil { + return ctrl.Result{}, err + } + logger.Info("created restore job", "job", job.Name) + chr.Status.Phase = api.BackupPhaseRunning + chr.Status.JobName = job.Name + if chr.Status.StartTime == nil { + now := metav1.Now() + chr.Status.StartTime = &now + } + setCondition(&chr.Status.Conditions, api.ConditionJobCreated, metav1.ConditionTrue, "JobCreated", "restore job created", chr.Generation) + metrics.RestoreStarted(ctx, chr.Namespace, chr.Spec.ClickHouseInstallation) + c.event(chr, core.EventTypeNormal, "RestoreStarted", "restore job %s created", job.Name) + return c.updateStatus(ctx, chr, ctrl.Result{RequeueAfter: requeueInterval}) + case err != nil: + return ctrl.Result{}, err + } + + ns, chiName := chr.Namespace, chr.Spec.ClickHouseInstallation + switch { + case jobComplete(job): + if chr.Status.CompletionTime == nil { + now := metav1.Now() + chr.Status.CompletionTime = &now + chr.Status.DurationSeconds = durationSeconds(chr.Status.StartTime, now) + metrics.RestoreCompleted(ctx, ns, chiName) + c.event(chr, core.EventTypeNormal, "RestoreCompleted", "restore of %s completed in %ds", chr.Spec.BackupName, chr.Status.DurationSeconds) + } + chr.Status.Phase = api.BackupPhaseCompleted + setCondition(&chr.Status.Conditions, api.ConditionReady, metav1.ConditionTrue, "RestoreCompleted", "restore completed successfully", chr.Generation) + return c.updateStatus(ctx, chr, ctrl.Result{}) + case jobFailed(job): + if chr.Status.Phase != api.BackupPhaseFailed { + metrics.RestoreFailed(ctx, ns, chiName) + c.event(chr, core.EventTypeWarning, "RestoreFailed", "restore job failed; inspect job logs") + } + chr.Status.Phase = api.BackupPhaseFailed + setCondition(&chr.Status.Conditions, api.ConditionReady, metav1.ConditionFalse, "JobFailed", "restore job failed; inspect job logs", chr.Generation) + return c.updateStatus(ctx, chr, ctrl.Result{}) + default: + chr.Status.Phase = api.BackupPhaseRunning + return c.updateStatus(ctx, chr, ctrl.Result{RequeueAfter: requeueInterval}) + } +} + +func (c *RestoreController) event(chr *api.ClickHouseRestore, eventType, reason, msgFmt string, args ...interface{}) { + if c.Recorder != nil { + c.Recorder.Eventf(chr, eventType, reason, msgFmt, args...) + } +} + +func (c *RestoreController) updateStatus(ctx context.Context, chr *api.ClickHouseRestore, result ctrl.Result) (ctrl.Result, error) { + if err := c.Status().Update(ctx, chr); err != nil { + return ctrl.Result{}, err + } + return result, nil +} diff --git a/pkg/controller/chbackup/schedule_controller.go b/pkg/controller/chbackup/schedule_controller.go new file mode 100644 index 000000000..5f9fb87cf --- /dev/null +++ b/pkg/controller/chbackup/schedule_controller.go @@ -0,0 +1,93 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "context" + + batchv1 "k8s.io/api/batch/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + apiruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" + "github.com/altinity/clickhouse-operator/pkg/model/chbackup" +) + +// ScheduleController reconciles a ClickHouseBackupSchedule object into a managed CronJob. +type ScheduleController struct { + client.Client + Scheme *apiruntime.Scheme +} + +// Reconcile ensures a CronJob exists and matches the ClickHouseBackupSchedule spec. +func (c *ScheduleController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + chbs := &api.ClickHouseBackupSchedule{} + if err := c.Get(ctx, req.NamespacedName, chbs); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + chi, err := getCHI(ctx, c.Client, chbs.Namespace, chbs.Spec.ClickHouseInstallation) + if err != nil { + if apierrors.IsNotFound(err) { + setCondition(&chbs.Status.Conditions, api.ConditionValidated, metav1.ConditionFalse, + "CHINotFound", "referenced ClickHouseInstallation not found", chbs.Generation) + if e := c.Status().Update(ctx, chbs); e != nil { + return ctrl.Result{}, e + } + return ctrl.Result{RequeueAfter: requeueInterval}, nil + } + return ctrl.Result{}, err + } + setCondition(&chbs.Status.Conditions, api.ConditionValidated, metav1.ConditionTrue, "Validated", "target CHI found", chbs.Generation) + + desired := chbackup.BuildBackupCronJob(chbs, chi) + if err := ctrl.SetControllerReference(chbs, desired, c.Scheme); err != nil { + return ctrl.Result{}, err + } + + existing := &batchv1.CronJob{} + err = c.Get(ctx, types.NamespacedName{Namespace: desired.Namespace, Name: desired.Name}, existing) + switch { + case apierrors.IsNotFound(err): + if err := c.Create(ctx, desired); err != nil { + return ctrl.Result{}, err + } + logger.Info("created backup cronjob", "cronjob", desired.Name) + case err != nil: + return ctrl.Result{}, err + default: + existing.Spec = desired.Spec + existing.Labels = desired.Labels + if err := c.Update(ctx, existing); err != nil { + return ctrl.Result{}, err + } + chbs.Status.LastScheduleTime = existing.Status.LastScheduleTime + } + + chbs.Status.CronJobName = desired.Name + setCondition(&chbs.Status.Conditions, api.ConditionJobCreated, metav1.ConditionTrue, "CronJobReady", "backup cronjob reconciled", chbs.Generation) + setCondition(&chbs.Status.Conditions, api.ConditionReady, metav1.ConditionTrue, "Scheduled", "backup schedule is active", chbs.Generation) + if err := c.Status().Update(ctx, chbs); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil +} diff --git a/pkg/model/chbackup/builder.go b/pkg/model/chbackup/builder.go new file mode 100644 index 000000000..4b8c0958e --- /dev/null +++ b/pkg/model/chbackup/builder.go @@ -0,0 +1,253 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "fmt" + "strconv" + + batchv1 "k8s.io/api/batch/v1" + core "k8s.io/api/core/v1" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" +) + +const ( + // DefaultClientImage is the default image used by backup/restore trigger jobs. + // It must provide the clickhouse-client binary used to submit commands to the + // clickhouse-backup sidecar via the system.backup_actions integration table. + // The clickhouse-server image is used (it bundles clickhouse-client) because the + // standalone clickhouse/clickhouse-client image is deprecated and not multi-arch. + // Overridable per CR via spec.image. + DefaultClientImage = "clickhouse/clickhouse-server:24.8" + + // containerName is the name of the trigger container in the job pod. + containerName = "clickhouse-backup-trigger" + + // LabelApp / LabelCRKind / LabelCRName tag generated jobs for observability. + LabelApp = "clickhouse.altinity.com/app" + LabelCRKind = "clickhouse.altinity.com/cr-kind" + LabelCRName = "clickhouse.altinity.com/cr-name" + LabelChi = "clickhouse.altinity.com/chi" +) + +func int32Ptr(i int32) *int32 { return &i } + +func labels(crKind, crName, chiName string) map[string]string { + return map[string]string{ + LabelApp: "clickhouse-backup", + LabelCRKind: crKind, + LabelCRName: crName, + LabelChi: chiName, + } +} + +func imageOrDefault(image string) string { + if image != "" { + return image + } + return DefaultClientImage +} + +// podSpec builds the pod spec shared by backup and restore jobs. +func podSpec(image, credentialsSecretName, script string) core.PodSpec { + c := core.Container{ + Name: containerName, + Image: imageOrDefault(image), + Command: []string{"bash", "-ec", script}, + } + if credentialsSecretName != "" { + c.EnvFrom = []core.EnvFromSource{ + { + SecretRef: &core.SecretEnvSource{ + LocalObjectReference: core.LocalObjectReference{Name: credentialsSecretName}, + }, + }, + } + } + return core.PodSpec{ + RestartPolicy: core.RestartPolicyNever, + Containers: []core.Container{c}, + } +} + +// ResolveBackupName returns the remote backup name for a one-off ClickHouseBackup. +// It is deterministic across reconciles (derived from the CR creation timestamp). +func ResolveBackupName(chb *api.ClickHouseBackup) string { + if chb.Spec.BackupName != "" { + return chb.Spec.BackupName + } + return fmt.Sprintf("%s-%d", chb.Name, chb.CreationTimestamp.Unix()) +} + +// BackupJobName returns the name of the Job created for a one-off backup. +func BackupJobName(chb *api.ClickHouseBackup) string { return chb.Name + "-backup" } + +// CronJobName returns the name of the CronJob created for a backup schedule. +func CronJobName(chbs *api.ClickHouseBackupSchedule) string { return chbs.Name + "-backup" } + +// RestoreJobName returns the name of the Job created for a restore. +func RestoreJobName(chr *api.ClickHouseRestore) string { return chr.Name + "-restore" } + +// VerifyJobName returns the name of the Job created to verify a one-off backup. +func VerifyJobName(chb *api.ClickHouseBackup) string { return chb.Name + "-verify" } + +// BuildBackupJob builds the one-off backup Job for a ClickHouseBackup CR. +func BuildBackupJob(chb *api.ClickHouseBackup, chi *api.ClickHouseInstallation) *batchv1.Job { + top := Topology(chi) + services := BackupServices(top, chb.Spec.GetReplicaSelection()) + script := BackupScript(services, strconv.Quote(ResolveBackupName(chb)), BackupOpts{ + SchemaOnly: chb.Spec.SchemaOnly, + Tables: chb.Spec.Tables, + Partitions: chb.Spec.Partitions, + DiffFromRemote: chb.Spec.DiffFromRemote, + KeepLastRemote: chb.Spec.KeepLastRemote, + }) + lbls := labels(api.ClickHouseBackupCRDResourceKind, chb.Name, chb.Spec.ClickHouseInstallation) + + return &batchv1.Job{ + ObjectMeta: meta.ObjectMeta{ + Name: BackupJobName(chb), + Namespace: chb.Namespace, + Labels: lbls, + }, + Spec: batchv1.JobSpec{ + BackoffLimit: int32Ptr(0), + Template: core.PodTemplateSpec{ + ObjectMeta: meta.ObjectMeta{Labels: lbls}, + Spec: podSpec(chb.Spec.Image, chb.Spec.ClickHouseCredentialsSecretName, script), + }, + }, + } +} + +// BuildVerifyJob builds a Job that verifies a one-off backup is restorable (downloads it and +// checks integrity, without touching cluster data). Created by the controller when spec.verify. +func BuildVerifyJob(chb *api.ClickHouseBackup, chi *api.ClickHouseInstallation) *batchv1.Job { + top := Topology(chi) + services := FirstPerShardServices(top) + script := VerifyScript(services, ResolveBackupName(chb)) + lbls := labels(api.ClickHouseBackupCRDResourceKind, chb.Name, chb.Spec.ClickHouseInstallation) + + return &batchv1.Job{ + ObjectMeta: meta.ObjectMeta{ + Name: VerifyJobName(chb), + Namespace: chb.Namespace, + Labels: lbls, + }, + Spec: batchv1.JobSpec{ + BackoffLimit: int32Ptr(0), + Template: core.PodTemplateSpec{ + ObjectMeta: meta.ObjectMeta{Labels: lbls}, + Spec: podSpec(chb.Spec.Image, chb.Spec.ClickHouseCredentialsSecretName, script), + }, + }, + } +} + +// BuildBackupCronJob builds the recurring backup CronJob for a ClickHouseBackupSchedule CR. +func BuildBackupCronJob(chbs *api.ClickHouseBackupSchedule, chi *api.ClickHouseInstallation) *batchv1.CronJob { + tmpl := chbs.Spec.BackupTemplate + top := Topology(chi) + services := BackupServices(top, tmpl.GetReplicaSelection()) + + prefix := tmpl.BackupNamePrefix + if prefix == "" { + prefix = chbs.Name + } + // Each scheduled run computes a unique, timestamped backup name at runtime. + backupNameExpr := "\"" + prefix + "-$(date -u +%Y%m%d-%H%M%S)\"" + script := BackupScript(services, backupNameExpr, BackupOpts{ + SchemaOnly: tmpl.SchemaOnly, + Tables: tmpl.Tables, + Partitions: tmpl.Partitions, + KeepLastRemote: tmpl.KeepLastRemote, + }) + lbls := labels(api.ClickHouseBackupScheduleCRDResourceKind, chbs.Name, chbs.Spec.ClickHouseInstallation) + + concurrency := batchv1.ForbidConcurrent + if chbs.Spec.ConcurrencyPolicy != "" { + concurrency = batchv1.ConcurrencyPolicy(chbs.Spec.ConcurrencyPolicy) + } + successLimit := int32Ptr(3) + if chbs.Spec.SuccessfulJobsHistoryLimit != nil { + successLimit = chbs.Spec.SuccessfulJobsHistoryLimit + } + failedLimit := int32Ptr(1) + if chbs.Spec.FailedJobsHistoryLimit != nil { + failedLimit = chbs.Spec.FailedJobsHistoryLimit + } + suspend := false + if chbs.Spec.Suspend != nil { + suspend = *chbs.Spec.Suspend + } + + return &batchv1.CronJob{ + ObjectMeta: meta.ObjectMeta{ + Name: CronJobName(chbs), + Namespace: chbs.Namespace, + Labels: lbls, + }, + Spec: batchv1.CronJobSpec{ + Schedule: chbs.Spec.Schedule, + Suspend: &suspend, + ConcurrencyPolicy: concurrency, + StartingDeadlineSeconds: chbs.Spec.StartingDeadlineSeconds, + SuccessfulJobsHistoryLimit: successLimit, + FailedJobsHistoryLimit: failedLimit, + JobTemplate: batchv1.JobTemplateSpec{ + ObjectMeta: meta.ObjectMeta{Labels: lbls}, + Spec: batchv1.JobSpec{ + BackoffLimit: int32Ptr(0), + Template: core.PodTemplateSpec{ + ObjectMeta: meta.ObjectMeta{Labels: lbls}, + Spec: podSpec(tmpl.Image, tmpl.ClickHouseCredentialsSecretName, script), + }, + }, + }, + }, + } +} + +// BuildRestoreJob builds the one-off restore Job for a ClickHouseRestore CR. +func BuildRestoreJob(chr *api.ClickHouseRestore, chi *api.ClickHouseInstallation) *batchv1.Job { + top := Topology(chi) + // Schema and data are both restored on the first replica of each shard. For + // Replicated* tables the sidecar must set restore_schema_on_cluster, so the schema + // CREATE is issued ON CLUSTER from that one node and reaches every replica with an + // identical ZooKeeper/Keeper path; native replication then clones the data to the + // other replicas. Restoring schema independently on every replica is intentionally + // avoided: clickhouse-backup rewrites the replica path per node, leaving replicas on + // divergent paths that never sync. + services := FirstPerShardServices(top) + script := RestoreScript(services, services, chr.Spec.BackupName, chr.Spec.SchemaOnly, chr.Spec.Overwrite, chr.Spec.IsTopologyValidationEnabled()) + lbls := labels(api.ClickHouseRestoreCRDResourceKind, chr.Name, chr.Spec.ClickHouseInstallation) + + return &batchv1.Job{ + ObjectMeta: meta.ObjectMeta{ + Name: RestoreJobName(chr), + Namespace: chr.Namespace, + Labels: lbls, + }, + Spec: batchv1.JobSpec{ + BackoffLimit: int32Ptr(0), + Template: core.PodTemplateSpec{ + ObjectMeta: meta.ObjectMeta{Labels: lbls}, + Spec: podSpec(chr.Spec.Image, chr.Spec.ClickHouseCredentialsSecretName, script), + }, + }, + } +} diff --git a/pkg/model/chbackup/builder_test.go b/pkg/model/chbackup/builder_test.go new file mode 100644 index 000000000..3720a6fad --- /dev/null +++ b/pkg/model/chbackup/builder_test.go @@ -0,0 +1,227 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "strings" + "testing" + + core "k8s.io/api/core/v1" + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" +) + +func testCHI(name, cluster string, shards, replicas int) *api.ClickHouseInstallation { + return &api.ClickHouseInstallation{ + ObjectMeta: meta.ObjectMeta{Name: name, Namespace: "ns"}, + Spec: api.ChiSpec{ + Configuration: &api.Configuration{ + Clusters: []*api.Cluster{ + { + Name: cluster, + Layout: &api.ChiClusterLayout{ShardsCount: shards, ReplicasCount: replicas}, + }, + }, + }, + }, + } +} + +func TestTopologyServiceNames(t *testing.T) { + chi := testCHI("demo", "default", 2, 2) + top := Topology(chi) + + all := AllServices(top) + want := []string{ + "chi-demo-default-0-0", "chi-demo-default-0-1", + "chi-demo-default-1-0", "chi-demo-default-1-1", + } + if strings.Join(all, ",") != strings.Join(want, ",") { + t.Fatalf("AllServices = %v, want %v", all, want) + } + + first := FirstPerShardServices(top) + wantFirst := []string{"chi-demo-default-0-0", "chi-demo-default-1-0"} + if strings.Join(first, ",") != strings.Join(wantFirst, ",") { + t.Fatalf("FirstPerShardServices = %v, want %v", first, wantFirst) + } + + shardsN, replicasN := Counts(top) + if shardsN != 2 || replicasN != 2 { + t.Fatalf("Counts = (%d,%d), want (2,2)", shardsN, replicasN) + } +} + +func TestLayoutDefaultsToSingleHost(t *testing.T) { + // shardsCount/replicasCount omitted -> default 1x1. + chi := &api.ClickHouseInstallation{ + ObjectMeta: meta.ObjectMeta{Name: "demo"}, + Spec: api.ChiSpec{Configuration: &api.Configuration{ + Clusters: []*api.Cluster{{Name: "default"}}, + }}, + } + all := AllServices(Topology(chi)) + if len(all) != 1 || all[0] != "chi-demo-default-0-0" { + t.Fatalf("AllServices = %v, want [chi-demo-default-0-0]", all) + } +} + +func TestBackupServicesSelection(t *testing.T) { + top := Topology(testCHI("demo", "default", 2, 2)) + if got := BackupServices(top, api.ReplicaSelectionAllReplicas); len(got) != 4 { + t.Fatalf("AllReplicas selection = %d services, want 4", len(got)) + } + if got := BackupServices(top, api.ReplicaSelectionFirstPerShard); len(got) != 2 { + t.Fatalf("FirstPerShard selection = %d services, want 2", len(got)) + } + // Empty selection must default to FirstPerShard. + if got := BackupServices(top, ""); len(got) != 2 { + t.Fatalf("default selection = %d services, want 2", len(got)) + } +} + +func TestBackupScript(t *testing.T) { + s := BackupScript([]string{"svc-a", "svc-b"}, `"my-backup"`, BackupOpts{}) + for _, want := range []string{"system.backup_actions", "create_remote", "svc-a svc-b", `BACKUP_NAME="my-backup"`} { + if !strings.Contains(s, want) { + t.Errorf("backup script missing %q", want) + } + } + if strings.Contains(s, "--schema") { + t.Errorf("non-schema-only backup should not pass --schema") + } + + schemaOnly := BackupScript([]string{"svc-a"}, `"x"`, BackupOpts{SchemaOnly: true}) + if !strings.Contains(schemaOnly, "create_remote --schema") { + t.Errorf("schemaOnly backup must pass --schema") + } +} + +func TestRestoreScriptSafety(t *testing.T) { + schema := []string{"chi-demo-default-0-0", "chi-demo-default-0-1"} + data := []string{"chi-demo-default-0-0"} + + // overwrite=false, validateTopology=true: guards present, no --rm. + s := RestoreScript(schema, data, "bk", false, false, true) + for _, want := range []string{ + `BACKUP_NAME="bk"`, + "restore_remote --schema ${BACKUP_NAME}", + "restore_remote --data ${BACKUP_NAME}", + "overwrite guard", + "validating target topology", + } { + if !strings.Contains(s, want) { + t.Errorf("restore script missing %q", want) + } + } + if strings.Contains(s, "--rm") { + t.Errorf("non-overwrite restore must not use --rm") + } + + // overwrite=true: --rm present, guard absent. + s2 := RestoreScript(schema, data, "bk", false, true, true) + if !strings.Contains(s2, "restore_remote --schema --rm ${BACKUP_NAME}") { + t.Errorf("overwrite restore must use --rm") + } + if strings.Contains(s2, "overwrite guard") { + t.Errorf("overwrite restore must skip the non-empty guard") + } + + // schemaOnly restore: no data phase. + s3 := RestoreScript(schema, data, "bk", true, false, false) + if strings.Contains(s3, "restore_remote --data") { + t.Errorf("schemaOnly restore must not restore data") + } +} + +func TestBuildBackupJob(t *testing.T) { + chi := testCHI("demo", "default", 2, 2) + chb := &api.ClickHouseBackup{ + ObjectMeta: meta.ObjectMeta{Name: "b1", Namespace: "ns"}, + Spec: api.ClickHouseBackupSpec{ClickHouseInstallation: "demo"}, + } + job := BuildBackupJob(chb, chi) + + if job.Name != "b1-backup" || job.Namespace != "ns" { + t.Fatalf("unexpected job meta: %s/%s", job.Namespace, job.Name) + } + if job.Spec.BackoffLimit == nil || *job.Spec.BackoffLimit != 0 { + t.Errorf("backup job BackoffLimit must be 0") + } + if job.Spec.Template.Spec.RestartPolicy != core.RestartPolicyNever { + t.Errorf("backup job RestartPolicy must be Never") + } + if got := job.Spec.Template.Spec.Containers[0].Image; got != DefaultClientImage { + t.Errorf("default image = %q, want %q", got, DefaultClientImage) + } + script := strings.Join(job.Spec.Template.Spec.Containers[0].Command, " ") + if !strings.Contains(script, "chi-demo-default-0-0") || strings.Contains(script, "chi-demo-default-0-1") { + t.Errorf("FirstPerShard backup should target shard-first hosts only") + } +} + +func TestBuildBackupCronJob(t *testing.T) { + chi := testCHI("demo", "default", 1, 1) + suspend := true + chbs := &api.ClickHouseBackupSchedule{ + ObjectMeta: meta.ObjectMeta{Name: "s1", Namespace: "ns"}, + Spec: api.ClickHouseBackupScheduleSpec{ + ClickHouseInstallation: "demo", + Schedule: "0 2 * * *", + Suspend: &suspend, + }, + } + cj := BuildBackupCronJob(chbs, chi) + if cj.Spec.Schedule != "0 2 * * *" { + t.Errorf("schedule = %q", cj.Spec.Schedule) + } + if cj.Spec.Suspend == nil || !*cj.Spec.Suspend { + t.Errorf("suspend must propagate to CronJob") + } + if cj.Spec.ConcurrencyPolicy != "Forbid" { + t.Errorf("default ConcurrencyPolicy must be Forbid, got %q", cj.Spec.ConcurrencyPolicy) + } + // Scheduled runs compute a unique timestamped name. + script := strings.Join(cj.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Command, " ") + if !strings.Contains(script, "date -u +%Y%m%d-%H%M%S") { + t.Errorf("scheduled backup must compute a timestamped name") + } +} + +func TestBuildRestoreJob(t *testing.T) { + chi := testCHI("demo", "default", 2, 2) + chr := &api.ClickHouseRestore{ + ObjectMeta: meta.ObjectMeta{Name: "r1", Namespace: "ns"}, + Spec: api.ClickHouseRestoreSpec{ClickHouseInstallation: "demo", BackupName: "bk"}, + } + job := BuildRestoreJob(chr, chi) + if job.Name != "r1-restore" { + t.Fatalf("restore job name = %q", job.Name) + } + script := strings.Join(job.Spec.Template.Spec.Containers[0].Command, " ") + // Schema and data are restored on the first replica of each shard only; the schema + // CREATE reaches the other replicas ON CLUSTER (via the sidecar's restore_schema_on_cluster). + for _, h := range []string{"chi-demo-default-0-0", "chi-demo-default-1-0"} { + if !strings.Contains(script, h) { + t.Errorf("restore script must target shard-first host %q", h) + } + } + for _, h := range []string{"chi-demo-default-0-1", "chi-demo-default-1-1"} { + if strings.Contains(script, h) { + t.Errorf("restore script must NOT target non-first replica %q", h) + } + } +} diff --git a/pkg/model/chbackup/options_test.go b/pkg/model/chbackup/options_test.go new file mode 100644 index 000000000..a9e8d43cf --- /dev/null +++ b/pkg/model/chbackup/options_test.go @@ -0,0 +1,90 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "strings" + "testing" + + meta "k8s.io/apimachinery/pkg/apis/meta/v1" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" +) + +func i32(v int32) *int32 { return &v } + +func TestBackupScriptOptions(t *testing.T) { + s := BackupScript([]string{"svc"}, `"bk"`, BackupOpts{ + Tables: "db.*", + Partitions: []string{"202601", "202602"}, + DiffFromRemote: "base-backup", + KeepLastRemote: i32(2), + }) + for _, want := range []string{ + "create_remote --diff-from-remote=base-backup --tables=db.* --partitions=202601,202602 ", + "system.backup_list", + "delete remote", + "OFFSET 2", + } { + if !strings.Contains(s, want) { + t.Errorf("backup script missing %q", want) + } + } +} + +func TestBackupScriptNoRetentionWhenUnset(t *testing.T) { + s := BackupScript([]string{"svc"}, `"bk"`, BackupOpts{}) + if strings.Contains(s, "delete remote") { + t.Errorf("no retention pruning expected when KeepLastRemote is unset") + } +} + +func TestBuildVerifyJob(t *testing.T) { + chi := testCHI("demo", "default", 1, 1) + chb := &api.ClickHouseBackup{ + ObjectMeta: meta.ObjectMeta{Name: "b1", Namespace: "ns"}, + Spec: api.ClickHouseBackupSpec{ClickHouseInstallation: "demo", BackupName: "bk", Verify: true}, + } + job := BuildVerifyJob(chb, chi) + if job.Name != "b1-verify" { + t.Fatalf("verify job name = %q, want b1-verify", job.Name) + } + script := strings.Join(job.Spec.Template.Spec.Containers[0].Command, " ") + for _, want := range []string{"download ${BACKUP_NAME}", "delete local", "system.backup_list"} { + if !strings.Contains(script, want) { + t.Errorf("verify script missing %q", want) + } + } +} + +func TestBuildBackupJobPropagatesOptions(t *testing.T) { + chi := testCHI("demo", "default", 1, 1) + chb := &api.ClickHouseBackup{ + ObjectMeta: meta.ObjectMeta{Name: "b1", Namespace: "ns"}, + Spec: api.ClickHouseBackupSpec{ + ClickHouseInstallation: "demo", + BackupName: "bk", + Tables: "db.t", + KeepLastRemote: i32(3), + }, + } + script := strings.Join(BuildBackupJob(chb, chi).Spec.Template.Spec.Containers[0].Command, " ") + if !strings.Contains(script, "--tables=db.t") { + t.Errorf("backup job script must carry --tables") + } + if !strings.Contains(script, "OFFSET 3") { + t.Errorf("backup job script must carry retention (OFFSET 3)") + } +} diff --git a/pkg/model/chbackup/script.go b/pkg/model/chbackup/script.go new file mode 100644 index 000000000..4ee184bab --- /dev/null +++ b/pkg/model/chbackup/script.go @@ -0,0 +1,188 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package chbackup + +import ( + "fmt" + "strings" +) + +// scriptPreamble defines the common shell helpers shared by backup and restore jobs. +// It builds optional auth flags from the CLICKHOUSE_USER/CLICKHOUSE_PASSWORD env vars +// (provided via an optional Secret) and a run_action helper that submits a command to +// the clickhouse-backup integration table (system.backup_actions) of the sidecar +// running inside the target host's pod and polls until it succeeds or fails. +const scriptPreamble = `set -euo pipefail +CH_PORT="${CLICKHOUSE_PORT:-9000}" +CH_AUTH="" +if [ -n "${CLICKHOUSE_USER:-}" ]; then CH_AUTH="--user=${CLICKHOUSE_USER}"; fi +if [ -n "${CLICKHOUSE_PASSWORD:-}" ]; then CH_AUTH="${CH_AUTH} --password=${CLICKHOUSE_PASSWORD}"; fi +MAX_POLLS="${MAX_POLLS:-1440}" +POLL_INTERVAL="${POLL_INTERVAL:-5}" + +run_action() { + svc="$1"; cmd="$2" + echo ">> ${svc}: ${cmd}" + clickhouse-client --host="${svc}" --port="${CH_PORT}" ${CH_AUTH} \ + --query="INSERT INTO system.backup_actions(command) VALUES('${cmd}')" + n=0 + while true; do + n=$((n+1)) + status="$(clickhouse-client --host="${svc}" --port="${CH_PORT}" ${CH_AUTH} \ + --query="SELECT status FROM system.backup_actions WHERE command='${cmd}' ORDER BY start DESC LIMIT 1")" + case "${status}" in + success) echo " done"; return 0 ;; + error) + err="$(clickhouse-client --host="${svc}" --port="${CH_PORT}" ${CH_AUTH} \ + --query="SELECT error FROM system.backup_actions WHERE command='${cmd}' ORDER BY start DESC LIMIT 1")" + echo " FAILED on ${svc}: ${err}" >&2; return 1 ;; + esac + if [ "${n}" -ge "${MAX_POLLS}" ]; then echo " TIMEOUT waiting for '${cmd}' on ${svc}" >&2; return 1; fi + sleep "${POLL_INTERVAL}" + done +} +` + +func shellList(services []string) string { + return strings.Join(services, " ") +} + +// BackupOpts collects the optional knobs for a create_remote backup. +type BackupOpts struct { + SchemaOnly bool + Tables string // clickhouse-backup --tables pattern + Partitions []string // clickhouse-backup --partitions ids + DiffFromRemote string // base backup name for an incremental backup + KeepLastRemote *int32 // retention: keep only the N most recent remote backups +} + +func (o BackupOpts) createRemoteFlags() string { + flags := "" + if o.SchemaOnly { + flags += "--schema " + } + if o.DiffFromRemote != "" { + flags += "--diff-from-remote=" + o.DiffFromRemote + " " + } + if o.Tables != "" { + flags += "--tables=" + o.Tables + " " + } + if len(o.Partitions) > 0 { + flags += "--partitions=" + strings.Join(o.Partitions, ",") + " " + } + return flags +} + +// BackupScript renders the shell script that triggers create_remote on each target host. +// +// backupNameExpr is the shell expression assigned to BACKUP_NAME. For one-off backups it +// is a quoted literal (e.g. "my-backup"); for scheduled backups it is a runtime expression +// (e.g. "${PREFIX}$(date -u +%Y%m%d-%H%M%S)") so each run gets a unique name. +// +// When KeepLastRemote is set, a best-effort retention step prunes remote backups beyond the +// N most recent (via system.backup_list + a `delete remote` action); retention failures never +// fail the backup itself. +func BackupScript(services []string, backupNameExpr string, opts BackupOpts) string { + flags := opts.createRemoteFlags() + var b strings.Builder + b.WriteString(scriptPreamble) + b.WriteString(fmt.Sprintf("\nBACKUP_NAME=%s\n", backupNameExpr)) + b.WriteString(fmt.Sprintf("SERVICES=%q\n", shellList(services))) + b.WriteString(fmt.Sprintf("for svc in ${SERVICES}; do run_action \"${svc}\" \"create_remote %s${BACKUP_NAME}\"; done\n", flags)) + b.WriteString("echo \"Backup ${BACKUP_NAME} completed on all shards.\"\n") + + if opts.KeepLastRemote != nil { + // Remote storage is shared per shard; prune from the first targeted host. Best-effort. + b.WriteString(fmt.Sprintf("\necho \"Retention: keeping last %d remote backups\"\n", *opts.KeepLastRemote)) + b.WriteString("RETAIN_SVC=\"$(echo ${SERVICES} | awk '{print $1}')\"\n") + b.WriteString(fmt.Sprintf("OLD_BACKUPS=\"$(clickhouse-client --host=\"${RETAIN_SVC}\" --port=\"${CH_PORT}\" ${CH_AUTH} --query=\"SELECT name FROM system.backup_list WHERE location='remote' ORDER BY created DESC LIMIT 1000000 OFFSET %d\" 2>/dev/null || true)\"\n", *opts.KeepLastRemote)) + // Serialize the deletes via run_action: the sidecar runs one action at a time, so + // firing them all at once yields "another operation is currently running". Tolerant: + // a failed prune (e.g. an object store that rejects the delete) never fails the backup. + b.WriteString("for old in ${OLD_BACKUPS}; do echo \" pruning ${old}\"; run_action \"${RETAIN_SVC}\" \"delete remote ${old}\" || true; done\n") + } + return b.String() +} + +// VerifyScript renders a best-effort verification: download the remote backup to each +// shard-first host, confirm it materializes locally (system.backup_list), then drop the +// local copy. It touches no cluster data; it only proves the remote backup is pullable. +func VerifyScript(services []string, backupName string) string { + var b strings.Builder + b.WriteString(scriptPreamble) + b.WriteString(fmt.Sprintf("\nBACKUP_NAME=%q\n", backupName)) + b.WriteString(fmt.Sprintf("SERVICES=%q\n", shellList(services))) + b.WriteString("for svc in ${SERVICES}; do\n") + b.WriteString(" echo \">> verifying ${BACKUP_NAME} on ${svc}\"\n") + // create_remote leaves a local copy; drop it so the download genuinely pulls from remote. + b.WriteString(" clickhouse-client --host=\"${svc}\" --port=\"${CH_PORT}\" ${CH_AUTH} --query=\"INSERT INTO system.backup_actions(command) VALUES('delete local ${BACKUP_NAME}')\" || true\n") + b.WriteString(" for i in $(seq 1 30); do lc=\"$(clickhouse-client --host=\"${svc}\" --port=\"${CH_PORT}\" ${CH_AUTH} --query=\"SELECT count() FROM system.backup_list WHERE name='${BACKUP_NAME}' AND location='local'\")\"; [ \"${lc}\" = \"0\" ] && break; sleep 2; done\n") + b.WriteString(" run_action \"${svc}\" \"download ${BACKUP_NAME}\"\n") + b.WriteString(" cnt=\"$(clickhouse-client --host=\"${svc}\" --port=\"${CH_PORT}\" ${CH_AUTH} --query=\"SELECT count() FROM system.backup_list WHERE name='${BACKUP_NAME}' AND location='local'\")\"\n") + b.WriteString(" if [ \"${cnt}\" -lt 1 ]; then echo \"verify failed: ${BACKUP_NAME} not present locally on ${svc} after download\" >&2; exit 1; fi\n") + b.WriteString(" clickhouse-client --host=\"${svc}\" --port=\"${CH_PORT}\" ${CH_AUTH} --query=\"INSERT INTO system.backup_actions(command) VALUES('delete local ${BACKUP_NAME}')\" || true\n") + b.WriteString("done\n") + b.WriteString("echo \"Backup ${BACKUP_NAME} verified.\"\n") + return b.String() +} + +// RestoreScript renders the shell script that restores a remote backup. +// +// Before touching any data it runs preflight safety checks: +// - topology check (when validateTopology is true): every schema host must be reachable, +// ensuring the full target cluster is up before a ReplicatedMergeTree restore; +// - overwrite guard (when overwrite is false): refuses if any target data host already +// holds user tables, preventing accidental data loss. +// +// Schema and data are then restored on the first replica of each shard. For Replicated* +// tables the sidecar must set restore_schema_on_cluster so the schema CREATE is issued +// ON CLUSTER from that node and reaches every replica with an identical Keeper path; +// native replication then clones the data to the remaining replicas. When overwrite is +// true, existing tables are dropped first via clickhouse-backup's --rm. +func RestoreScript(schemaServices, dataServices []string, backupName string, schemaOnly, overwrite, validateTopology bool) string { + rm := "" + if overwrite { + rm = "--rm " + } + var b strings.Builder + b.WriteString(scriptPreamble) + b.WriteString(fmt.Sprintf("\nBACKUP_NAME=%q\n", backupName)) + b.WriteString(fmt.Sprintf("SCHEMA_SERVICES=%q\n", shellList(schemaServices))) + b.WriteString(fmt.Sprintf("DATA_SERVICES=%q\n", shellList(dataServices))) + + if validateTopology { + b.WriteString("echo \"Preflight: validating target topology is reachable...\"\n") + b.WriteString("for svc in ${SCHEMA_SERVICES}; do\n") + b.WriteString(" clickhouse-client --host=\"${svc}\" --port=\"${CH_PORT}\" ${CH_AUTH} --query=\"SELECT 1\" >/dev/null \\\n") + b.WriteString(" || { echo \"topology check failed: host ${svc} is not reachable\" >&2; exit 1; }\n") + b.WriteString("done\n") + } + if !overwrite { + b.WriteString("echo \"Preflight: overwrite guard (refuse non-empty target)...\"\n") + b.WriteString("for svc in ${DATA_SERVICES}; do\n") + b.WriteString(" cnt=\"$(clickhouse-client --host=\"${svc}\" --port=\"${CH_PORT}\" ${CH_AUTH} --query=\"SELECT count() FROM system.tables WHERE database NOT IN ('system','INFORMATION_SCHEMA','information_schema') AND is_temporary=0\")\"\n") + b.WriteString(" if [ \"${cnt}\" -gt 0 ]; then echo \"refusing restore: target ${svc} already has ${cnt} user table(s); set spec.overwrite=true to proceed\" >&2; exit 1; fi\n") + b.WriteString("done\n") + } + + b.WriteString("echo \"Restoring schema...\"\n") + b.WriteString(fmt.Sprintf("for svc in ${SCHEMA_SERVICES}; do run_action \"${svc}\" \"restore_remote --schema %s${BACKUP_NAME}\"; done\n", rm)) + if !schemaOnly { + b.WriteString("echo \"Restoring data...\"\n") + b.WriteString("for svc in ${DATA_SERVICES}; do run_action \"${svc}\" \"restore_remote --data ${BACKUP_NAME}\"; done\n") + } + b.WriteString("echo \"Restore ${BACKUP_NAME} completed.\"\n") + return b.String() +} diff --git a/pkg/model/chbackup/topology.go b/pkg/model/chbackup/topology.go new file mode 100644 index 000000000..4e7b4d586 --- /dev/null +++ b/pkg/model/chbackup/topology.go @@ -0,0 +1,137 @@ +// Copyright 2019 Altinity Ltd and/or its affiliates. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package chbackup builds the Kubernetes Job/CronJob resources that drive +// clickhouse-backup for the ClickHouseBackup, ClickHouseBackupSchedule and +// ClickHouseRestore custom resources. +package chbackup + +import ( + "fmt" + + api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1" +) + +// ShardHosts holds the per-host StatefulSet Service names of a single shard, +// indexed by replica. +type ShardHosts struct { + Shard int + Hosts []string +} + +// ClusterTopology holds the resolved per-shard host Service names of one cluster. +type ClusterTopology struct { + Cluster string + Shards []ShardHosts +} + +// hostServiceName returns the per-host StatefulSet Service name produced by the +// operator for the default naming scheme: "chi-{chi}-{cluster}-{shard}-{replica}". +// The short (namespace-local) name is used on purpose: the backup/restore Job runs +// in the same namespace as the CHI, so DNS resolves it without an FQDN and we avoid +// depending on spec.namespaceDomainPattern. +func hostServiceName(chiName, cluster string, shard, replica int) string { + return fmt.Sprintf("chi-%s-%s-%d-%d", chiName, cluster, shard, replica) +} + +func layoutCounts(c *api.Cluster) (shards, replicas int) { + shards, replicas = 1, 1 + if l := c.GetLayout(); l != nil { + if l.ShardsCount > 0 { + shards = l.ShardsCount + } + if l.ReplicasCount > 0 { + replicas = l.ReplicasCount + } + } + return shards, replicas +} + +// Topology resolves the shard/replica host Service names of every cluster of the CHI +// from the cluster layout counts. It is computed from the live CHI at reconcile time. +// +// Known limitation: it assumes the default host naming scheme and layout expressed via +// shardsCount/replicasCount. Clusters that use explicit shard/replica lists or custom +// host names are a documented follow-up. +func Topology(chi *api.ClickHouseInstallation) []ClusterTopology { + var out []ClusterTopology + if chi == nil || chi.Spec.Configuration == nil { + return out + } + for _, c := range chi.Spec.Configuration.Clusters { + if c == nil { + continue + } + shards, replicas := layoutCounts(c) + ct := ClusterTopology{Cluster: c.Name} + for s := 0; s < shards; s++ { + sh := ShardHosts{Shard: s} + for r := 0; r < replicas; r++ { + sh.Hosts = append(sh.Hosts, hostServiceName(chi.Name, c.Name, s, r)) + } + ct.Shards = append(ct.Shards, sh) + } + out = append(out, ct) + } + return out +} + +// AllServices returns the Service names of every host across all clusters/shards/replicas. +func AllServices(top []ClusterTopology) []string { + var out []string + for _, ct := range top { + for _, sh := range ct.Shards { + out = append(out, sh.Hosts...) + } + } + return out +} + +// FirstPerShardServices returns the Service name of the first replica of every shard. +// This is the correct selection for Replicated* table engines, whose data is identical +// across replicas of a shard. +func FirstPerShardServices(top []ClusterTopology) []string { + var out []string + for _, ct := range top { + for _, sh := range ct.Shards { + if len(sh.Hosts) > 0 { + out = append(out, sh.Hosts[0]) + } + } + } + return out +} + +// Counts returns the total number of shards and the maximum replica count across all +// clusters. Used for restore topology validation. +func Counts(top []ClusterTopology) (shards, replicas int) { + for _, ct := range top { + shards += len(ct.Shards) + for _, sh := range ct.Shards { + if len(sh.Hosts) > replicas { + replicas = len(sh.Hosts) + } + } + } + return shards, replicas +} + +// BackupServices returns the host Service names a backup should target for the given +// replica selection. +func BackupServices(top []ClusterTopology, selection api.ReplicaSelection) []string { + if selection == api.ReplicaSelectionAllReplicas { + return AllServices(top) + } + return FirstPerShardServices(top) +} diff --git a/tests/e2e/manifests/chb/test-clickhousebackup.yaml b/tests/e2e/manifests/chb/test-clickhousebackup.yaml new file mode 100644 index 000000000..a122afab4 --- /dev/null +++ b/tests/e2e/manifests/chb/test-clickhousebackup.yaml @@ -0,0 +1,8 @@ +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseBackup" +metadata: + name: test-backup +spec: + clickHouseInstallation: test-cluster-for-backups + backupName: e2e-backup + replicaSelection: FirstPerShard diff --git a/tests/e2e/manifests/chbs/test-clickhousebackupschedule.yaml b/tests/e2e/manifests/chbs/test-clickhousebackupschedule.yaml new file mode 100644 index 000000000..b826bf49c --- /dev/null +++ b/tests/e2e/manifests/chbs/test-clickhousebackupschedule.yaml @@ -0,0 +1,11 @@ +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseBackupSchedule" +metadata: + name: test-backup-schedule +spec: + clickHouseInstallation: test-cluster-for-backups + schedule: "0 2 * * *" + concurrencyPolicy: Forbid + backupTemplate: + backupNamePrefix: e2e-scheduled + replicaSelection: FirstPerShard diff --git a/tests/e2e/manifests/chr/test-clickhouserestore.yaml b/tests/e2e/manifests/chr/test-clickhouserestore.yaml new file mode 100644 index 000000000..37e2952c7 --- /dev/null +++ b/tests/e2e/manifests/chr/test-clickhouserestore.yaml @@ -0,0 +1,9 @@ +apiVersion: "clickhouse.altinity.com/v1" +kind: "ClickHouseRestore" +metadata: + name: test-restore +spec: + clickHouseInstallation: test-cluster-for-backups + backupName: e2e-backup + overwrite: true + validateTopology: true diff --git a/tests/e2e/test_backup_restore.py b/tests/e2e/test_backup_restore.py new file mode 100644 index 000000000..83dd79bf7 --- /dev/null +++ b/tests/e2e/test_backup_restore.py @@ -0,0 +1,131 @@ +import time + +import e2e.settings as settings +import e2e.kubectl as kubectl +import e2e.clickhouse as clickhouse +import e2e.util as util +import e2e.steps as steps +import e2e.alerts as alerts + +from testflows.core import * +from testflows.asserts import error + +CHI_NAME = "test-cluster-for-backups" +REPLICA_0 = "chi-test-cluster-for-backups-default-0-0" +REPLICA_1 = "chi-test-cluster-for-backups-default-0-1" +ROWS = 1000 + + +def wait_backup_sidecars_ready(): + for pod in (REPLICA_0, REPLICA_1): + with Then(f"wait {pod} clickhouse-backup sidecar ready"): + kubectl.wait_field("pod", pod, ".status.containerStatuses[1].ready", "true") + + +def wait_cr_phase(kind, name, expected="Completed", timeout=300): + """Poll a backup/restore custom resource until it reaches the expected phase.""" + with Then(f'wait {kind}/{name} phase "{expected}"'): + start = time.time() + while time.time() - start < timeout: + phase = kubectl.launch( + f"get {kind} {name} -n {settings.test_namespace} -o jsonpath='{{.status.phase}}'", + ok_to_fail=True, + ).strip("'") + if phase == expected: + return True + if phase == "Failed": + fail(f"{kind}/{name} reached Failed phase") + time.sleep(5) + fail(f"{kind}/{name} did not reach phase {expected} within {timeout}s") + + +def create_replicated_table_with_data(): + with Given("a ReplicatedMergeTree table with data on the cluster"): + clickhouse.query( + CHI_NAME, + "CREATE TABLE IF NOT EXISTS default.test_restore ON CLUSTER 'default' (i UInt64) " + "ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/{table}', '{replica}') ORDER BY tuple();" + f"INSERT INTO default.test_restore SELECT number FROM numbers({ROWS})", + pod=REPLICA_0, + ) + + +def row_count(pod): + return clickhouse.query(CHI_NAME, "SELECT count() FROM default.test_restore", pod=pod).strip() + + +@TestScenario +@Name("test_operator_backup_and_restore. Operator-driven backup then restore round-trip") +def test_operator_backup_and_restore(self, chi): + wait_backup_sidecars_ready() + create_replicated_table_with_data() + + with When("a ClickHouseBackup is created"): + kubectl.launch(f"apply -f manifests/chb/test-clickhousebackup.yaml -n {settings.test_namespace}") + wait_cr_phase("clickhousebackup", "test-backup", "Completed") + + with Then("the operator created an owned backup Job"): + kubectl.wait_field("job", "test-backup-backup", ".status.succeeded", "1") + + with When("the table is dropped on the whole cluster"): + clickhouse.query(CHI_NAME, "DROP TABLE default.test_restore ON CLUSTER 'default' SYNC", pod=REPLICA_0) + + with When("a ClickHouseRestore is created"): + kubectl.launch(f"apply -f manifests/chr/test-clickhouserestore.yaml -n {settings.test_namespace}") + wait_cr_phase("clickhouserestore", "test-restore", "Completed") + + with Then("data is restored on the first replica"): + assert row_count(REPLICA_0) == str(ROWS), error("data not restored on first replica") + + with Then("native replication synchronized the second replica"): + synced = False + for _ in range(24): + if row_count(REPLICA_1) == str(ROWS): + synced = True + break + time.sleep(5) + assert synced, error("second replica did not catch up after restore") + + +@TestScenario +@Name("test_backup_schedule_creates_cronjob. Schedule is reconciled into a CronJob") +def test_backup_schedule_creates_cronjob(self, chi): + with When("a ClickHouseBackupSchedule is created"): + kubectl.launch(f"apply -f manifests/chbs/test-clickhousebackupschedule.yaml -n {settings.test_namespace}") + + with Then("the operator reconciles a managed CronJob"): + kubectl.wait_field( + "clickhousebackupschedule", "test-backup-schedule", ".status.cronJobName", "test-backup-schedule-backup" + ) + cronjob = kubectl.get("cronjob", "test-backup-schedule-backup", ns=settings.test_namespace) + assert cronjob["spec"]["schedule"] == "0 2 * * *", error("unexpected cronjob schedule") + assert cronjob["spec"]["concurrencyPolicy"] == "Forbid", error("unexpected concurrencyPolicy") + + with Finally("cleanup the schedule"): + kubectl.launch( + f"delete -f manifests/chbs/test-clickhousebackupschedule.yaml -n {settings.test_namespace}", + ok_to_fail=True, + ) + + +@TestModule +@Name("e2e.test_backup_restore") +def test(self): + with Given("I setup settings"): + steps.set_settings() + with Given("I create shell"): + self.context.shell = steps.get_shell() + + util.clean_namespace(delete_chi=True) + util.install_operator_if_not_exist() + + _, _, _, _, chi = alerts.initialize( + chi_file="manifests/chi/test-cluster-for-backups.yaml", + chi_template_file="manifests/chit/tpl-clickhouse-backups.yaml", + chi_name=CHI_NAME, + keeper_type=self.context.keeper_type, + ) + + with Module("backup_restore"): + for scenario in (test_operator_backup_and_restore, test_backup_schedule_creates_cronjob): + Scenario(test=scenario)(chi=chi) diff --git a/tests/regression.py b/tests/regression.py index f290fec42..699d4d736 100755 --- a/tests/regression.py +++ b/tests/regression.py @@ -24,6 +24,7 @@ def run_features(): "e2e.test_metrics_exporter", "e2e.test_operator", "e2e.test_acvp", + "e2e.test_backup_restore", ] for feature_name in features: Feature(run=load(feature_name, "test"))