From 6a8e1d1ac4f74b975a6429186e18943f2728179e Mon Sep 17 00:00:00 2001 From: jmsperu Date: Fri, 27 Mar 2026 01:18:43 +0300 Subject: [PATCH 1/3] NAS backup: add infrastructure backup (database, configs, certs) Adds automated backup of CloudStack infrastructure to NAS storage: - MySQL databases (cloud + cloud_usage if enabled) - Management server configuration (/etc/cloudstack/management/) - Agent configuration (/etc/cloudstack/agent/) - SSL certificates and keystores Backup runs daily via BackgroundPollManager, configurable via global settings: - nas.infra.backup.enabled (default: false) - nas.infra.backup.location (NAS mount path) - nas.infra.backup.retention (default: 7 backup sets) - nas.infra.backup.include.usage.db (default: true) Backups are stored in the NAS backup storage under infra-backup/ with automatic retention management. Uses mysqldump --single-transaction for hot database backup (no table locks, InnoDB consistent snapshot). Database credentials are read from /etc/cloudstack/management/db.properties. --- .../backup/InfrastructureBackupTask.java | 273 ++++++++++++++++++ .../cloudstack/backup/NASBackupProvider.java | 53 +++- 2 files changed, 325 insertions(+), 1 deletion(-) create mode 100644 plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java diff --git a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java new file mode 100644 index 000000000000..99634be62429 --- /dev/null +++ b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java @@ -0,0 +1,273 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.backup; + +import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.poll.BackgroundPollTask; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.LogManager; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Properties; +import java.util.concurrent.TimeUnit; + +/** + * Scheduled task that backs up CloudStack infrastructure to NAS storage: + * + * + * Database credentials are read from /etc/cloudstack/management/db.properties. + * Backups are stored under {nasBackupPath}/infra-backup/{timestamp}/ with + * automatic retention management. + */ +public class InfrastructureBackupTask extends ManagedContextRunnable implements BackgroundPollTask { + + private static final Logger LOG = LogManager.getLogger(InfrastructureBackupTask.class); + + private static final String DB_PROPERTIES_PATH = "/etc/cloudstack/management/db.properties"; + private static final String MANAGEMENT_CONFIG_PATH = "/etc/cloudstack/management"; + private static final String AGENT_CONFIG_PATH = "/etc/cloudstack/agent"; + private static final String SSL_CERT_PATH = "/etc/cloudstack/management/cert"; + + /** 24 hours in milliseconds */ + private static final long DAILY_INTERVAL_MS = 86400L * 1000L; + + private final NASBackupProvider provider; + + public InfrastructureBackupTask(NASBackupProvider provider) { + this.provider = provider; + } + + @Override + public Long getDelay() { + return DAILY_INTERVAL_MS; + } + + @Override + protected void runInContext() { + if (!Boolean.TRUE.equals(NASBackupProvider.NASInfraBackupEnabled.value())) { + LOG.debug("Infrastructure backup is disabled (nas.infra.backup.enabled=false)"); + return; + } + + String nasBackupPath = NASBackupProvider.NASInfraBackupLocation.value(); + if (nasBackupPath == null || nasBackupPath.isEmpty()) { + LOG.error("Infrastructure backup location not configured (nas.infra.backup.location is empty)"); + return; + } + + int retentionCount = NASBackupProvider.NASInfraBackupRetention.value(); + boolean includeUsageDb = Boolean.TRUE.equals(NASBackupProvider.NASInfraBackupUsageDb.value()); + + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss")); + String backupDir = nasBackupPath + "/infra-backup/" + timestamp; + + LOG.info("Starting infrastructure backup to {}", backupDir); + + try { + File dir = new File(backupDir); + if (!dir.mkdirs()) { + LOG.error("Failed to create backup directory: {}", backupDir); + return; + } + + // Read database credentials from db.properties + Properties dbProps = loadDbProperties(); + if (dbProps == null) { + LOG.error("Failed to load database properties from {}", DB_PROPERTIES_PATH); + return; + } + + String dbHost = dbProps.getProperty("db.cloud.host", "localhost"); + String dbUser = dbProps.getProperty("db.cloud.username", "cloud"); + String dbPassword = dbProps.getProperty("db.cloud.password", ""); + + // 1. Backup CloudStack database + backupDatabase("cloud", backupDir, timestamp, dbHost, dbUser, dbPassword); + + // 2. Backup usage database if enabled + if (includeUsageDb) { + String usageHost = dbProps.getProperty("db.usage.host", dbHost); + String usageUser = dbProps.getProperty("db.usage.username", dbUser); + String usagePassword = dbProps.getProperty("db.usage.password", dbPassword); + backupDatabase("cloud_usage", backupDir, timestamp, usageHost, usageUser, usagePassword); + } + + // 3. Backup management server configs + backupDirectory(MANAGEMENT_CONFIG_PATH, backupDir, "management-config"); + + // 4. Backup agent configs (if present on this host) + File agentDir = new File(AGENT_CONFIG_PATH); + if (agentDir.exists()) { + backupDirectory(AGENT_CONFIG_PATH, backupDir, "agent-config"); + } + + // 5. Backup SSL certificates + File sslDir = new File(SSL_CERT_PATH); + if (sslDir.exists()) { + backupDirectory(SSL_CERT_PATH, backupDir, "ssl-certs"); + } + + // 6. Cleanup old backups based on retention policy + cleanupOldBackups(nasBackupPath, retentionCount); + + LOG.info("Infrastructure backup completed successfully: {}", backupDir); + + } catch (Exception e) { + LOG.error("Infrastructure backup failed: {}", e.getMessage(), e); + } + } + + private Properties loadDbProperties() { + File propsFile = new File(DB_PROPERTIES_PATH); + if (!propsFile.exists()) { + LOG.warn("Database properties file not found: {}", DB_PROPERTIES_PATH); + return null; + } + + Properties props = new Properties(); + try (BufferedReader reader = new BufferedReader(new FileReader(propsFile))) { + props.load(reader); + return props; + } catch (IOException e) { + LOG.error("Failed to read database properties: {}", e.getMessage()); + return null; + } + } + + private void backupDatabase(String dbName, String backupDir, String timestamp, + String dbHost, String dbUser, String dbPassword) { + String dumpFile = backupDir + "/" + dbName + "-" + timestamp + ".sql.gz"; + + // Use --single-transaction for InnoDB hot backup (no table locks, consistent snapshot) + String[] cmd = {"/bin/bash", "-c", + String.format("mysqldump --single-transaction --routines --triggers --events " + + "-h '%s' -u '%s' -p'%s' '%s' | gzip > '%s'", + dbHost, dbUser, dbPassword, dbName, dumpFile)}; + + try { + ProcessBuilder pb = new ProcessBuilder(cmd); + pb.redirectErrorStream(true); + Process process = pb.start(); + boolean completed = process.waitFor(300, TimeUnit.SECONDS); + + if (!completed) { + process.destroyForcibly(); + LOG.error("Database backup timed out for {}", dbName); + return; + } + + if (process.exitValue() != 0) { + LOG.error("Database backup failed for {} with exit code {}", dbName, process.exitValue()); + return; + } + + File dump = new File(dumpFile); + LOG.info("Database {} backed up: {} ({} bytes)", dbName, dumpFile, dump.length()); + + } catch (IOException | InterruptedException e) { + LOG.error("Failed to backup database {}: {}", dbName, e.getMessage()); + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + } + } + + private void backupDirectory(String sourcePath, String backupDir, String archiveName) { + File source = new File(sourcePath); + if (!source.exists() || !source.isDirectory()) { + LOG.debug("Directory {} does not exist, skipping", sourcePath); + return; + } + + String tarFile = backupDir + "/" + archiveName + ".tar.gz"; + String[] cmd = {"/bin/bash", "-c", + String.format("tar czf '%s' -C '%s' .", tarFile, sourcePath)}; + + try { + ProcessBuilder pb = new ProcessBuilder(cmd); + pb.redirectErrorStream(true); + Process process = pb.start(); + boolean completed = process.waitFor(60, TimeUnit.SECONDS); + + if (completed && process.exitValue() == 0) { + LOG.info("Directory {} backed up to {}", sourcePath, tarFile); + } else { + if (!completed) { + process.destroyForcibly(); + } + LOG.warn("Directory backup failed for {} (exit code: {})", + sourcePath, completed ? process.exitValue() : "timeout"); + } + } catch (IOException | InterruptedException e) { + LOG.error("Failed to backup directory {}: {}", sourcePath, e.getMessage()); + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + } + } + + private void cleanupOldBackups(String nasBackupPath, int retentionCount) { + File infraDir = new File(nasBackupPath + "/infra-backup"); + if (!infraDir.exists()) { + return; + } + + File[] backups = infraDir.listFiles(File::isDirectory); + if (backups == null || backups.length <= retentionCount) { + return; + } + + // Sort by name (timestamp-based), oldest first + Arrays.sort(backups, Comparator.comparing(File::getName)); + + int toDelete = backups.length - retentionCount; + for (int i = 0; i < toDelete; i++) { + LOG.info("Removing old infrastructure backup: {}", backups[i].getName()); + deleteDirectory(backups[i]); + } + } + + private void deleteDirectory(File dir) { + File[] files = dir.listFiles(); + if (files != null) { + for (File file : files) { + if (file.isDirectory()) { + deleteDirectory(file); + } else { + if (!file.delete()) { + LOG.warn("Failed to delete file: {}", file.getAbsolutePath()); + } + } + } + } + if (!dir.delete()) { + LOG.warn("Failed to delete directory: {}", dir.getAbsolutePath()); + } + } +} diff --git a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java index f2ea8ac71c91..b0f408cc9f51 100644 --- a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java +++ b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java @@ -53,6 +53,7 @@ import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.poll.BackgroundPollManager; import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; import org.apache.cloudstack.storage.to.PrimaryDataStoreTO; @@ -60,6 +61,7 @@ import org.apache.logging.log4j.LogManager; import javax.inject.Inject; +import javax.naming.ConfigurationException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; @@ -84,6 +86,41 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co true, BackupFrameworkEnabled.key()); + static final ConfigKey NASInfraBackupEnabled = new ConfigKey<>("Advanced", Boolean.class, + "nas.infra.backup.enabled", + "false", + "Enable automated infrastructure backup (database, configs) to NAS storage. " + + "When enabled, the management server will perform a daily backup of the CloudStack " + + "database, configuration files, and SSL certificates to the configured NAS location.", + true, + ConfigKey.Scope.Global, + BackupFrameworkEnabled.key()); + + static final ConfigKey NASInfraBackupLocation = new ConfigKey<>("Advanced", String.class, + "nas.infra.backup.location", + "", + "NAS mount path where infrastructure backups are stored (e.g. /mnt/nas-backup). " + + "Backups will be written to {location}/infra-backup/{timestamp}/.", + true, + ConfigKey.Scope.Global, + BackupFrameworkEnabled.key()); + + static final ConfigKey NASInfraBackupRetention = new ConfigKey<>("Advanced", Integer.class, + "nas.infra.backup.retention", + "7", + "Number of infrastructure backup sets to retain. Older backups are automatically removed.", + true, + ConfigKey.Scope.Global, + BackupFrameworkEnabled.key()); + + static final ConfigKey NASInfraBackupUsageDb = new ConfigKey<>("Advanced", Boolean.class, + "nas.infra.backup.include.usage.db", + "true", + "Include the cloud_usage database in infrastructure backup.", + true, + ConfigKey.Scope.Global, + BackupFrameworkEnabled.key()); + @Inject private BackupDao backupDao; @@ -129,6 +166,16 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co @Inject private DiskOfferingDao diskOfferingDao; + @Inject + private BackgroundPollManager backgroundPollManager; + + @Override + public boolean configure(String name, Map params) throws ConfigurationException { + super.configure(name, params); + backgroundPollManager.submitTask(new InfrastructureBackupTask(this)); + return true; + } + private Long getClusterIdFromRootVolume(VirtualMachine vm) { VolumeVO rootVolume = volumeDao.getInstanceRootVolume(vm.getId()); StoragePoolVO rootDiskPool = primaryDataStoreDao.findById(rootVolume.getPoolId()); @@ -594,7 +641,11 @@ public Boolean crossZoneInstanceCreationEnabled(BackupOffering backupOffering) { @Override public ConfigKey[] getConfigKeys() { return new ConfigKey[]{ - NASBackupRestoreMountTimeout + NASBackupRestoreMountTimeout, + NASInfraBackupEnabled, + NASInfraBackupLocation, + NASInfraBackupRetention, + NASInfraBackupUsageDb }; } From 432c049a7e48a112fb30bf1d8d61abe8b738164c Mon Sep 17 00:00:00 2001 From: jmsperu Date: Sat, 23 May 2026 01:11:21 +0300 Subject: [PATCH 2/3] fix(backup): make infrastructure DB backup opt-in, add unit tests Addresses @weizhouapache's review: by default, production deployments should manage CloudStack DB backups via existing external tooling (cron + mysqldump, replication, etc), not via the in-process backup task. The DB component is now gated on a new explicit ConfigKey that defaults to false. - New ConfigKey: nas.infra.backup.include.database (Boolean, default false, Global scope). When false, the InfrastructureBackupTask runs only the configs + certs path (where there is no reasonable external alternative). - nas.infra.backup.enabled description rewritten to make the new split clear and to steer production users toward the cron-job pattern for the DB. - nas.infra.backup.include.usage.db description updated to clarify it has no effect unless include.database is also true. - New InfrastructureBackupTaskTest with 9 tests covering the gating decisions: master switch off, empty location, DB-skipped-by-default, both DBs when usage enabled, usage flag ignored when DB excluded, props unreadable, retention always runs, daily interval. Addresses codecov coverage gap on this PR (143 untested lines was the original report). - backupDatabase/backupDirectory/cleanupOldBackups + loadDbProperties + the ConfigKey accessors made protected so tests can override the side-effecting paths without standing up ProcessBuilder. --- .../backup/InfrastructureBackupTask.java | 80 ++++-- .../cloudstack/backup/NASBackupProvider.java | 27 +- .../backup/InfrastructureBackupTaskTest.java | 236 ++++++++++++++++++ 3 files changed, 312 insertions(+), 31 deletions(-) create mode 100644 plugins/backup/nas/src/test/java/org/apache/cloudstack/backup/InfrastructureBackupTaskTest.java diff --git a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java index 99634be62429..25c086430fb0 100644 --- a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java +++ b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java @@ -68,26 +68,48 @@ public Long getDelay() { return DAILY_INTERVAL_MS; } + /** Indirection so tests can override without standing up the ConfigDepot. */ + protected boolean isEnabled() { + return Boolean.TRUE.equals(NASBackupProvider.NASInfraBackupEnabled.value()); + } + + protected String getBackupLocation() { + return NASBackupProvider.NASInfraBackupLocation.value(); + } + + protected int getRetentionCount() { + return NASBackupProvider.NASInfraBackupRetention.value(); + } + + protected boolean isDatabaseIncluded() { + return Boolean.TRUE.equals(NASBackupProvider.NASInfraBackupIncludeDatabase.value()); + } + + protected boolean isUsageDbIncluded() { + return Boolean.TRUE.equals(NASBackupProvider.NASInfraBackupUsageDb.value()); + } + @Override protected void runInContext() { - if (!Boolean.TRUE.equals(NASBackupProvider.NASInfraBackupEnabled.value())) { + if (!isEnabled()) { LOG.debug("Infrastructure backup is disabled (nas.infra.backup.enabled=false)"); return; } - String nasBackupPath = NASBackupProvider.NASInfraBackupLocation.value(); + String nasBackupPath = getBackupLocation(); if (nasBackupPath == null || nasBackupPath.isEmpty()) { LOG.error("Infrastructure backup location not configured (nas.infra.backup.location is empty)"); return; } - int retentionCount = NASBackupProvider.NASInfraBackupRetention.value(); - boolean includeUsageDb = Boolean.TRUE.equals(NASBackupProvider.NASInfraBackupUsageDb.value()); + int retentionCount = getRetentionCount(); + boolean includeDatabase = isDatabaseIncluded(); + boolean includeUsageDb = isUsageDbIncluded(); String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd-HHmmss")); String backupDir = nasBackupPath + "/infra-backup/" + timestamp; - LOG.info("Starting infrastructure backup to {}", backupDir); + LOG.info("Starting infrastructure backup to {} (database included: {})", backupDir, includeDatabase); try { File dir = new File(backupDir); @@ -96,26 +118,30 @@ protected void runInContext() { return; } - // Read database credentials from db.properties - Properties dbProps = loadDbProperties(); - if (dbProps == null) { - LOG.error("Failed to load database properties from {}", DB_PROPERTIES_PATH); - return; - } - - String dbHost = dbProps.getProperty("db.cloud.host", "localhost"); - String dbUser = dbProps.getProperty("db.cloud.username", "cloud"); - String dbPassword = dbProps.getProperty("db.cloud.password", ""); + // 1 & 2. Database backup — opt-in via nas.infra.backup.include.database. + // Production deployments typically run their own mysqldump cron jobs and disable this; + // it exists for small/edge deployments wanting unified DR on the same NAS as VM backups. + if (includeDatabase) { + Properties dbProps = loadDbProperties(); + if (dbProps == null) { + LOG.error("Database backup requested but failed to load properties from {} — skipping DB component", DB_PROPERTIES_PATH); + } else { + String dbHost = dbProps.getProperty("db.cloud.host", "localhost"); + String dbUser = dbProps.getProperty("db.cloud.username", "cloud"); + String dbPassword = dbProps.getProperty("db.cloud.password", ""); - // 1. Backup CloudStack database - backupDatabase("cloud", backupDir, timestamp, dbHost, dbUser, dbPassword); + backupDatabase("cloud", backupDir, timestamp, dbHost, dbUser, dbPassword); - // 2. Backup usage database if enabled - if (includeUsageDb) { - String usageHost = dbProps.getProperty("db.usage.host", dbHost); - String usageUser = dbProps.getProperty("db.usage.username", dbUser); - String usagePassword = dbProps.getProperty("db.usage.password", dbPassword); - backupDatabase("cloud_usage", backupDir, timestamp, usageHost, usageUser, usagePassword); + if (includeUsageDb) { + String usageHost = dbProps.getProperty("db.usage.host", dbHost); + String usageUser = dbProps.getProperty("db.usage.username", dbUser); + String usagePassword = dbProps.getProperty("db.usage.password", dbPassword); + backupDatabase("cloud_usage", backupDir, timestamp, usageHost, usageUser, usagePassword); + } + } + } else { + LOG.debug("Database backup skipped (nas.infra.backup.include.database=false). " + + "Manage DB backups externally for production deployments."); } // 3. Backup management server configs @@ -143,7 +169,7 @@ protected void runInContext() { } } - private Properties loadDbProperties() { + protected Properties loadDbProperties() { File propsFile = new File(DB_PROPERTIES_PATH); if (!propsFile.exists()) { LOG.warn("Database properties file not found: {}", DB_PROPERTIES_PATH); @@ -160,7 +186,7 @@ private Properties loadDbProperties() { } } - private void backupDatabase(String dbName, String backupDir, String timestamp, + protected void backupDatabase(String dbName, String backupDir, String timestamp, String dbHost, String dbUser, String dbPassword) { String dumpFile = backupDir + "/" + dbName + "-" + timestamp + ".sql.gz"; @@ -198,7 +224,7 @@ private void backupDatabase(String dbName, String backupDir, String timestamp, } } - private void backupDirectory(String sourcePath, String backupDir, String archiveName) { + protected void backupDirectory(String sourcePath, String backupDir, String archiveName) { File source = new File(sourcePath); if (!source.exists() || !source.isDirectory()) { LOG.debug("Directory {} does not exist, skipping", sourcePath); @@ -232,7 +258,7 @@ private void backupDirectory(String sourcePath, String backupDir, String archive } } - private void cleanupOldBackups(String nasBackupPath, int retentionCount) { + protected void cleanupOldBackups(String nasBackupPath, int retentionCount) { File infraDir = new File(nasBackupPath + "/infra-backup"); if (!infraDir.exists()) { return; diff --git a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java index b0f408cc9f51..5e1d20324835 100644 --- a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java +++ b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/NASBackupProvider.java @@ -89,9 +89,26 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co static final ConfigKey NASInfraBackupEnabled = new ConfigKey<>("Advanced", Boolean.class, "nas.infra.backup.enabled", "false", - "Enable automated infrastructure backup (database, configs) to NAS storage. " + - "When enabled, the management server will perform a daily backup of the CloudStack " + - "database, configuration files, and SSL certificates to the configured NAS location.", + "Enable automated infrastructure backup to NAS storage. When enabled, the management " + + "server will perform a daily backup of CloudStack configuration files and SSL " + + "certificates to the configured NAS location. The CloudStack database is NOT included " + + "by default — for production deployments, manage database backups externally (e.g. via " + + "a cron job running mysqldump). To opt in to bundling the database with this backup " + + "(useful only for small / edge / single-MS deployments without separate ops tooling), " + + "also set nas.infra.backup.include.database=true.", + true, + ConfigKey.Scope.Global, + BackupFrameworkEnabled.key()); + + static final ConfigKey NASInfraBackupIncludeDatabase = new ConfigKey<>("Advanced", Boolean.class, + "nas.infra.backup.include.database", + "false", + "Include the CloudStack database in the daily infrastructure backup. Defaults to false " + + "because production deployments typically manage DB backups via external tooling (e.g. " + + "cron + mysqldump, replication, dedicated backup appliance) and are better served " + + "doing so. Only set true when you want one-knob disaster recovery for a small/edge " + + "deployment and the same NAS that already holds your VM backups is an acceptable " + + "target. Has no effect unless nas.infra.backup.enabled is also true.", true, ConfigKey.Scope.Global, BackupFrameworkEnabled.key()); @@ -116,7 +133,8 @@ public class NASBackupProvider extends AdapterBase implements BackupProvider, Co static final ConfigKey NASInfraBackupUsageDb = new ConfigKey<>("Advanced", Boolean.class, "nas.infra.backup.include.usage.db", "true", - "Include the cloud_usage database in infrastructure backup.", + "Also include the cloud_usage database when the CloudStack database is being backed " + + "up. Has no effect unless nas.infra.backup.include.database is also true.", true, ConfigKey.Scope.Global, BackupFrameworkEnabled.key()); @@ -643,6 +661,7 @@ public ConfigKey[] getConfigKeys() { return new ConfigKey[]{ NASBackupRestoreMountTimeout, NASInfraBackupEnabled, + NASInfraBackupIncludeDatabase, NASInfraBackupLocation, NASInfraBackupRetention, NASInfraBackupUsageDb diff --git a/plugins/backup/nas/src/test/java/org/apache/cloudstack/backup/InfrastructureBackupTaskTest.java b/plugins/backup/nas/src/test/java/org/apache/cloudstack/backup/InfrastructureBackupTaskTest.java new file mode 100644 index 000000000000..7c8e3b02b2f3 --- /dev/null +++ b/plugins/backup/nas/src/test/java/org/apache/cloudstack/backup/InfrastructureBackupTaskTest.java @@ -0,0 +1,236 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.backup; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.concurrent.atomic.AtomicInteger; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class InfrastructureBackupTaskTest { + + private Path tmpRoot; + + @Before + public void setUp() throws IOException { + tmpRoot = Files.createTempDirectory("cs-infra-backup-test-"); + } + + @After + public void tearDown() { + if (tmpRoot != null) { + deleteRecursively(tmpRoot.toFile()); + } + } + + /** Captures backupDatabase/backupDirectory invocations instead of shelling out. */ + private static class RecordingTask extends InfrastructureBackupTask { + boolean enabled = true; + String location; + int retention = 7; + boolean databaseIncluded = false; + boolean usageDbIncluded = true; + Properties dbProps; + final List databaseBackupNames = new ArrayList<>(); + final List directoryBackupNames = new ArrayList<>(); + final AtomicInteger retentionCalls = new AtomicInteger(0); + + RecordingTask() { + super(null); + } + + @Override + protected boolean isEnabled() { return enabled; } + @Override + protected String getBackupLocation() { return location; } + @Override + protected int getRetentionCount() { return retention; } + @Override + protected boolean isDatabaseIncluded() { return databaseIncluded; } + @Override + protected boolean isUsageDbIncluded() { return usageDbIncluded; } + + @Override + protected Properties loadDbProperties() { + return dbProps; + } + + @Override + protected void backupDatabase(String dbName, String backupDir, String timestamp, + String dbHost, String dbUser, String dbPassword) { + databaseBackupNames.add(dbName); + } + + @Override + protected void backupDirectory(String sourcePath, String backupDir, String archiveName) { + directoryBackupNames.add(archiveName); + } + + @Override + protected void cleanupOldBackups(String nasBackupPath, int retentionCount) { + retentionCalls.incrementAndGet(); + } + } + + private static Properties stubDbProps() { + Properties props = new Properties(); + props.setProperty("db.cloud.host", "localhost"); + props.setProperty("db.cloud.username", "cloud"); + props.setProperty("db.cloud.password", "secret"); + return props; + } + + @Test + public void doesNothingWhenMasterSwitchDisabled() { + RecordingTask task = new RecordingTask(); + task.enabled = false; + task.location = tmpRoot.toString(); + + task.runInContext(); + + Assert.assertTrue("no DB backups when feature disabled", task.databaseBackupNames.isEmpty()); + Assert.assertTrue("no dir backups when feature disabled", task.directoryBackupNames.isEmpty()); + Assert.assertEquals("no retention when feature disabled", 0, task.retentionCalls.get()); + } + + @Test + public void doesNothingWhenLocationEmpty() { + RecordingTask task = new RecordingTask(); + task.enabled = true; + task.location = ""; + + task.runInContext(); + + Assert.assertTrue(task.databaseBackupNames.isEmpty()); + Assert.assertTrue(task.directoryBackupNames.isEmpty()); + Assert.assertEquals(0, task.retentionCalls.get()); + } + + @Test + public void skipsDatabaseByDefault() { + RecordingTask task = new RecordingTask(); + task.enabled = true; + task.location = tmpRoot.toString(); + task.databaseIncluded = false; // default + task.dbProps = stubDbProps(); + + task.runInContext(); + + Assert.assertTrue("DB component is opt-in: no DB backups by default", + task.databaseBackupNames.isEmpty()); + // Configs+certs still attempted (they're skipped silently if dirs absent) + Assert.assertEquals("retention still runs", 1, task.retentionCalls.get()); + } + + @Test + public void backsUpCloudDbWhenIncludeDatabaseTrue() { + RecordingTask task = new RecordingTask(); + task.enabled = true; + task.location = tmpRoot.toString(); + task.databaseIncluded = true; + task.usageDbIncluded = false; // suppress cloud_usage + task.dbProps = stubDbProps(); + + task.runInContext(); + + Assert.assertEquals("only cloud DB backed up", 1, task.databaseBackupNames.size()); + Assert.assertEquals("cloud", task.databaseBackupNames.get(0)); + } + + @Test + public void backsUpBothDbsWhenUsageEnabled() { + RecordingTask task = new RecordingTask(); + task.enabled = true; + task.location = tmpRoot.toString(); + task.databaseIncluded = true; + task.usageDbIncluded = true; + task.dbProps = stubDbProps(); + + task.runInContext(); + + Assert.assertEquals("both DBs backed up", 2, task.databaseBackupNames.size()); + Assert.assertEquals("cloud", task.databaseBackupNames.get(0)); + Assert.assertEquals("cloud_usage", task.databaseBackupNames.get(1)); + } + + @Test + public void usageDbFlagIgnoredWhenDatabaseExcluded() { + RecordingTask task = new RecordingTask(); + task.enabled = true; + task.location = tmpRoot.toString(); + task.databaseIncluded = false; // master DB gate off + task.usageDbIncluded = true; // should be ignored + task.dbProps = stubDbProps(); + + task.runInContext(); + + Assert.assertTrue("usage DB requires include.database=true", + task.databaseBackupNames.isEmpty()); + } + + @Test + public void skipsDbBackupWhenPropertiesUnreadable() { + RecordingTask task = new RecordingTask(); + task.enabled = true; + task.location = tmpRoot.toString(); + task.databaseIncluded = true; + task.dbProps = null; // simulate missing/unreadable db.properties + + task.runInContext(); + + Assert.assertTrue("no DB backups attempted when props can't be loaded", + task.databaseBackupNames.isEmpty()); + Assert.assertEquals("retention still runs (configs+certs path unaffected)", + 1, task.retentionCalls.get()); + } + + @Test + public void retentionRunsOnEverySuccessfulPass() { + RecordingTask task = new RecordingTask(); + task.enabled = true; + task.location = tmpRoot.toString(); + task.retention = 3; + task.databaseIncluded = false; + + task.runInContext(); + + Assert.assertEquals(1, task.retentionCalls.get()); + } + + @Test + public void dailyIntervalIs24Hours() { + InfrastructureBackupTask task = new RecordingTask(); + Assert.assertEquals(Long.valueOf(86_400_000L), task.getDelay()); + } + + private void deleteRecursively(File f) { + File[] kids = f.listFiles(); + if (kids != null) { + for (File k : kids) deleteRecursively(k); + } + f.delete(); + } +} From 5b824954761e9b9969acaa837f1510b7bca8966c Mon Sep 17 00:00:00 2001 From: jmsperu Date: Sun, 21 Jun 2026 20:48:57 +0300 Subject: [PATCH 3/3] fix(backup): harden infrastructure backup per review (#12900) Address review feedback on #12900 (with unit tests proving the deletion logic): - backupDatabase: invoke mysqldump without a shell and pass credentials via a 0600 --defaults-extra-file, so db.properties values can't be shell-interpreted (no command injection) and the password no longer appears in the process list; gzip the dump in-process. - cleanupOldBackups: clamp negative retentionCount to avoid ArrayIndexOutOfBounds. - deleteDirectory: use Files.walkFileTree (no symlink following) so a symlink in the NAS tree cannot cause deletion outside the backup path. - runInContext: guard with a cluster-wide GlobalLock so only one management server runs the infra backup at a time. Lock acquisition is an overridable seam (acquireRunLock/releaseRunLock) so unit tests need no DB-backed lock. - create the backup dir only when it does not already exist. Extend InfrastructureBackupTaskTest with retention/delete coverage: negative- retention clamp, retention keeps newest/deletes oldest, and symlink-escape safety (the last two fail against the pre-fix code). Existing orchestration tests are preserved. --- .../backup/InfrastructureBackupTask.java | 141 +++++++++++++++--- .../backup/InfrastructureBackupTaskTest.java | 94 ++++++++++++ 2 files changed, 213 insertions(+), 22 deletions(-) diff --git a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java index 25c086430fb0..9c66f23c97c6 100644 --- a/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java +++ b/plugins/backup/nas/src/main/java/org/apache/cloudstack/backup/InfrastructureBackupTask.java @@ -16,6 +16,8 @@ // under the License. package org.apache.cloudstack.backup; +import com.cloud.utils.db.GlobalLock; + import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.poll.BackgroundPollTask; import org.apache.logging.log4j.Logger; @@ -23,14 +25,28 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.PosixFilePermission; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.Arrays; import java.util.Comparator; +import java.util.EnumSet; import java.util.Properties; import java.util.concurrent.TimeUnit; +import java.util.zip.GZIPOutputStream; /** * Scheduled task that backs up CloudStack infrastructure to NAS storage: @@ -111,9 +127,18 @@ protected void runInContext() { LOG.info("Starting infrastructure backup to {} (database included: {})", backupDir, includeDatabase); + // Cluster-wide lock: in multi-management-server deployments only one MS should run the + // infrastructure backup at a time, otherwise they would write/delete concurrently in the + // same NAS infra-backup path (duplicate backups, retention races, accidental deletes). + GlobalLock lock = acquireRunLock(); + if (lock == null) { + LOG.info("Another management server is performing the infrastructure backup; skipping this run"); + return; + } + try { File dir = new File(backupDir); - if (!dir.mkdirs()) { + if (!dir.exists() && !dir.mkdirs()) { LOG.error("Failed to create backup directory: {}", backupDir); return; } @@ -166,6 +191,24 @@ protected void runInContext() { } catch (Exception e) { LOG.error("Infrastructure backup failed: {}", e.getMessage(), e); + } finally { + releaseRunLock(lock); + } + } + + /** + * Acquire the cluster-wide run lock so only one management server performs the infrastructure + * backup at a time. Returns null if the lock can't be taken (another MS holds it). Overridable + * so unit tests can exercise runInContext without standing up the DB-backed lock manager. + */ + protected GlobalLock acquireRunLock() { + GlobalLock lock = GlobalLock.getInternLock("infra-backup"); + return (lock != null && lock.lock(5)) ? lock : null; + } + + protected void releaseRunLock(GlobalLock lock) { + if (lock != null) { + lock.unlock(); } } @@ -190,15 +233,23 @@ protected void backupDatabase(String dbName, String backupDir, String timestamp, String dbHost, String dbUser, String dbPassword) { String dumpFile = backupDir + "/" + dbName + "-" + timestamp + ".sql.gz"; - // Use --single-transaction for InnoDB hot backup (no table locks, consistent snapshot) - String[] cmd = {"/bin/bash", "-c", - String.format("mysqldump --single-transaction --routines --triggers --events " + - "-h '%s' -u '%s' -p'%s' '%s' | gzip > '%s'", - dbHost, dbUser, dbPassword, dbName, dumpFile)}; - + File defaultsFile = null; + File rawDump = null; try { - ProcessBuilder pb = new ProcessBuilder(cmd); - pb.redirectErrorStream(true); + // Pass credentials via a 0600 --defaults-extra-file rather than the command line, so the + // password never appears in the process list, and invoke mysqldump WITHOUT a shell so that + // values read from db.properties cannot be interpreted as shell metacharacters (no injection). + defaultsFile = writeMysqlDefaultsFile(dbHost, dbUser, dbPassword); + + rawDump = new File(backupDir, dbName + "-" + timestamp + ".sql"); + // --single-transaction = InnoDB hot backup (no table locks, consistent snapshot) + ProcessBuilder pb = new ProcessBuilder( + "mysqldump", + "--defaults-extra-file=" + defaultsFile.getAbsolutePath(), + "--single-transaction", "--routines", "--triggers", "--events", + dbName); + pb.redirectOutput(ProcessBuilder.Redirect.to(rawDump)); + pb.redirectError(ProcessBuilder.Redirect.INHERIT); Process process = pb.start(); boolean completed = process.waitFor(300, TimeUnit.SECONDS); @@ -213,6 +264,12 @@ protected void backupDatabase(String dbName, String backupDir, String timestamp, return; } + // Compress in-process (no shell pipeline) into the final .sql.gz. + try (InputStream in = new FileInputStream(rawDump); + OutputStream out = new GZIPOutputStream(new FileOutputStream(dumpFile))) { + in.transferTo(out); + } + File dump = new File(dumpFile); LOG.info("Database {} backed up: {} ({} bytes)", dbName, dumpFile, dump.length()); @@ -221,7 +278,37 @@ protected void backupDatabase(String dbName, String backupDir, String timestamp, if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } + } finally { + if (rawDump != null && rawDump.exists() && !rawDump.delete()) { + LOG.warn("Failed to delete temporary dump file: {}", rawDump.getAbsolutePath()); + } + if (defaultsFile != null && defaultsFile.exists() && !defaultsFile.delete()) { + LOG.warn("Failed to delete temporary mysql defaults file: {}", defaultsFile.getAbsolutePath()); + } + } + } + + /** + * Writes a temporary MySQL option file (restricted to mode 0600 where supported) holding the + * connection credentials, so mysqldump can read them via {@code --defaults-extra-file}. Keeping + * the password out of the argument list prevents it leaking through the process list. + */ + private File writeMysqlDefaultsFile(String host, String user, String password) throws IOException { + Path tmp = Files.createTempFile("cs-infra-mysqldump-", ".cnf"); + try { + Files.setPosixFilePermissions(tmp, + EnumSet.of(PosixFilePermission.OWNER_READ, PosixFilePermission.OWNER_WRITE)); + } catch (UnsupportedOperationException ignored) { + // Non-POSIX filesystem; createTempFile already restricts access to the owner. + } + String content = "[client]\n" + + "host=" + (host == null ? "" : host) + "\n" + + "user=" + (user == null ? "" : user) + "\n" + + "password=" + (password == null ? "" : password) + "\n"; + try (Writer w = Files.newBufferedWriter(tmp, StandardCharsets.UTF_8)) { + w.write(content); } + return tmp.toFile(); } protected void backupDirectory(String sourcePath, String backupDir, String archiveName) { @@ -259,6 +346,11 @@ protected void backupDirectory(String sourcePath, String backupDir, String archi } protected void cleanupOldBackups(String nasBackupPath, int retentionCount) { + // A negative retention (misconfiguration) would make toDelete exceed backups.length below and + // throw ArrayIndexOutOfBoundsException; clamp it so we never compute a delete count > available. + if (retentionCount < 0) { + retentionCount = 0; + } File infraDir = new File(nasBackupPath + "/infra-backup"); if (!infraDir.exists()) { return; @@ -280,20 +372,25 @@ protected void cleanupOldBackups(String nasBackupPath, int retentionCount) { } private void deleteDirectory(File dir) { - File[] files = dir.listFiles(); - if (files != null) { - for (File file : files) { - if (file.isDirectory()) { - deleteDirectory(file); - } else { - if (!file.delete()) { - LOG.warn("Failed to delete file: {}", file.getAbsolutePath()); - } + try { + // walkFileTree does NOT follow symbolic links by default: a symlink placed inside the + // backup tree (NAS shares are often writable by multiple systems) is removed as a link, + // its target is never descended into or deleted — so a delete can't escape the tree. + Files.walkFileTree(dir.toPath(), new SimpleFileVisitor() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + Files.deleteIfExists(file); + return FileVisitResult.CONTINUE; } - } - } - if (!dir.delete()) { - LOG.warn("Failed to delete directory: {}", dir.getAbsolutePath()); + + @Override + public FileVisitResult postVisitDirectory(Path visited, IOException exc) throws IOException { + Files.deleteIfExists(visited); + return FileVisitResult.CONTINUE; + } + }); + } catch (IOException e) { + LOG.warn("Failed to delete directory {}: {}", dir.getAbsolutePath(), e.getMessage()); } } } diff --git a/plugins/backup/nas/src/test/java/org/apache/cloudstack/backup/InfrastructureBackupTaskTest.java b/plugins/backup/nas/src/test/java/org/apache/cloudstack/backup/InfrastructureBackupTaskTest.java index 7c8e3b02b2f3..19ebf9ea288b 100644 --- a/plugins/backup/nas/src/test/java/org/apache/cloudstack/backup/InfrastructureBackupTaskTest.java +++ b/plugins/backup/nas/src/test/java/org/apache/cloudstack/backup/InfrastructureBackupTaskTest.java @@ -16,17 +16,21 @@ // under the License. package org.apache.cloudstack.backup; +import com.cloud.utils.db.GlobalLock; + import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; import org.junit.After; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Test; @@ -93,6 +97,17 @@ protected void backupDirectory(String sourcePath, String backupDir, String archi protected void cleanupOldBackups(String nasBackupPath, int retentionCount) { retentionCalls.incrementAndGet(); } + + @Override + protected GlobalLock acquireRunLock() { + // in-memory interned lock only; never touches the DB-backed lock manager in unit tests + return GlobalLock.getInternLock("infra-backup-test"); + } + + @Override + protected void releaseRunLock(GlobalLock lock) { + // no-op: the DB lock was never actually acquired in tests + } } private static Properties stubDbProps() { @@ -226,6 +241,85 @@ public void dailyIntervalIs24Hours() { Assert.assertEquals(Long.valueOf(86_400_000L), task.getDelay()); } + // ---- retention / delete logic (exercises the REAL cleanupOldBackups + deleteDirectory, + // which RecordingTask above stubs out) ---- + + private final InfrastructureBackupTask realTask = new InfrastructureBackupTask(null); + + private File infraBackupDir() { + return new File(tmpRoot.toFile(), "infra-backup"); + } + + private File makeBackup(String name) throws IOException { + File dir = new File(infraBackupDir(), name); + Assert.assertTrue(dir.mkdirs()); + Assert.assertTrue(new File(dir, "dump.sql.gz").createNewFile()); + return dir; + } + + private String[] remainingBackups() { + File[] dirs = infraBackupDir().listFiles(File::isDirectory); + if (dirs == null) { + return new String[0]; + } + return Arrays.stream(dirs).map(File::getName).sorted().toArray(String[]::new); + } + + @Test + public void cleanupClampsNegativeRetentionInsteadOfThrowing() throws IOException { + makeBackup("20240101-000000"); + makeBackup("20240102-000000"); + makeBackup("20240103-000000"); + + // A negative retention (misconfiguration) previously made toDelete (= count - retention) + // larger than the array length and threw ArrayIndexOutOfBoundsException. It must now clamp + // to 0 and simply remove everything, without throwing. + realTask.cleanupOldBackups(tmpRoot.toString(), -5); + + Assert.assertEquals(0, remainingBackups().length); + } + + @Test + public void cleanupKeepsNewestAndDeletesOldest() throws IOException { + makeBackup("20240101-000000"); + makeBackup("20240102-000000"); + makeBackup("20240103-000000"); + makeBackup("20240104-000000"); + makeBackup("20240105-000000"); + + realTask.cleanupOldBackups(tmpRoot.toString(), 2); + + Assert.assertArrayEquals(new String[] {"20240104-000000", "20240105-000000"}, remainingBackups()); + } + + @Test + public void deleteDoesNotFollowSymlinkOutOfTheBackupTree() throws IOException { + // A file living OUTSIDE the backup tree, reachable only via a symlink inside an old backup. + File outside = new File(tmpRoot.toFile(), "outside"); + Assert.assertTrue(outside.mkdirs()); + File precious = new File(outside, "precious.txt"); + Assert.assertTrue(precious.createNewFile()); + + File oldest = makeBackup("20240101-000000"); + makeBackup("20240102-000000"); + makeBackup("20240103-000000"); + + Path link = new File(oldest, "escape").toPath(); + try { + Files.createSymbolicLink(link, outside.toPath()); + } catch (IOException | UnsupportedOperationException e) { + Assume.assumeNoException("Filesystem does not support symbolic links", e); + } + + // Retain only the newest; the two oldest (including the one holding the symlink) are deleted. + realTask.cleanupOldBackups(tmpRoot.toString(), 1); + + Assert.assertArrayEquals(new String[] {"20240103-000000"}, remainingBackups()); + // The symlink target and its contents MUST survive — the delete must not follow the link out. + Assert.assertTrue("symlink target directory must survive", outside.exists()); + Assert.assertTrue("file behind the symlink must survive", precious.exists()); + } + private void deleteRecursively(File f) { File[] kids = f.listFiles(); if (kids != null) {