diff --git a/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java b/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java index ddc8153d7398..3ae94479cea5 100644 --- a/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java +++ b/engine/components-api/src/main/java/com/cloud/ha/HighAvailabilityManager.java @@ -21,6 +21,7 @@ import com.cloud.deploy.DeploymentPlanner; import com.cloud.host.HostVO; import com.cloud.host.Status; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.utils.component.Manager; import com.cloud.vm.VMInstanceVO; import org.apache.cloudstack.framework.config.ConfigKey; @@ -32,6 +33,8 @@ */ public interface HighAvailabilityManager extends Manager { + List LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT = List.of(StoragePoolType.NetworkFilesystem, StoragePoolType.SharedMountPoint); + ConfigKey ForceHA = new ConfigKey<>("Advanced", Boolean.class, "force.ha", "false", "Force High-Availability to happen even if the VM says no.", true, Cluster); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java index 896426addca1..e9a7ac8951ce 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java @@ -35,7 +35,6 @@ public class KVMHABase { protected Logger logger = LogManager.getLogger(getClass()); private long _timeout = 60000; /* 1 minutes */ - protected static String s_heartBeatPath; protected long _heartBeatUpdateTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEARTBEAT_UPDATE_TIMEOUT); protected long _heartBeatUpdateFreq = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_HEARTBEAT_UPDATE_FREQUENCY); protected long _heartBeatUpdateMaxTries = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_HEARTBEAT_UPDATE_MAX_TRIES); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java index cf407bfc08a8..aa868ff1d3f2 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java @@ -18,7 +18,7 @@ import com.cloud.agent.properties.AgentProperties; import com.cloud.agent.properties.AgentPropertiesFileHandler; -import com.cloud.storage.Storage.StoragePoolType; +import com.cloud.ha.HighAvailabilityManager; import com.cloud.utils.script.Script; import org.libvirt.Connect; import org.libvirt.LibvirtException; @@ -39,20 +39,15 @@ public class KVMHAMonitor extends KVMHABase implements Runnable { private final String hostPrivateIp; - public KVMHAMonitor(HAStoragePool pool, String host, String scriptPath) { + public KVMHAMonitor(HAStoragePool pool, String host) { if (pool != null) { storagePool.put(pool.getPoolUUID(), pool); } hostPrivateIp = host; - configureHeartBeatPath(scriptPath); rebootHostAndAlertManagementOnHeartbeatTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.REBOOT_HOST_AND_ALERT_MANAGEMENT_ON_HEARTBEAT_TIMEOUT); } - private static synchronized void configureHeartBeatPath(String scriptPath) { - KVMHABase.s_heartBeatPath = scriptPath; - } - public void addStoragePool(HAStoragePool pool) { synchronized (storagePool) { storagePool.put(pool.getPoolUUID(), pool); @@ -86,8 +81,8 @@ protected void runHeartBeat() { Set removedPools = new HashSet<>(); for (String uuid : storagePool.keySet()) { HAStoragePool primaryStoragePool = storagePool.get(uuid); - if (primaryStoragePool.getPool().getType() == StoragePoolType.NetworkFilesystem) { - checkForNotExistingPools(removedPools, uuid); + if (HighAvailabilityManager.LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(primaryStoragePool.getPool().getType())) { + checkForNotExistingLibvirtStoragePools(removedPools, uuid); if (removedPools.contains(uuid)) { continue; } @@ -127,7 +122,7 @@ private String executePoolHeartBeatCommand(String uuid, HAStoragePool primarySto return result; } - private void checkForNotExistingPools(Set removedPools, String uuid) { + private void checkForNotExistingLibvirtStoragePools(Set removedPools, String uuid) { try { Connect conn = LibvirtConnection.getConnection(); StoragePool storage = conn.storagePoolLookupByUUIDString(uuid); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index b561cedd0183..64df98f413a6 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -1063,11 +1063,6 @@ public boolean configure(final String name, final Map params) th throw new ConfigurationException("Unable to find patch.sh"); } - heartBeatPath = Script.findScript(kvmScriptsDir, "kvmheartbeat.sh"); - if (heartBeatPath == null) { - throw new ConfigurationException("Unable to find kvmheartbeat.sh"); - } - createVmPath = Script.findScript(storageScriptsDir, "createvm.sh"); if (createVmPath == null) { throw new ConfigurationException("Unable to find the createvm.sh"); @@ -1330,7 +1325,7 @@ public boolean configure(final String name, final Map params) th final String[] info = NetUtils.getNetworkParams(privateNic); - kvmhaMonitor = new KVMHAMonitor(null, info[0], heartBeatPath); + kvmhaMonitor = new KVMHAMonitor(null, info[0]); final Thread ha = new Thread(kvmhaMonitor); ha.start(); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckVMActivityOnStoragePoolCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckVMActivityOnStoragePoolCommandWrapper.java index a708d441be59..d3f537dc9173 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckVMActivityOnStoragePoolCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckVMActivityOnStoragePoolCommandWrapper.java @@ -48,7 +48,7 @@ public Answer execute(final CheckVMActivityOnStoragePoolCommand command, final L KVMStoragePool primaryPool = storagePoolMgr.getStoragePool(pool.getType(), pool.getUuid()); - if (primaryPool.isPoolSupportHA()){ + if (primaryPool.isPoolSupportHA()) { final HAStoragePool nfspool = monitor.getStoragePool(pool.getUuid()); final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds()); final Future future = executors.submit(ha); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java index 6665cf625e2f..35cc864268c3 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java @@ -289,6 +289,7 @@ public KVMStoragePool getStoragePool(StoragePoolType type, String uuid, boolean if (pool instanceof LibvirtStoragePool) { addPoolDetails(uuid, (LibvirtStoragePool) pool); + ((LibvirtStoragePool) pool).setType(type); } return pool; @@ -390,6 +391,9 @@ public KVMStoragePool createStoragePool(String name, String host, int port, Stri private synchronized KVMStoragePool createStoragePool(String name, String host, int port, String path, String userInfo, StoragePoolType type, Map details, boolean primaryStorage) { StorageAdaptor adaptor = getStorageAdaptor(type); KVMStoragePool pool = adaptor.createStoragePool(name, host, port, path, userInfo, type, details, primaryStorage); + if (pool instanceof LibvirtStoragePool) { + ((LibvirtStoragePool) pool).setType(type); + } // LibvirtStorageAdaptor-specific statement if (pool.isPoolSupportHA() && primaryStorage) { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java index ab39f7bc6ffd..45c22d3ac754 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java @@ -31,6 +31,7 @@ import com.cloud.agent.api.to.HostTO; import com.cloud.agent.properties.AgentProperties; import com.cloud.agent.properties.AgentPropertiesFileHandler; +import com.cloud.ha.HighAvailabilityManager; import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool; import com.cloud.storage.Storage; import com.cloud.storage.Storage.StoragePoolType; @@ -320,13 +321,24 @@ public void setDetails(Map details) { @Override public boolean isPoolSupportHA() { - return type == StoragePoolType.NetworkFilesystem; + return HighAvailabilityManager.LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type); } public String getHearthBeatPath() { - if (type == StoragePoolType.NetworkFilesystem) { + if (StoragePoolType.NetworkFilesystem.equals(type)) { String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR); - return Script.findScript(kvmScriptsDir, "kvmheartbeat.sh"); + String scriptPath = Script.findScript(kvmScriptsDir, "kvmheartbeat.sh"); + if (scriptPath == null) { + throw new CloudRuntimeException("Unable to find heartbeat script 'kvmheartbeat.sh' in directory: " + kvmScriptsDir); + } + return scriptPath; + } else if (StoragePoolType.SharedMountPoint.equals(type)) { + String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR); + String scriptPath = Script.findScript(kvmScriptsDir, "kvmsmpheartbeat.sh"); + if (scriptPath == null) { + throw new CloudRuntimeException("Unable to find heartbeat script 'kvmsmpheartbeat.sh' in directory: " + kvmScriptsDir); + } + return scriptPath; } return null; } @@ -410,4 +422,8 @@ public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activit return true; } } + + public void setType(StoragePoolType type) { + this.type = type; + } } diff --git a/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java b/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java index 5faa377ce3d3..6a7f1d580436 100644 --- a/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java +++ b/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java @@ -27,6 +27,7 @@ import javax.inject.Inject; import com.cloud.agent.api.to.DiskTO; +import com.cloud.ha.HighAvailabilityManager; import com.cloud.storage.VolumeVO; import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService; import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo; @@ -587,7 +588,7 @@ private boolean anyVolumeRequiresEncryption(DataObject ... objects) { @Override public boolean isStorageSupportHA(StoragePoolType type) { - return StoragePoolType.NetworkFilesystem == type; + return type != null && HighAvailabilityManager.LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type); } @Override diff --git a/scripts/vm/hypervisor/kvm/kvmsmpheartbeat.sh b/scripts/vm/hypervisor/kvm/kvmsmpheartbeat.sh new file mode 100755 index 000000000000..b102a1a866bb --- /dev/null +++ b/scripts/vm/hypervisor/kvm/kvmsmpheartbeat.sh @@ -0,0 +1,218 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +help() { + printf "Usage: $0 + -i identifier (required for CLI compatibility; value ignored by local-only heartbeat) + -p path (required for CLI compatibility; value ignored by local-only heartbeat) + -m mount point (local path where heartbeat will be written) + -h host (host IP/name to include in heartbeat filename) + -r write/read hb log (read-check mode) + -c cleanup (trigger emergency reboot) + -t interval between read hb log\n" + exit 1 +} + +#set -x +NfsSvrIP= +NfsSvrPath= +MountPoint= +HostIP= +interval= +rflag=0 +cflag=0 + +while getopts 'i:p:m:h:t:rc' OPTION +do + case $OPTION in + i) + NfsSvrIP="$OPTARG" + ;; # retained for CLI compatibility but unused for this script + p) + NfsSvrPath="$OPTARG" + ;; # retained for CLI compatibility but unused for this script + m) + MountPoint="$OPTARG" + ;; + h) + HostIP="$OPTARG" + ;; + r) + rflag=1 + ;; + t) + interval="$OPTARG" + ;; + c) + cflag=1 + ;; + *) + help + ;; + esac +done + +# For heartbeat we require a mountpoint +if [ -z "$MountPoint" ] +then + echo "Mount point (-m) is required" + help +fi + +# Validate mount point exists, is (if possible) a mounted filesystem, and is writable +if [ ! -d "$MountPoint" ]; then + echo "Mount point directory does not exist: $MountPoint" >&2 + exit 1 +fi + +# If the 'mountpoint' utility is available, ensure this is an actual mount +if command -v mountpoint >/dev/null 2>&1; then + if ! mountpoint -q "$MountPoint"; then + echo "Mount point is not a mounted filesystem: $MountPoint" >&2 + exit 1 + fi +fi + +# Ensure the mount point is writable +if [ ! -w "$MountPoint" ]; then + echo "Mount point is not writable: $MountPoint" >&2 + exit 1 +fi +#delete VMs on this mountpoint (best-effort) +deleteVMs() { + local mountPoint=$1 + # ensure it ends with a single trailing slash + mountPoint="${mountPoint%/}/" + + vmPids=$(ps aux | grep qemu | grep "$mountPoint" | awk '{print $2}' 2> /dev/null) + + if [ -z "$vmPids" ] + then + return + fi + + for pid in $vmPids + do + kill -9 $pid &> /dev/null + done +} + +#checking is there the mount point present under $MountPoint? +if grep -q "^[^ ]\+ $MountPoint " /proc/mounts +then + # mount exists; nothing to do here; keep for compatibility with original flow + : +else + # mount point not present + # if not in read-check mode, consider deleting VMs similar to original behavior + if [ "$rflag" == "0" ] + then + deleteVMs $MountPoint + fi +fi + +hbFolder="$MountPoint/KVMHA" +hbFile="$hbFolder/hb-$HostIP" + +write_hbLog() { +#write the heart beat log + stat "$hbFile" &> /dev/null + if [ $? -gt 0 ] + then + # create a new one + mkdir -p "$hbFolder" &> /dev/null + # touch will be done by atomic write below; ensure folder is writable + if [ ! -w "$hbFolder" ]; then + printf "Folder not writable: $hbFolder" >&2 + return 2 + fi + fi + + timestamp=$(date +%s) + # Write atomically to avoid partial writes (write to tmp then mv) + tmpfile="${hbFile}.$$" + printf "%s\n" "$timestamp" > "$tmpfile" 2>/dev/null + if [ $? -ne 0 ]; then + printf "Failed to write heartbeat to $tmpfile" >&2 + return 2 + fi + mv -f "$tmpfile" "$hbFile" 2>/dev/null + return $? +} + +check_hbLog() { + hb_diff=0 + if [ ! -f "$hbFile" ]; then + # signal large difference if file missing + hb_diff=999999 + return 1 + fi + now=$(date +%s) + hb=$(cat "$hbFile" 2>/dev/null) + if [ -z "$hb" ]; then + hb_diff=999998 + return 1 + fi + diff=`expr $now - $hb 2>/dev/null` + if [ $? -ne 0 ] + then + hb_diff=999997 + return 1 + fi + if [ -z "$interval" ]; then + # if no interval provided, consider 0 as success + if [ $diff -gt 0 ]; then + hb_diff=$diff + return 1 + else + hb_diff=0 + return 0 + fi + fi + if [ $diff -gt $interval ] + then + hb_diff=$diff + return 1 + fi + hb_diff=0 + return 0 +} + +if [ "$rflag" == "1" ] +then + check_hbLog + status=$? + diff="${hb_diff:-0}" + if [ $status -eq 0 ] + then + echo "=====> ALIVE <=====" + else + echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======" + fi + exit 0 +elif [ "$cflag" == "1" ] +then + /usr/bin/logger -t heartbeat "kvmsmpheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage." + sync & + sleep 5 + echo b > /proc/sysrq-trigger + exit $? +else + write_hbLog + exit $? +fi