Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

public class KVMHAMonitor extends KVMHABase implements Runnable {

public static final List<StoragePoolType> STORAGE_POOL_TYPES_WITH_HA_SUPPORT = List.of(StoragePoolType.NetworkFilesystem, StoragePoolType.SharedMountPoint);
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
Comment thread
weizhouapache marked this conversation as resolved.
Outdated

private final Map<String, HAStoragePool> storagePool = new ConcurrentHashMap<>();
private final boolean rebootHostAndAlertManagementOnHeartbeatTimeout;

Expand Down Expand Up @@ -86,7 +88,7 @@ protected void runHeartBeat() {
Set<String> removedPools = new HashSet<>();
for (String uuid : storagePool.keySet()) {
HAStoragePool primaryStoragePool = storagePool.get(uuid);
if (primaryStoragePool.getPool().getType() == StoragePoolType.NetworkFilesystem) {
if (STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(primaryStoragePool.getPool().getType())) {
checkForNotExistingPools(removedPools, uuid);
if (removedPools.contains(uuid)) {
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ public Answer execute(final CheckVMActivityOnStoragePoolCommand command, final L
final KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr();

KVMStoragePool primaryPool = storagePoolMgr.getStoragePool(pool.getType(), pool.getUuid());
primaryPool.setType(pool.getType());

if (primaryPool.isPoolSupportHA()){
if (primaryPool.isPoolSupportHA()) {
final HAStoragePool nfspool = monitor.getStoragePool(pool.getUuid());
final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds());
final Future<Boolean> future = executors.submit(ha);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ default Long getUsedIops() {

public StoragePoolType getType();

default void setType(StoragePoolType type) {
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
}

public boolean delete();

PhysicalDiskFormat getDefaultFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public class KVMStoragePoolManager {
private final Map<String, StoragePoolInformation> _storagePools = new ConcurrentHashMap<String, StoragePoolInformation>();
private final Map<String, StorageAdaptor> _storageMapper = new HashMap<String, StorageAdaptor>();

private StorageAdaptor getStorageAdaptor(StoragePoolType type) {
public StorageAdaptor getStorageAdaptor(StoragePoolType type) {
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
// type can be null: LibVirtComputingResource:3238
if (type == null) {
return _storageMapper.get("libvirt");
Expand Down Expand Up @@ -390,6 +390,7 @@ public KVMStoragePool createStoragePool(String name, String host, int port, Stri
private synchronized KVMStoragePool createStoragePool(String name, String host, int port, String path, String userInfo, StoragePoolType type, Map<String, String> details, boolean primaryStorage) {
StorageAdaptor adaptor = getStorageAdaptor(type);
KVMStoragePool pool = adaptor.createStoragePool(name, host, port, path, userInfo, type, details, primaryStorage);
pool.setType(type);

Comment thread
weizhouapache marked this conversation as resolved.
// LibvirtStorageAdaptor-specific statement
if (pool.isPoolSupportHA() && primaryStorage) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import com.cloud.agent.properties.AgentProperties;
import com.cloud.agent.properties.AgentPropertiesFileHandler;
import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool;
import com.cloud.hypervisor.kvm.resource.KVMHAMonitor;
import com.cloud.storage.Storage;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.exception.CloudRuntimeException;
Expand Down Expand Up @@ -320,13 +321,16 @@ public void setDetails(Map<String, String> details) {

@Override
public boolean isPoolSupportHA() {
return type == StoragePoolType.NetworkFilesystem;
return KVMHAMonitor.STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
}

public String getHearthBeatPath() {
if (type == StoragePoolType.NetworkFilesystem) {
if (StoragePoolType.NetworkFilesystem.equals(type)) {
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
return Script.findScript(kvmScriptsDir, "kvmheartbeat.sh");
} else if (StoragePoolType.SharedMountPoint.equals(type)) {
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
return Script.findScript(kvmScriptsDir, "kvmsmpheartbeat.sh");
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
}
return null;
}
Expand Down Expand Up @@ -410,4 +414,9 @@ public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activit
return true;
}
}

@Override
public void setType(StoragePoolType type) {
Comment thread
weizhouapache marked this conversation as resolved.
this.type = type;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static com.cloud.utils.NumbersUtil.toHumanReadableSize;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;

Expand Down Expand Up @@ -100,6 +101,8 @@ public Map<String, String> getCapabilities() {
protected Logger logger = LogManager.getLogger(getClass());
private static final String NO_REMOTE_ENDPOINT_WITH_ENCRYPTION = "No remote endpoint to send command, unable to find a valid endpoint. Requires encryption support: %s";

private static final List<StoragePoolType> STORAGE_POOL_TYPES_WITH_HA_SUPPORT = List.of(StoragePoolType.NetworkFilesystem, StoragePoolType.SharedMountPoint);

@Inject
DiskOfferingDao diskOfferingDao;
@Inject
Expand Down Expand Up @@ -587,7 +590,7 @@ private boolean anyVolumeRequiresEncryption(DataObject ... objects) {

@Override
public boolean isStorageSupportHA(StoragePoolType type) {
return StoragePoolType.NetworkFilesystem == type;
return type != null && STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
}

@Override
Expand Down
218 changes: 218 additions & 0 deletions scripts/vm/hypervisor/kvm/kvmsmpheartbeat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

help() {
printf "Usage: $0
-i identifier (ignored for local-only heartbeat)
-p path (ignored for local-only heartbeat)
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
-m mount point (local path where heartbeat will be written)
-h host (host IP/name to include in heartbeat filename)
-r write/read hb log (read-check mode)
-c cleanup (trigger emergency reboot)
-t interval between read hb log\n"
exit 1
}

#set -x
NfsSvrIP=
NfsSvrPath=
MountPoint=
HostIP=
interval=
rflag=0
cflag=0

while getopts 'i:p:m:h:t:rc' OPTION
do
case $OPTION in
i)
NfsSvrIP="$OPTARG"
;; # retained for CLI compatibility but unused for local-only script
p)
NfsSvrPath="$OPTARG"
;; # retained for CLI compatibility but unused for local-only script
m)
MountPoint="$OPTARG"
;;
h)
HostIP="$OPTARG"
;;
r)
rflag=1
;;
t)
interval="$OPTARG"
;;
c)
cflag=1
;;
*)
help
;;
esac
done

# Match original kvmheartbeat.sh: require NfsSvrIP parameter for CLI compatibility
if [ -z "$NfsSvrIP" ]
then
exit 1
fi

Comment thread
weizhouapache marked this conversation as resolved.
Outdated
# For local-only heartbeat we require a mountpoint
if [ -z "$MountPoint" ]
then
echo "Mount point (-m) is required"
help
fi

# Ensure mount point exists and is writable
if [ ! -d "$MountPoint" ]; then
mkdir -p "$MountPoint" 2>/dev/null
if [ $? -ne 0 ]; then
echo "Failed to create mount point directory: $MountPoint" >&2
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
exit 1
fi
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
fi

Comment thread
weizhouapache marked this conversation as resolved.
Outdated
# Determine a sensible HostIP if not provided
if [ -z "$HostIP" ]; then
# try to get a non-loopback IPv4 address, fallback to hostname
ipaddr=$(hostname -I 2>/dev/null | awk '{print $1}')
if [ -n "$ipaddr" ]; then
HostIP="$ipaddr"
else
HostIP=$(hostname)
fi
fi

#delete VMs on this mountpoint (best-effort)
deleteVMs() {
local mountPoint=$1
vmPids=$(ps aux | grep qemu | grep "$mountPoint" | awk '{print $2}' 2> /dev/null)
if [ $? -gt 0 ]
then
return
fi
Comment thread
weizhouapache marked this conversation as resolved.
Outdated

if [ -z "$vmPids" ]
then
return
fi

for pid in $vmPids
do
kill -9 $pid &> /dev/null
done
Comment thread
weizhouapache marked this conversation as resolved.
}

#checking is there the mount point present under $MountPoint?
mounts=$(cat /proc/mounts | grep "$MountPoint")
if [ $? -gt 0 ]
then
# mount point not present — we don't remount in local-only script
# nothing to do here; keep for compatibility with original flow
:
else
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
# mount exists; if not in read-check mode, consider deleting VMs similar to original behavior
if [ "$rflag" == "0" ]
then
deleteVMs $MountPoint
fi
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
fi

Comment thread
weizhouapache marked this conversation as resolved.
Outdated
hbFolder="$MountPoint/KVMHA/"
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
hbFile="$hbFolder/hb-$HostIP"

write_hbLog() {
#write the heart beat log
stat "$hbFile" &> /dev/null
if [ $? -gt 0 ]
then
# create a new one
mkdir -p "$hbFolder" &> /dev/null
# touch will be done by atomic write below; ensure folder is writable
if [ ! -w "$hbFolder" ]; then
printf "Folder not writable: $hbFolder" >&2
return 2
fi
Comment thread
weizhouapache marked this conversation as resolved.
fi

timestamp=$(date +%s)
# Write atomically to avoid partial writes (write to tmp then mv)
tmpfile="${hbFile}.$$"
printf "%s\n" "$timestamp" > "$tmpfile" 2>/dev/null
if [ $? -ne 0 ]; then
printf "Failed to write heartbeat to $tmpfile" >&2
return 2
fi
Comment thread
weizhouapache marked this conversation as resolved.
mv -f "$tmpfile" "$hbFile" 2>/dev/null
return $?
}

check_hbLog() {
if [ ! -f "$hbFile" ]; then
# signal large difference if file missing
return 999999
fi
now=$(date +%s)
hb=$(cat "$hbFile" 2>/dev/null)
if [ -z "$hb" ]; then
return 999998
fi
diff=`expr $now - $hb 2>/dev/null`
if [ $? -ne 0 ]
then
return 999997
fi
if [ -z "$interval" ]; then
# if no interval provided, consider 0 as success
if [ $diff -gt 0 ]; then
return $diff
else
return 0
fi
Comment thread
weizhouapache marked this conversation as resolved.
Comment thread
weizhouapache marked this conversation as resolved.
fi
if [ $diff -gt $interval ]
then
return $diff
fi
return 0
}

if [ "$rflag" == "1" ]
then
check_hbLog
diff=$?
if [ $diff == 0 ]
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
then
echo "=====> ALIVE <====="
else
echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======"
fi
Comment thread
weizhouapache marked this conversation as resolved.
Outdated
exit 0
elif [ "$cflag" == "1" ]
then
/usr/bin/logger -t heartbeat "kvmsmpheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage."
sync &
sleep 5
echo b > /proc/sysrq-trigger
exit $?
else
write_hbLog
exit $?
fi
Loading