Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions plugin/kvm/src/main/java/org/zstack/kvm/KVMHost.java
Original file line number Diff line number Diff line change
Expand Up @@ -5803,6 +5803,13 @@ public void success(Boolean run) {

@Override
public void fail(ErrorCode errorCode) {
if (KVMHostUtils.shouldContinueReconnectOnAnsibleFailure(info.isNewAdded(), errorCode)) {
logger.warn(String.format(
"kvm ansible failed to mask libvirt sockets because systemd dbus timed out on existing host[uuid:%s, ip:%s], continue reconnect and verify kvmagent, error: %s",
self.getUuid(), self.getManagementIp(), errorCode));
trigger.next();
return;
}
trigger.fail(errorCode);
}
});
Expand Down
33 changes: 33 additions & 0 deletions plugin/kvm/src/main/java/org/zstack/kvm/KVMHostUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.apache.commons.codec.digest.DigestUtils;
import org.zstack.core.db.Q;
import org.zstack.header.errorcode.ErrorCode;
import org.zstack.header.network.l2.*;
import org.zstack.header.tag.SystemTagVO;
import org.zstack.header.tag.SystemTagVO_;
Expand All @@ -13,6 +14,7 @@
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;

/**
Expand All @@ -21,6 +23,37 @@
public class KVMHostUtils {
private static final CLogger logger = CLoggerImpl.getLogger(KVMHostUtils.class);

public static boolean shouldContinueReconnectOnAnsibleFailure(boolean isNewAdded, ErrorCode errorCode) {
return !isNewAdded && isLibvirtSocketMaskSystemdTimeout(errorCode);
}

public static boolean isLibvirtSocketMaskSystemdTimeout(ErrorCode errorCode) {
String errorText = collectErrorText(errorCode).toLowerCase(Locale.ROOT);
return errorText.contains("systemctl mask")
&& errorText.contains("libvirtd.socket")
&& errorText.contains("org.freedesktop.systemd1")
&& errorText.contains("timed out")
&& (errorText.contains("failed to get properties")
|| errorText.contains("failed to activate service"));
}

private static String collectErrorText(ErrorCode errorCode) {
StringBuilder sb = new StringBuilder();
ErrorCode cursor = errorCode;
while (cursor != null) {
appendIfNotNull(sb, cursor.getDetails());
appendIfNotNull(sb, cursor.getDescription());
cursor = cursor.getCause();
}
return sb.toString();
}

private static void appendIfNotNull(StringBuilder sb, String text) {
if (text != null) {
sb.append(text).append('\n');
}
}

/**
* Get normalized bridge name for l2 network, which at most has 15 chars.
* - if l2 network has L2_BRIDGE_NAME tag, then return it's value directly;
Expand Down
25 changes: 25 additions & 0 deletions test/src/test/java/org/zstack/test/kvm/KVMHostUtilsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package org.zstack.test.kvm;

import org.junit.Assert;
import org.junit.Test;
import org.zstack.header.errorcode.ErrorCode;
import org.zstack.kvm.KVMHostUtils;

public class KVMHostUtilsTest {
@Test
public void zstac86349_continueReconnectOnLibvirtSocketMaskSystemdTimeout() {
ErrorCode error = new ErrorCode();
error.setDetails("[HOST: 192.168.51.12] ERROR: run shell command: systemctl mask libvirtd.socket libvirtd-ro.socket libvirtd-admin.socket libvirtd-tls.socket libvirtd-tcp.socket failed! stderr: Failed to get properties: Failed to activate service 'org.freedesktop.systemd1': timed out (service_start_timeout=25000ms)");

Assert.assertTrue(KVMHostUtils.shouldContinueReconnectOnAnsibleFailure(false, error));
Assert.assertFalse(KVMHostUtils.shouldContinueReconnectOnAnsibleFailure(true, error));
}

@Test
public void zstac86349_doNotContinueReconnectOnOtherAnsibleFailures() {
ErrorCode error = new ErrorCode();
error.setDetails("[HOST: 192.168.51.12] ERROR: run shell command: systemctl restart libvirtd failed! stderr: Job for libvirtd.service failed");

Assert.assertFalse(KVMHostUtils.shouldContinueReconnectOnAnsibleFailure(false, error));
}
}