From b75ddc1cc417dfe04ec45a256cc896e4fe8c0fb4 Mon Sep 17 00:00:00 2001 From: DJ Gregor Date: Sat, 15 Nov 2025 16:37:15 -0800 Subject: [PATCH 1/5] ExceptionHandlers: Fix code in bytecode comment --- .../tooling/bytebuddy/ExceptionHandlers.java | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/bytebuddy/ExceptionHandlers.java b/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/bytebuddy/ExceptionHandlers.java index 20099d76a37..ddf9187fd8a 100644 --- a/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/bytebuddy/ExceptionHandlers.java +++ b/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/bytebuddy/ExceptionHandlers.java @@ -39,24 +39,14 @@ public Size apply(final MethodVisitor mv, final Implementation.Context context) final boolean exitOnFailure = InstrumenterConfig.get().isInternalExitOnFailure(); final String logMethod = exitOnFailure ? "error" : "debug"; - // Writes the following bytecode if exitOnFailure is false: + // Writes the following bytecode (note that two statements are conditionally written): // - // BlockingExceptionHandler.rethrowIfBlockingException(t); - // try { - // InstrumentationErrors.incrementErrorCount(); - // org.slf4j.LoggerFactory.getLogger((Class)ExceptionLogger.class) - // .debug("Failed to handle exception in instrumentation for ...", t); - // } catch (Throwable t2) { - // } - // - // And the following bytecode if exitOnFailure is true: - // - // BlockingExceptionHandler.rethrowIfBlockingException(t); + // BlockingExceptionHandler.rethrowIfBlockingException(t); // when appSecEnabled=true // try { // InstrumentationErrors.incrementErrorCount(); // org.slf4j.LoggerFactory.getLogger((Class)ExceptionLogger.class) // .error("Failed to handle exception in instrumentation for ...", t); - // System.exit(1); + // System.exit(1); // when exitOnFailure=true // } catch (Throwable t2) { // } // From b0a25705f0a66b2a1fe1c13ae90f352222e4eac4 Mon Sep 17 00:00:00 2001 From: DJ Gregor Date: Sat, 15 Nov 2025 16:50:38 -0800 Subject: [PATCH 2/5] InstrumentationContext: be more explicit about the reasons for failure --- .../trace/bootstrap/InstrumentationContext.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationContext.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationContext.java index 5a7dda111d2..5768cff4d3f 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationContext.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationContext.java @@ -24,7 +24,10 @@ private InstrumentationContext() {} public static ContextStore get( final Class keyClass, final Class contextClass) { throw new RuntimeException( - "Calls to this method will be rewritten by Instrumentation Context Provider (e.g. FieldBackedProvider)"); + "Calls to this method will be rewritten by Instrumentation Context Provider (e.g. FieldBackedProvider)." + + " If you get this exception, this method has not been rewritten." + + " Ensure instrumentation class has a contextStore method and the call to InstrumentationContext.get happens directly in an instrumentation Advice class." + + " See how_instrumentations_work.md for details."); } /** @@ -35,6 +38,9 @@ public static ContextStore get( */ public static ContextStore get(final String keyClass, final String contextClass) { throw new RuntimeException( - "Calls to this method will be rewritten by Instrumentation Context Provider (e.g. FieldBackedProvider)"); + "Calls to this method will be rewritten by Instrumentation Context Provider (e.g. FieldBackedProvider)." + + " If you get this exception, this method has not been rewritten." + + " Ensure instrumentation class has a contextStore method and the call to InstrumentationContext.get happens directly in an instrumentation Advice class." + + " See how_instrumentations_work.md for details."); } } From d5dd82b89d9d8d1fb8ae688fb15277705ceaded5 Mon Sep 17 00:00:00 2001 From: DJ Gregor Date: Sat, 15 Nov 2025 17:00:11 -0800 Subject: [PATCH 3/5] Add instrumentation error details on test failures This puts the details front and center in the test summary and is much easier than hunting back through log messages. Note: In InstrumentationErrors, errors are only recorded when enableRecordingAndReset has been called. This is only done from the InstrumentationSpecification test specification, so we don't need to worry about the ArrayList growing without bounds in production. I considered adding a limit to the ArrayList, but opted not to for simplicity. I also wanted to avoid silently discarding some errors. --- .../bootstrap/InstrumentationErrors.java | 33 ++++++++++++++----- .../tooling/bytebuddy/ExceptionHandlers.java | 10 +++--- .../test/InstrumentationSpecification.groovy | 16 +++++---- 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationErrors.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationErrors.java index 580b1557f3c..382af51c62d 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationErrors.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/InstrumentationErrors.java @@ -1,21 +1,36 @@ package datadog.trace.bootstrap; -import java.util.concurrent.atomic.AtomicLong; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; public class InstrumentationErrors { - private static final AtomicLong COUNTER = new AtomicLong(); + private static final List ERRORS = new CopyOnWriteArrayList<>(); + private static volatile boolean recordErrors = false; - public static long getErrorCount() { - return COUNTER.get(); + /** + * Record an error from {@link datadog.trace.agent.tooling.bytebuddy.ExceptionHandlers} for test + * visibility. + */ + @SuppressWarnings("unused") + public static void recordError(final Throwable throwable) { + if (recordErrors) { + StringWriter stackTrace = new StringWriter(); + throwable.printStackTrace(new PrintWriter(stackTrace)); + ERRORS.add(stackTrace.toString()); + } } - @SuppressWarnings("unused") - public static void incrementErrorCount() { - COUNTER.incrementAndGet(); + // Visible for testing + public static void enableRecordingAndReset() { + recordErrors = true; + ERRORS.clear(); } // Visible for testing - public static void resetErrorCount() { - COUNTER.set(0); + public static List getErrors() { + return Collections.unmodifiableList(ERRORS); } } diff --git a/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/bytebuddy/ExceptionHandlers.java b/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/bytebuddy/ExceptionHandlers.java index ddf9187fd8a..263136898fa 100644 --- a/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/bytebuddy/ExceptionHandlers.java +++ b/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/bytebuddy/ExceptionHandlers.java @@ -43,7 +43,7 @@ public Size apply(final MethodVisitor mv, final Implementation.Context context) // // BlockingExceptionHandler.rethrowIfBlockingException(t); // when appSecEnabled=true // try { - // InstrumentationErrors.incrementErrorCount(); + // InstrumentationErrors.recordError(t); // org.slf4j.LoggerFactory.getLogger((Class)ExceptionLogger.class) // .error("Failed to handle exception in instrumentation for ...", t); // System.exit(1); // when exitOnFailure=true @@ -71,12 +71,14 @@ public Size apply(final MethodVisitor mv, final Implementation.Context context) mv.visitTryCatchBlock(logStart, logEnd, eatException, "java/lang/Throwable"); mv.visitLabel(logStart); - // invoke incrementAndGet on our exception counter + mv.visitInsn(Opcodes.DUP); // stack: (top) throwable,throwable + // invoke recordError on our exception tracker mv.visitMethodInsn( Opcodes.INVOKESTATIC, "datadog/trace/bootstrap/InstrumentationErrors", - "incrementErrorCount", - "()V"); + "recordError", + "(Ljava/lang/Throwable;)V"); + // stack: (top) throwable mv.visitLdcInsn(Type.getType("L" + HANDLER_NAME + ";")); mv.visitMethodInsn( diff --git a/dd-java-agent/instrumentation-testing/src/main/groovy/datadog/trace/agent/test/InstrumentationSpecification.groovy b/dd-java-agent/instrumentation-testing/src/main/groovy/datadog/trace/agent/test/InstrumentationSpecification.groovy index 47e67ffbafc..5035aba0915 100644 --- a/dd-java-agent/instrumentation-testing/src/main/groovy/datadog/trace/agent/test/InstrumentationSpecification.groovy +++ b/dd-java-agent/instrumentation-testing/src/main/groovy/datadog/trace/agent/test/InstrumentationSpecification.groovy @@ -73,9 +73,9 @@ import java.lang.instrument.ClassFileTransformer import java.lang.instrument.Instrumentation import java.nio.ByteBuffer import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.CopyOnWriteArrayList import java.util.concurrent.TimeUnit import java.util.concurrent.TimeoutException -import java.util.concurrent.atomic.AtomicInteger import static datadog.communication.http.OkHttpUtils.buildHttpClient import static datadog.trace.api.ConfigDefaults.DEFAULT_AGENT_HOST @@ -165,7 +165,7 @@ abstract class InstrumentationSpecification extends DDSpecification implements A @SuppressWarnings('PropertyName') @Shared - AtomicInteger INSTRUMENTATION_ERROR_COUNT = new AtomicInteger(0) + List INSTRUMENTATION_ERRORS = new CopyOnWriteArrayList() // don't use mocks because it will break too many exhaustive interaction-verifying tests @SuppressWarnings('PropertyName') @@ -463,7 +463,7 @@ abstract class InstrumentationSpecification extends DDSpecification implements A if (forceAppSecActive) { ActiveSubsystems.APPSEC_ACTIVE = true } - InstrumentationErrors.resetErrorCount() + InstrumentationErrors.enableRecordingAndReset() ProcessTags.reset() } @@ -505,7 +505,8 @@ abstract class InstrumentationSpecification extends DDSpecification implements A spanFinishLocations.clear() originalToTrackingSpan.clear() } - assert InstrumentationErrors.errorCount == 0 + def instrumentationErrorCount = InstrumentationErrors.getErrors().size() + assert instrumentationErrorCount == 0, "${instrumentationErrorCount} instrumentation errors were seen:\n${InstrumentationErrors.getErrors().join("\n---\n")}" } private void doCheckRepeatedFinish() { @@ -547,7 +548,8 @@ abstract class InstrumentationSpecification extends DDSpecification implements A cleanupAfterAgent() // All cleanup should happen before these assertion. If not, a failing assertion may prevent cleanup - assert INSTRUMENTATION_ERROR_COUNT.get() == 0: INSTRUMENTATION_ERROR_COUNT.get() + " Instrumentation errors during test" + def instrumentationErrors = INSTRUMENTATION_ERRORS.size() + assert instrumentationErrors == 0: "${instrumentationErrors} Instrumentation errors during test:\n${INSTRUMENTATION_ERRORS.join("\n---\n")}" assert TRANSFORMED_CLASSES_TYPES.findAll { GlobalIgnores.isAdditionallyIgnored(it.getActualName()) @@ -641,7 +643,9 @@ abstract class InstrumentationSpecification extends DDSpecification implements A println "Unexpected instrumentation error when instrumenting ${typeName} on ${classLoader}" throwable.printStackTrace() - INSTRUMENTATION_ERROR_COUNT.incrementAndGet() + def stackTrace = new StringWriter() + throwable.printStackTrace(new PrintWriter(stackTrace)) + INSTRUMENTATION_ERRORS.add(stackTrace.toString()) } @Override From 332a4f4c6ba9f67c82e4e3e142c355665d41540e Mon Sep 17 00:00:00 2001 From: DJ Gregor Date: Sat, 15 Nov 2025 17:30:30 -0800 Subject: [PATCH 4/5] Check for blocked instrumentation in tests If there is a blocked instrumentation, the test will fail and the failure message will include the same information that is logged from MuzzleCheck. --- .../agent/tooling/muzzle/MuzzleCheck.java | 45 ++++++++++++++++++- .../test/InstrumentationSpecification.groovy | 4 ++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/muzzle/MuzzleCheck.java b/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/muzzle/MuzzleCheck.java index 2544e2732c8..fa408768870 100644 --- a/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/muzzle/MuzzleCheck.java +++ b/dd-java-agent/agent-tooling/src/main/java/datadog/trace/agent/tooling/muzzle/MuzzleCheck.java @@ -3,7 +3,9 @@ import datadog.trace.agent.tooling.InstrumenterModule; import datadog.trace.agent.tooling.InstrumenterState; import datadog.trace.agent.tooling.Utils; +import java.util.Collections; import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; import net.bytebuddy.matcher.ElementMatcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -11,6 +13,9 @@ public class MuzzleCheck implements ElementMatcher { private static final Logger log = LoggerFactory.getLogger(MuzzleCheck.class); + private static final List ERRORS = new CopyOnWriteArrayList<>(); + private static volatile boolean recordErrors = false; + private final int instrumentationId; private final String instrumentationClass; private final ReferenceProvider runtimeMuzzleReferences; @@ -33,9 +38,13 @@ public boolean matches(ClassLoader classLoader) { InstrumenterState.applyInstrumentation(classLoader, instrumentationId); } else { InstrumenterState.blockInstrumentation(classLoader, instrumentationId); - if (log.isDebugEnabled()) { + if (recordErrors || log.isDebugEnabled()) { final List mismatches = muzzle.getMismatchedReferenceSources(classLoader); + if (recordErrors) { + recordMuzzleMismatch( + InstrumenterState.describe(instrumentationId), classLoader, mismatches); + } log.debug( "Muzzled - {} instrumentation.target.classloader={}", InstrumenterState.describe(instrumentationId), @@ -61,4 +70,38 @@ private ReferenceMatcher muzzle() { } return muzzle; } + + /** + * Record a muzzle mismatch error for test visibility. + * + * @param instrumentationDescription the description of the instrumentation that was blocked + * @param classLoader the classloader where the instrumentation was blocked + * @param mismatches the list of mismatch details + */ + private static void recordMuzzleMismatch( + String instrumentationDescription, + ClassLoader classLoader, + List mismatches) { + StringBuilder sb = new StringBuilder(); + sb.append("Muzzled - ") + .append(instrumentationDescription) + .append(" instrumentation.target.classloader=") + .append(classLoader) + .append("\n"); + for (Reference.Mismatch mismatch : mismatches) { + sb.append(" Mismatch: ").append(mismatch).append("\n"); + } + ERRORS.add(sb.toString()); + } + + // Visible for testing + public static void enableRecordingAndReset() { + recordErrors = true; + ERRORS.clear(); + } + + // Visible for testing + public static List getErrors() { + return Collections.unmodifiableList(ERRORS); + } } diff --git a/dd-java-agent/instrumentation-testing/src/main/groovy/datadog/trace/agent/test/InstrumentationSpecification.groovy b/dd-java-agent/instrumentation-testing/src/main/groovy/datadog/trace/agent/test/InstrumentationSpecification.groovy index 5035aba0915..65323d3de1a 100644 --- a/dd-java-agent/instrumentation-testing/src/main/groovy/datadog/trace/agent/test/InstrumentationSpecification.groovy +++ b/dd-java-agent/instrumentation-testing/src/main/groovy/datadog/trace/agent/test/InstrumentationSpecification.groovy @@ -25,6 +25,7 @@ import datadog.trace.agent.tooling.InstrumenterModule import datadog.trace.agent.tooling.TracerInstaller import datadog.trace.agent.tooling.bytebuddy.matcher.ClassLoaderMatchers import datadog.trace.agent.tooling.bytebuddy.matcher.GlobalIgnores +import datadog.trace.agent.tooling.muzzle.MuzzleCheck import datadog.trace.api.Config import datadog.trace.api.IdGenerationStrategy import datadog.trace.api.ProcessTags @@ -464,6 +465,7 @@ abstract class InstrumentationSpecification extends DDSpecification implements A ActiveSubsystems.APPSEC_ACTIVE = true } InstrumentationErrors.enableRecordingAndReset() + MuzzleCheck.enableRecordingAndReset() ProcessTags.reset() } @@ -507,6 +509,8 @@ abstract class InstrumentationSpecification extends DDSpecification implements A } def instrumentationErrorCount = InstrumentationErrors.getErrors().size() assert instrumentationErrorCount == 0, "${instrumentationErrorCount} instrumentation errors were seen:\n${InstrumentationErrors.getErrors().join("\n---\n")}" + def muzzleErrorCount = MuzzleCheck.getErrors().size() + assert muzzleErrorCount == 0, "${muzzleErrorCount} muzzle errors were seen:\n${MuzzleCheck.getErrors().join("\n---\n")}" } private void doCheckRepeatedFinish() { From e9388b2bab88fe86a4a4ee8742e2ba1dac6ea351 Mon Sep 17 00:00:00 2001 From: DJ Gregor Date: Sat, 15 Nov 2025 17:54:12 -0800 Subject: [PATCH 5/5] Fail fast on muzzle or instrumentation errors --- .../agent/test/asserts/ListWriterAssert.groovy | 18 +++++++++++++++++- .../trace/common/writer/ListWriter.java | 9 ++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/dd-java-agent/testing/src/main/groovy/datadog/trace/agent/test/asserts/ListWriterAssert.groovy b/dd-java-agent/testing/src/main/groovy/datadog/trace/agent/test/asserts/ListWriterAssert.groovy index 166eb92f6b6..18b336be32e 100644 --- a/dd-java-agent/testing/src/main/groovy/datadog/trace/agent/test/asserts/ListWriterAssert.groovy +++ b/dd-java-agent/testing/src/main/groovy/datadog/trace/agent/test/asserts/ListWriterAssert.groovy @@ -2,6 +2,8 @@ package datadog.trace.agent.test.asserts import static TraceAssert.assertTrace +import datadog.trace.agent.tooling.muzzle.MuzzleCheck +import datadog.trace.bootstrap.InstrumentationErrors import datadog.trace.common.writer.ListWriter import datadog.trace.core.DDSpan import groovy.transform.stc.ClosureParams @@ -47,7 +49,15 @@ class ListWriterAssert { @ClosureParams(value = SimpleType, options = ['datadog.trace.agent.test.asserts.ListWriterAssert']) @DelegatesTo(value = ListWriterAssert, strategy = Closure.DELEGATE_FIRST) Closure spec) { try { - writer.waitForTraces(expectedSize) + // Fail fast if we see any muzzle errors or instrumentation errors + for (int timeoutRemaining = ListWriter.WAIT_FOR_TRACES_TIMEOUT_SECONDS; timeoutRemaining > 0; timeoutRemaining--) { + if (writer.waitForTracesMax(expectedSize, 1)) { + break + } + assertNoErrors() + } + assertNoErrors() // even if we break immediately above, make sure we check for errors at least once + writer.waitForTraces(expectedSize, 0) def array = writer.toArray() assert array.length == expectedSize def traces = (Arrays.asList(array) as List>) @@ -91,6 +101,12 @@ class ListWriterAssert { } } + static void assertNoErrors() { + assert InstrumentationErrors.getErrors().size() == 0 : "Instrumentation errors were seen, failing fast" + assert MuzzleCheck.getErrors().size() == 0 : "Muzzle errors were seen, failing fast" + // InstrumentationSpecification.{cleanup,cleanupAfterAgent} will provide details later + } + void sortSpansByStart() { traces = traces.collect { return new ArrayList(it).sort { a, b -> diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/ListWriter.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/ListWriter.java index dec3a8eb9e5..7b2c9e72a94 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/ListWriter.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/ListWriter.java @@ -18,6 +18,8 @@ /** List writer used by tests mostly */ public class ListWriter extends CopyOnWriteArrayList> implements Writer { + public static final int WAIT_FOR_TRACES_TIMEOUT_SECONDS = 20; + private static final Logger log = LoggerFactory.getLogger(ListWriter.class); private static final Filter ACCEPT_ALL = trace -> true; @@ -85,7 +87,12 @@ public boolean waitForTracesMax(final int number, int seconds) throws Interrupte } public void waitForTraces(final int number) throws InterruptedException, TimeoutException { - if (!waitForTracesMax(number, 20)) { + waitForTraces(number, WAIT_FOR_TRACES_TIMEOUT_SECONDS); + } + + public void waitForTraces(final int number, final int seconds) + throws InterruptedException, TimeoutException { + if (!waitForTracesMax(number, seconds)) { String msg = "Timeout waiting for " + number