DataDog · sarahchen6 · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025 · Dec 12, 2025
@@ -4,12 +4,24 @@ include:
   - local: ".gitlab/macrobenchmarks.yml"
   - local: ".gitlab/exploration-tests.yml"
   - local: ".gitlab/ci-visibility-tests.yml"
+  - project: 'DataDog/apm-reliability/apm-sdks-benchmarks'
+    file: '.gitlab/ci-java-spring-petclinic.yml'
+    ref: 'sarahchen6/run-java-microbenchmarks'
+  - project: 'DataDog/apm-reliability/apm-sdks-benchmarks'
+    file: '.gitlab/ci-java-insecure-bank.yml'
+    ref: 'sarahchen6/run-java-microbenchmarks'
 
 stages:
   - build
   - publish
   - shared-pipeline
   - benchmarks
+  - infrastructure
+  - java-spring-petclinic-tests
+  - java-spring-petclinic-macrobenchmarks
+  - java-startup-microbenchmarks
+  - java-load-microbenchmarks
+  - generate-slos
   - macrobenchmarks
   - tests
   - exploration-tests

@@ -30,82 +30,82 @@
     UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME # The branch or tag name for which project is built.
     UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA # The commit revision the project is built for.
 
-benchmarks-startup:
-  extends: .benchmarks
-  script:
-    - !reference [ .benchmarks, script ]
-    - ./steps/capture-hardware-software-info.sh
-    - ./steps/run-benchmarks.sh startup
-    - ./steps/analyze-results.sh startup
+# benchmarks-startup:
+#   extends: .benchmarks
+#   script:
+#     - !reference [ .benchmarks, script ]
+#     - ./steps/capture-hardware-software-info.sh
+#     - ./steps/run-benchmarks.sh startup
+#     - ./steps/analyze-results.sh startup
 
-benchmarks-load:
-  extends: .benchmarks
-  script:
-    - !reference [ .benchmarks, script ]
-    - ./steps/capture-hardware-software-info.sh
-    - ./steps/run-benchmarks.sh load
-    - ./steps/analyze-results.sh load
+# benchmarks-load:
+#   extends: .benchmarks
+#   script:
+#     - !reference [ .benchmarks, script ]
+#     - ./steps/capture-hardware-software-info.sh
+#     - ./steps/run-benchmarks.sh load
+#     - ./steps/analyze-results.sh load
 
-benchmarks-dacapo:
-  extends: .benchmarks
-  script:
-    - !reference [ .benchmarks, script ]
-    - ./steps/capture-hardware-software-info.sh
-    - ./steps/run-benchmarks.sh dacapo
-    - ./steps/analyze-results.sh dacapo
+# benchmarks-dacapo:
+#   extends: .benchmarks
+#   script:
+#     - !reference [ .benchmarks, script ]
+#     - ./steps/capture-hardware-software-info.sh
+#     - ./steps/run-benchmarks.sh dacapo
+#     - ./steps/analyze-results.sh dacapo
 
-benchmarks-post-results:
-  extends: .benchmarks
-  tags: ["arch:amd64"]
-  script:
-    - !reference [ .benchmarks, script ]
-    - ./steps/upload-results-to-s3.sh
-    - ./steps/post-pr-comment.sh
-  needs:
-    - job: benchmarks-startup
-      artifacts: true
-    - job: benchmarks-load
-      artifacts: true
-    - job: benchmarks-dacapo
-      artifacts: true
+# benchmarks-post-results:
+#   extends: .benchmarks
+#   tags: ["arch:amd64"]
+#   script:
+#     - !reference [ .benchmarks, script ]
+#     - ./steps/upload-results-to-s3.sh
+#     - ./steps/post-pr-comment.sh
+#   needs:
+#     - job: benchmarks-startup
+#       artifacts: true
+#     - job: benchmarks-load
+#       artifacts: true
+#     - job: benchmarks-dacapo
+#       artifacts: true
 
-check-big-regressions:
-  extends: .benchmarks
-  needs:
-    - job: benchmarks-startup
-      artifacts: true
-    - job: benchmarks-dacapo
-      artifacts: true
-  when: on_success
-  tags: ["arch:amd64"]
-  rules:
-    - if: '$POPULATE_CACHE'
-      when: never
-    - if: '$CI_COMMIT_BRANCH =~ /backport-pr-/'
-      when: never
-    - if: '$CI_COMMIT_BRANCH !~ /^(master|release\/)/'
-      when: on_success
-    - when: never
-  # ARTIFACTS_DIR /go/src/github.com/DataDog/apm-reliability/dd-trace-java/reports/
-  # need to convert them
-  script:
-    - !reference [ .benchmarks, script ]
-    - | 
-      for benchmarkType in startup dacapo; do
-          find "$ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do
-            relpath="${file#$ARTIFACTS_DIR/$benchmarkType/}"
-            prefix="${relpath%/benchmark-*}" # Remove the trailing /benchmark-(baseline|candidate).json
-            prefix="${prefix#./}" # Remove any leading ./
-            prefix="${prefix//\//-}" # Replace / with -
-            case "$file" in
-              *benchmark-baseline.json) type="baseline" ;;
-              *benchmark-candidate.json) type="candidate" ;;
-            esac
-            echo "Moving $file to $ARTIFACTS_DIR/${type}-${prefix}.converted.json"
-            cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json"
-          done
-      done
-    - bp-runner $CI_PROJECT_DIR/.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
+# check-big-regressions:
+#   extends: .benchmarks
+#   needs:
+#     - job: benchmarks-startup
+#       artifacts: true
+#     - job: benchmarks-dacapo
+#       artifacts: true
+#   when: on_success
+#   tags: ["arch:amd64"]
+#   rules:
+#     - if: '$POPULATE_CACHE'
+#       when: never
+#     - if: '$CI_COMMIT_BRANCH =~ /backport-pr-/'
+#       when: never
+#     - if: '$CI_COMMIT_BRANCH !~ /^(master|release\/)/'
+#       when: on_success
+#     - when: never
+#   # ARTIFACTS_DIR /go/src/github.com/DataDog/apm-reliability/dd-trace-java/reports/
+#   # need to convert them
+#   script:
+#     - !reference [ .benchmarks, script ]
+#     - | 
+#       for benchmarkType in startup dacapo; do
+#           find "$ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do
+#             relpath="${file#$ARTIFACTS_DIR/$benchmarkType/}"
+#             prefix="${relpath%/benchmark-*}" # Remove the trailing /benchmark-(baseline|candidate).json
+#             prefix="${prefix#./}" # Remove any leading ./
+#             prefix="${prefix//\//-}" # Replace / with -
+#             case "$file" in
+#               *benchmark-baseline.json) type="baseline" ;;
+#               *benchmark-candidate.json) type="candidate" ;;
+#             esac
+#             echo "Moving $file to $ARTIFACTS_DIR/${type}-${prefix}.converted.json"
+#             cp "$file" "$ARTIFACTS_DIR/${type}-${prefix}.converted.json"
+#           done
+#       done
+#     - bp-runner $CI_PROJECT_DIR/.gitlab/benchmarks/bp-runner.fail-on-regression.yml --debug
 
 .dsm-kafka-benchmarks:
   stage: benchmarks

@@ -92,12 +92,12 @@ check-slo-breaches:
       artifacts: true
     - job: otel-latest
       artifacts: true
-    - job: benchmarks-startup
-      artifacts: true
-    - job: benchmarks-load
-      artifacts: true
-    - job: benchmarks-dacapo
-      artifacts: true
+    # - job: benchmarks-startup
+    #   artifacts: true
+    # - job: benchmarks-load
+    #   artifacts: true
+    # - job: benchmarks-dacapo
+    #   artifacts: true
   script:
     # macrobenchmarks are located here, files are already in "converted" format
     - export ARTIFACTS_DIR="$(pwd)/platform/artifacts/" && mkdir -p "${ARTIFACTS_DIR}"

@@ -0,0 +1,36 @@
+# Thresholds set based on guidance in https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-thresholds-for-pre-release-gates%3F
+
+experiments:
+  - name: Run SLO breach check
+    steps:
+      - name: SLO breach check
+        run: fail_on_breach
+        # https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-a-warning-range-for-pre-release-gates%3F
+        warning_range: 10
+        # File spec
+        #   https://datadoghq.atlassian.net/wiki/x/LgI1LgE#Specification
+        # Measurements
+        #   https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario
+        scenarios:
+          # Note that thresholds there are chosen based the confidence interval with a 10% adjustment.
+
+          # Standard macrobenchmarks
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fonly-tracing&trendsType=scenario
+          - name: normal_operation/only-tracing
+            thresholds:
+              - agg_http_req_duration_p50 < 2.6 ms
+              - agg_http_req_duration_p99 < 8.5 ms
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fotel-latest&trendsType=scenario
+          - name: normal_operation/otel-latest
+            thresholds:
+              - agg_http_req_duration_p50 < 2.5 ms
+              - agg_http_req_duration_p99 < 10 ms
+
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fonly-tracing&trendsType=scenario
+          - name: high_load/only-tracing
+            thresholds:
+              - throughput > 1100.0 op/s
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fotel-latest&trendsType=scenario
+          - name: high_load/otel-latest
+            thresholds:
+              - throughput > 1100.0 op/s