Skip to content

Commit 24f992f

Browse files
committed
wip
Signed-off-by: Attila Mészáros <a_meszaros@apple.com>
1 parent d6f0b48 commit 24f992f

7 files changed

Lines changed: 39 additions & 48 deletions

File tree

observability/josdk-operator-metrics-dashboard.json

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@
103103
"uid": "prometheus"
104104
},
105105
"editorMode": "code",
106-
"expr": "sum(rate(operator_sdk_reconciliations_started_total{service_name=\"josdk\"}[5m])) by (controller_name)",
106+
"expr": "sum(rate(reconciliations_started_total{service_name=\"josdk\"}[5m])) by (controller_name)",
107107
"legendFormat": "{{controller_name}}",
108108
"range": true,
109109
"refId": "A"
@@ -224,7 +224,7 @@
224224
"uid": "prometheus"
225225
},
226226
"editorMode": "code",
227-
"expr": "sum(rate(operator_sdk_reconciliations_success_total{service_name=\"josdk\"}[5m])) by (controller_name)",
227+
"expr": "sum(rate(reconciliations_success_total{service_name=\"josdk\"}[5m])) by (controller_name)",
228228
"legendFormat": "Success - {{controller_name}}",
229229
"range": true,
230230
"refId": "A"
@@ -235,7 +235,7 @@
235235
"uid": "prometheus"
236236
},
237237
"editorMode": "code",
238-
"expr": "sum(rate(operator_sdk_reconciliations_failure_total{service_name=\"josdk\"}[5m])) by (controller_name)",
238+
"expr": "sum(rate(reconciliations_failure_total{service_name=\"josdk\"}[5m])) by (controller_name)",
239239
"legendFormat": "Failure - {{controller_name}}",
240240
"range": true,
241241
"refId": "B"
@@ -302,7 +302,7 @@
302302
"uid": "prometheus"
303303
},
304304
"editorMode": "code",
305-
"expr": "sum(operator_sdk_reconciliations_executions{service_name=\"josdk\"})",
305+
"expr": "sum(reconciliations_executions{service_name=\"josdk\"})",
306306
"legendFormat": "Executing",
307307
"range": true,
308308
"refId": "A"
@@ -369,7 +369,7 @@
369369
"uid": "prometheus"
370370
},
371371
"editorMode": "code",
372-
"expr": "sum(operator_sdk_reconciliations_active{service_name=\"josdk\"})",
372+
"expr": "sum(reconciliations_active{service_name=\"josdk\"})",
373373
"legendFormat": "Active",
374374
"range": true,
375375
"refId": "A"
@@ -430,7 +430,7 @@
430430
"uid": "prometheus"
431431
},
432432
"editorMode": "code",
433-
"expr": "sum(operator_sdk_reconciliations_started_total{service_name=\"josdk\"})",
433+
"expr": "sum(reconciliations_started_total{service_name=\"josdk\"})",
434434
"legendFormat": "Total",
435435
"range": true,
436436
"refId": "A"
@@ -495,7 +495,7 @@
495495
"uid": "prometheus"
496496
},
497497
"editorMode": "code",
498-
"expr": "sum(rate(operator_sdk_reconciliations_failure_total{service_name=\"josdk\"}[5m]))",
498+
"expr": "sum(rate(reconciliations_failure_total{service_name=\"josdk\"}[5m]))",
499499
"legendFormat": "Error Rate",
500500
"range": true,
501501
"refId": "A"
@@ -654,7 +654,7 @@
654654
"uid": "prometheus"
655655
},
656656
"editorMode": "code",
657-
"expr": "histogram_quantile(0.50, sum(rate(operator_sdk_reconciliations_execution_seconds_bucket{service_name=\"josdk\"}[5m])) by (le, controller_name))",
657+
"expr": "histogram_quantile(0.50, sum(rate(reconciliations_execution_seconds_bucket{service_name=\"josdk\"}[5m])) by (le, controller_name))",
658658
"legendFormat": "p50 - {{controller_name}}",
659659
"range": true,
660660
"refId": "A"
@@ -665,7 +665,7 @@
665665
"uid": "prometheus"
666666
},
667667
"editorMode": "code",
668-
"expr": "histogram_quantile(0.95, sum(rate(operator_sdk_reconciliations_execution_seconds_bucket{service_name=\"josdk\"}[5m])) by (le, controller_name))",
668+
"expr": "histogram_quantile(0.95, sum(rate(reconciliations_execution_seconds_bucket{service_name=\"josdk\"}[5m])) by (le, controller_name))",
669669
"legendFormat": "p95 - {{controller_name}}",
670670
"range": true,
671671
"refId": "B"
@@ -676,7 +676,7 @@
676676
"uid": "prometheus"
677677
},
678678
"editorMode": "code",
679-
"expr": "histogram_quantile(0.99, sum(rate(operator_sdk_reconciliations_execution_seconds_bucket{service_name=\"josdk\"}[5m])) by (le, controller_name))",
679+
"expr": "histogram_quantile(0.99, sum(rate(reconciliations_execution_seconds_bucket{service_name=\"josdk\"}[5m])) by (le, controller_name))",
680680
"legendFormat": "p99 - {{controller_name}}",
681681
"range": true,
682682
"refId": "C"
@@ -766,7 +766,7 @@
766766
"uid": "prometheus"
767767
},
768768
"editorMode": "code",
769-
"expr": "sum(rate(operator_sdk_events_received_total{service_name=\"josdk\"}[5m])) by (event, action)",
769+
"expr": "sum(rate(events_received_total{service_name=\"josdk\"}[5m])) by (event, action)",
770770
"legendFormat": "{{event}} - {{action}}",
771771
"range": true,
772772
"refId": "A"
@@ -856,7 +856,7 @@
856856
"uid": "prometheus"
857857
},
858858
"editorMode": "code",
859-
"expr": "sum(rate(operator_sdk_reconciliations_failure_total{service_name=\"josdk\"}[5m])) by (controller_name)",
859+
"expr": "sum(rate(reconciliations_failure_total{service_name=\"josdk\"}[5m])) by (controller_name)",
860860
"legendFormat": "{{controller_name}}",
861861
"range": true,
862862
"refId": "A"
@@ -946,7 +946,7 @@
946946
"uid": "prometheus"
947947
},
948948
"editorMode": "code",
949-
"expr": "sum(rate(operator_sdk_controllers_success_total{service_name=\"josdk\"}[5m])) by (controller_name)",
949+
"expr": "sum(rate(controllers_success_total{service_name=\"josdk\"}[5m])) by (controller_name)",
950950
"legendFormat": "Success - {{controller_name}}",
951951
"range": true,
952952
"refId": "A"
@@ -957,7 +957,7 @@
957957
"uid": "prometheus"
958958
},
959959
"editorMode": "code",
960-
"expr": "sum(rate(operator_sdk_controllers_failure_total{service_name=\"josdk\"}[5m])) by (controller_name)",
960+
"expr": "sum(rate(controllers_failure_total{service_name=\"josdk\"}[5m])) by (controller_name)",
961961
"legendFormat": "Failure - {{controller_name}}",
962962
"range": true,
963963
"refId": "B"
@@ -1047,7 +1047,7 @@
10471047
"uid": "prometheus"
10481048
},
10491049
"editorMode": "code",
1050-
"expr": "sum(rate(operator_sdk_events_delete_total{service_name=\"josdk\"}[5m])) by (controller_name)",
1050+
"expr": "sum(rate(events_delete_total{service_name=\"josdk\"}[5m])) by (controller_name)",
10511051
"legendFormat": "{{controller_name}}",
10521052
"range": true,
10531053
"refId": "A"
@@ -1145,7 +1145,7 @@
11451145
"uid": "prometheus"
11461146
},
11471147
"editorMode": "code",
1148-
"expr": "sum(rate(operator_sdk_reconciliations_retries_total{service_name=\"josdk\"}[5m])) by (controller_name)",
1148+
"expr": "sum(rate(reconciliations_retries_total{service_name=\"josdk\"}[5m])) by (controller_name)",
11491149
"legendFormat": "Retries - {{controller_name}}",
11501150
"range": true,
11511151
"refId": "A"

sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingReconciler1.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ public UpdateControl<MetricsHandlingCustomResource1> reconcile(
6969

7070
var spec = resource.getSpec();
7171
if (spec != null) {
72-
status.setObservedNumber(spec.getObservedNumber());
72+
status.setObservedNumber(spec.getNumber());
7373
}
7474

7575
log.info("Successfully reconciled resource: {}", name);

sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingReconciler2.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public UpdateControl<MetricsHandlingCustomResource2> reconcile(
6262

6363
var spec = resource.getSpec();
6464
if (spec != null) {
65-
status.setObservedNumber(spec.getObservedNumber());
65+
status.setObservedNumber(spec.getNumber());
6666
}
6767

6868
log.info("Successfully reconciled resource: {}", name);

sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingSampleOperator.java

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
public class MetricsHandlingSampleOperator {
4848

4949
private static final Logger log = LoggerFactory.getLogger(MetricsHandlingSampleOperator.class);
50-
public static final String OPERATOR_SDK_METRICS_PREFIX = "operator.sdk";
5150

5251
/**
5352
* Based on env variables a different flavor of Reconciler is used, showcasing how the same logic
@@ -57,7 +56,7 @@ public static void main(String[] args) {
5756
log.info("Metrics Handling Sample Operator starting!");
5857

5958
// Load configuration from config.yaml
60-
Metrics metrics = initOTLPMetrics(false);
59+
Metrics metrics = initOTLPMetrics(true);
6160
Operator operator =
6261
new Operator(o -> o.withStopOnInformerErrorDuringStartup(false).withMetrics(metrics));
6362

@@ -77,11 +76,6 @@ public static void main(String[] args) {
7776
}
7877
var otlpConfig =
7978
new OtlpConfig() {
80-
@Override
81-
public String prefix() {
82-
return OPERATOR_SDK_METRICS_PREFIX;
83-
}
84-
8579
@Override
8680
public @Nullable String get(String key) {
8781
return configProperties.get(key);
@@ -103,25 +97,19 @@ public Map<String, String> resourceAttributes() {
10397
LoggingMeterRegistry loggingRegistry =
10498
new LoggingMeterRegistry(
10599
new LoggingRegistryConfig() {
106-
@Override
107-
public String prefix() {
108-
return OPERATOR_SDK_METRICS_PREFIX;
109-
}
110-
111100
@Override
112101
public String get(String key) {
113102
return null;
114103
}
115104

116105
@Override
117106
public Duration step() {
118-
return Duration.ofSeconds(15);
107+
return Duration.ofSeconds(10);
119108
}
120109
},
121110
Clock.SYSTEM);
122111
compositeRegistry.add(loggingRegistry);
123112
}
124-
125113
// Register JVM and system metrics
126114
log.info("Registering JVM and system metrics...");
127115

sample-operators/metrics-processing/src/main/java/io/javaoperatorsdk/operator/sample/metrics/customresource/MetricsHandlingSpec.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ public class MetricsHandlingSpec {
1919

2020
private int number;
2121

22-
public int getObservedNumber() {
22+
public int getNumber() {
2323
return number;
2424
}
2525

26-
public void setObservedNumber(int observedNumber) {
27-
this.number = observedNumber;
26+
public void setNumber(int number) {
27+
this.number = number;
2828
}
2929
}

sample-operators/metrics-processing/src/main/resources/otlp-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@ otlp:
1818
# OTLP Collector endpoint - see observability/install-observability.sh for setup
1919
# url: "http://localhost:4318/v1/metrics"
2020
url: "http://otel-collector-collector.observability.svc.cluster.local:4318/v1/metrics"
21-
step: 15s
21+
step: 10s
2222
batchSize: 15000
2323
aggregationTemporality: "cumulative"

sample-operators/metrics-processing/src/test/java/io/javaoperatorsdk/operator/sample/metrics/MetricsHandlingE2E.java

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -80,15 +80,18 @@ boolean isLocal() {
8080
.build();
8181

8282
@BeforeAll
83-
void setupObservability() {
83+
void setupObservability() throws InterruptedException {
8484
log.info("Setting up observability stack...");
8585
installObservabilityServices();
8686
// Setup port forwarding to Prometheus
87+
log.info("Setting up port forwarding for Prometheus");
8788
setupPrometheusPortForward();
8889
if (isLocal()) {
90+
log.info("Setting up port forwarding for Otel collector and grafana");
8991
setupPortForwardForOtelCollector();
9092
setupPortForwardForGrafana();
9193
}
94+
Thread.sleep(2000);
9295
}
9396

9497
@AfterAll
@@ -140,7 +143,7 @@ private void verifyPrometheusMetrics() throws Exception {
140143
String prometheusUrl = "http://localhost:" + localPort;
141144

142145
// Verify reconciliation started metrics
143-
String startedQuery = "operator_sdk_reconciliations_started_total";
146+
String startedQuery = "reconciliations_started_total";
144147
await()
145148
.atMost(Duration.ofSeconds(60))
146149
.pollInterval(Duration.ofSeconds(5))
@@ -149,35 +152,35 @@ private void verifyPrometheusMetrics() throws Exception {
149152
String result = queryPrometheus(prometheusUrl, startedQuery);
150153
log.info("Reconciliations started metric: {}", result);
151154
assertThat(result).contains("\"status\":\"success\"");
152-
assertThat(result).contains("operator_sdk_reconciliations_started_total");
155+
assertThat(result).contains("reconciliations_started_total");
153156
});
154157

155158
// Verify success metrics
156-
String successQuery = "operator_sdk_reconciliations_success_total";
159+
String successQuery = "reconciliations_success_total";
157160
await()
158161
.atMost(Duration.ofSeconds(30))
159162
.untilAsserted(
160163
() -> {
161164
String result = queryPrometheus(prometheusUrl, successQuery);
162165
log.info("Reconciliations success metric: {}", result);
163166
assertThat(result).contains("\"status\":\"success\"");
164-
assertThat(result).contains("operator_sdk_reconciliations_success_total");
167+
assertThat(result).contains("reconciliations_success_total");
165168
});
166169

167170
// Verify failure metrics
168-
String failureQuery = "operator_sdk_reconciliations_failure_total";
171+
String failureQuery = "reconciliations_failure_total";
169172
await()
170173
.atMost(Duration.ofSeconds(30))
171174
.untilAsserted(
172175
() -> {
173176
String result = queryPrometheus(prometheusUrl, failureQuery);
174177
log.info("Reconciliations failure metric: {}", result);
175178
assertThat(result).contains("\"status\":\"success\"");
176-
assertThat(result).contains("operator_sdk_reconciliations_failure_total");
179+
assertThat(result).contains("reconciliations_failure_total");
177180
});
178181

179182
// Verify controller execution metrics
180-
String controllerQuery = "operator_sdk_controllers_success_total";
183+
String controllerQuery = "controllers_success_total";
181184
await()
182185
.atMost(Duration.ofSeconds(30))
183186
.untilAsserted(
@@ -188,7 +191,7 @@ private void verifyPrometheusMetrics() throws Exception {
188191
});
189192

190193
// Verify execution time metrics
191-
String executionTimeQuery = "operator_sdk_reconciliations_execution_seconds_count";
194+
String executionTimeQuery = "reconciliations_execution_seconds_count";
192195
await()
193196
.atMost(Duration.ofSeconds(30))
194197
.untilAsserted(
@@ -231,7 +234,7 @@ private MetricsHandlingCustomResource1 createResource1(String name, int number)
231234
resource.getMetadata().setName(name);
232235

233236
MetricsHandlingSpec spec = new MetricsHandlingSpec();
234-
spec.setObservedNumber(number);
237+
spec.setNumber(number);
235238
resource.setSpec(spec);
236239

237240
return resource;
@@ -242,7 +245,7 @@ private MetricsHandlingCustomResource2 createResource2(String name, int number)
242245
resource.getMetadata().setName(name);
243246

244247
MetricsHandlingSpec spec = new MetricsHandlingSpec();
245-
spec.setObservedNumber(number);
248+
spec.setNumber(number);
246249
resource.setSpec(spec);
247250

248251
return resource;
@@ -345,7 +348,7 @@ private LocalPortForward setupPortForward(String appName, int port) {
345348
.pods()
346349
.inNamespace(OBSERVABILITY_NAMESPACE)
347350
.withName(pod.getMetadata().getName())
348-
.portForward(port);
351+
.portForward(port, port);
349352

350353
log.info(
351354
"{} port forward established on local port: {}", appName, portForward.getLocalPort());

0 commit comments

Comments
 (0)