-
Notifications
You must be signed in to change notification settings - Fork 134
feat: add initial opentelemetry tracing to big query HTTP requests #4126
Changes from 1 commit
f5fa8ea
4faaa82
4202fb3
0372db5
de0ee8a
68704bd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,195 @@ | ||
| /* | ||
| * Copyright 2026 Google LLC | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.google.cloud.bigquery.spi.v2; | ||
|
|
||
| import com.google.api.client.http.*; | ||
| import com.google.api.core.InternalApi; | ||
| import com.google.cloud.bigquery.telemetry.BigQueryTelemetryTracer; | ||
| import io.opentelemetry.api.common.AttributeKey; | ||
| import io.opentelemetry.api.trace.Span; | ||
| import io.opentelemetry.api.trace.StatusCode; | ||
| import io.opentelemetry.api.trace.Tracer; | ||
| import org.checkerframework.checker.nullness.qual.Nullable; | ||
|
|
||
| import java.io.IOException; | ||
|
|
||
| /** | ||
| * HttpRequestInitializer that wraps a delegate initializer, intercepts all HTTP requests, | ||
| * adds OpenTelemetry tracing and then invokes delegate interceptor. | ||
| */ | ||
| @InternalApi | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's mark all new classes as |
||
| public class HttpTracingRequestInitializer implements HttpRequestInitializer { | ||
|
|
||
| // HTTP Specific Telemetry Attributes | ||
| public static final AttributeKey<String> HTTP_REQUEST_METHOD = | ||
| AttributeKey.stringKey("http.request.method"); | ||
| public static final AttributeKey<String> URL_FULL = AttributeKey.stringKey("url.full"); | ||
| public static final AttributeKey<String> URL_TEMPLATE = AttributeKey.stringKey("url.template"); | ||
| public static final AttributeKey<String> URL_DOMAIN = AttributeKey.stringKey("url.domain"); | ||
| public static final AttributeKey<Long> HTTP_RESPONSE_STATUS_CODE = | ||
| AttributeKey.longKey("http.response.status_code"); | ||
| public static final AttributeKey<Long> HTTP_REQUEST_RESEND_COUNT = | ||
| AttributeKey.longKey("http.request.resend_count"); | ||
| public static final AttributeKey<Long> HTTP_REQUEST_BODY_SIZE = | ||
| AttributeKey.longKey("http.request.body.size"); | ||
| public static final AttributeKey<Long> HTTP_RESPONSE_BODY_SIZE = | ||
| AttributeKey.longKey("http.response.body.size"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The OpenTelemetry attribute keys being defined here are based on an older version of the semantic conventions. The conventions have been stabilized and it's recommended to use the new key names to ensure compatibility with standard OpenTelemetry tooling. For example:
Consider adding a dependency on public static final AttributeKey<String> HTTP_REQUEST_METHOD =
AttributeKey.stringKey("http.method");
public static final AttributeKey<String> URL_FULL = AttributeKey.stringKey("url.full");
public static final AttributeKey<String> URL_TEMPLATE = AttributeKey.stringKey("url.template");
public static final AttributeKey<String> URL_DOMAIN = AttributeKey.stringKey("url.domain");
public static final AttributeKey<Long> HTTP_RESPONSE_STATUS_CODE =
AttributeKey.longKey("http.status_code");
public static final AttributeKey<Long> HTTP_REQUEST_RESEND_COUNT =
AttributeKey.longKey("http.request.resend_count");
public static final AttributeKey<Long> HTTP_REQUEST_BODY_SIZE =
AttributeKey.longKey("http.request_content_length");
public static final AttributeKey<Long> HTTP_RESPONSE_BODY_SIZE =
AttributeKey.longKey("http.response_content_length");
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI I doubled, checked and it looks like what we're using is correct: https://opentelemetry.io/docs/specs/semconv/http/http-spans/ |
||
|
|
||
| private final HttpRequestInitializer delegate; | ||
| private final Tracer tracer; | ||
| private static final String BIGQUERY_DOMAIN = "bigquery.googleapis.com"; | ||
|
|
||
| public HttpTracingRequestInitializer(HttpRequestInitializer delegate, Tracer tracer) { | ||
| this.delegate = delegate; | ||
| this.tracer = tracer; | ||
| } | ||
|
|
||
| @Override | ||
| public void initialize(HttpRequest request) throws IOException { | ||
| if (delegate != null) { | ||
| delegate.initialize(request); | ||
| } | ||
|
|
||
| if (tracer == null) { | ||
| return; | ||
| } | ||
|
|
||
| String httpMethod = request.getRequestMethod(); | ||
| String url = request.getUrl().build(); | ||
| String host = request.getUrl().getHost(); | ||
| Integer port = request.getUrl().getPort(); | ||
|
|
||
| Long requestBodySize = getRequestBodySize(request); | ||
|
|
||
| Span span = getSpan(httpMethod, url, host, port, requestBodySize); | ||
|
|
||
| // Wrap the existing response interceptor | ||
| HttpResponseInterceptor originalInterceptor = request.getResponseInterceptor(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are exactly one of these guaranteed to run to close the span? |
||
| request.setResponseInterceptor( | ||
| response -> { | ||
| try { | ||
| addSuccessResponseToSpan(response, httpMethod, span); | ||
| if (originalInterceptor != null) { | ||
| originalInterceptor.interceptResponse(response); | ||
| } | ||
| } finally { | ||
| span.end(); | ||
| } | ||
| }); | ||
|
|
||
| // Wrap the existing unsuccessful response handler | ||
| HttpUnsuccessfulResponseHandler originalHandler = request.getUnsuccessfulResponseHandler(); | ||
| request.setUnsuccessfulResponseHandler( | ||
| (request1, response, supportsRetry) -> { | ||
| addErrorResponseToSpan(response, span); | ||
| try { | ||
| if (originalHandler != null) { | ||
| return originalHandler.handleResponse(request1, response, supportsRetry); | ||
| } | ||
| return false; | ||
| } catch (IOException e) { | ||
| addExceptionToSpan(e, span); | ||
| throw e; | ||
| } finally { | ||
| span.end(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this result in ending the span before a retry can occur? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can this happen twice if the request is retried? |
||
| } | ||
| }); | ||
| } | ||
|
|
||
| private static void addExceptionToSpan(IOException e, Span span) { | ||
| span.recordException(e); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What exactly does Same question for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. recordException adds an event type "exception" to a span (does not affect attributes). See screenshot. For setStatus, it sets the overall status of the span (also not an attribute), default is Unset. See screenshot. Let me know if you want me to flag either of these for confirmation from Wes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. recordException -- please skip it, we will use logs for this. The span could have the exception type if it is convenient. |
||
| span.setAttribute(BigQueryTelemetryTracer.EXCEPTION_TYPE, e.getClass().getName()); | ||
| span.setAttribute(BigQueryTelemetryTracer.ERROR_TYPE, e.getClass().getSimpleName()); | ||
| span.setAttribute(BigQueryTelemetryTracer.STATUS_MESSAGE, e.getMessage() != null ? e.getMessage() : e.getClass().getName()); | ||
| span.setStatus(StatusCode.ERROR, e.getMessage()); | ||
| } | ||
|
|
||
| private static void addErrorResponseToSpan(HttpResponse response, Span span) { | ||
| int statusCode = response.getStatusCode(); | ||
| span.setAttribute(HTTP_RESPONSE_STATUS_CODE, statusCode); | ||
| String errorMessage = "HTTP " + statusCode; | ||
| try { | ||
| String statusMessage = response.getStatusMessage(); | ||
| if (statusMessage != null && !statusMessage.isEmpty()) { | ||
| errorMessage = statusMessage; | ||
| } | ||
| } catch (Exception ex) { | ||
| // Ignore | ||
| } | ||
| span.setAttribute(BigQueryTelemetryTracer.STATUS_MESSAGE, errorMessage); | ||
|
ldetmer marked this conversation as resolved.
Outdated
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any data privacy concerns with exporting the error message? I assume BigQuery error messages could potentially contain sensitive details like SQL query snippets, table names, or row data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good thought! if it's in OpenTelemetry semconv it should be OK. Otherwise let's document it at the enablement site. see go/client-libraries:o11y-controls |
||
| span.setAttribute(BigQueryTelemetryTracer.ERROR_TYPE, String.valueOf(statusCode)); | ||
| span.setStatus(StatusCode.ERROR, errorMessage); | ||
| } | ||
|
|
||
| private static void addSuccessResponseToSpan(HttpResponse response, String httpMethod, Span span) { | ||
| String actualMethod = response.getRequest().getRequestMethod(); | ||
| if (actualMethod != null && httpMethod == null) { | ||
| span.updateName(actualMethod); | ||
| span.setAttribute(HTTP_REQUEST_METHOD, actualMethod); | ||
| } | ||
| int statusCode = response.getStatusCode(); | ||
| span.setAttribute(HTTP_RESPONSE_STATUS_CODE, statusCode); | ||
| try { | ||
| long contentLength = response.getHeaders().getContentLength(); | ||
| if (contentLength > 0) { | ||
| span.setAttribute(HTTP_RESPONSE_BODY_SIZE, contentLength); | ||
| } | ||
| } catch (Exception e) { | ||
| // Ignore - body size not available | ||
| } | ||
| if (statusCode >= 400) { | ||
| addErrorResponseToSpan(response, span); | ||
| } else { | ||
| span.setStatus(StatusCode.OK); | ||
| } | ||
| } | ||
|
|
||
| private Span getSpan(String httpMethod, String url, String host, Integer port, Long requestBodySize) { | ||
| //TODO: Determine span name: {method} {url.template} or {method} | ||
| Span span = | ||
| BigQueryTelemetryTracer.newSpanBuilder(tracer, httpMethod) | ||
| // OpenTelemetry semantic convention attributes | ||
| .setAttribute(HTTP_REQUEST_METHOD, httpMethod) | ||
| .setAttribute(URL_FULL, url) | ||
|
ldetmer marked this conversation as resolved.
Outdated
|
||
| .setAttribute(BigQueryTelemetryTracer.SERVER_ADDRESS, host) | ||
| .setAttribute(URL_DOMAIN, BIGQUERY_DOMAIN) | ||
|
ldetmer marked this conversation as resolved.
Outdated
|
||
| .setAttribute(BigQueryTelemetryTracer.RPC_SYSTEM_NAME , "http") | ||
| .startSpan(); | ||
|
|
||
| // TODO: add url template && resource name | ||
| if (port != null && port > 0) { | ||
| span.setAttribute(BigQueryTelemetryTracer.SERVER_PORT, port.longValue()); | ||
| } | ||
| if (requestBodySize != null && requestBodySize > 0) { | ||
| span.setAttribute(HTTP_REQUEST_BODY_SIZE, requestBodySize); | ||
| } | ||
| return span; | ||
| } | ||
|
|
||
| private static @Nullable Long getRequestBodySize(HttpRequest request) { | ||
| Long requestBodySize = null; | ||
| try { | ||
| HttpContent content = request.getContent(); | ||
| if (content != null) { | ||
| requestBodySize = content.getLength(); | ||
| } | ||
| } catch (Exception e) { | ||
| // Ignore - body size not available | ||
| } | ||
| return requestBodySize; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| /* | ||
| * Copyright 2026 Google LLC | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.google.cloud.bigquery.telemetry; | ||
|
|
||
| import com.google.api.core.InternalApi; | ||
| import io.opentelemetry.api.common.AttributeKey; | ||
| import io.opentelemetry.api.trace.SpanBuilder; | ||
| import io.opentelemetry.api.trace.SpanKind; | ||
| import io.opentelemetry.api.trace.Tracer; | ||
|
|
||
| /** General BigQuery Telemetry class that stores generic telemetry attributes | ||
| * and any associated logic to calculate. | ||
| */ | ||
| @InternalApi | ||
| public final class BigQueryTelemetryTracer { | ||
|
|
||
| private BigQueryTelemetryTracer() {} | ||
|
|
||
| // Common GCP Attributes | ||
| public static final AttributeKey<String> GCP_CLIENT_SERVICE = | ||
| AttributeKey.stringKey("gcp.client.service"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's OK to hardcode these keys in Bigquery for now, we may want to use them from Gax once they are stabilized. |
||
| public static final AttributeKey<String> GCP_CLIENT_VERSION = | ||
| AttributeKey.stringKey("gcp.client.version"); | ||
| public static final AttributeKey<String> GCP_CLIENT_REPO = | ||
| AttributeKey.stringKey("gcp.client.repo"); | ||
| public static final AttributeKey<String> GCP_CLIENT_ARTIFACT = | ||
| AttributeKey.stringKey("gcp.client.artifact"); | ||
| public static final AttributeKey<String> GCP_CLIENT_LANGUAGE = | ||
| AttributeKey.stringKey("gcp.client.language"); | ||
| public static final AttributeKey<String> GCP_RESOURCE_NAME = | ||
| AttributeKey.stringKey("gcp.resource.name"); | ||
| public static final AttributeKey<String> RPC_SYSTEM_NAME = AttributeKey.stringKey("rpc.system.name"); | ||
|
|
||
|
|
||
| // Common Error Attributes | ||
| public static final AttributeKey<String> ERROR_TYPE = AttributeKey.stringKey("error.type"); | ||
| public static final AttributeKey<String> EXCEPTION_TYPE = | ||
| AttributeKey.stringKey("exception.type"); | ||
| public static final AttributeKey<String> STATUS_MESSAGE = | ||
| AttributeKey.stringKey("status.message"); | ||
|
|
||
| // Common Server Attributes | ||
| public static final AttributeKey<String> SERVER_ADDRESS = | ||
| AttributeKey.stringKey("server.address"); | ||
| public static final AttributeKey<Long> SERVER_PORT = AttributeKey.longKey("server.port"); | ||
|
|
||
| public static SpanBuilder newSpanBuilder(Tracer tracer, String spanName) { | ||
| return tracer | ||
| .spanBuilder(spanName) | ||
| .setSpanKind(SpanKind.CLIENT) | ||
| .setAttribute(GCP_CLIENT_SERVICE, "bigquery") | ||
| .setAttribute(GCP_CLIENT_REPO, "googleapis/java-bigquery") | ||
| .setAttribute(GCP_CLIENT_ARTIFACT, "google-cloud-bigquery") | ||
| .setAttribute(GCP_CLIENT_LANGUAGE, "java"); | ||
| // TODO: add version | ||
| } | ||
|
ldetmer marked this conversation as resolved.
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| /* | ||
| * Copyright 2026 Google LLC | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.google.cloud.bigquery.spi.v2; | ||
|
|
||
| import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
|
||
|
|
||
| import com.google.api.client.http.GenericUrl; | ||
| import com.google.api.client.http.HttpRequest; | ||
| import com.google.api.client.http.HttpRequestFactory; | ||
| import com.google.api.client.http.HttpResponse; | ||
| import com.google.api.client.http.javanet.NetHttpTransport; | ||
| import com.google.cloud.bigquery.RetryContext; | ||
| import com.sun.net.httpserver.HttpServer; | ||
| import io.opentelemetry.api.common.AttributeKey; | ||
| import io.opentelemetry.api.trace.Tracer; | ||
| import io.opentelemetry.sdk.OpenTelemetrySdk; | ||
| import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; | ||
| import io.opentelemetry.sdk.trace.SdkTracerProvider; | ||
| import io.opentelemetry.sdk.trace.data.SpanData; | ||
| import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; | ||
| import java.io.IOException; | ||
| import java.io.OutputStream; | ||
| import java.net.InetSocketAddress; | ||
| import java.util.List; | ||
| import org.junit.jupiter.api.AfterEach; | ||
| import org.junit.jupiter.api.BeforeEach; | ||
| import org.junit.jupiter.api.Test; | ||
|
|
||
| /** | ||
| * Integration test for HTTP tracing with real HTTP transport and server. | ||
| * This test verifies that OpenTelemetry tracing works correctly with actual network calls. | ||
| */ | ||
| public class HttpTracingIntegrationTest { | ||
|
|
||
| private InMemorySpanExporter spanExporter; | ||
| private HttpTracingRequestInitializer initializer; | ||
| private HttpServer testServer; | ||
| private int serverPort; | ||
|
|
||
| @BeforeEach | ||
| public void setUp() throws IOException { | ||
| // Set up OpenTelemetry with in-memory exporter | ||
| spanExporter = InMemorySpanExporter.create(); | ||
| SdkTracerProvider tracerProvider = | ||
| SdkTracerProvider.builder() | ||
| .addSpanProcessor(SimpleSpanProcessor.create(spanExporter)) | ||
| .build(); | ||
| OpenTelemetrySdk openTelemetry = | ||
| OpenTelemetrySdk.builder().setTracerProvider(tracerProvider).build(); | ||
| Tracer tracer = openTelemetry.getTracer("test-tracer"); | ||
| initializer = new HttpTracingRequestInitializer(null, tracer); | ||
|
|
||
| // Start a test HTTP server | ||
| testServer = HttpServer.create(new InetSocketAddress("localhost", 0), 0); | ||
| serverPort = testServer.getAddress().getPort(); | ||
| testServer.start(); | ||
| } | ||
|
|
||
| @AfterEach | ||
| public void tearDown() { | ||
| if (testServer != null) { | ||
| testServer.stop(0); | ||
| } | ||
| RetryContext.clearRetryAttempt(); | ||
| } | ||
|
|
||
| @Test | ||
| public void testHttpTracingWithRealServer() throws IOException { | ||
| testServer.createContext("/api/test", exchange -> { | ||
| String response = "{\"status\": \"ok\"}"; | ||
| exchange.getResponseHeaders().add("Content-Type", "application/json"); | ||
| exchange.sendResponseHeaders(200, response.getBytes().length); | ||
| try (OutputStream os = exchange.getResponseBody()) { | ||
| os.write(response.getBytes()); | ||
| } | ||
| }); | ||
|
|
||
| NetHttpTransport transport = new NetHttpTransport(); | ||
| HttpRequestFactory requestFactory = transport.createRequestFactory(initializer); | ||
| HttpRequest request = requestFactory.buildGetRequest( | ||
| new GenericUrl("http://localhost:" + serverPort + "/api/test")); | ||
|
|
||
| HttpResponse response = request.execute(); | ||
| assertEquals(200, response.getStatusCode()); | ||
| response.disconnect(); | ||
|
|
||
| List<SpanData> spans = spanExporter.getFinishedSpanItems(); | ||
| assertEquals(1, spans.size()); | ||
| SpanData span = spans.get(0); | ||
| assertEquals("GET", span.getAttributes().get(AttributeKey.stringKey("http.request.method"))); | ||
| assertEquals(200L, span.getAttributes().get(AttributeKey.longKey("http.response.status_code"))); | ||
| assertEquals("localhost", span.getAttributes().get(AttributeKey.stringKey("server.address"))); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we need to put more thoughts about how to enable this feature.
This seems to be reusing the existing options which may or may not be a good practice. Because 1. Existing customers will also automatically get new Spans which they may not want. 2. Customers will get two different Spans which are not related at this moment.
Can we understand more about the use cases of the existing Spans?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 to Blake's concerns. I'm leaning against reusing the existing options for the reasons he listed. Would it be possible to have something like
isHttpTelemetryTracingEnabled()to focus on the network-level tracing?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Our product decision today: we will preserve the existing spans and enablement and make non-breaking changes (adding attributes).
This continues to provide product-specific attributes and naming and avoids breaking BigQuery customers.
We considered creating a new enablement and forking spans but (1) the existing enablement in Beta allows for change and (2) it is very difficult to explain when a customer might want each type of CLIENT span.
In the future, we can decide whether it's worth it to adjust things like span name, structural hierarchy or enablement API by reviewing with the BQ team.