From a79ea3ba1d50fff57272667230b16d472d15fdb3 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 13 Feb 2026 19:13:04 +0000 Subject: [PATCH] feat: add Storage API Acceleration via Apache Arrow Deserialization This commit adds support for picosecond timestamp precision in the BigQuery Storage Read API with Apache Arrow. Due to limitations in the Apache Arrow JavaScript library, a validation hook is added to fall back to microsecond precision when picosecond precision is requested, preventing deserialization errors. Changes: - Updated `arrow.proto` to include `PicosecondTimestampPrecision` enum. - Regenerated protos with new field. - Injected validation hook in `BigQueryReadClient.createReadSession` to handle fallback. - Added documentation about the limitation in `README`. - Added unit tests to verify the fallback mechanism. Co-authored-by: danieljbruce <8935272+danieljbruce@users.noreply.github.com> --- .readme-partials.yaml | 6 +- .../cloud/bigquery/storage/v1/arrow.proto | 18 +++ protos/protos.d.ts | 16 +- protos/protos.js | 75 +++++++++- protos/protos.json | 21 ++- src/v1/big_query_read_client.ts | 20 +++ test/picosecond_precision.ts | 140 ++++++++++++++++++ 7 files changed, 291 insertions(+), 5 deletions(-) create mode 100644 test/picosecond_precision.ts diff --git a/.readme-partials.yaml b/.readme-partials.yaml index 12bd9210..2b982a12 100644 --- a/.readme-partials.yaml +++ b/.readme-partials.yaml @@ -104,4 +104,8 @@ introduction: |- Read more how to [use the BigQuery Storage Read API](https://cloud.google.com/bigquery/docs/reference/storage). - See sample code on the [Quickstart section](#quickstart). \ No newline at end of file + See sample code on the [Quickstart section](#quickstart). + + ### Apache Arrow Picosecond Precision Limitation + Please note that the Apache Arrow JavaScript library currently does not support picosecond precision (`Timestamp[ps]`) logical types. + When using the Storage Read API with Arrow format, if picosecond precision is requested, the Node.js client library will automatically fall back to microsecond precision and log a warning to prevent deserialization errors. \ No newline at end of file diff --git a/protos/google/cloud/bigquery/storage/v1/arrow.proto b/protos/google/cloud/bigquery/storage/v1/arrow.proto index f4f17c3c..4f8f4a91 100644 --- a/protos/google/cloud/bigquery/storage/v1/arrow.proto +++ b/protos/google/cloud/bigquery/storage/v1/arrow.proto @@ -58,7 +58,25 @@ message ArrowSerializationOptions { ZSTD = 2; } + // Precision of the TIMESTAMP type. + enum PicosecondTimestampPrecision { + // If unspecified, microsecond precision will be used. + PICOSECOND_TIMESTAMP_PRECISION_UNSPECIFIED = 0; + + // Use microsecond precision. + MICROSECOND = 1; + + // Use nanosecond precision. + NANOSECOND = 2; + + // Use picosecond precision. + PICOSECOND = 3; + } + // The compression codec to use for Arrow buffers in serialized record // batches. CompressionCodec buffer_compression = 2; + + // The precision of the TIMESTAMP type. + PicosecondTimestampPrecision timestamp_precision = 3; } diff --git a/protos/protos.d.ts b/protos/protos.d.ts index 0f9ad177..8c26512c 100644 --- a/protos/protos.d.ts +++ b/protos/protos.d.ts @@ -1,4 +1,4 @@ -// Copyright 2025 Google LLC +// Copyright 2026 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -234,6 +234,9 @@ export namespace google { /** ArrowSerializationOptions bufferCompression */ bufferCompression?: (google.cloud.bigquery.storage.v1.ArrowSerializationOptions.CompressionCodec|keyof typeof google.cloud.bigquery.storage.v1.ArrowSerializationOptions.CompressionCodec|null); + + /** ArrowSerializationOptions timestampPrecision */ + timestampPrecision?: (google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision|keyof typeof google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision|null); } /** Represents an ArrowSerializationOptions. */ @@ -248,6 +251,9 @@ export namespace google { /** ArrowSerializationOptions bufferCompression. */ public bufferCompression: (google.cloud.bigquery.storage.v1.ArrowSerializationOptions.CompressionCodec|keyof typeof google.cloud.bigquery.storage.v1.ArrowSerializationOptions.CompressionCodec); + /** ArrowSerializationOptions timestampPrecision. */ + public timestampPrecision: (google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision|keyof typeof google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision); + /** * Creates a new ArrowSerializationOptions instance using the specified properties. * @param [properties] Properties to set @@ -334,6 +340,14 @@ export namespace google { LZ4_FRAME = 1, ZSTD = 2 } + + /** PicosecondTimestampPrecision enum. */ + enum PicosecondTimestampPrecision { + PICOSECOND_TIMESTAMP_PRECISION_UNSPECIFIED = 0, + MICROSECOND = 1, + NANOSECOND = 2, + PICOSECOND = 3 + } } /** Properties of an AvroSchema. */ diff --git a/protos/protos.js b/protos/protos.js index e5b18720..69c4dcdb 100644 --- a/protos/protos.js +++ b/protos/protos.js @@ -1,4 +1,4 @@ -// Copyright 2025 Google LLC +// Copyright 2026 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -548,6 +548,7 @@ * @memberof google.cloud.bigquery.storage.v1 * @interface IArrowSerializationOptions * @property {google.cloud.bigquery.storage.v1.ArrowSerializationOptions.CompressionCodec|null} [bufferCompression] ArrowSerializationOptions bufferCompression + * @property {google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision|null} [timestampPrecision] ArrowSerializationOptions timestampPrecision */ /** @@ -573,6 +574,14 @@ */ ArrowSerializationOptions.prototype.bufferCompression = 0; + /** + * ArrowSerializationOptions timestampPrecision. + * @member {google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision} timestampPrecision + * @memberof google.cloud.bigquery.storage.v1.ArrowSerializationOptions + * @instance + */ + ArrowSerializationOptions.prototype.timestampPrecision = 0; + /** * Creates a new ArrowSerializationOptions instance using the specified properties. * @function create @@ -599,6 +608,8 @@ writer = $Writer.create(); if (message.bufferCompression != null && Object.hasOwnProperty.call(message, "bufferCompression")) writer.uint32(/* id 2, wireType 0 =*/16).int32(message.bufferCompression); + if (message.timestampPrecision != null && Object.hasOwnProperty.call(message, "timestampPrecision")) + writer.uint32(/* id 3, wireType 0 =*/24).int32(message.timestampPrecision); return writer; }; @@ -639,6 +650,10 @@ message.bufferCompression = reader.int32(); break; } + case 3: { + message.timestampPrecision = reader.int32(); + break; + } default: reader.skipType(tag & 7); break; @@ -683,6 +698,16 @@ case 2: break; } + if (message.timestampPrecision != null && message.hasOwnProperty("timestampPrecision")) + switch (message.timestampPrecision) { + default: + return "timestampPrecision: enum value expected"; + case 0: + case 1: + case 2: + case 3: + break; + } return null; }; @@ -718,6 +743,30 @@ message.bufferCompression = 2; break; } + switch (object.timestampPrecision) { + default: + if (typeof object.timestampPrecision === "number") { + message.timestampPrecision = object.timestampPrecision; + break; + } + break; + case "PICOSECOND_TIMESTAMP_PRECISION_UNSPECIFIED": + case 0: + message.timestampPrecision = 0; + break; + case "MICROSECOND": + case 1: + message.timestampPrecision = 1; + break; + case "NANOSECOND": + case 2: + message.timestampPrecision = 2; + break; + case "PICOSECOND": + case 3: + message.timestampPrecision = 3; + break; + } return message; }; @@ -734,10 +783,14 @@ if (!options) options = {}; var object = {}; - if (options.defaults) + if (options.defaults) { object.bufferCompression = options.enums === String ? "COMPRESSION_UNSPECIFIED" : 0; + object.timestampPrecision = options.enums === String ? "PICOSECOND_TIMESTAMP_PRECISION_UNSPECIFIED" : 0; + } if (message.bufferCompression != null && message.hasOwnProperty("bufferCompression")) object.bufferCompression = options.enums === String ? $root.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.CompressionCodec[message.bufferCompression] === undefined ? message.bufferCompression : $root.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.CompressionCodec[message.bufferCompression] : message.bufferCompression; + if (message.timestampPrecision != null && message.hasOwnProperty("timestampPrecision")) + object.timestampPrecision = options.enums === String ? $root.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision[message.timestampPrecision] === undefined ? message.timestampPrecision : $root.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision[message.timestampPrecision] : message.timestampPrecision; return object; }; @@ -783,6 +836,24 @@ return values; })(); + /** + * PicosecondTimestampPrecision enum. + * @name google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision + * @enum {number} + * @property {number} PICOSECOND_TIMESTAMP_PRECISION_UNSPECIFIED=0 PICOSECOND_TIMESTAMP_PRECISION_UNSPECIFIED value + * @property {number} MICROSECOND=1 MICROSECOND value + * @property {number} NANOSECOND=2 NANOSECOND value + * @property {number} PICOSECOND=3 PICOSECOND value + */ + ArrowSerializationOptions.PicosecondTimestampPrecision = (function() { + var valuesById = {}, values = Object.create(valuesById); + values[valuesById[0] = "PICOSECOND_TIMESTAMP_PRECISION_UNSPECIFIED"] = 0; + values[valuesById[1] = "MICROSECOND"] = 1; + values[valuesById[2] = "NANOSECOND"] = 2; + values[valuesById[3] = "PICOSECOND"] = 3; + return values; + })(); + return ArrowSerializationOptions; })(); diff --git a/protos/protos.json b/protos/protos.json index 0307c1da..042d3483 100644 --- a/protos/protos.json +++ b/protos/protos.json @@ -61,6 +61,10 @@ "bufferCompression": { "type": "CompressionCodec", "id": 2 + }, + "timestampPrecision": { + "type": "PicosecondTimestampPrecision", + "id": 3 } }, "nested": { @@ -70,6 +74,14 @@ "LZ4_FRAME": 1, "ZSTD": 2 } + }, + "PicosecondTimestampPrecision": { + "values": { + "PICOSECOND_TIMESTAMP_PRECISION_UNSPECIFIED": 0, + "MICROSECOND": 1, + "NANOSECOND": 2, + "PICOSECOND": 3 + } } } }, @@ -3150,7 +3162,14 @@ "type": "ServiceOptions", "id": 3 } - } + }, + "reserved": [ + [ + 4, + 4 + ], + "stream" + ] }, "MethodDescriptorProto": { "edition": "proto2", diff --git a/src/v1/big_query_read_client.ts b/src/v1/big_query_read_client.ts index b34ec292..628c7e36 100644 --- a/src/v1/big_query_read_client.ts +++ b/src/v1/big_query_read_client.ts @@ -542,6 +542,26 @@ export class BigQueryReadClient { this._gaxModule.routingHeader.fromParams({ 'read_session.table': request.readSession!.table ?? '', }); + + // Apache Arrow does not currently support picosecond precision in JavaScript. + // If picosecond precision is requested, we fall back to microsecond precision + // to avoid deserialization errors. + const timestampPrecision = + request.readSession?.readOptions?.arrowSerializationOptions + ?.timestampPrecision; + if ( + timestampPrecision === + protos.google.cloud.bigquery.storage.v1.ArrowSerializationOptions + .PicosecondTimestampPrecision.PICOSECOND || + timestampPrecision === 'PICOSECOND' + ) { + console.warn( + 'Apache Arrow does not support picosecond precision. Falling back to microsecond precision.' + ); + request.readSession!.readOptions!.arrowSerializationOptions!.timestampPrecision = + protos.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision.MICROSECOND; + } + this.initialize().catch(err => { throw err; }); diff --git a/test/picosecond_precision.ts b/test/picosecond_precision.ts new file mode 100644 index 00000000..bd1884a9 --- /dev/null +++ b/test/picosecond_precision.ts @@ -0,0 +1,140 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import * as protos from '../protos/protos'; +import * as assert from 'assert'; +import * as sinon from 'sinon'; +import {describe, it} from 'mocha'; +import * as bigqueryreadModule from '../src'; + +describe('Picosecond Precision Support', () => { + it('falls back to microsecond precision when picosecond precision is requested for Arrow', async () => { + const client = new bigqueryreadModule.v1.BigQueryReadClient({ + credentials: {client_email: 'bogus', private_key: 'bogus'}, + projectId: 'bogus', + }); + await client.initialize(); + + const request: protos.google.cloud.bigquery.storage.v1.ICreateReadSessionRequest = { + parent: 'projects/bogus', + readSession: { + table: 'projects/bogus/datasets/bogus/tables/bogus', + dataFormat: protos.google.cloud.bigquery.storage.v1.DataFormat.ARROW, + readOptions: { + arrowSerializationOptions: { + timestampPrecision: protos.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision.PICOSECOND + } + } + } + }; + + const expectedResponse = new protos.google.cloud.bigquery.storage.v1.ReadSession(); + const stub = sinon.stub().resolves([expectedResponse]); + client.innerApiCalls.createReadSession = stub; + + const consoleWarnStub = sinon.stub(console, 'warn'); + + await client.createReadSession(request); + + assert(consoleWarnStub.calledOnce); + assert(consoleWarnStub.calledWith('Apache Arrow does not support picosecond precision. Falling back to microsecond precision.')); + + const actualRequest = stub.getCall(0).args[0]; + assert.strictEqual( + actualRequest.readSession.readOptions.arrowSerializationOptions.timestampPrecision, + protos.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision.MICROSECOND + ); + + consoleWarnStub.restore(); + }); + + it('falls back to microsecond precision when picosecond precision is requested as a string for Arrow', async () => { + const client = new bigqueryreadModule.v1.BigQueryReadClient({ + credentials: {client_email: 'bogus', private_key: 'bogus'}, + projectId: 'bogus', + }); + await client.initialize(); + + const request: any = { + parent: 'projects/bogus', + readSession: { + table: 'projects/bogus/datasets/bogus/tables/bogus', + dataFormat: protos.google.cloud.bigquery.storage.v1.DataFormat.ARROW, + readOptions: { + arrowSerializationOptions: { + timestampPrecision: 'PICOSECOND' + } + } + } + }; + + const expectedResponse = new protos.google.cloud.bigquery.storage.v1.ReadSession(); + const stub = sinon.stub().resolves([expectedResponse]); + client.innerApiCalls.createReadSession = stub; + + const consoleWarnStub = sinon.stub(console, 'warn'); + + await client.createReadSession(request); + + assert(consoleWarnStub.calledOnce); + assert(consoleWarnStub.calledWith('Apache Arrow does not support picosecond precision. Falling back to microsecond precision.')); + + const actualRequest = stub.getCall(0).args[0]; + assert.strictEqual( + actualRequest.readSession.readOptions.arrowSerializationOptions.timestampPrecision, + protos.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision.MICROSECOND + ); + + consoleWarnStub.restore(); + }); + + it('does not fall back when microsecond precision is requested', async () => { + const client = new bigqueryreadModule.v1.BigQueryReadClient({ + credentials: {client_email: 'bogus', private_key: 'bogus'}, + projectId: 'bogus', + }); + await client.initialize(); + + const request: protos.google.cloud.bigquery.storage.v1.ICreateReadSessionRequest = { + parent: 'projects/bogus', + readSession: { + table: 'projects/bogus/datasets/bogus/tables/bogus', + dataFormat: protos.google.cloud.bigquery.storage.v1.DataFormat.ARROW, + readOptions: { + arrowSerializationOptions: { + timestampPrecision: protos.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision.MICROSECOND + } + } + } + }; + + const expectedResponse = new protos.google.cloud.bigquery.storage.v1.ReadSession(); + const stub = sinon.stub().resolves([expectedResponse]); + client.innerApiCalls.createReadSession = stub; + + const consoleWarnStub = sinon.stub(console, 'warn'); + + await client.createReadSession(request); + + assert(consoleWarnStub.notCalled); + + const actualRequest = stub.getCall(0).args[0]; + assert.strictEqual( + actualRequest.readSession.readOptions.arrowSerializationOptions.timestampPrecision, + protos.google.cloud.bigquery.storage.v1.ArrowSerializationOptions.PicosecondTimestampPrecision.MICROSECOND + ); + + consoleWarnStub.restore(); + }); +});