From aa007ce7939032537bd4112d0e5b336482f2e845 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Fri, 1 May 2026 10:31:51 -0400 Subject: [PATCH 1/4] feat: add a onLargeRow option on RowAdapter Change-Id: I7b8f76c0ba11efefd8c17c3882e6435388e32cb7 --- .../com/google/cloud/bigtable/data/v2/models/RowAdapter.java | 2 ++ .../data/v2/stub/readrows/LargeReadRowsResumptionStrategy.java | 1 + 2 files changed, 3 insertions(+) diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/models/RowAdapter.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/models/RowAdapter.java index 874529bbe1..8a53f7fcc0 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/models/RowAdapter.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/models/RowAdapter.java @@ -35,6 +35,8 @@ public interface RowAdapter { ByteString getKey(RowT row); + default void onLargeRow(ByteString rowKey) {} + /** * A SAX style row factory. It is responsible for creating two types of rows: standard data rows * and special marker rows. Marker rows are emitted when skipping lots of rows due to filters. The diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeReadRowsResumptionStrategy.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeReadRowsResumptionStrategy.java index 93b6b548dd..8d36d1b088 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeReadRowsResumptionStrategy.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeReadRowsResumptionStrategy.java @@ -91,6 +91,7 @@ public RowT processResponse(RowT response) { public Throwable processError(Throwable throwable) { ByteString rowKeyExtracted = extractLargeRowKey(throwable); if (rowKeyExtracted != null) { + rowAdapter.onLargeRow(rowKeyExtracted); LOGGER.warning("skipping large row " + rowKeyExtracted); this.largeRowKey = rowKeyExtracted; numProcessed = numProcessed + 1; From 526bf8bb76e268c8ee655158963bd894ad84cb04 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Fri, 1 May 2026 11:13:47 -0400 Subject: [PATCH 2/4] add the utils Change-Id: I643390642ecbf3fd6867d6defd33e3de12d0c40a --- .../stub/readrows/LargeRowPaginationUtil.java | 97 +++++++++ .../data/v2/it/LargeRowPaginationUtilIT.java | 187 ++++++++++++++++++ 2 files changed, 284 insertions(+) create mode 100644 google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginationUtil.java create mode 100644 google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/it/LargeRowPaginationUtilIT.java diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginationUtil.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginationUtil.java new file mode 100644 index 0000000000..301a44f621 --- /dev/null +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginationUtil.java @@ -0,0 +1,97 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.stub.readrows; + +import com.google.api.gax.rpc.ApiException; +import com.google.api.gax.rpc.StatusCode.Code; +import com.google.cloud.bigtable.data.v2.BigtableDataClient; +import com.google.cloud.bigtable.data.v2.models.Filters; +import com.google.cloud.bigtable.data.v2.models.Row; +import com.google.cloud.bigtable.data.v2.models.RowCell; +import com.google.cloud.bigtable.data.v2.models.TableId; +import com.google.common.collect.Lists; +import com.google.protobuf.ByteString; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.annotation.Nullable; + +/** Public utility class to read a large row by paginating over cells. */ +public final class LargeRowPaginationUtil { + private static final Logger LOGGER = Logger.getLogger(LargeRowPaginationUtil.class.getName()); + + private LargeRowPaginationUtil() {} + + /** + * Reads a large row by paginating over cells. 1. Reads the total count of cells without fetching + * values using strip filter. 2. Reads cells in chunks using limit and offset filters. 3. Divides + * the chunk size by half if a failure occurs with FAILED_PRECONDITION. + */ + public static Row readLargeRow( + BigtableDataClient client, + String tableId, + ByteString rowKey, + @Nullable Filters.Filter rowFilter) { + // Step 1: Count the number of cells with a strip value filter + Filters.ChainFilter countFilter = + Filters.FILTERS.chain().filter(Filters.FILTERS.value().strip()); + if (rowFilter != null) { + countFilter.filter(rowFilter); + } + Row countRow = client.readRow(TableId.of(tableId), rowKey, countFilter); + if (countRow == null) { + return null; // row not found + } + int totalCells = countRow.getCells().size(); + + List resultCells = Lists.newArrayList(); + int offset = 0; + int limit = totalCells; // start with trying to read all cells + + while (offset < totalCells) { + try { + Filters.ChainFilter chain = Filters.FILTERS.chain(); + if (rowFilter != null) { + chain.filter(rowFilter); + } + if (offset > 0) { + chain.filter(Filters.FILTERS.offset().cellsPerRow(offset)); + } + chain.filter(Filters.FILTERS.limit().cellsPerRow(limit)); + + Row partialRow = client.readRow(TableId.of(tableId), rowKey, chain); + if (partialRow == null) { + break; + } + resultCells.addAll(partialRow.getCells()); + offset += partialRow.getCells().size(); + } catch (ApiException e) { + if (e.getStatusCode().getCode() != Code.FAILED_PRECONDITION) { + throw e; + } + limit = limit / 2; + if (limit == 0) { + throw new RuntimeException("Cannot divide limit further. Cell might be too large."); + } + LOGGER.log( + Level.FINE, + "Failed to read chunk with limit {0} at offset {1}. Dividing limit by half.", + new Object[] {limit, offset}); + } + } + return Row.create(rowKey, resultCells); + } +} diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/it/LargeRowPaginationUtilIT.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/it/LargeRowPaginationUtilIT.java new file mode 100644 index 0000000000..3bcbc7b893 --- /dev/null +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/it/LargeRowPaginationUtilIT.java @@ -0,0 +1,187 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.it; + +import static com.google.common.truth.Truth.assertThat; +import static com.google.common.truth.TruthJUnit.assume; + +import com.google.api.gax.grpc.GrpcStatusCode; +import com.google.api.gax.rpc.ApiException; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient; +import com.google.cloud.bigtable.admin.v2.models.CreateTableRequest; +import com.google.cloud.bigtable.admin.v2.models.Table; +import com.google.cloud.bigtable.data.v2.BigtableDataClient; +import com.google.cloud.bigtable.data.v2.models.Filters; +import com.google.cloud.bigtable.data.v2.models.Row; +import com.google.cloud.bigtable.data.v2.models.RowMutation; +import com.google.cloud.bigtable.data.v2.models.TableId; +import com.google.cloud.bigtable.data.v2.stub.readrows.LargeRowPaginationUtil; +import com.google.cloud.bigtable.test_helpers.env.EmulatorEnv; +import com.google.cloud.bigtable.test_helpers.env.PrefixGenerator; +import com.google.cloud.bigtable.test_helpers.env.TestEnvRule; +import com.google.protobuf.ByteString; +import io.grpc.Status; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class LargeRowPaginationUtilIT { + + @ClassRule public static final TestEnvRule testEnvRule = new TestEnvRule(); + + private BigtableTableAdminClient tableAdminClient; + private BigtableDataClient dataClient; + private Table table; + private final String familyId1 = "cf1"; + private final String familyId2 = "cf2"; + private ByteString rowKey; + private ByteString largeValue; + + @Before + public void setup() throws Exception { + tableAdminClient = testEnvRule.env().getTableAdminClient(); + dataClient = testEnvRule.env().getDataClient(); + + // Skip population for emulator as it doesn't support the expected failure + if (testEnvRule.env() instanceof EmulatorEnv) { + return; + } + + String tableId = PrefixGenerator.newPrefix("LargeRowPagination"); + table = + tableAdminClient.createTable( + CreateTableRequest.of(tableId).addFamily(familyId1).addFamily(familyId2)); + + rowKey = ByteString.copyFromUtf8("large-row-key"); + byte[] largeValueBytes = new byte[100 * 1024 * 1024]; + new Random().nextBytes(largeValueBytes); + largeValue = ByteString.copyFrom(largeValueBytes); + + // Populate cf1: 1 qualifier, 2 versions + for (int i = 0; i < 2; i++) { + dataClient + .mutateRowAsync( + RowMutation.create(TableId.of(table.getId()), rowKey) + .setCell(familyId1, ByteString.copyFromUtf8("q1"), largeValue)) + .get(10, TimeUnit.MINUTES); + } + + // Populate cf2: 2 qualifiers, 2 versions each + for (int q = 1; q <= 2; q++) { + for (int v = 0; v < 2; v++) { + dataClient + .mutateRowAsync( + RowMutation.create(TableId.of(table.getId()), rowKey) + .setCell(familyId2, ByteString.copyFromUtf8("q" + q), largeValue)) + .get(10, TimeUnit.MINUTES); + } + } + } + + @After + public void tearDown() { + if (table != null) { + tableAdminClient.deleteTable(table.getId()); + } + } + + @Test + public void testReadLargeRow() throws Exception { + assume() + .withMessage("Large row read errors are not supported by emulator") + .that(testEnvRule.env()) + .isNotInstanceOf(EmulatorEnv.class); + + // Verify it fails with standard read + try { + dataClient.readRow(TableId.of(table.getId()), rowKey); + org.junit.Assert.fail("Should have failed with FAILED_PRECONDITION"); + } catch (ApiException e) { + assertThat(e.getStatusCode()).isEqualTo(GrpcStatusCode.of(Status.Code.FAILED_PRECONDITION)); + } + + // Read without filter: 2 (cf1) + 4 (cf2) = 6 cells + Row row = LargeRowPaginationUtil.readLargeRow(dataClient, table.getId(), rowKey, null); + assertThat(row).isNotNull(); + assertThat(row.getKey()).isEqualTo(rowKey); + assertThat(row.getCells()).hasSize(6); + + // Verify cell content + // cf1:q1 (2 versions) + assertThat(row.getCells().get(0).getFamily()).isEqualTo(familyId1); + assertThat(row.getCells().get(0).getQualifier().toStringUtf8()).isEqualTo("q1"); + assertThat(row.getCells().get(0).getValue()).isEqualTo(largeValue); + + assertThat(row.getCells().get(1).getFamily()).isEqualTo(familyId1); + assertThat(row.getCells().get(1).getQualifier().toStringUtf8()).isEqualTo("q1"); + assertThat(row.getCells().get(1).getValue()).isEqualTo(largeValue); + + // cf2:q1 (2 versions) + assertThat(row.getCells().get(2).getFamily()).isEqualTo(familyId2); + assertThat(row.getCells().get(2).getQualifier().toStringUtf8()).isEqualTo("q1"); + assertThat(row.getCells().get(2).getValue()).isEqualTo(largeValue); + + assertThat(row.getCells().get(3).getFamily()).isEqualTo(familyId2); + assertThat(row.getCells().get(3).getQualifier().toStringUtf8()).isEqualTo("q1"); + assertThat(row.getCells().get(3).getValue()).isEqualTo(largeValue); + + // cf2:q2 (2 versions) + assertThat(row.getCells().get(4).getFamily()).isEqualTo(familyId2); + assertThat(row.getCells().get(4).getQualifier().toStringUtf8()).isEqualTo("q2"); + assertThat(row.getCells().get(4).getValue()).isEqualTo(largeValue); + + assertThat(row.getCells().get(5).getFamily()).isEqualTo(familyId2); + assertThat(row.getCells().get(5).getQualifier().toStringUtf8()).isEqualTo("q2"); + assertThat(row.getCells().get(5).getValue()).isEqualTo(largeValue); + } + + @Test + public void testReadLargeRowWithFilter() throws Exception { + assume() + .withMessage("Large row read errors are not supported by emulator") + .that(testEnvRule.env()) + .isNotInstanceOf(EmulatorEnv.class); + + // Filter for most recent version: 1 (cf1) + 2 (cf2) = 3 cells + Filters.Filter filter = Filters.FILTERS.limit().cellsPerColumn(1); + Row row = LargeRowPaginationUtil.readLargeRow(dataClient, table.getId(), rowKey, filter); + + assertThat(row).isNotNull(); + assertThat(row.getKey()).isEqualTo(rowKey); + assertThat(row.getCells()).hasSize(3); + + // cf1:q1 + assertThat(row.getCells().get(0).getFamily()).isEqualTo(familyId1); + assertThat(row.getCells().get(0).getQualifier().toStringUtf8()).isEqualTo("q1"); + assertThat(row.getCells().get(0).getValue()).isEqualTo(largeValue); + + // cf2:q1 + assertThat(row.getCells().get(1).getFamily()).isEqualTo(familyId2); + assertThat(row.getCells().get(1).getQualifier().toStringUtf8()).isEqualTo("q1"); + assertThat(row.getCells().get(1).getValue()).isEqualTo(largeValue); + + // cf2:q2 + assertThat(row.getCells().get(2).getFamily()).isEqualTo(familyId2); + assertThat(row.getCells().get(2).getQualifier().toStringUtf8()).isEqualTo("q2"); + assertThat(row.getCells().get(2).getValue()).isEqualTo(largeValue); + } +} From c992e24d73f1feb45917b6e342d059905eb4efbb Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Tue, 5 May 2026 02:06:45 +0000 Subject: [PATCH 3/4] update --- .../stub/readrows/LargeRowPaginationUtil.java | 97 --------- .../v2/stub/readrows/LargeRowPaginator.java | 78 ++++++++ .../data/v2/it/LargeRowPaginationUtilIT.java | 187 ------------------ .../stub/readrows/LargeRowPaginatorTest.java | 69 +++++++ 4 files changed, 147 insertions(+), 284 deletions(-) delete mode 100644 google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginationUtil.java create mode 100644 google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginator.java delete mode 100644 google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/it/LargeRowPaginationUtilIT.java create mode 100644 google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginatorTest.java diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginationUtil.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginationUtil.java deleted file mode 100644 index 301a44f621..0000000000 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginationUtil.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2026 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.google.cloud.bigtable.data.v2.stub.readrows; - -import com.google.api.gax.rpc.ApiException; -import com.google.api.gax.rpc.StatusCode.Code; -import com.google.cloud.bigtable.data.v2.BigtableDataClient; -import com.google.cloud.bigtable.data.v2.models.Filters; -import com.google.cloud.bigtable.data.v2.models.Row; -import com.google.cloud.bigtable.data.v2.models.RowCell; -import com.google.cloud.bigtable.data.v2.models.TableId; -import com.google.common.collect.Lists; -import com.google.protobuf.ByteString; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; -import javax.annotation.Nullable; - -/** Public utility class to read a large row by paginating over cells. */ -public final class LargeRowPaginationUtil { - private static final Logger LOGGER = Logger.getLogger(LargeRowPaginationUtil.class.getName()); - - private LargeRowPaginationUtil() {} - - /** - * Reads a large row by paginating over cells. 1. Reads the total count of cells without fetching - * values using strip filter. 2. Reads cells in chunks using limit and offset filters. 3. Divides - * the chunk size by half if a failure occurs with FAILED_PRECONDITION. - */ - public static Row readLargeRow( - BigtableDataClient client, - String tableId, - ByteString rowKey, - @Nullable Filters.Filter rowFilter) { - // Step 1: Count the number of cells with a strip value filter - Filters.ChainFilter countFilter = - Filters.FILTERS.chain().filter(Filters.FILTERS.value().strip()); - if (rowFilter != null) { - countFilter.filter(rowFilter); - } - Row countRow = client.readRow(TableId.of(tableId), rowKey, countFilter); - if (countRow == null) { - return null; // row not found - } - int totalCells = countRow.getCells().size(); - - List resultCells = Lists.newArrayList(); - int offset = 0; - int limit = totalCells; // start with trying to read all cells - - while (offset < totalCells) { - try { - Filters.ChainFilter chain = Filters.FILTERS.chain(); - if (rowFilter != null) { - chain.filter(rowFilter); - } - if (offset > 0) { - chain.filter(Filters.FILTERS.offset().cellsPerRow(offset)); - } - chain.filter(Filters.FILTERS.limit().cellsPerRow(limit)); - - Row partialRow = client.readRow(TableId.of(tableId), rowKey, chain); - if (partialRow == null) { - break; - } - resultCells.addAll(partialRow.getCells()); - offset += partialRow.getCells().size(); - } catch (ApiException e) { - if (e.getStatusCode().getCode() != Code.FAILED_PRECONDITION) { - throw e; - } - limit = limit / 2; - if (limit == 0) { - throw new RuntimeException("Cannot divide limit further. Cell might be too large."); - } - LOGGER.log( - Level.FINE, - "Failed to read chunk with limit {0} at offset {1}. Dividing limit by half.", - new Object[] {limit, offset}); - } - } - return Row.create(rowKey, resultCells); - } -} diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginator.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginator.java new file mode 100644 index 0000000000..8ae3f478eb --- /dev/null +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginator.java @@ -0,0 +1,78 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.stub.readrows; + +import com.google.api.core.InternalApi; +import com.google.cloud.bigtable.data.v2.models.Filters; + +/** + * A paginator for fetching extremely large rows from Bigtable chunk by chunk to avoid the 256MB gRPC size limit. + * It yields Filters that chunk the row by cell limits and handles limit halving if FAILED_PRECONDITION occurs. + */ +@InternalApi("For internal usage only") +public class LargeRowPaginator { + private int currentLimit; + private int currentOffset; + private boolean hasMore; + + public LargeRowPaginator(int initialLimit) { + this.currentLimit = initialLimit; + this.currentOffset = 0; + this.hasMore = true; + } + + /** + * Yields the filter required to fetch the next chunk of cells for the large row. + */ + public Filters.Filter getNextFilter() { + Filters.ChainFilter chain = Filters.FILTERS.chain(); + if (currentOffset > 0) { + chain.filter(Filters.FILTERS.offset().cellsPerRow(currentOffset)); + } + chain.filter(Filters.FILTERS.limit().cellsPerRow(currentLimit)); + return chain; + } + + /** + * Advances the internal offset. Call this after a successful Bigtable API call. + * @param cellsReadInLastChunk The number of cells returned in the last chunk. + * @return true if there are potentially more cells to fetch. + */ + public boolean advance(int cellsReadInLastChunk) { + this.currentOffset += cellsReadInLastChunk; + + // If we read fewer cells than requested, we've hit the end of the row. + if (cellsReadInLastChunk < currentLimit) { + this.hasMore = false; + } + return this.hasMore; + } + + /** + * Call this if the Bigtable API call fails with a FAILED_PRECONDITION due to size limits. + * It reduces the batch size to fetch a smaller chunk on the next attempt. + */ + public void halveLimit() { + this.currentLimit /= 2; + if (this.currentLimit == 0) { + throw new RuntimeException("Cannot divide limit further. A single cell might be too large."); + } + } + + public boolean hasNext() { + return this.hasMore; + } +} diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/it/LargeRowPaginationUtilIT.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/it/LargeRowPaginationUtilIT.java deleted file mode 100644 index 3bcbc7b893..0000000000 --- a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/it/LargeRowPaginationUtilIT.java +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright 2026 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.google.cloud.bigtable.data.v2.it; - -import static com.google.common.truth.Truth.assertThat; -import static com.google.common.truth.TruthJUnit.assume; - -import com.google.api.gax.grpc.GrpcStatusCode; -import com.google.api.gax.rpc.ApiException; -import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient; -import com.google.cloud.bigtable.admin.v2.models.CreateTableRequest; -import com.google.cloud.bigtable.admin.v2.models.Table; -import com.google.cloud.bigtable.data.v2.BigtableDataClient; -import com.google.cloud.bigtable.data.v2.models.Filters; -import com.google.cloud.bigtable.data.v2.models.Row; -import com.google.cloud.bigtable.data.v2.models.RowMutation; -import com.google.cloud.bigtable.data.v2.models.TableId; -import com.google.cloud.bigtable.data.v2.stub.readrows.LargeRowPaginationUtil; -import com.google.cloud.bigtable.test_helpers.env.EmulatorEnv; -import com.google.cloud.bigtable.test_helpers.env.PrefixGenerator; -import com.google.cloud.bigtable.test_helpers.env.TestEnvRule; -import com.google.protobuf.ByteString; -import io.grpc.Status; -import java.util.Random; -import java.util.concurrent.TimeUnit; -import org.junit.After; -import org.junit.Before; -import org.junit.ClassRule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -@RunWith(JUnit4.class) -public class LargeRowPaginationUtilIT { - - @ClassRule public static final TestEnvRule testEnvRule = new TestEnvRule(); - - private BigtableTableAdminClient tableAdminClient; - private BigtableDataClient dataClient; - private Table table; - private final String familyId1 = "cf1"; - private final String familyId2 = "cf2"; - private ByteString rowKey; - private ByteString largeValue; - - @Before - public void setup() throws Exception { - tableAdminClient = testEnvRule.env().getTableAdminClient(); - dataClient = testEnvRule.env().getDataClient(); - - // Skip population for emulator as it doesn't support the expected failure - if (testEnvRule.env() instanceof EmulatorEnv) { - return; - } - - String tableId = PrefixGenerator.newPrefix("LargeRowPagination"); - table = - tableAdminClient.createTable( - CreateTableRequest.of(tableId).addFamily(familyId1).addFamily(familyId2)); - - rowKey = ByteString.copyFromUtf8("large-row-key"); - byte[] largeValueBytes = new byte[100 * 1024 * 1024]; - new Random().nextBytes(largeValueBytes); - largeValue = ByteString.copyFrom(largeValueBytes); - - // Populate cf1: 1 qualifier, 2 versions - for (int i = 0; i < 2; i++) { - dataClient - .mutateRowAsync( - RowMutation.create(TableId.of(table.getId()), rowKey) - .setCell(familyId1, ByteString.copyFromUtf8("q1"), largeValue)) - .get(10, TimeUnit.MINUTES); - } - - // Populate cf2: 2 qualifiers, 2 versions each - for (int q = 1; q <= 2; q++) { - for (int v = 0; v < 2; v++) { - dataClient - .mutateRowAsync( - RowMutation.create(TableId.of(table.getId()), rowKey) - .setCell(familyId2, ByteString.copyFromUtf8("q" + q), largeValue)) - .get(10, TimeUnit.MINUTES); - } - } - } - - @After - public void tearDown() { - if (table != null) { - tableAdminClient.deleteTable(table.getId()); - } - } - - @Test - public void testReadLargeRow() throws Exception { - assume() - .withMessage("Large row read errors are not supported by emulator") - .that(testEnvRule.env()) - .isNotInstanceOf(EmulatorEnv.class); - - // Verify it fails with standard read - try { - dataClient.readRow(TableId.of(table.getId()), rowKey); - org.junit.Assert.fail("Should have failed with FAILED_PRECONDITION"); - } catch (ApiException e) { - assertThat(e.getStatusCode()).isEqualTo(GrpcStatusCode.of(Status.Code.FAILED_PRECONDITION)); - } - - // Read without filter: 2 (cf1) + 4 (cf2) = 6 cells - Row row = LargeRowPaginationUtil.readLargeRow(dataClient, table.getId(), rowKey, null); - assertThat(row).isNotNull(); - assertThat(row.getKey()).isEqualTo(rowKey); - assertThat(row.getCells()).hasSize(6); - - // Verify cell content - // cf1:q1 (2 versions) - assertThat(row.getCells().get(0).getFamily()).isEqualTo(familyId1); - assertThat(row.getCells().get(0).getQualifier().toStringUtf8()).isEqualTo("q1"); - assertThat(row.getCells().get(0).getValue()).isEqualTo(largeValue); - - assertThat(row.getCells().get(1).getFamily()).isEqualTo(familyId1); - assertThat(row.getCells().get(1).getQualifier().toStringUtf8()).isEqualTo("q1"); - assertThat(row.getCells().get(1).getValue()).isEqualTo(largeValue); - - // cf2:q1 (2 versions) - assertThat(row.getCells().get(2).getFamily()).isEqualTo(familyId2); - assertThat(row.getCells().get(2).getQualifier().toStringUtf8()).isEqualTo("q1"); - assertThat(row.getCells().get(2).getValue()).isEqualTo(largeValue); - - assertThat(row.getCells().get(3).getFamily()).isEqualTo(familyId2); - assertThat(row.getCells().get(3).getQualifier().toStringUtf8()).isEqualTo("q1"); - assertThat(row.getCells().get(3).getValue()).isEqualTo(largeValue); - - // cf2:q2 (2 versions) - assertThat(row.getCells().get(4).getFamily()).isEqualTo(familyId2); - assertThat(row.getCells().get(4).getQualifier().toStringUtf8()).isEqualTo("q2"); - assertThat(row.getCells().get(4).getValue()).isEqualTo(largeValue); - - assertThat(row.getCells().get(5).getFamily()).isEqualTo(familyId2); - assertThat(row.getCells().get(5).getQualifier().toStringUtf8()).isEqualTo("q2"); - assertThat(row.getCells().get(5).getValue()).isEqualTo(largeValue); - } - - @Test - public void testReadLargeRowWithFilter() throws Exception { - assume() - .withMessage("Large row read errors are not supported by emulator") - .that(testEnvRule.env()) - .isNotInstanceOf(EmulatorEnv.class); - - // Filter for most recent version: 1 (cf1) + 2 (cf2) = 3 cells - Filters.Filter filter = Filters.FILTERS.limit().cellsPerColumn(1); - Row row = LargeRowPaginationUtil.readLargeRow(dataClient, table.getId(), rowKey, filter); - - assertThat(row).isNotNull(); - assertThat(row.getKey()).isEqualTo(rowKey); - assertThat(row.getCells()).hasSize(3); - - // cf1:q1 - assertThat(row.getCells().get(0).getFamily()).isEqualTo(familyId1); - assertThat(row.getCells().get(0).getQualifier().toStringUtf8()).isEqualTo("q1"); - assertThat(row.getCells().get(0).getValue()).isEqualTo(largeValue); - - // cf2:q1 - assertThat(row.getCells().get(1).getFamily()).isEqualTo(familyId2); - assertThat(row.getCells().get(1).getQualifier().toStringUtf8()).isEqualTo("q1"); - assertThat(row.getCells().get(1).getValue()).isEqualTo(largeValue); - - // cf2:q2 - assertThat(row.getCells().get(2).getFamily()).isEqualTo(familyId2); - assertThat(row.getCells().get(2).getQualifier().toStringUtf8()).isEqualTo("q2"); - assertThat(row.getCells().get(2).getValue()).isEqualTo(largeValue); - } -} diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginatorTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginatorTest.java new file mode 100644 index 0000000000..edd0665f25 --- /dev/null +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginatorTest.java @@ -0,0 +1,69 @@ +/* + * Copyright 2026 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.data.v2.stub.readrows; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertThrows; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class LargeRowPaginatorTest { + + @Test + public void testPaginatorAdvancesProperly() { + LargeRowPaginator paginator = new LargeRowPaginator(10); + + assertThat(paginator.hasNext()).isTrue(); + + // Simulate reading exactly the limit (10 cells). Paginator assumes more data exists. + boolean hasMore = paginator.advance(10); + assertThat(hasMore).isTrue(); + assertThat(paginator.hasNext()).isTrue(); + + // Simulate reading 5 cells (less than limit, indicating end of row) + hasMore = paginator.advance(5); + assertThat(hasMore).isFalse(); + assertThat(paginator.hasNext()).isFalse(); + } + + @Test + public void testPaginatorHalvesLimit() { + LargeRowPaginator paginator = new LargeRowPaginator(10); + + paginator.halveLimit(); // Internal limit becomes 5 + + // Simulate reading exactly the new limit (5 cells) + boolean hasMore = paginator.advance(5); + assertThat(hasMore).isTrue(); + + paginator.halveLimit(); // Internal limit becomes 2 + + // Simulate reading 1 cell (less than the new limit of 2) + hasMore = paginator.advance(1); + assertThat(hasMore).isFalse(); + } + + @Test + public void testPaginatorThrowsOnZeroLimit() { + LargeRowPaginator paginator = new LargeRowPaginator(1); + + RuntimeException exception = assertThrows(RuntimeException.class, () -> paginator.halveLimit()); + assertThat(exception).hasMessageThat().contains("Cannot divide limit further"); + } +} From 8b5188d9134d307fe6ba85270079b86f5be7a001 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Mon, 4 May 2026 22:23:47 -0400 Subject: [PATCH 4/4] udpate test Change-Id: I8bd49e79fb8d9c368a5126e7fa0bb4f8d73cff3c --- .../v2/stub/readrows/LargeRowPaginator.java | 96 ++++++++++--------- .../stub/readrows/LargeRowPaginatorTest.java | 43 ++++++--- 2 files changed, 81 insertions(+), 58 deletions(-) diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginator.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginator.java index 8ae3f478eb..fdc580fb06 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginator.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginator.java @@ -17,62 +17,68 @@ import com.google.api.core.InternalApi; import com.google.cloud.bigtable.data.v2.models.Filters; +import javax.annotation.Nullable; /** - * A paginator for fetching extremely large rows from Bigtable chunk by chunk to avoid the 256MB gRPC size limit. - * It yields Filters that chunk the row by cell limits and handles limit halving if FAILED_PRECONDITION occurs. + * A paginator for fetching large rows from Bigtable chunk by chunk to avoid the 256MB size limit. + * It yields Filters that chunk the row by cell limits and handles limit halving if + * FAILED_PRECONDITION occurs. */ @InternalApi("For internal usage only") public class LargeRowPaginator { - private int currentLimit; - private int currentOffset; - private boolean hasMore; + private int currentLimit; + private int currentOffset; + private boolean hasMore; + @Nullable private final Filters.Filter baseFilter; - public LargeRowPaginator(int initialLimit) { - this.currentLimit = initialLimit; - this.currentOffset = 0; - this.hasMore = true; - } + public LargeRowPaginator(int initialLimit, @Nullable Filters.Filter filter) { + this.currentLimit = initialLimit; + this.currentOffset = 0; + this.hasMore = true; + this.baseFilter = filter; + } - /** - * Yields the filter required to fetch the next chunk of cells for the large row. - */ - public Filters.Filter getNextFilter() { - Filters.ChainFilter chain = Filters.FILTERS.chain(); - if (currentOffset > 0) { - chain.filter(Filters.FILTERS.offset().cellsPerRow(currentOffset)); - } - chain.filter(Filters.FILTERS.limit().cellsPerRow(currentLimit)); - return chain; + /** Yields the filter required to fetch the next chunk of cells for the large row. */ + public Filters.Filter getNextFilter() { + Filters.ChainFilter chain = Filters.FILTERS.chain(); + if (baseFilter != null) { + chain.filter(baseFilter); } - - /** - * Advances the internal offset. Call this after a successful Bigtable API call. - * @param cellsReadInLastChunk The number of cells returned in the last chunk. - * @return true if there are potentially more cells to fetch. - */ - public boolean advance(int cellsReadInLastChunk) { - this.currentOffset += cellsReadInLastChunk; - - // If we read fewer cells than requested, we've hit the end of the row. - if (cellsReadInLastChunk < currentLimit) { - this.hasMore = false; - } - return this.hasMore; + if (currentOffset > 0) { + chain.filter(Filters.FILTERS.offset().cellsPerRow(currentOffset)); } + chain.filter(Filters.FILTERS.limit().cellsPerRow(currentLimit)); + return chain; + } - /** - * Call this if the Bigtable API call fails with a FAILED_PRECONDITION due to size limits. - * It reduces the batch size to fetch a smaller chunk on the next attempt. - */ - public void halveLimit() { - this.currentLimit /= 2; - if (this.currentLimit == 0) { - throw new RuntimeException("Cannot divide limit further. A single cell might be too large."); - } + /** + * Advances the internal offset. Call this after a successful Bigtable API call. + * + * @param cellsReadInLastChunk The number of cells returned in the last chunk. + * @return true if there are potentially more cells to fetch. + */ + public boolean advance(int cellsReadInLastChunk) { + this.currentOffset += cellsReadInLastChunk; + + // If we read fewer cells than requested, we've hit the end of the row. + if (cellsReadInLastChunk < currentLimit) { + this.hasMore = false; } + return this.hasMore; + } - public boolean hasNext() { - return this.hasMore; + /** + * Call this if the Bigtable API call fails with a FAILED_PRECONDITION due to size limits. It + * reduces the batch size to fetch a smaller chunk on the next attempt. + */ + public void halveLimit() { + this.currentLimit /= 2; + if (this.currentLimit == 0) { + throw new RuntimeException("Cannot divide limit further. A single cell might be too large."); } + } + + public boolean hasNext() { + return this.hasMore; + } } diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginatorTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginatorTest.java index edd0665f25..da5e14673b 100644 --- a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginatorTest.java +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/readrows/LargeRowPaginatorTest.java @@ -18,6 +18,7 @@ import static com.google.common.truth.Truth.assertThat; import static org.junit.Assert.assertThrows; +import com.google.cloud.bigtable.data.v2.models.Filters; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -27,15 +28,15 @@ public class LargeRowPaginatorTest { @Test public void testPaginatorAdvancesProperly() { - LargeRowPaginator paginator = new LargeRowPaginator(10); - + LargeRowPaginator paginator = new LargeRowPaginator(10, null); + assertThat(paginator.hasNext()).isTrue(); - + // Simulate reading exactly the limit (10 cells). Paginator assumes more data exists. boolean hasMore = paginator.advance(10); assertThat(hasMore).isTrue(); assertThat(paginator.hasNext()).isTrue(); - + // Simulate reading 5 cells (less than limit, indicating end of row) hasMore = paginator.advance(5); assertThat(hasMore).isFalse(); @@ -44,26 +45,42 @@ public void testPaginatorAdvancesProperly() { @Test public void testPaginatorHalvesLimit() { - LargeRowPaginator paginator = new LargeRowPaginator(10); - + LargeRowPaginator paginator = new LargeRowPaginator(10, null); + paginator.halveLimit(); // Internal limit becomes 5 - + // Simulate reading exactly the new limit (5 cells) boolean hasMore = paginator.advance(5); - assertThat(hasMore).isTrue(); - + assertThat(hasMore).isTrue(); + paginator.halveLimit(); // Internal limit becomes 2 - + // Simulate reading 1 cell (less than the new limit of 2) - hasMore = paginator.advance(1); + hasMore = paginator.advance(1); assertThat(hasMore).isFalse(); } @Test public void testPaginatorThrowsOnZeroLimit() { - LargeRowPaginator paginator = new LargeRowPaginator(1); - + LargeRowPaginator paginator = new LargeRowPaginator(1, null); + RuntimeException exception = assertThrows(RuntimeException.class, () -> paginator.halveLimit()); assertThat(exception).hasMessageThat().contains("Cannot divide limit further"); } + + @Test + public void testPaginatorWithBaseFilter() { + LargeRowPaginator paginator = + new LargeRowPaginator(10, Filters.FILTERS.family().exactMatch("cf")); + + Filters.Filter nextFilter = paginator.getNextFilter(); + + Filters.Filter expectedFilter = + Filters.FILTERS + .chain() + .filter(Filters.FILTERS.family().exactMatch("cf")) + .filter(Filters.FILTERS.limit().cellsPerRow(10)); + + assertThat(nextFilter.toProto()).isEqualTo(expectedFilter.toProto()); + } }