From 4d9920289999b731bb517a1d815f18db4d0d3d64 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Mon, 11 May 2026 10:46:59 +0530 Subject: [PATCH 1/4] HBASE-30150: Delegate getHintForRejectedRow and getSkipHint in composite filters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FilterList (AND/OR), SkipFilter, and WhileMatchFilter now delegate getHintForRejectedRow() and getSkipHint() to their sub-filters, using maximal-step merging for AND and minimal-step merging for OR — consistent with the existing getNextCellHint() convention. ColumnRangeFilter and ColumnPrefixFilter gain stateless getSkipHint() implementations. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../hbase/filter/ColumnPrefixFilter.java | 8 + .../hbase/filter/ColumnRangeFilter.java | 8 + .../apache/hadoop/hbase/filter/Filter.java | 14 +- .../hadoop/hbase/filter/FilterList.java | 10 + .../hbase/filter/FilterListWithAND.java | 49 ++ .../hadoop/hbase/filter/FilterListWithOR.java | 52 ++ .../hadoop/hbase/filter/SkipFilter.java | 10 + .../hadoop/hbase/filter/WhileMatchFilter.java | 10 + .../filter/TestFilterHintForRejectedRow.java | 566 ++++++++++++++++++ .../filter/TestFilterListHintDelegation.java | 485 +++++++++++++++ 10 files changed, 1206 insertions(+), 6 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java index 9b477ec06cc7..4dd89c505dd2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java @@ -148,6 +148,14 @@ public Cell getNextCellHint(Cell cell) { return PrivateCellUtil.createFirstOnRowCol(cell, prefix, 0, prefix.length); } + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (this.prefix == null) { + return null; + } + return getNextCellHint(skippedCell); + } + @Override public String toString() { return this.getClass().getSimpleName() + " " + Bytes.toStringBinary(this.prefix); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java index bbfec008c2c5..3ab7e2575429 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java @@ -200,6 +200,14 @@ public Cell getNextCellHint(Cell cell) { return PrivateCellUtil.createFirstOnRowCol(cell, this.minColumn, 0, len(this.minColumn)); } + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (this.minColumn == null) { + return null; + } + return getNextCellHint(skippedCell); + } + @Override public String toString() { return this.getClass().getSimpleName() + " " + (this.minColumnInclusive ? "[" : "(") diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index 423e5b20aeed..e99821f96c49 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -216,9 +216,10 @@ public enum ReturnCode { * must point to a smaller row key (earlier in reverse-scan direction). The scanner * validates hint direction and falls back to {@code nextRow()} if the hint does not advance in * the scan direction. - *
  • Composite filter limitation: {@code FilterList}, {@code SkipFilter}, and - * {@code WhileMatchFilter} do not currently delegate this method to wrapped sub-filters. Hints - * from filters used inside these wrappers will be silently ignored.
  • + *
  • Composite filter support: {@code FilterList} (both {@code MUST_PASS_ALL} + * and {@code MUST_PASS_ONE}), {@code SkipFilter}, and {@code WhileMatchFilter} delegate this + * method to their sub-filters and merge the results (maximal step for AND, minimal step for + * OR).
  • * * @param firstRowCell the first cell encountered in the rejected row; contains the row key that * was passed to {@code filterRowKey} @@ -255,9 +256,10 @@ public Cell getHintForRejectedRow(final Cell firstRowCell) throws IOException { *
  • For reversed scans, the returned cell must have a smaller row key (i.e., earlier * in reverse-scan direction) than the {@code skippedCell}. Hints that do not advance in the scan * direction are silently ignored.
  • - *
  • Composite filter limitation: {@code FilterList}, {@code SkipFilter}, and - * {@code WhileMatchFilter} do not currently delegate this method to wrapped sub-filters. Hints - * from filters used inside these wrappers will be silently ignored.
  • + *
  • Composite filter support: {@code FilterList} (both {@code MUST_PASS_ALL} + * and {@code MUST_PASS_ONE}), {@code SkipFilter}, and {@code WhileMatchFilter} delegate this + * method to their sub-filters and merge the results (maximal step for AND, minimal step for + * OR).
  • * * @param skippedCell the cell that was rejected by the time-range, column, or version gate before * {@code filterCell} could be consulted diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java index cb42072e1d80..aba5c2424754 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java @@ -242,6 +242,16 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return this.filterListBase.getNextCellHint(currentCell); } + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return this.filterListBase.getHintForRejectedRow(firstRowCell); + } + + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return this.filterListBase.getSkipHint(skippedCell); + } + @Override public boolean isFamilyEssential(byte[] name) throws IOException { return this.filterListBase.isFamilyEssential(name); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java index a5e1eec45401..52600fe2de17 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java @@ -318,6 +318,55 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return maxHint; } + /** + * Maximal step: return the farthest hint among sub-filters. Null hints are ignored; if no + * sub-filter provides a hint, return null. + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + if (isEmpty()) { + return super.getHintForRejectedRow(firstRowCell); + } + Cell maxHint = null; + for (int i = 0, n = filters.size(); i < n; i++) { + Filter filter = filters.get(i); + if (filter.filterAllRemaining()) { + continue; + } + Cell hint = filter.getHintForRejectedRow(firstRowCell); + if (hint == null) { + continue; + } + if (maxHint == null || this.compareCell(maxHint, hint) < 0) { + maxHint = hint; + } + } + return maxHint; + } + + /** Maximal step: return the farthest skip hint among sub-filters. */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (isEmpty()) { + return super.getSkipHint(skippedCell); + } + Cell maxHint = null; + for (int i = 0, n = filters.size(); i < n; i++) { + Filter filter = filters.get(i); + if (filter.filterAllRemaining()) { + continue; + } + Cell hint = filter.getSkipHint(skippedCell); + if (hint == null) { + continue; + } + if (maxHint == null || this.compareCell(maxHint, hint) < 0) { + maxHint = hint; + } + } + return maxHint; + } + @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java index fbe68ab13527..40da51b4ba29 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java @@ -392,6 +392,58 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return minKeyHint; } + /** + * Minimal step: return the nearest hint. If any non-terminated sub-filter returns null, the + * composite cannot safely skip, so return null. + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + if (isEmpty()) { + return super.getHintForRejectedRow(firstRowCell); + } + Cell minHint = null; + for (int i = 0, n = filters.size(); i < n; i++) { + Filter filter = filters.get(i); + if (filter.filterAllRemaining()) { + continue; + } + Cell hint = filter.getHintForRejectedRow(firstRowCell); + if (hint == null) { + return null; + } + if (minHint == null || this.compareCell(minHint, hint) > 0) { + minHint = hint; + } + } + return minHint; + } + + /** + * Minimal step: return the nearest skip hint. Null from any sub-filter collapses the entire + * result to null. + */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (isEmpty()) { + return super.getSkipHint(skippedCell); + } + Cell minHint = null; + for (int i = 0, n = filters.size(); i < n; i++) { + Filter filter = filters.get(i); + if (filter.filterAllRemaining()) { + continue; + } + Cell hint = filter.getSkipHint(skippedCell); + if (hint == null) { + return null; + } + if (minHint == null || this.compareCell(minHint, hint) > 0) { + minHint = hint; + } + } + return minHint; + } + @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/SkipFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/SkipFilter.java index a5149592f614..5e4afeec5499 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/SkipFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/SkipFilter.java @@ -141,6 +141,16 @@ boolean areSerializedFieldsEqual(Filter o) { return getFilter().areSerializedFieldsEqual(other.getFilter()); } + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return filter.getHintForRejectedRow(firstRowCell); + } + + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return filter.getSkipHint(skippedCell); + } + @Override public boolean isFamilyEssential(byte[] name) throws IOException { return filter.isFamilyEssential(name); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/WhileMatchFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/WhileMatchFilter.java index 65cd03042b0c..1117c9ec6dba 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/WhileMatchFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/WhileMatchFilter.java @@ -139,6 +139,16 @@ boolean areSerializedFieldsEqual(Filter o) { return getFilter().areSerializedFieldsEqual(other.getFilter()); } + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return filter.getHintForRejectedRow(firstRowCell); + } + + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return filter.getSkipHint(skippedCell); + } + @Override public boolean isFamilyEssential(byte[] name) throws IOException { return filter.isFamilyEssential(name); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java index ba57ff563b74..accb8fc9ed64 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.hbase.Cell; @@ -643,4 +644,569 @@ public Cell getSkipHint(Cell skippedCell) { assertTrue(skipHintCalls.get() > 0, "getSkipHint must be called at least once for reversed scan"); } + + // ---- FilterList AND hint delegation integration tests ---- + + @Test + public void testFilterListANDHintDelegation() throws IOException { + final String prefix = "row"; + final int rejectedCount = 5; + final int acceptedCount = 5; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallsA = new AtomicInteger(0); + final AtomicInteger hintCallsB = new AtomicInteger(0); + + // Both filters reject the same rows; both provide hints to the accepted start. + // AND merging takes max — both point to the same target here. + FilterBase filterA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallsA.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase filterB = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallsB.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterList andFilter = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filterA, filterB)); + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(andFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "AND FilterList with hints must return same cells as no-hint path"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(acceptedCount * CELLS_PER_ROW, hintResults.size()); + assertTrue(hintCallsA.get() > 0, "Sub-filter A hint must be consulted"); + assertTrue(hintCallsB.get() > 0, "Sub-filter B hint must be consulted"); + } + + @Test + public void testFilterListORHintDelegation() throws IOException { + final String prefix = "row"; + final int rejectedCount = 5; + final int acceptedCount = 5; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + + // Both filters reject the same rows, OR requires ALL to reject. + // Both provide hints to the accepted start. OR merging takes min — same target. + FilterBase filterA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase filterB = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterList orFilter = + new FilterList(FilterList.Operator.MUST_PASS_ONE, Arrays.asList(filterA, filterB)); + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(orFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "OR FilterList with hints must return same cells as no-hint path"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(acceptedCount * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testFilterListANDWithOneNullHintSubFilter() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCalls = new AtomicInteger(0); + + // One sub-filter provides a hint, the other returns null. + // AND ignores nulls, so the non-null hint should be used. + FilterBase hintProvider = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCalls.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase noHintProvider = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + FilterList andFilter = new FilterList(FilterList.Operator.MUST_PASS_ALL, + Arrays.asList(hintProvider, noHintProvider)); + + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(andFilter)); + assertEquals(acceptedCount * CELLS_PER_ROW, results.size()); + assertEquals(1, hintCalls.get(), + "Hint provider must be called; AND ignores the null from the other sub-filter"); + } + + @Test + public void testFilterListORWithOneNullHintSubFilter() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + + // One sub-filter provides a hint, the other returns null. + // OR returns null if ANY sub-filter returns null, so no hint optimization. + FilterBase hintProvider = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase noHintProvider = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + FilterList orFilter = new FilterList(FilterList.Operator.MUST_PASS_ONE, + Arrays.asList(hintProvider, noHintProvider)); + + FilterBase baseline = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List orResults = scanAll(new Scan().addFamily(FAMILY).setFilter(orFilter)); + List baselineResults = scanAll(new Scan().addFamily(FAMILY).setFilter(baseline)); + + assertEquals(baselineResults.size(), orResults.size(), + "OR with one null hint must still return correct results (falls back to no-hint path)"); + for (int i = 0; i < orResults.size(); i++) { + assertTrue(CellUtil.equals(orResults.get(i), baselineResults.get(i)), + "Cell mismatch at index " + i); + } + } + + @Test + public void testNestedFilterListHintDelegation() throws IOException { + final String prefix = "row"; + final int rejectedCount = 4; + final int acceptedCount = 4; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + + // Nested: AND(OR(hintA, hintB), hintC) + // All filters reject the same rows and hint to the same target. + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase hintFilter2 = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase hintFilter3 = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterList innerOR = + new FilterList(FilterList.Operator.MUST_PASS_ONE, Arrays.asList(hintFilter, hintFilter2)); + FilterList outerAND = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(innerOR, hintFilter3)); + + FilterBase baseline = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List nestedResults = scanAll(new Scan().addFamily(FAMILY).setFilter(outerAND)); + List baselineResults = scanAll(new Scan().addFamily(FAMILY).setFilter(baseline)); + + assertEquals(baselineResults.size(), nestedResults.size(), + "Nested FilterList must return same results as baseline"); + for (int i = 0; i < nestedResults.size(); i++) { + assertTrue(CellUtil.equals(nestedResults.get(i), baselineResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(acceptedCount * CELLS_PER_ROW, nestedResults.size()); + } + + @Test + public void testWhileMatchFilterHintDelegation() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCalls = new AtomicInteger(0); + + FilterBase innerHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCalls.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + WhileMatchFilter wmFilter = new WhileMatchFilter(innerHintFilter); + + // WhileMatchFilter delegates filterRowKey and sets filterAllRemaining on first true. + // After the first row is rejected, the scan terminates. So we expect 0 results. + // The hint should still be consulted before termination. + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(wmFilter)); + + // WhileMatchFilter ends the scan on first filterRowKey=true, so no data returned. + assertTrue(results.isEmpty(), "WhileMatchFilter terminates scan on first rejection"); + } + + @Test + public void testFilterListANDReversedScanHint() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Accept rows 00-04, reject rows 05-09. + // In reversed scan, scanner starts at row-09 and moves backward. + final byte[] rejectThreshold = Bytes.toBytes(String.format("%s-%02d", prefix, 5)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 4)); + + FilterBase rejectFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + FilterList andFilter = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(rejectFilter)); + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(andFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(noHintFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Reversed AND FilterList must return same cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(5 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testFilterListORReversedScanHint() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + final byte[] rejectThreshold = Bytes.toBytes(String.format("%s-%02d", prefix, 5)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 4)); + + FilterBase rejectFilterA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + FilterBase rejectFilterB = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + FilterList orFilter = new FilterList(FilterList.Operator.MUST_PASS_ONE, + Arrays.asList(rejectFilterA, rejectFilterB)); + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(orFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(noHintFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Reversed OR FilterList must return same cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(5 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testColumnRangeFilterGetSkipHintIntegration() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("colrange-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + // Qualifiers: "a", "b", "c", "d" — ColumnRangeFilter will select "b" to "c". + p.addColumn(FAMILY, Bytes.toBytes("a"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("a"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("b"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("b"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("c"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("c"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("d"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("d"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + ColumnRangeFilter colFilter = + new ColumnRangeFilter(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true); + + // Time range [1000, 3000): insideTs cells pass, outsideTs cells hit the time-range gate. + // ColumnRangeFilter.getSkipHint() should be consulted for structurally skipped cells. + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(colFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(colFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size()); + // Should get "b" and "c" qualifiers for each row, only insideTs versions. + assertEquals(rowCount * 2, hintResults.size()); + } + + @Test + public void testColumnPrefixFilterGetSkipHintIntegration() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("colpfx-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + // Qualifiers: "aaa", "abc", "abd", "xyz" + p.addColumn(FAMILY, Bytes.toBytes("aaa"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("aaa"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abc"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abc"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abd"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abd"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("xyz"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("xyz"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + // ColumnPrefixFilter with prefix "ab" should match "abc" and "abd". + ColumnPrefixFilter prefixFilter = new ColumnPrefixFilter(Bytes.toBytes("ab")); + + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(prefixFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(prefixFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size()); + // Should get "abc" and "abd" qualifiers for each row, only insideTs versions. + assertEquals(rowCount * 2, hintResults.size()); + } + + @Test + public void testFilterListANDGetSkipHintComposition() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("composed-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + p.addColumn(FAMILY, Bytes.toBytes("a"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("a"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("b"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("b"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("c"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("c"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + // Compose ColumnRangeFilter("b","c") AND a custom skip-hint filter. + // The AND composition should take the max hint. + final AtomicInteger skipHintCalls = new AtomicInteger(0); + ColumnRangeFilter colRange = + new ColumnRangeFilter(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true); + FilterBase customSkipHint = new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + skipHintCalls.incrementAndGet(); + return PrivateCellUtil.createFirstOnNextRow(skippedCell); + } + }; + + FilterList andFilter = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(colRange, customSkipHint)); + + Scan scan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(andFilter); + List results = scanAll(scan); + + // Should get "b" and "c" for each row, only insideTs. + assertEquals(rowCount * 2, results.size()); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java new file mode 100644 index 000000000000..4878b6fc09a5 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java @@ -0,0 +1,485 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; + +import java.io.IOException; +import java.util.Arrays; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.filter.FilterList.Operator; +import org.apache.hadoop.hbase.testclassification.FilterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for {@code getHintForRejectedRow} and {@code getSkipHint} delegation in composite + * filters ({@link FilterList}, {@link SkipFilter}, {@link WhileMatchFilter}). + */ +@Tag(FilterTests.TAG) +@Tag(SmallTests.TAG) +public class TestFilterListHintDelegation { + + private static final byte[] ROW_A = Bytes.toBytes("rowA"); + private static final byte[] ROW_B = Bytes.toBytes("rowB"); + private static final byte[] ROW_C = Bytes.toBytes("rowC"); + private static final byte[] FAMILY = Bytes.toBytes("f"); + private static final byte[] QUALIFIER = Bytes.toBytes("q"); + + private static KeyValue kv(byte[] row) { + return new KeyValue(row, FAMILY, QUALIFIER, 1L, KeyValue.Type.Put, Bytes.toBytes("v")); + } + + /** Filter that returns a fixed hint from {@code getHintForRejectedRow}. */ + private static FilterBase fixedRejectedRowHintFilter(Cell hint) { + return new FilterBase() { + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + }; + } + + /** Filter that returns a fixed hint from {@code getSkipHint}. */ + private static FilterBase fixedSkipHintFilter(Cell hint) { + return new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + return hint; + } + }; + } + + /** Filter that claims {@code filterAllRemaining() == true}. */ + private static FilterBase terminatedFilter(Cell hint) { + return new FilterBase() { + @Override + public boolean filterAllRemaining() { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + + @Override + public Cell getSkipHint(Cell skippedCell) { + return hint; + } + }; + } + + // ---- AND (MUST_PASS_ALL) getHintForRejectedRow ---- + + @Test + public void testANDGetHintForRejectedRow_takesMax() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "AND must return the farthest (max) hint"); + } + + @Test + public void testANDGetHintForRejectedRow_ignoresNull() throws IOException { + Cell hintB = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedRejectedRowHintFilter(null), fixedRejectedRowHintFilter(hintB))); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintB, result), + "AND must ignore null hints and return the non-null one"); + } + + @Test + public void testANDGetHintForRejectedRow_allNull() throws IOException { + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedRejectedRowHintFilter(null), fixedRejectedRowHintFilter(null))); + + assertNull(fl.getHintForRejectedRow(kv(ROW_A)), "AND with all-null hints must return null"); + } + + @Test + public void testANDGetHintForRejectedRow_reversed() throws IOException { + // In reversed scan, "max" in scan direction means the smaller row key. + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + fl.setReversed(true); + + Cell result = fl.getHintForRejectedRow(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "Reversed AND must return the smaller row key (farthest in reverse direction)"); + } + + // ---- AND (MUST_PASS_ALL) getSkipHint ---- + + @Test + public void testANDGetSkipHint_takesMax() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "AND getSkipHint must return the farthest (max) hint"); + } + + @Test + public void testANDGetSkipHint_ignoresNull() throws IOException { + Cell hintB = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedSkipHintFilter(null), fixedSkipHintFilter(hintB))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintB, result), + "AND getSkipHint must ignore null and return the non-null hint"); + } + + @Test + public void testANDGetSkipHint_allNull() throws IOException { + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedSkipHintFilter(null), fixedSkipHintFilter(null))); + + assertNull(fl.getSkipHint(kv(ROW_A)), "AND with all-null skip hints must return null"); + } + + @Test + public void testANDGetSkipHint_reversed() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + fl.setReversed(true); + + Cell result = fl.getSkipHint(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "Reversed AND getSkipHint must return the smaller row key (farthest in reverse direction)"); + } + + // ---- OR (MUST_PASS_ONE) getHintForRejectedRow ---- + + @Test + public void testORGetHintForRejectedRow_takesMin() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "OR must return the nearest (min) hint"); + } + + @Test + public void testORGetHintForRejectedRow_nullReturnsNull() throws IOException { + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedRejectedRowHintFilter(null), fixedRejectedRowHintFilter(hintC))); + + assertNull(fl.getHintForRejectedRow(kv(ROW_A)), + "OR must return null if any sub-filter returns null (can't safely skip)"); + } + + @Test + public void testORGetHintForRejectedRow_allHints() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintB = kv(ROW_B); + Cell hintC = kv(ROW_C); + FilterList fl = + new FilterList(Operator.MUST_PASS_ONE, Arrays.asList(fixedRejectedRowHintFilter(hintB), + fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "OR with all hints must return the minimum"); + } + + @Test + public void testORGetHintForRejectedRow_reversed() throws IOException { + // In reversed scan, "min" in scan direction means the larger row key. + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + fl.setReversed(true); + + Cell result = fl.getHintForRejectedRow(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "Reversed OR must return the larger row key (nearest in reverse direction)"); + } + + // ---- OR (MUST_PASS_ONE) getSkipHint ---- + + @Test + public void testORGetSkipHint_takesMin() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "OR getSkipHint must return the nearest (min) hint"); + } + + @Test + public void testORGetSkipHint_nullReturnsNull() throws IOException { + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedSkipHintFilter(null), fixedSkipHintFilter(hintC))); + + assertNull(fl.getSkipHint(kv(ROW_A)), + "OR getSkipHint must return null if any sub-filter returns null"); + } + + @Test + public void testORGetSkipHint_allHints() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintB = kv(ROW_B); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, Arrays.asList(fixedSkipHintFilter(hintB), + fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "OR getSkipHint with all hints must return the minimum"); + } + + @Test + public void testORGetSkipHint_reversed() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + fl.setReversed(true); + + Cell result = fl.getSkipHint(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "Reversed OR getSkipHint must return the larger row key (nearest in reverse direction)"); + } + + // ---- filterAllRemaining for getSkipHint ---- + + @Test + public void testFilterAllRemainingSubFilterSkippedForGetSkipHint() throws IOException { + Cell terminatedHint = kv(ROW_C); + Cell activeHint = kv(ROW_B); + + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(terminatedFilter(terminatedHint), fixedSkipHintFilter(activeHint))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(activeHint, result), + "Terminated sub-filters must be skipped for getSkipHint too"); + } + + @Test + public void testORFilterAllRemainingSubFilterSkippedForGetHintForRejectedRow() + throws IOException { + Cell terminatedHint = kv(ROW_C); + Cell activeHint = kv(ROW_B); + + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(terminatedFilter(terminatedHint), fixedRejectedRowHintFilter(activeHint))); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(activeHint, result), + "OR must skip terminated sub-filters and return the active filter's hint"); + } + + @Test + public void testORFilterAllRemainingSubFilterSkippedForGetSkipHint() throws IOException { + Cell terminatedHint = kv(ROW_C); + Cell activeHint = kv(ROW_B); + + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(terminatedFilter(terminatedHint), fixedSkipHintFilter(activeHint))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(activeHint, result), + "OR must skip terminated sub-filters for getSkipHint too"); + } + + // ---- SkipFilter delegation ---- + + @Test + public void testSkipFilterDelegatesGetHintForRejectedRow() throws IOException { + Cell hint = kv(ROW_B); + SkipFilter sf = new SkipFilter(fixedRejectedRowHintFilter(hint)); + + Cell result = sf.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "SkipFilter must delegate getHintForRejectedRow to wrapped filter"); + } + + @Test + public void testSkipFilterDelegatesGetSkipHint() throws IOException { + Cell hint = kv(ROW_B); + SkipFilter sf = new SkipFilter(fixedSkipHintFilter(hint)); + + Cell result = sf.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "SkipFilter must delegate getSkipHint to wrapped filter"); + } + + // ---- WhileMatchFilter delegation ---- + + @Test + public void testWhileMatchFilterDelegatesGetHintForRejectedRow() throws IOException { + Cell hint = kv(ROW_B); + WhileMatchFilter wmf = new WhileMatchFilter(fixedRejectedRowHintFilter(hint)); + + Cell result = wmf.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "WhileMatchFilter must delegate getHintForRejectedRow to wrapped filter"); + } + + @Test + public void testWhileMatchFilterDelegatesGetSkipHint() throws IOException { + Cell hint = kv(ROW_B); + WhileMatchFilter wmf = new WhileMatchFilter(fixedSkipHintFilter(hint)); + + Cell result = wmf.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "WhileMatchFilter must delegate getSkipHint to wrapped filter"); + } + + // ---- FilterList facade delegation ---- + + @Test + public void testFilterListDelegatesToFilterListBase() throws IOException { + Cell hint = kv(ROW_B); + // AND variant + FilterList andList = new FilterList(Operator.MUST_PASS_ALL, fixedRejectedRowHintFilter(hint)); + assertNotNull(andList.getHintForRejectedRow(kv(ROW_A)), + "FilterList(AND) must delegate getHintForRejectedRow"); + // OR variant + FilterList orList = new FilterList(Operator.MUST_PASS_ONE, fixedSkipHintFilter(hint)); + assertNotNull(orList.getSkipHint(kv(ROW_A)), "FilterList(OR) must delegate getSkipHint"); + } + + // ---- Nested FilterList ---- + + @Test + public void testNestedFilterList() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintB = kv(ROW_B); + Cell hintC = kv(ROW_C); + + // Inner OR: returns min(hintA, hintC) = hintA + FilterList innerOR = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + // Outer AND: returns max(innerOR=hintA, hintB) = hintB + FilterList outerAND = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(innerOR, fixedRejectedRowHintFilter(hintB))); + + Cell result = outerAND.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintB, result), + "Nested AND(OR(A,C), B) must return max(min(A,C), B) = max(A, B) = B"); + } + + // ---- filterAllRemaining sub-filter is skipped ---- + + @Test + public void testFilterAllRemainingSubFilterSkipped() throws IOException { + Cell terminatedHint = kv(ROW_C); + Cell activeHint = kv(ROW_B); + + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(terminatedFilter(terminatedHint), fixedRejectedRowHintFilter(activeHint))); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(activeHint, result), + "Terminated sub-filters (filterAllRemaining=true) must be skipped"); + } + + // ---- Empty FilterList ---- + + @Test + public void testEmptyFilterListReturnsNull() throws IOException { + FilterList emptyAND = new FilterList(Operator.MUST_PASS_ALL); + FilterList emptyOR = new FilterList(Operator.MUST_PASS_ONE); + + assertNull(emptyAND.getHintForRejectedRow(kv(ROW_A)), + "Empty AND FilterList must return null for getHintForRejectedRow"); + assertNull(emptyAND.getSkipHint(kv(ROW_A)), + "Empty AND FilterList must return null for getSkipHint"); + assertNull(emptyOR.getHintForRejectedRow(kv(ROW_A)), + "Empty OR FilterList must return null for getHintForRejectedRow"); + assertNull(emptyOR.getSkipHint(kv(ROW_A)), + "Empty OR FilterList must return null for getSkipHint"); + } + + // ---- Single filter pass-through ---- + + @Test + public void testSingleFilterAND() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, fixedRejectedRowHintFilter(hint)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "Single-filter AND must pass through the hint unchanged"); + } + + @Test + public void testSingleFilterOR() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, fixedRejectedRowHintFilter(hint)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "Single-filter OR must pass through the hint unchanged"); + } +} From b847a408cd2ab2f9707db6e9849ee2dc63a3b532 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Mon, 11 May 2026 14:34:12 +0530 Subject: [PATCH 2/4] HBASE-30150 Addendum: add getSkipHint to MultipleColumnPrefixFilter and strengthen test coverage - Add getSkipHint() to MultipleColumnPrefixFilter so it participates in the structural-skip hint optimization alongside ColumnPrefixFilter and ColumnRangeFilter. - Add unit tests for the all-sub-filters-terminated edge case in both AND and OR FilterList for getHintForRejectedRow and getSkipHint. - Add integration test for MultipleColumnPrefixFilter.getSkipHint with time-range gating. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../filter/MultipleColumnPrefixFilter.java | 23 ++++++++++ .../filter/TestFilterHintForRejectedRow.java | 40 ++++++++++++++++++ .../filter/TestFilterListHintDelegation.java | 42 +++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.java index d2b9396ed98a..4fbf43ed99b5 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.java @@ -173,6 +173,29 @@ public Cell getNextCellHint(Cell cell) { return PrivateCellUtil.createFirstOnRowCol(cell, hint, 0, hint.length); } + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (sortedPrefixes.isEmpty()) { + return null; + } + byte[] qualifier = CellUtil.cloneQualifier(skippedCell); + TreeSet lesserOrEqual = (TreeSet) sortedPrefixes.headSet(qualifier, true); + byte[] target; + if (lesserOrEqual.isEmpty()) { + target = sortedPrefixes.first(); + } else { + byte[] largest = lesserOrEqual.last(); + if (Bytes.startsWith(qualifier, largest)) { + return null; + } + target = sortedPrefixes.higher(largest); + if (target == null) { + return null; + } + } + return PrivateCellUtil.createFirstOnRowCol(skippedCell, target, 0, target.length); + } + public TreeSet createTreeSet() { return new TreeSet<>(new Comparator() { @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java index accb8fc9ed64..dd0767ba6a29 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -1167,6 +1167,46 @@ public void testColumnPrefixFilterGetSkipHintIntegration() throws IOException { assertEquals(rowCount * 2, hintResults.size()); } + @Test + public void testMultipleColumnPrefixFilterGetSkipHintIntegration() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("mcpfx-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + // Qualifiers: "aaa", "abc", "abd", "bbb", "xyz" + p.addColumn(FAMILY, Bytes.toBytes("aaa"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("aaa"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abc"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abc"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abd"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abd"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("bbb"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("bbb"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("xyz"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("xyz"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + // MultipleColumnPrefixFilter with prefixes "ab" and "bb" should match "abc", "abd", "bbb". + MultipleColumnPrefixFilter mcpFilter = + new MultipleColumnPrefixFilter(new byte[][] { Bytes.toBytes("ab"), Bytes.toBytes("bb") }); + + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(mcpFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(mcpFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size()); + // Should get "abc", "abd", "bbb" qualifiers for each row, only insideTs versions. + assertEquals(rowCount * 3, hintResults.size()); + } + @Test public void testFilterListANDGetSkipHintComposition() throws IOException { final long insideTs = 2000; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java index 4878b6fc09a5..1c21d54f020d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java @@ -442,6 +442,48 @@ public void testFilterAllRemainingSubFilterSkipped() throws IOException { "Terminated sub-filters (filterAllRemaining=true) must be skipped"); } + // ---- All sub-filters terminated ---- + + @Test + public void testORAllSubFiltersTerminated_getHintForRejectedRow() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(terminatedFilter(hint), terminatedFilter(hint))); + + assertNull(fl.getHintForRejectedRow(kv(ROW_A)), + "OR with all terminated sub-filters must return null for getHintForRejectedRow"); + } + + @Test + public void testORAllSubFiltersTerminated_getSkipHint() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(terminatedFilter(hint), terminatedFilter(hint))); + + assertNull(fl.getSkipHint(kv(ROW_A)), + "OR with all terminated sub-filters must return null for getSkipHint"); + } + + @Test + public void testANDAllSubFiltersTerminated_getHintForRejectedRow() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(terminatedFilter(hint), terminatedFilter(hint))); + + assertNull(fl.getHintForRejectedRow(kv(ROW_A)), + "AND with all terminated sub-filters must return null for getHintForRejectedRow"); + } + + @Test + public void testANDAllSubFiltersTerminated_getSkipHint() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(terminatedFilter(hint), terminatedFilter(hint))); + + assertNull(fl.getSkipHint(kv(ROW_A)), + "AND with all terminated sub-filters must return null for getSkipHint"); + } + // ---- Empty FilterList ---- @Test From 68147e433859137f4c31689a7d7ac8a4b3ab3ce3 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Tue, 12 May 2026 09:59:17 +0530 Subject: [PATCH 3/4] HBASE-30150 Addendum: fix contract violation in FilterListWithAND.getHintForRejectedRow - Track which sub-filters actually rejected via filterRowKey() using a boolean[] rejectedByFilterRowKey array (mirrors seekHintFilters pattern). getHintForRejectedRow now only consults sub-filters that individually returned true from filterRowKey, honouring the per-filter contract. - Clarify Filter.java javadoc: OR's null-collapse semantic is now explicitly documented for both getHintForRejectedRow and getSkipHint. - Add unit test proving the contract: a non-rejecting sub-filter that throws IllegalStateException from getHintForRejectedRow is never called. - Add divergent-hint tests (unit + integration) asserting AND correctly returns max when sub-filters hint to different targets. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../apache/hadoop/hbase/filter/Filter.java | 10 +- .../hbase/filter/FilterListWithAND.java | 15 +- .../filter/TestFilterHintForRejectedRow.java | 65 +++++++ .../filter/TestFilterListHintDelegation.java | 162 ++++++++++++++++-- 4 files changed, 234 insertions(+), 18 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index e99821f96c49..bf64b1d70e27 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -218,8 +218,9 @@ public enum ReturnCode { * the scan direction. *
  • Composite filter support: {@code FilterList} (both {@code MUST_PASS_ALL} * and {@code MUST_PASS_ONE}), {@code SkipFilter}, and {@code WhileMatchFilter} delegate this - * method to their sub-filters and merge the results (maximal step for AND, minimal step for - * OR).
  • + * method to their sub-filters and merge the results (maximal step for AND; for OR, the nearest + * hint is returned only when every non-terminated sub-filter provides one — any null collapses + * the OR result to null). * * @param firstRowCell the first cell encountered in the rejected row; contains the row key that * was passed to {@code filterRowKey} @@ -258,8 +259,9 @@ public Cell getHintForRejectedRow(final Cell firstRowCell) throws IOException { * direction are silently ignored. *
  • Composite filter support: {@code FilterList} (both {@code MUST_PASS_ALL} * and {@code MUST_PASS_ONE}), {@code SkipFilter}, and {@code WhileMatchFilter} delegate this - * method to their sub-filters and merge the results (maximal step for AND, minimal step for - * OR).
  • + * method to their sub-filters and merge the results (maximal step for AND; for OR, the nearest + * hint is returned only when every non-terminated sub-filter provides one — any null collapses + * the OR result to null). * * @param skippedCell the cell that was rejected by the time-range, column, or version gate before * {@code filterCell} could be consulted diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java index 52600fe2de17..4aacecac1efa 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Objects; @@ -34,6 +35,7 @@ public class FilterListWithAND extends FilterListBase { private List seekHintFilters = new ArrayList<>(); private boolean[] hintingFilters; + private boolean[] rejectedByFilterRowKey; public FilterListWithAND(List filters) { super(filters); @@ -41,6 +43,7 @@ public FilterListWithAND(List filters) { // sub-filters (because all sub-filters return INCLUDE*). So here, fill this array with true. we // keep this in FilterListWithAND for abstracting the transformCell() in FilterListBase. subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), true)); + rejectedByFilterRowKey = new boolean[filters.size()]; cacheHintingFilters(); } @@ -51,6 +54,7 @@ public void addFilterLists(List filters) { } this.filters.addAll(filters); this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), true)); + this.rejectedByFilterRowKey = new boolean[this.filters.size()]; this.cacheHintingFilters(); } @@ -237,6 +241,7 @@ public void reset() throws IOException { filters.get(i).reset(); } seekHintFilters.clear(); + Arrays.fill(rejectedByFilterRowKey, false); } @Override @@ -258,6 +263,7 @@ public boolean filterRowKey(Cell firstRowCell) throws IOException { // will catch the row changed event by filterRowKey(). If we return early here, those // filters will have no chance to update their row state. anyRowKeyFiltered = true; + rejectedByFilterRowKey[i] = true; } else if (hintingFilters[i]) { // If filterRowKey returns false and this is a hinting filter, then we must not filter this // rowkey. @@ -319,8 +325,10 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { } /** - * Maximal step: return the farthest hint among sub-filters. Null hints are ignored; if no - * sub-filter provides a hint, return null. + * Maximal step: return the farthest hint among sub-filters that actually rejected the row. Only + * sub-filters whose {@link Filter#filterRowKey(Cell)} returned {@code true} are consulted, + * honouring the per-filter contract. Null hints are ignored; if no rejecting sub-filter provides + * a hint, return null. */ @Override public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { @@ -329,6 +337,9 @@ public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { } Cell maxHint = null; for (int i = 0, n = filters.size(); i < n; i++) { + if (!rejectedByFilterRowKey[i]) { + continue; + } Filter filter = filters.get(i); if (filter.filterAllRemaining()) { continue; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java index dd0767ba6a29..1c428cdae530 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -1249,4 +1249,69 @@ public Cell getSkipHint(Cell skippedCell) { // Should get "b" and "c" for each row, only insideTs. assertEquals(rowCount * 2, results.size()); } + + @Test + public void testFilterListANDDivergentHints() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Filter A rejects rows 0-4, hints to row-03 (a conservative hint). + // Filter B rejects rows 0-6, hints to row-07 (a more aggressive hint). + // Both reject rows 0-4 (overlap). AND merges => takes max => row-07. + // Rows 0-6 are rejected by the composite (at least one rejects each). + // Scan should return rows 07-09. + final byte[] rejectThresholdA = Bytes.toBytes(String.format("%s-%02d", prefix, 5)); + final byte[] hintTargetA = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + final byte[] rejectThresholdB = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + final byte[] hintTargetB = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + + FilterBase filterA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThresholdA, 0, rejectThresholdA.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTargetA); + } + }; + + FilterBase filterB = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThresholdB, 0, rejectThresholdB.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTargetB); + } + }; + + FilterList andFilter = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filterA, filterB)); + + FilterBase baseline = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThresholdB, 0, rejectThresholdB.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(andFilter)); + List baselineResults = scanAll(new Scan().addFamily(FAMILY).setFilter(baseline)); + + assertEquals(baselineResults.size(), hintResults.size(), + "AND with divergent hints must return same cells as baseline"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), baselineResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(3 * CELLS_PER_ROW, hintResults.size()); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java index 1c21d54f020d..d165e4d289a8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java @@ -20,6 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.Arrays; @@ -61,6 +62,21 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { }; } + /** Filter that rejects every row via {@code filterRowKey} and returns a fixed hint. */ + private static FilterBase rejectingRowHintFilter(Cell hint) { + return new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + }; + } + /** Filter that returns a fixed hint from {@code getSkipHint}. */ private static FilterBase fixedSkipHintFilter(Cell hint) { return new FilterBase() { @@ -98,7 +114,8 @@ public void testANDGetHintForRejectedRow_takesMax() throws IOException { Cell hintA = kv(ROW_A); Cell hintC = kv(ROW_C); FilterList fl = new FilterList(Operator.MUST_PASS_ALL, - Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + Arrays.asList(rejectingRowHintFilter(hintA), rejectingRowHintFilter(hintC))); + fl.filterRowKey(kv(ROW_A)); Cell result = fl.getHintForRejectedRow(kv(ROW_A)); assertNotNull(result); @@ -110,7 +127,8 @@ public void testANDGetHintForRejectedRow_takesMax() throws IOException { public void testANDGetHintForRejectedRow_ignoresNull() throws IOException { Cell hintB = kv(ROW_B); FilterList fl = new FilterList(Operator.MUST_PASS_ALL, - Arrays.asList(fixedRejectedRowHintFilter(null), fixedRejectedRowHintFilter(hintB))); + Arrays.asList(rejectingRowHintFilter(null), rejectingRowHintFilter(hintB))); + fl.filterRowKey(kv(ROW_A)); Cell result = fl.getHintForRejectedRow(kv(ROW_A)); assertNotNull(result); @@ -121,19 +139,20 @@ public void testANDGetHintForRejectedRow_ignoresNull() throws IOException { @Test public void testANDGetHintForRejectedRow_allNull() throws IOException { FilterList fl = new FilterList(Operator.MUST_PASS_ALL, - Arrays.asList(fixedRejectedRowHintFilter(null), fixedRejectedRowHintFilter(null))); + Arrays.asList(rejectingRowHintFilter(null), rejectingRowHintFilter(null))); + fl.filterRowKey(kv(ROW_A)); assertNull(fl.getHintForRejectedRow(kv(ROW_A)), "AND with all-null hints must return null"); } @Test public void testANDGetHintForRejectedRow_reversed() throws IOException { - // In reversed scan, "max" in scan direction means the smaller row key. Cell hintA = kv(ROW_A); Cell hintC = kv(ROW_C); FilterList fl = new FilterList(Operator.MUST_PASS_ALL, - Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + Arrays.asList(rejectingRowHintFilter(hintA), rejectingRowHintFilter(hintC))); fl.setReversed(true); + fl.filterRowKey(kv(ROW_C)); Cell result = fl.getHintForRejectedRow(kv(ROW_C)); assertNotNull(result); @@ -397,7 +416,8 @@ public void testWhileMatchFilterDelegatesGetSkipHint() throws IOException { public void testFilterListDelegatesToFilterListBase() throws IOException { Cell hint = kv(ROW_B); // AND variant - FilterList andList = new FilterList(Operator.MUST_PASS_ALL, fixedRejectedRowHintFilter(hint)); + FilterList andList = new FilterList(Operator.MUST_PASS_ALL, rejectingRowHintFilter(hint)); + andList.filterRowKey(kv(ROW_A)); assertNotNull(andList.getHintForRejectedRow(kv(ROW_A)), "FilterList(AND) must delegate getHintForRejectedRow"); // OR variant @@ -413,12 +433,13 @@ public void testNestedFilterList() throws IOException { Cell hintB = kv(ROW_B); Cell hintC = kv(ROW_C); - // Inner OR: returns min(hintA, hintC) = hintA + // Inner OR: all sub-filters reject, so OR rejects too. Returns min(hintA, hintC) = hintA. FilterList innerOR = new FilterList(Operator.MUST_PASS_ONE, - Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + Arrays.asList(rejectingRowHintFilter(hintA), rejectingRowHintFilter(hintC))); // Outer AND: returns max(innerOR=hintA, hintB) = hintB - FilterList outerAND = new FilterList(Operator.MUST_PASS_ALL, - Arrays.asList(innerOR, fixedRejectedRowHintFilter(hintB))); + FilterList outerAND = + new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(innerOR, rejectingRowHintFilter(hintB))); + outerAND.filterRowKey(kv(ROW_A)); Cell result = outerAND.getHintForRejectedRow(kv(ROW_A)); assertNotNull(result); @@ -433,8 +454,11 @@ public void testFilterAllRemainingSubFilterSkipped() throws IOException { Cell terminatedHint = kv(ROW_C); Cell activeHint = kv(ROW_B); + // Place the active rejecting filter first so filterRowKey reaches it before + // encountering the terminated filter (which causes early return). FilterList fl = new FilterList(Operator.MUST_PASS_ALL, - Arrays.asList(terminatedFilter(terminatedHint), fixedRejectedRowHintFilter(activeHint))); + Arrays.asList(rejectingRowHintFilter(activeHint), terminatedFilter(terminatedHint))); + fl.filterRowKey(kv(ROW_A)); Cell result = fl.getHintForRejectedRow(kv(ROW_A)); assertNotNull(result); @@ -506,7 +530,8 @@ public void testEmptyFilterListReturnsNull() throws IOException { @Test public void testSingleFilterAND() throws IOException { Cell hint = kv(ROW_B); - FilterList fl = new FilterList(Operator.MUST_PASS_ALL, fixedRejectedRowHintFilter(hint)); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, rejectingRowHintFilter(hint)); + fl.filterRowKey(kv(ROW_A)); Cell result = fl.getHintForRejectedRow(kv(ROW_A)); assertNotNull(result); @@ -524,4 +549,117 @@ public void testSingleFilterOR() throws IOException { assertEquals(0, CellComparator.getInstance().compare(hint, result), "Single-filter OR must pass through the hint unchanged"); } + + // ---- AND contract: only consult rejecting sub-filters for getHintForRejectedRow ---- + + @Test + public void testANDGetHintForRejectedRow_onlyConsultsRejectingSubFilters() throws IOException { + Cell hint = kv(ROW_C); + FilterBase rejectingFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + }; + FilterBase acceptingFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return false; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + throw new IllegalStateException( + "Contract violation: getHintForRejectedRow called on non-rejecting filter"); + } + }; + + FilterList fl = + new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(rejectingFilter, acceptingFilter)); + assertTrue(fl.filterRowKey(kv(ROW_A)), "AND must reject when at least one sub-filter rejects"); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "AND must return the hint from the rejecting sub-filter only"); + } + + @Test + public void testANDGetHintForRejectedRow_takesMaxFromRejectingFilters() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterBase rejectToA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hintA; + } + }; + FilterBase rejectToC = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hintC; + } + }; + + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(rejectToA, rejectToC)); + fl.filterRowKey(kv(ROW_A)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "AND must return max hint from rejecting sub-filters"); + } + + @Test + public void testANDGetHintForRejectedRow_reversedTakesMaxFromRejectingFilters() + throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterBase rejectToA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hintA; + } + }; + FilterBase rejectToC = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hintC; + } + }; + + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(rejectToA, rejectToC)); + fl.setReversed(true); + fl.filterRowKey(kv(ROW_C)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "Reversed AND must return smallest row key (farthest in reverse) from rejecting filters"); + } } From 19065e2d3077719f5effa876ced3aa0fbf461a29 Mon Sep 17 00:00:00 2001 From: Shubham Roy Date: Wed, 13 May 2026 07:17:08 +0530 Subject: [PATCH 4/4] HBASE-30150 Addendum: address follow-up review nits - Preserve existing rejection state in addFilterLists via Arrays.copyOf instead of clobbering with a fresh zero array. - Add javadoc on rejectedByFilterRowKey field documenting the reset invariant (cleared only by reset(), callers must invoke between rows). - Update Filter.getHintForRejectedRow javadoc to make AND's per-sub-filter gating visible to filter authors. - Add reset-invariant unit test: reject row1 -> reset -> accept row2 -> assert getHintForRejectedRow returns null (no stale state). - Clarify fixedRejectedRowHintFilter vs rejectingRowHintFilter helper comments for future contributors. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../apache/hadoop/hbase/filter/Filter.java | 8 ++-- .../hbase/filter/FilterListWithAND.java | 7 ++- .../filter/TestFilterListHintDelegation.java | 48 ++++++++++++++++++- 3 files changed, 57 insertions(+), 6 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index bf64b1d70e27..4e21a877fb6b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -218,9 +218,11 @@ public enum ReturnCode { * the scan direction. *
  • Composite filter support: {@code FilterList} (both {@code MUST_PASS_ALL} * and {@code MUST_PASS_ONE}), {@code SkipFilter}, and {@code WhileMatchFilter} delegate this - * method to their sub-filters and merge the results (maximal step for AND; for OR, the nearest - * hint is returned only when every non-terminated sub-filter provides one — any null collapses - * the OR result to null).
  • + * method to their sub-filters and merge the results. For AND ({@code MUST_PASS_ALL}), only + * sub-filters whose {@code filterRowKey} individually returned {@code true} are consulted, and + * the farthest (maximal-step) hint among them is returned. For OR ({@code MUST_PASS_ONE}), the + * nearest hint is returned only when every non-terminated sub-filter provides one — any null + * collapses the OR result to null. * * @param firstRowCell the first cell encountered in the rejected row; contains the row key that * was passed to {@code filterRowKey} diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java index 4aacecac1efa..6ef850145302 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java @@ -35,6 +35,11 @@ public class FilterListWithAND extends FilterListBase { private List seekHintFilters = new ArrayList<>(); private boolean[] hintingFilters; + /** + * Tracks which sub-filters returned {@code true} from {@link Filter#filterRowKey(Cell)}. Set in + * {@code filterRowKey()}, consumed by {@code getHintForRejectedRow()}, cleared only by + * {@code reset()} — callers must invoke {@code reset()} between rows to avoid stale state. + */ private boolean[] rejectedByFilterRowKey; public FilterListWithAND(List filters) { @@ -54,7 +59,7 @@ public void addFilterLists(List filters) { } this.filters.addAll(filters); this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), true)); - this.rejectedByFilterRowKey = new boolean[this.filters.size()]; + this.rejectedByFilterRowKey = Arrays.copyOf(this.rejectedByFilterRowKey, this.filters.size()); this.cacheHintingFilters(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java index d165e4d289a8..b4e760a7afe2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java @@ -52,7 +52,11 @@ private static KeyValue kv(byte[] row) { return new KeyValue(row, FAMILY, QUALIFIER, 1L, KeyValue.Type.Put, Bytes.toBytes("v")); } - /** Filter that returns a fixed hint from {@code getHintForRejectedRow}. */ + /** + * Filter that returns a fixed hint from {@code getHintForRejectedRow} without overriding + * {@code filterRowKey}. Used in OR / SkipFilter / WhileMatchFilter tests where the AND + * per-sub-filter rejection tracking does not apply. + */ private static FilterBase fixedRejectedRowHintFilter(Cell hint) { return new FilterBase() { @Override @@ -62,7 +66,11 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { }; } - /** Filter that rejects every row via {@code filterRowKey} and returns a fixed hint. */ + /** + * Filter that rejects every row via {@code filterRowKey} and returns a fixed hint. Used in AND + * tests where {@code FilterListWithAND.getHintForRejectedRow} requires sub-filters to have + * individually rejected via {@code filterRowKey} before being consulted. + */ private static FilterBase rejectingRowHintFilter(Cell hint) { return new FilterBase() { @Override @@ -589,6 +597,42 @@ public Cell getHintForRejectedRow(Cell firstRowCell) { "AND must return the hint from the rejecting sub-filter only"); } + @Test + public void testANDGetHintForRejectedRow_resetClearsRejectionState() throws IOException { + Cell hint = kv(ROW_C); + FilterBase sometimesRejects = new FilterBase() { + private boolean shouldReject = true; + + @Override + public boolean filterRowKey(Cell cell) { + if (shouldReject) { + shouldReject = false; + return true; + } + return false; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + }; + + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, sometimesRejects); + + // Row 1: filter rejects, hint is available + assertTrue(fl.filterRowKey(kv(ROW_A))); + assertNotNull(fl.getHintForRejectedRow(kv(ROW_A))); + + // Reset between rows (as the scanner does) + fl.reset(); + + // Row 2: filter accepts, no rejection state should remain + fl.filterRowKey(kv(ROW_B)); + assertNull(fl.getHintForRejectedRow(kv(ROW_B)), + "After reset(), rejection state must be cleared — no stale hints"); + } + @Test public void testANDGetHintForRejectedRow_takesMaxFromRejectingFilters() throws IOException { Cell hintA = kv(ROW_A);