diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java index 9b477ec06cc7..4dd89c505dd2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.java @@ -148,6 +148,14 @@ public Cell getNextCellHint(Cell cell) { return PrivateCellUtil.createFirstOnRowCol(cell, prefix, 0, prefix.length); } + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (this.prefix == null) { + return null; + } + return getNextCellHint(skippedCell); + } + @Override public String toString() { return this.getClass().getSimpleName() + " " + Bytes.toStringBinary(this.prefix); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java index bbfec008c2c5..3ab7e2575429 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java @@ -200,6 +200,14 @@ public Cell getNextCellHint(Cell cell) { return PrivateCellUtil.createFirstOnRowCol(cell, this.minColumn, 0, len(this.minColumn)); } + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (this.minColumn == null) { + return null; + } + return getNextCellHint(skippedCell); + } + @Override public String toString() { return this.getClass().getSimpleName() + " " + (this.minColumnInclusive ? "[" : "(") diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java index 423e5b20aeed..4e21a877fb6b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java @@ -216,9 +216,13 @@ public enum ReturnCode { * must point to a smaller row key (earlier in reverse-scan direction). The scanner * validates hint direction and falls back to {@code nextRow()} if the hint does not advance in * the scan direction. - *
  • Composite filter limitation: {@code FilterList}, {@code SkipFilter}, and - * {@code WhileMatchFilter} do not currently delegate this method to wrapped sub-filters. Hints - * from filters used inside these wrappers will be silently ignored.
  • + *
  • Composite filter support: {@code FilterList} (both {@code MUST_PASS_ALL} + * and {@code MUST_PASS_ONE}), {@code SkipFilter}, and {@code WhileMatchFilter} delegate this + * method to their sub-filters and merge the results. For AND ({@code MUST_PASS_ALL}), only + * sub-filters whose {@code filterRowKey} individually returned {@code true} are consulted, and + * the farthest (maximal-step) hint among them is returned. For OR ({@code MUST_PASS_ONE}), the + * nearest hint is returned only when every non-terminated sub-filter provides one — any null + * collapses the OR result to null.
  • * * @param firstRowCell the first cell encountered in the rejected row; contains the row key that * was passed to {@code filterRowKey} @@ -255,9 +259,11 @@ public Cell getHintForRejectedRow(final Cell firstRowCell) throws IOException { *
  • For reversed scans, the returned cell must have a smaller row key (i.e., earlier * in reverse-scan direction) than the {@code skippedCell}. Hints that do not advance in the scan * direction are silently ignored.
  • - *
  • Composite filter limitation: {@code FilterList}, {@code SkipFilter}, and - * {@code WhileMatchFilter} do not currently delegate this method to wrapped sub-filters. Hints - * from filters used inside these wrappers will be silently ignored.
  • + *
  • Composite filter support: {@code FilterList} (both {@code MUST_PASS_ALL} + * and {@code MUST_PASS_ONE}), {@code SkipFilter}, and {@code WhileMatchFilter} delegate this + * method to their sub-filters and merge the results (maximal step for AND; for OR, the nearest + * hint is returned only when every non-terminated sub-filter provides one — any null collapses + * the OR result to null).
  • * * @param skippedCell the cell that was rejected by the time-range, column, or version gate before * {@code filterCell} could be consulted diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java index cb42072e1d80..aba5c2424754 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterList.java @@ -242,6 +242,16 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return this.filterListBase.getNextCellHint(currentCell); } + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return this.filterListBase.getHintForRejectedRow(firstRowCell); + } + + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return this.filterListBase.getSkipHint(skippedCell); + } + @Override public boolean isFamilyEssential(byte[] name) throws IOException { return this.filterListBase.isFamilyEssential(name); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java index a5e1eec45401..6ef850145302 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithAND.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Objects; @@ -34,6 +35,12 @@ public class FilterListWithAND extends FilterListBase { private List seekHintFilters = new ArrayList<>(); private boolean[] hintingFilters; + /** + * Tracks which sub-filters returned {@code true} from {@link Filter#filterRowKey(Cell)}. Set in + * {@code filterRowKey()}, consumed by {@code getHintForRejectedRow()}, cleared only by + * {@code reset()} — callers must invoke {@code reset()} between rows to avoid stale state. + */ + private boolean[] rejectedByFilterRowKey; public FilterListWithAND(List filters) { super(filters); @@ -41,6 +48,7 @@ public FilterListWithAND(List filters) { // sub-filters (because all sub-filters return INCLUDE*). So here, fill this array with true. we // keep this in FilterListWithAND for abstracting the transformCell() in FilterListBase. subFiltersIncludedCell = new ArrayList<>(Collections.nCopies(filters.size(), true)); + rejectedByFilterRowKey = new boolean[filters.size()]; cacheHintingFilters(); } @@ -51,6 +59,7 @@ public void addFilterLists(List filters) { } this.filters.addAll(filters); this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), true)); + this.rejectedByFilterRowKey = Arrays.copyOf(this.rejectedByFilterRowKey, this.filters.size()); this.cacheHintingFilters(); } @@ -237,6 +246,7 @@ public void reset() throws IOException { filters.get(i).reset(); } seekHintFilters.clear(); + Arrays.fill(rejectedByFilterRowKey, false); } @Override @@ -258,6 +268,7 @@ public boolean filterRowKey(Cell firstRowCell) throws IOException { // will catch the row changed event by filterRowKey(). If we return early here, those // filters will have no chance to update their row state. anyRowKeyFiltered = true; + rejectedByFilterRowKey[i] = true; } else if (hintingFilters[i]) { // If filterRowKey returns false and this is a hinting filter, then we must not filter this // rowkey. @@ -318,6 +329,60 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return maxHint; } + /** + * Maximal step: return the farthest hint among sub-filters that actually rejected the row. Only + * sub-filters whose {@link Filter#filterRowKey(Cell)} returned {@code true} are consulted, + * honouring the per-filter contract. Null hints are ignored; if no rejecting sub-filter provides + * a hint, return null. + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + if (isEmpty()) { + return super.getHintForRejectedRow(firstRowCell); + } + Cell maxHint = null; + for (int i = 0, n = filters.size(); i < n; i++) { + if (!rejectedByFilterRowKey[i]) { + continue; + } + Filter filter = filters.get(i); + if (filter.filterAllRemaining()) { + continue; + } + Cell hint = filter.getHintForRejectedRow(firstRowCell); + if (hint == null) { + continue; + } + if (maxHint == null || this.compareCell(maxHint, hint) < 0) { + maxHint = hint; + } + } + return maxHint; + } + + /** Maximal step: return the farthest skip hint among sub-filters. */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (isEmpty()) { + return super.getSkipHint(skippedCell); + } + Cell maxHint = null; + for (int i = 0, n = filters.size(); i < n; i++) { + Filter filter = filters.get(i); + if (filter.filterAllRemaining()) { + continue; + } + Cell hint = filter.getSkipHint(skippedCell); + if (hint == null) { + continue; + } + if (maxHint == null || this.compareCell(maxHint, hint) < 0) { + maxHint = hint; + } + } + return maxHint; + } + @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java index fbe68ab13527..40da51b4ba29 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterListWithOR.java @@ -392,6 +392,58 @@ public Cell getNextCellHint(Cell currentCell) throws IOException { return minKeyHint; } + /** + * Minimal step: return the nearest hint. If any non-terminated sub-filter returns null, the + * composite cannot safely skip, so return null. + */ + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + if (isEmpty()) { + return super.getHintForRejectedRow(firstRowCell); + } + Cell minHint = null; + for (int i = 0, n = filters.size(); i < n; i++) { + Filter filter = filters.get(i); + if (filter.filterAllRemaining()) { + continue; + } + Cell hint = filter.getHintForRejectedRow(firstRowCell); + if (hint == null) { + return null; + } + if (minHint == null || this.compareCell(minHint, hint) > 0) { + minHint = hint; + } + } + return minHint; + } + + /** + * Minimal step: return the nearest skip hint. Null from any sub-filter collapses the entire + * result to null. + */ + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (isEmpty()) { + return super.getSkipHint(skippedCell); + } + Cell minHint = null; + for (int i = 0, n = filters.size(); i < n; i++) { + Filter filter = filters.get(i); + if (filter.filterAllRemaining()) { + continue; + } + Cell hint = filter.getSkipHint(skippedCell); + if (hint == null) { + return null; + } + if (minHint == null || this.compareCell(minHint, hint) > 0) { + minHint = hint; + } + } + return minHint; + } + @Override public boolean equals(Object obj) { if (this == obj) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.java index d2b9396ed98a..4fbf43ed99b5 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.java @@ -173,6 +173,29 @@ public Cell getNextCellHint(Cell cell) { return PrivateCellUtil.createFirstOnRowCol(cell, hint, 0, hint.length); } + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + if (sortedPrefixes.isEmpty()) { + return null; + } + byte[] qualifier = CellUtil.cloneQualifier(skippedCell); + TreeSet lesserOrEqual = (TreeSet) sortedPrefixes.headSet(qualifier, true); + byte[] target; + if (lesserOrEqual.isEmpty()) { + target = sortedPrefixes.first(); + } else { + byte[] largest = lesserOrEqual.last(); + if (Bytes.startsWith(qualifier, largest)) { + return null; + } + target = sortedPrefixes.higher(largest); + if (target == null) { + return null; + } + } + return PrivateCellUtil.createFirstOnRowCol(skippedCell, target, 0, target.length); + } + public TreeSet createTreeSet() { return new TreeSet<>(new Comparator() { @Override diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/SkipFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/SkipFilter.java index a5149592f614..5e4afeec5499 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/SkipFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/SkipFilter.java @@ -141,6 +141,16 @@ boolean areSerializedFieldsEqual(Filter o) { return getFilter().areSerializedFieldsEqual(other.getFilter()); } + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return filter.getHintForRejectedRow(firstRowCell); + } + + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return filter.getSkipHint(skippedCell); + } + @Override public boolean isFamilyEssential(byte[] name) throws IOException { return filter.isFamilyEssential(name); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/WhileMatchFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/WhileMatchFilter.java index 65cd03042b0c..1117c9ec6dba 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/WhileMatchFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/WhileMatchFilter.java @@ -139,6 +139,16 @@ boolean areSerializedFieldsEqual(Filter o) { return getFilter().areSerializedFieldsEqual(other.getFilter()); } + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException { + return filter.getHintForRejectedRow(firstRowCell); + } + + @Override + public Cell getSkipHint(Cell skippedCell) throws IOException { + return filter.getSkipHint(skippedCell); + } + @Override public boolean isFamilyEssential(byte[] name) throws IOException { return filter.isFamilyEssential(name); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java index ba57ff563b74..1c428cdae530 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.hbase.Cell; @@ -643,4 +644,674 @@ public Cell getSkipHint(Cell skippedCell) { assertTrue(skipHintCalls.get() > 0, "getSkipHint must be called at least once for reversed scan"); } + + // ---- FilterList AND hint delegation integration tests ---- + + @Test + public void testFilterListANDHintDelegation() throws IOException { + final String prefix = "row"; + final int rejectedCount = 5; + final int acceptedCount = 5; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCallsA = new AtomicInteger(0); + final AtomicInteger hintCallsB = new AtomicInteger(0); + + // Both filters reject the same rows; both provide hints to the accepted start. + // AND merging takes max — both point to the same target here. + FilterBase filterA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallsA.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase filterB = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCallsB.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterList andFilter = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filterA, filterB)); + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(andFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "AND FilterList with hints must return same cells as no-hint path"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(acceptedCount * CELLS_PER_ROW, hintResults.size()); + assertTrue(hintCallsA.get() > 0, "Sub-filter A hint must be consulted"); + assertTrue(hintCallsB.get() > 0, "Sub-filter B hint must be consulted"); + } + + @Test + public void testFilterListORHintDelegation() throws IOException { + final String prefix = "row"; + final int rejectedCount = 5; + final int acceptedCount = 5; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + + // Both filters reject the same rows, OR requires ALL to reject. + // Both provide hints to the accepted start. OR merging takes min — same target. + FilterBase filterA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase filterB = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterList orFilter = + new FilterList(FilterList.Operator.MUST_PASS_ONE, Arrays.asList(filterA, filterB)); + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(orFilter)); + List noHintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(noHintFilter)); + + assertEquals(noHintResults.size(), hintResults.size(), + "OR FilterList with hints must return same cells as no-hint path"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(acceptedCount * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testFilterListANDWithOneNullHintSubFilter() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCalls = new AtomicInteger(0); + + // One sub-filter provides a hint, the other returns null. + // AND ignores nulls, so the non-null hint should be used. + FilterBase hintProvider = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCalls.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase noHintProvider = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + FilterList andFilter = new FilterList(FilterList.Operator.MUST_PASS_ALL, + Arrays.asList(hintProvider, noHintProvider)); + + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(andFilter)); + assertEquals(acceptedCount * CELLS_PER_ROW, results.size()); + assertEquals(1, hintCalls.get(), + "Hint provider must be called; AND ignores the null from the other sub-filter"); + } + + @Test + public void testFilterListORWithOneNullHintSubFilter() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + + // One sub-filter provides a hint, the other returns null. + // OR returns null if ANY sub-filter returns null, so no hint optimization. + FilterBase hintProvider = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase noHintProvider = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + FilterList orFilter = new FilterList(FilterList.Operator.MUST_PASS_ONE, + Arrays.asList(hintProvider, noHintProvider)); + + FilterBase baseline = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List orResults = scanAll(new Scan().addFamily(FAMILY).setFilter(orFilter)); + List baselineResults = scanAll(new Scan().addFamily(FAMILY).setFilter(baseline)); + + assertEquals(baselineResults.size(), orResults.size(), + "OR with one null hint must still return correct results (falls back to no-hint path)"); + for (int i = 0; i < orResults.size(); i++) { + assertTrue(CellUtil.equals(orResults.get(i), baselineResults.get(i)), + "Cell mismatch at index " + i); + } + } + + @Test + public void testNestedFilterListHintDelegation() throws IOException { + final String prefix = "row"; + final int rejectedCount = 4; + final int acceptedCount = 4; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + + // Nested: AND(OR(hintA, hintB), hintC) + // All filters reject the same rows and hint to the same target. + FilterBase hintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase hintFilter2 = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterBase hintFilter3 = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + FilterList innerOR = + new FilterList(FilterList.Operator.MUST_PASS_ONE, Arrays.asList(hintFilter, hintFilter2)); + FilterList outerAND = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(innerOR, hintFilter3)); + + FilterBase baseline = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + }; + + List nestedResults = scanAll(new Scan().addFamily(FAMILY).setFilter(outerAND)); + List baselineResults = scanAll(new Scan().addFamily(FAMILY).setFilter(baseline)); + + assertEquals(baselineResults.size(), nestedResults.size(), + "Nested FilterList must return same results as baseline"); + for (int i = 0; i < nestedResults.size(); i++) { + assertTrue(CellUtil.equals(nestedResults.get(i), baselineResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(acceptedCount * CELLS_PER_ROW, nestedResults.size()); + } + + @Test + public void testWhileMatchFilterHintDelegation() throws IOException { + final String prefix = "row"; + final int rejectedCount = 3; + final int acceptedCount = 3; + writeRows(prefix, rejectedCount + acceptedCount); + + final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d", prefix, rejectedCount)); + final AtomicInteger hintCalls = new AtomicInteger(0); + + FilterBase innerHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + acceptedStartRow, 0, acceptedStartRow.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + hintCalls.incrementAndGet(); + return PrivateCellUtil.createFirstOnRow(acceptedStartRow); + } + }; + + WhileMatchFilter wmFilter = new WhileMatchFilter(innerHintFilter); + + // WhileMatchFilter delegates filterRowKey and sets filterAllRemaining on first true. + // After the first row is rejected, the scan terminates. So we expect 0 results. + // The hint should still be consulted before termination. + List results = scanAll(new Scan().addFamily(FAMILY).setFilter(wmFilter)); + + // WhileMatchFilter ends the scan on first filterRowKey=true, so no data returned. + assertTrue(results.isEmpty(), "WhileMatchFilter terminates scan on first rejection"); + } + + @Test + public void testFilterListANDReversedScanHint() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Accept rows 00-04, reject rows 05-09. + // In reversed scan, scanner starts at row-09 and moves backward. + final byte[] rejectThreshold = Bytes.toBytes(String.format("%s-%02d", prefix, 5)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 4)); + + FilterBase rejectFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + FilterList andFilter = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(rejectFilter)); + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(andFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(noHintFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Reversed AND FilterList must return same cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(5 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testFilterListORReversedScanHint() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + final byte[] rejectThreshold = Bytes.toBytes(String.format("%s-%02d", prefix, 5)); + final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix, 4)); + + FilterBase rejectFilterA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + FilterBase rejectFilterB = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTarget); + } + }; + + FilterList orFilter = new FilterList(FilterList.Operator.MUST_PASS_ONE, + Arrays.asList(rejectFilterA, rejectFilterB)); + + FilterBase noHintFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThreshold, 0, rejectThreshold.length) >= 0; + } + }; + + Scan hintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(orFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setReversed(true).setFilter(noHintFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size(), + "Reversed OR FilterList must return same cells"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(5 * CELLS_PER_ROW, hintResults.size()); + } + + @Test + public void testColumnRangeFilterGetSkipHintIntegration() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("colrange-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + // Qualifiers: "a", "b", "c", "d" — ColumnRangeFilter will select "b" to "c". + p.addColumn(FAMILY, Bytes.toBytes("a"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("a"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("b"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("b"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("c"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("c"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("d"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("d"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + ColumnRangeFilter colFilter = + new ColumnRangeFilter(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true); + + // Time range [1000, 3000): insideTs cells pass, outsideTs cells hit the time-range gate. + // ColumnRangeFilter.getSkipHint() should be consulted for structurally skipped cells. + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(colFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(colFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size()); + // Should get "b" and "c" qualifiers for each row, only insideTs versions. + assertEquals(rowCount * 2, hintResults.size()); + } + + @Test + public void testColumnPrefixFilterGetSkipHintIntegration() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("colpfx-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + // Qualifiers: "aaa", "abc", "abd", "xyz" + p.addColumn(FAMILY, Bytes.toBytes("aaa"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("aaa"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abc"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abc"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abd"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abd"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("xyz"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("xyz"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + // ColumnPrefixFilter with prefix "ab" should match "abc" and "abd". + ColumnPrefixFilter prefixFilter = new ColumnPrefixFilter(Bytes.toBytes("ab")); + + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(prefixFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(prefixFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size()); + // Should get "abc" and "abd" qualifiers for each row, only insideTs versions. + assertEquals(rowCount * 2, hintResults.size()); + } + + @Test + public void testMultipleColumnPrefixFilterGetSkipHintIntegration() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("mcpfx-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + // Qualifiers: "aaa", "abc", "abd", "bbb", "xyz" + p.addColumn(FAMILY, Bytes.toBytes("aaa"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("aaa"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abc"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abc"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abd"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("abd"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("bbb"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("bbb"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("xyz"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("xyz"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + // MultipleColumnPrefixFilter with prefixes "ab" and "bb" should match "abc", "abd", "bbb". + MultipleColumnPrefixFilter mcpFilter = + new MultipleColumnPrefixFilter(new byte[][] { Bytes.toBytes("ab"), Bytes.toBytes("bb") }); + + Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(mcpFilter); + Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(mcpFilter); + + List hintResults = scanAll(hintScan); + List noHintResults = scanAll(noHintScan); + + assertEquals(noHintResults.size(), hintResults.size()); + // Should get "abc", "abd", "bbb" qualifiers for each row, only insideTs versions. + assertEquals(rowCount * 3, hintResults.size()); + } + + @Test + public void testFilterListANDGetSkipHintComposition() throws IOException { + final long insideTs = 2000; + final long outsideTs = 500; + final int rowCount = 5; + + for (int i = 0; i < rowCount; i++) { + byte[] row = Bytes.toBytes(String.format("composed-%02d", i)); + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + p.addColumn(FAMILY, Bytes.toBytes("a"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("a"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("b"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("b"), outsideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("c"), insideTs, VALUE); + p.addColumn(FAMILY, Bytes.toBytes("c"), outsideTs, VALUE); + region.put(p); + } + region.flush(true); + + // Compose ColumnRangeFilter("b","c") AND a custom skip-hint filter. + // The AND composition should take the max hint. + final AtomicInteger skipHintCalls = new AtomicInteger(0); + ColumnRangeFilter colRange = + new ColumnRangeFilter(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true); + FilterBase customSkipHint = new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + skipHintCalls.incrementAndGet(); + return PrivateCellUtil.createFirstOnNextRow(skippedCell); + } + }; + + FilterList andFilter = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(colRange, customSkipHint)); + + Scan scan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000).setFilter(andFilter); + List results = scanAll(scan); + + // Should get "b" and "c" for each row, only insideTs. + assertEquals(rowCount * 2, results.size()); + } + + @Test + public void testFilterListANDDivergentHints() throws IOException { + final String prefix = "row"; + final int totalRows = 10; + writeRows(prefix, totalRows); + + // Filter A rejects rows 0-4, hints to row-03 (a conservative hint). + // Filter B rejects rows 0-6, hints to row-07 (a more aggressive hint). + // Both reject rows 0-4 (overlap). AND merges => takes max => row-07. + // Rows 0-6 are rejected by the composite (at least one rejects each). + // Scan should return rows 07-09. + final byte[] rejectThresholdA = Bytes.toBytes(String.format("%s-%02d", prefix, 5)); + final byte[] hintTargetA = Bytes.toBytes(String.format("%s-%02d", prefix, 3)); + final byte[] rejectThresholdB = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + final byte[] hintTargetB = Bytes.toBytes(String.format("%s-%02d", prefix, 7)); + + FilterBase filterA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThresholdA, 0, rejectThresholdA.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTargetA); + } + }; + + FilterBase filterB = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThresholdB, 0, rejectThresholdB.length) < 0; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return PrivateCellUtil.createFirstOnRow(hintTargetB); + } + }; + + FilterList andFilter = + new FilterList(FilterList.Operator.MUST_PASS_ALL, Arrays.asList(filterA, filterB)); + + FilterBase baseline = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(), + rejectThresholdB, 0, rejectThresholdB.length) < 0; + } + }; + + List hintResults = scanAll(new Scan().addFamily(FAMILY).setFilter(andFilter)); + List baselineResults = scanAll(new Scan().addFamily(FAMILY).setFilter(baseline)); + + assertEquals(baselineResults.size(), hintResults.size(), + "AND with divergent hints must return same cells as baseline"); + for (int i = 0; i < hintResults.size(); i++) { + assertTrue(CellUtil.equals(hintResults.get(i), baselineResults.get(i)), + "Cell mismatch at index " + i); + } + assertEquals(3 * CELLS_PER_ROW, hintResults.size()); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java new file mode 100644 index 000000000000..b4e760a7afe2 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterListHintDelegation.java @@ -0,0 +1,709 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.util.Arrays; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.filter.FilterList.Operator; +import org.apache.hadoop.hbase.testclassification.FilterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for {@code getHintForRejectedRow} and {@code getSkipHint} delegation in composite + * filters ({@link FilterList}, {@link SkipFilter}, {@link WhileMatchFilter}). + */ +@Tag(FilterTests.TAG) +@Tag(SmallTests.TAG) +public class TestFilterListHintDelegation { + + private static final byte[] ROW_A = Bytes.toBytes("rowA"); + private static final byte[] ROW_B = Bytes.toBytes("rowB"); + private static final byte[] ROW_C = Bytes.toBytes("rowC"); + private static final byte[] FAMILY = Bytes.toBytes("f"); + private static final byte[] QUALIFIER = Bytes.toBytes("q"); + + private static KeyValue kv(byte[] row) { + return new KeyValue(row, FAMILY, QUALIFIER, 1L, KeyValue.Type.Put, Bytes.toBytes("v")); + } + + /** + * Filter that returns a fixed hint from {@code getHintForRejectedRow} without overriding + * {@code filterRowKey}. Used in OR / SkipFilter / WhileMatchFilter tests where the AND + * per-sub-filter rejection tracking does not apply. + */ + private static FilterBase fixedRejectedRowHintFilter(Cell hint) { + return new FilterBase() { + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + }; + } + + /** + * Filter that rejects every row via {@code filterRowKey} and returns a fixed hint. Used in AND + * tests where {@code FilterListWithAND.getHintForRejectedRow} requires sub-filters to have + * individually rejected via {@code filterRowKey} before being consulted. + */ + private static FilterBase rejectingRowHintFilter(Cell hint) { + return new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + }; + } + + /** Filter that returns a fixed hint from {@code getSkipHint}. */ + private static FilterBase fixedSkipHintFilter(Cell hint) { + return new FilterBase() { + @Override + public Cell getSkipHint(Cell skippedCell) { + return hint; + } + }; + } + + /** Filter that claims {@code filterAllRemaining() == true}. */ + private static FilterBase terminatedFilter(Cell hint) { + return new FilterBase() { + @Override + public boolean filterAllRemaining() { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + + @Override + public Cell getSkipHint(Cell skippedCell) { + return hint; + } + }; + } + + // ---- AND (MUST_PASS_ALL) getHintForRejectedRow ---- + + @Test + public void testANDGetHintForRejectedRow_takesMax() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(rejectingRowHintFilter(hintA), rejectingRowHintFilter(hintC))); + fl.filterRowKey(kv(ROW_A)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "AND must return the farthest (max) hint"); + } + + @Test + public void testANDGetHintForRejectedRow_ignoresNull() throws IOException { + Cell hintB = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(rejectingRowHintFilter(null), rejectingRowHintFilter(hintB))); + fl.filterRowKey(kv(ROW_A)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintB, result), + "AND must ignore null hints and return the non-null one"); + } + + @Test + public void testANDGetHintForRejectedRow_allNull() throws IOException { + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(rejectingRowHintFilter(null), rejectingRowHintFilter(null))); + fl.filterRowKey(kv(ROW_A)); + + assertNull(fl.getHintForRejectedRow(kv(ROW_A)), "AND with all-null hints must return null"); + } + + @Test + public void testANDGetHintForRejectedRow_reversed() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(rejectingRowHintFilter(hintA), rejectingRowHintFilter(hintC))); + fl.setReversed(true); + fl.filterRowKey(kv(ROW_C)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "Reversed AND must return the smaller row key (farthest in reverse direction)"); + } + + // ---- AND (MUST_PASS_ALL) getSkipHint ---- + + @Test + public void testANDGetSkipHint_takesMax() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "AND getSkipHint must return the farthest (max) hint"); + } + + @Test + public void testANDGetSkipHint_ignoresNull() throws IOException { + Cell hintB = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedSkipHintFilter(null), fixedSkipHintFilter(hintB))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintB, result), + "AND getSkipHint must ignore null and return the non-null hint"); + } + + @Test + public void testANDGetSkipHint_allNull() throws IOException { + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedSkipHintFilter(null), fixedSkipHintFilter(null))); + + assertNull(fl.getSkipHint(kv(ROW_A)), "AND with all-null skip hints must return null"); + } + + @Test + public void testANDGetSkipHint_reversed() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + fl.setReversed(true); + + Cell result = fl.getSkipHint(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "Reversed AND getSkipHint must return the smaller row key (farthest in reverse direction)"); + } + + // ---- OR (MUST_PASS_ONE) getHintForRejectedRow ---- + + @Test + public void testORGetHintForRejectedRow_takesMin() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "OR must return the nearest (min) hint"); + } + + @Test + public void testORGetHintForRejectedRow_nullReturnsNull() throws IOException { + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedRejectedRowHintFilter(null), fixedRejectedRowHintFilter(hintC))); + + assertNull(fl.getHintForRejectedRow(kv(ROW_A)), + "OR must return null if any sub-filter returns null (can't safely skip)"); + } + + @Test + public void testORGetHintForRejectedRow_allHints() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintB = kv(ROW_B); + Cell hintC = kv(ROW_C); + FilterList fl = + new FilterList(Operator.MUST_PASS_ONE, Arrays.asList(fixedRejectedRowHintFilter(hintB), + fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "OR with all hints must return the minimum"); + } + + @Test + public void testORGetHintForRejectedRow_reversed() throws IOException { + // In reversed scan, "min" in scan direction means the larger row key. + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedRejectedRowHintFilter(hintA), fixedRejectedRowHintFilter(hintC))); + fl.setReversed(true); + + Cell result = fl.getHintForRejectedRow(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "Reversed OR must return the larger row key (nearest in reverse direction)"); + } + + // ---- OR (MUST_PASS_ONE) getSkipHint ---- + + @Test + public void testORGetSkipHint_takesMin() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "OR getSkipHint must return the nearest (min) hint"); + } + + @Test + public void testORGetSkipHint_nullReturnsNull() throws IOException { + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedSkipHintFilter(null), fixedSkipHintFilter(hintC))); + + assertNull(fl.getSkipHint(kv(ROW_A)), + "OR getSkipHint must return null if any sub-filter returns null"); + } + + @Test + public void testORGetSkipHint_allHints() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintB = kv(ROW_B); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, Arrays.asList(fixedSkipHintFilter(hintB), + fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "OR getSkipHint with all hints must return the minimum"); + } + + @Test + public void testORGetSkipHint_reversed() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(fixedSkipHintFilter(hintA), fixedSkipHintFilter(hintC))); + fl.setReversed(true); + + Cell result = fl.getSkipHint(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "Reversed OR getSkipHint must return the larger row key (nearest in reverse direction)"); + } + + // ---- filterAllRemaining for getSkipHint ---- + + @Test + public void testFilterAllRemainingSubFilterSkippedForGetSkipHint() throws IOException { + Cell terminatedHint = kv(ROW_C); + Cell activeHint = kv(ROW_B); + + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(terminatedFilter(terminatedHint), fixedSkipHintFilter(activeHint))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(activeHint, result), + "Terminated sub-filters must be skipped for getSkipHint too"); + } + + @Test + public void testORFilterAllRemainingSubFilterSkippedForGetHintForRejectedRow() + throws IOException { + Cell terminatedHint = kv(ROW_C); + Cell activeHint = kv(ROW_B); + + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(terminatedFilter(terminatedHint), fixedRejectedRowHintFilter(activeHint))); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(activeHint, result), + "OR must skip terminated sub-filters and return the active filter's hint"); + } + + @Test + public void testORFilterAllRemainingSubFilterSkippedForGetSkipHint() throws IOException { + Cell terminatedHint = kv(ROW_C); + Cell activeHint = kv(ROW_B); + + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(terminatedFilter(terminatedHint), fixedSkipHintFilter(activeHint))); + + Cell result = fl.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(activeHint, result), + "OR must skip terminated sub-filters for getSkipHint too"); + } + + // ---- SkipFilter delegation ---- + + @Test + public void testSkipFilterDelegatesGetHintForRejectedRow() throws IOException { + Cell hint = kv(ROW_B); + SkipFilter sf = new SkipFilter(fixedRejectedRowHintFilter(hint)); + + Cell result = sf.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "SkipFilter must delegate getHintForRejectedRow to wrapped filter"); + } + + @Test + public void testSkipFilterDelegatesGetSkipHint() throws IOException { + Cell hint = kv(ROW_B); + SkipFilter sf = new SkipFilter(fixedSkipHintFilter(hint)); + + Cell result = sf.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "SkipFilter must delegate getSkipHint to wrapped filter"); + } + + // ---- WhileMatchFilter delegation ---- + + @Test + public void testWhileMatchFilterDelegatesGetHintForRejectedRow() throws IOException { + Cell hint = kv(ROW_B); + WhileMatchFilter wmf = new WhileMatchFilter(fixedRejectedRowHintFilter(hint)); + + Cell result = wmf.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "WhileMatchFilter must delegate getHintForRejectedRow to wrapped filter"); + } + + @Test + public void testWhileMatchFilterDelegatesGetSkipHint() throws IOException { + Cell hint = kv(ROW_B); + WhileMatchFilter wmf = new WhileMatchFilter(fixedSkipHintFilter(hint)); + + Cell result = wmf.getSkipHint(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "WhileMatchFilter must delegate getSkipHint to wrapped filter"); + } + + // ---- FilterList facade delegation ---- + + @Test + public void testFilterListDelegatesToFilterListBase() throws IOException { + Cell hint = kv(ROW_B); + // AND variant + FilterList andList = new FilterList(Operator.MUST_PASS_ALL, rejectingRowHintFilter(hint)); + andList.filterRowKey(kv(ROW_A)); + assertNotNull(andList.getHintForRejectedRow(kv(ROW_A)), + "FilterList(AND) must delegate getHintForRejectedRow"); + // OR variant + FilterList orList = new FilterList(Operator.MUST_PASS_ONE, fixedSkipHintFilter(hint)); + assertNotNull(orList.getSkipHint(kv(ROW_A)), "FilterList(OR) must delegate getSkipHint"); + } + + // ---- Nested FilterList ---- + + @Test + public void testNestedFilterList() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintB = kv(ROW_B); + Cell hintC = kv(ROW_C); + + // Inner OR: all sub-filters reject, so OR rejects too. Returns min(hintA, hintC) = hintA. + FilterList innerOR = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(rejectingRowHintFilter(hintA), rejectingRowHintFilter(hintC))); + // Outer AND: returns max(innerOR=hintA, hintB) = hintB + FilterList outerAND = + new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(innerOR, rejectingRowHintFilter(hintB))); + outerAND.filterRowKey(kv(ROW_A)); + + Cell result = outerAND.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintB, result), + "Nested AND(OR(A,C), B) must return max(min(A,C), B) = max(A, B) = B"); + } + + // ---- filterAllRemaining sub-filter is skipped ---- + + @Test + public void testFilterAllRemainingSubFilterSkipped() throws IOException { + Cell terminatedHint = kv(ROW_C); + Cell activeHint = kv(ROW_B); + + // Place the active rejecting filter first so filterRowKey reaches it before + // encountering the terminated filter (which causes early return). + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(rejectingRowHintFilter(activeHint), terminatedFilter(terminatedHint))); + fl.filterRowKey(kv(ROW_A)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(activeHint, result), + "Terminated sub-filters (filterAllRemaining=true) must be skipped"); + } + + // ---- All sub-filters terminated ---- + + @Test + public void testORAllSubFiltersTerminated_getHintForRejectedRow() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(terminatedFilter(hint), terminatedFilter(hint))); + + assertNull(fl.getHintForRejectedRow(kv(ROW_A)), + "OR with all terminated sub-filters must return null for getHintForRejectedRow"); + } + + @Test + public void testORAllSubFiltersTerminated_getSkipHint() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, + Arrays.asList(terminatedFilter(hint), terminatedFilter(hint))); + + assertNull(fl.getSkipHint(kv(ROW_A)), + "OR with all terminated sub-filters must return null for getSkipHint"); + } + + @Test + public void testANDAllSubFiltersTerminated_getHintForRejectedRow() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(terminatedFilter(hint), terminatedFilter(hint))); + + assertNull(fl.getHintForRejectedRow(kv(ROW_A)), + "AND with all terminated sub-filters must return null for getHintForRejectedRow"); + } + + @Test + public void testANDAllSubFiltersTerminated_getSkipHint() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(terminatedFilter(hint), terminatedFilter(hint))); + + assertNull(fl.getSkipHint(kv(ROW_A)), + "AND with all terminated sub-filters must return null for getSkipHint"); + } + + // ---- Empty FilterList ---- + + @Test + public void testEmptyFilterListReturnsNull() throws IOException { + FilterList emptyAND = new FilterList(Operator.MUST_PASS_ALL); + FilterList emptyOR = new FilterList(Operator.MUST_PASS_ONE); + + assertNull(emptyAND.getHintForRejectedRow(kv(ROW_A)), + "Empty AND FilterList must return null for getHintForRejectedRow"); + assertNull(emptyAND.getSkipHint(kv(ROW_A)), + "Empty AND FilterList must return null for getSkipHint"); + assertNull(emptyOR.getHintForRejectedRow(kv(ROW_A)), + "Empty OR FilterList must return null for getHintForRejectedRow"); + assertNull(emptyOR.getSkipHint(kv(ROW_A)), + "Empty OR FilterList must return null for getSkipHint"); + } + + // ---- Single filter pass-through ---- + + @Test + public void testSingleFilterAND() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, rejectingRowHintFilter(hint)); + fl.filterRowKey(kv(ROW_A)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "Single-filter AND must pass through the hint unchanged"); + } + + @Test + public void testSingleFilterOR() throws IOException { + Cell hint = kv(ROW_B); + FilterList fl = new FilterList(Operator.MUST_PASS_ONE, fixedRejectedRowHintFilter(hint)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "Single-filter OR must pass through the hint unchanged"); + } + + // ---- AND contract: only consult rejecting sub-filters for getHintForRejectedRow ---- + + @Test + public void testANDGetHintForRejectedRow_onlyConsultsRejectingSubFilters() throws IOException { + Cell hint = kv(ROW_C); + FilterBase rejectingFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + }; + FilterBase acceptingFilter = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return false; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + throw new IllegalStateException( + "Contract violation: getHintForRejectedRow called on non-rejecting filter"); + } + }; + + FilterList fl = + new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(rejectingFilter, acceptingFilter)); + assertTrue(fl.filterRowKey(kv(ROW_A)), "AND must reject when at least one sub-filter rejects"); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hint, result), + "AND must return the hint from the rejecting sub-filter only"); + } + + @Test + public void testANDGetHintForRejectedRow_resetClearsRejectionState() throws IOException { + Cell hint = kv(ROW_C); + FilterBase sometimesRejects = new FilterBase() { + private boolean shouldReject = true; + + @Override + public boolean filterRowKey(Cell cell) { + if (shouldReject) { + shouldReject = false; + return true; + } + return false; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hint; + } + }; + + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, sometimesRejects); + + // Row 1: filter rejects, hint is available + assertTrue(fl.filterRowKey(kv(ROW_A))); + assertNotNull(fl.getHintForRejectedRow(kv(ROW_A))); + + // Reset between rows (as the scanner does) + fl.reset(); + + // Row 2: filter accepts, no rejection state should remain + fl.filterRowKey(kv(ROW_B)); + assertNull(fl.getHintForRejectedRow(kv(ROW_B)), + "After reset(), rejection state must be cleared — no stale hints"); + } + + @Test + public void testANDGetHintForRejectedRow_takesMaxFromRejectingFilters() throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterBase rejectToA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hintA; + } + }; + FilterBase rejectToC = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hintC; + } + }; + + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(rejectToA, rejectToC)); + fl.filterRowKey(kv(ROW_A)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_A)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintC, result), + "AND must return max hint from rejecting sub-filters"); + } + + @Test + public void testANDGetHintForRejectedRow_reversedTakesMaxFromRejectingFilters() + throws IOException { + Cell hintA = kv(ROW_A); + Cell hintC = kv(ROW_C); + FilterBase rejectToA = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hintA; + } + }; + FilterBase rejectToC = new FilterBase() { + @Override + public boolean filterRowKey(Cell cell) { + return true; + } + + @Override + public Cell getHintForRejectedRow(Cell firstRowCell) { + return hintC; + } + }; + + FilterList fl = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(rejectToA, rejectToC)); + fl.setReversed(true); + fl.filterRowKey(kv(ROW_C)); + + Cell result = fl.getHintForRejectedRow(kv(ROW_C)); + assertNotNull(result); + assertEquals(0, CellComparator.getInstance().compare(hintA, result), + "Reversed AND must return smallest row key (farthest in reverse) from rejecting filters"); + } +}