Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.PriorityQueue;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
Expand Down Expand Up @@ -127,39 +126,53 @@ public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
p.setFirst(Arrays.copyOf(aFuzzyKeysData.getFirst(), aFuzzyKeysData.getFirst().length));
p.setSecond(Arrays.copyOf(aFuzzyKeysData.getSecond(), aFuzzyKeysData.getSecond().length));

// update mask ( 0 -> -1 (0xff), 1 -> [0 or 2 depending on processedWildcardMask value])
// Normalize the mask, zero the non-fixed key bytes, then fix the unsafe mask to its final
// {-1, 0} form once here so it is never mutated during scanning.
p.setSecond(preprocessMask(p.getSecond()));
preprocessSearchKey(p);
preprocessSearchKey(p, UNSAFE_UNALIGNED);
preprocessMaskForSatisfies(p.getSecond(), UNSAFE_UNALIGNED);

fuzzyKeyDataCopy.add(p);
}
this.fuzzyKeysData = fuzzyKeyDataCopy;
this.tracker = new RowTracker();
}

private void preprocessSearchKey(Pair<byte[], byte[]> p) {
if (!UNSAFE_UNALIGNED) {
// do nothing
return;
}
/**
* Zeroes the non-fixed ("don't care") positions of the search key (on both paths) so the
* next-cell hint from {@link #getNextForFuzzyRule} is the smallest matching row. The byte at a
* non-fixed position is never compared, so this affects neither matching nor deserialization.
*/
static void preprocessSearchKey(Pair<byte[], byte[]> p, boolean unsafeUnaligned) {
byte[] key = p.getFirst();
byte[] mask = p.getSecond();
for (int i = 0; i < mask.length; i++) {
// set non-fixed part of a search key to 0.
if (mask[i] == processedWildcardMask) {
// non-fixed = anything but -1 on unsafe ({-1, 0/2}); 1 on no-unsafe ({0, 1})
if ((unsafeUnaligned && mask[i] != -1) || (!unsafeUnaligned && mask[i] == 1)) {
key[i] = 0;
}
}
}

/**
* We need to preprocess mask array, as since we treat 2's as unfixed positions and -1 (0xff) as
* fixed positions
* Normalizes the incoming mask to the active path's encoding. Input is the public {0, 1} form, or
* the already-preprocessed unsafe form ({-1, 0} v1 / {-1, 2} v2) when {@link #parseFrom}
* deserializes a filter from a legacy (pre-HBASE-30256) unsafe peer -- current peers emit the
* public {0, 1} form via {@link #toByteArray}; accepting both keeps such legacy filters working
* across platforms. Unsafe keeps/produces {-1, 0/2}; no-unsafe keeps/produces {0, 1}.
* @return mask array
*/
private byte[] preprocessMask(byte[] mask) {
if (!UNSAFE_UNALIGNED) {
// do nothing
// A mask from an unsafe peer is in internal form (-1 fixed; non-fixed 0 v1 / 2 v2). A byte
// outside the public {0, 1} marks internal form -> normalize to {0, 1}. An all-zeros mask is
// kept as public all-fixed: ambiguous with a legacy unsafe all-wildcard mask (HBASE-15676),
// resolved as all-fixed to avoid over-matching.
if (!isPublicMask(mask)) {
for (int i = 0; i < mask.length; i++) {
mask[i] = (byte) (mask[i] == -1 ? 0 : 1);
}
}
return mask;
}
if (isPreprocessedMask(mask)) return mask;
Expand All @@ -173,6 +186,19 @@ private byte[] preprocessMask(byte[] mask) {
return mask;
}

/**
* Returns true if {@code mask} is already in the public {0, 1} form, i.e. it has no internal-form
* byte (a -1 or a 2). An all-zeros mask counts as public (all-fixed).
*/
private static boolean isPublicMask(byte[] mask) {
for (int i = 0; i < mask.length; i++) {
if (mask[i] != 0 && mask[i] != 1) {
return false;
}
}
return true;
}

private boolean isPreprocessedMask(byte[] mask) {
for (int i = 0; i < mask.length; i++) {
if (mask[i] != -1 && mask[i] != processedWildcardMask) {
Expand All @@ -182,6 +208,23 @@ private boolean isPreprocessedMask(byte[] mask) {
return true;
}

/**
* Converts a stored mask back to the public {0 (fixed), 1 (non-fixed)} form as a new array, so
* serialization, {@link #getFuzzyKeys}, equals and hashCode never expose the internal encoding.
* No-unsafe already stores {0, 1}; unsafe stores {-1, 0}, where -1 is fixed and anything else is
* non-fixed.
* @return a new array in {0, 1} form
*/
private static byte[] toConstructorMask(byte[] mask, boolean unsafeUnaligned) {
byte[] out = Arrays.copyOf(mask, mask.length);
if (unsafeUnaligned) {
for (int i = 0; i < out.length; i++) {
out[i] = (byte) (out[i] == -1 ? 0 : 1);
}
}
return out;
}

/**
* Returns the Fuzzy keys in the format expected by the constructor.
* @return the Fuzzy keys in the format expected by the constructor
Expand All @@ -192,18 +235,7 @@ public List<Pair<byte[], byte[]>> getFuzzyKeys() {
Pair<byte[], byte[]> returnKey = new Pair<>();
// This won't revert the original key's don't care values, but we don't care.
returnKey.setFirst(Arrays.copyOf(fuzzyKey.getFirst(), fuzzyKey.getFirst().length));
byte[] returnMask = Arrays.copyOf(fuzzyKey.getSecond(), fuzzyKey.getSecond().length);
if (UNSAFE_UNALIGNED && isPreprocessedMask(returnMask)) {
// Revert the preprocessing.
for (int i = 0; i < returnMask.length; i++) {
if (returnMask[i] == -1) {
returnMask[i] = 0; // -1 >> 0
} else if (returnMask[i] == processedWildcardMask) {
returnMask[i] = 1; // 0 or 2 >> 1 depending on mask version
}
}
}
returnKey.setSecond(returnMask);
returnKey.setSecond(toConstructorMask(fuzzyKey.getSecond(), UNSAFE_UNALIGNED));
returnList.add(returnKey);
}
return returnList;
Expand Down Expand Up @@ -232,7 +264,6 @@ public ReturnCode filterCell(final Cell c) {
for (int i = startIndex; i < size + startIndex; i++) {
final int index = i % size;
Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
idempotentMaskShift(fuzzyData.getSecond());
SatisfiesCode satisfiesCode = satisfies(isReversed(), c.getRowArray(), c.getRowOffset(),
c.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
if (satisfiesCode == SatisfiesCode.YES) {
Expand Down Expand Up @@ -348,9 +379,11 @@ boolean isNextRowSameAs(Cell currentCell) {
}

byte[] updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
byte[] nextRowKeyCandidate =
getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
// getNextForFuzzyRule needs {-1, 0}: a converted copy on no-unsafe, the stored mask on
// unsafe.
byte[] fuzzyKeyMeta = preprocessMaskForHinting(fuzzyData.getSecond(), UNSAFE_UNALIGNED);
byte[] nextRowKeyCandidate = getNextForFuzzyRule(isReversed(), currentCell.getRowArray(),
currentCell.getRowOffset(), currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyKeyMeta);
if (nextRowKeyCandidate != null) {
nextRows.add(new Pair<>(nextRowKeyCandidate, fuzzyData));
}
Expand All @@ -372,7 +405,9 @@ public byte[] toByteArray() {
for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(fuzzyData.getFirst()));
bbpBuilder.setSecond(UnsafeByteOperations.unsafeWrap(fuzzyData.getSecond()));
// Emit the public {0, 1} mask, not the internal form, so the wire is platform-independent.
bbpBuilder.setSecond(UnsafeByteOperations
.unsafeWrap(toConstructorMask(fuzzyData.getSecond(), UNSAFE_UNALIGNED)));
builder.addFuzzyKeysData(bbpBuilder);
}
return builder.build().toByteArray();
Expand Down Expand Up @@ -505,6 +540,40 @@ static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int leng
return SatisfiesCode.YES;
}

/**
* Mutates {@code mask} in place into the form {@link #satisfies} expects. Called once from the
* constructor so the stored mask is fixed up front and never mutated during scanning. Unsafe:
* shift {-1, 0/2} -&gt; {-1, 0} (the word-based satisfies wants non-fixed = 0). No-unsafe: no-op,
* {@link #satisfiesNoUnsafe} already wants {0, 1}.
*/
static void preprocessMaskForSatisfies(byte[] mask, boolean unsafeUnaligned) {
if (!unsafeUnaligned) {
return;
}
// unsafe stores {-1, <wildcard>}; shift to {-1, 0} for satisfies (works for v1 and v2)
idempotentMaskShift(mask);
}

/**
* Returns the mask in the {-1 (fixed), 0 (non-fixed)} form {@link #getNextForFuzzyRule} expects.
* No-unsafe converts {0, 1} into a NEW array (the stored {0, 1} is still needed by
* {@link #satisfiesNoUnsafe}); unsafe is already {-1, 0}, returned as is.
*/
static byte[] preprocessMaskForHinting(byte[] mask, boolean unsafeUnaligned) {
if (unsafeUnaligned) {
return mask;
}
byte[] converted = Arrays.copyOf(mask, mask.length);
for (int i = 0; i < converted.length; i++) {
if (converted[i] == 0) {
converted[i] = -1;
} else if (converted[i] == 1) {
converted[i] = 0;
}
}
return converted;
}

static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
if (row == null) {
Expand Down Expand Up @@ -754,9 +823,11 @@ boolean areSerializedFieldsEqual(Filter o) {
for (int i = 0; i < fuzzyKeysData.size(); ++i) {
Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
// Compare masks in the normalized {0, 1} form, so equality matches the serialized bytes.
if (
!(Bytes.equals(thisData.getFirst(), otherData.getFirst())
&& Bytes.equals(thisData.getSecond(), otherData.getSecond()))
&& Bytes.equals(toConstructorMask(thisData.getSecond(), UNSAFE_UNALIGNED),
toConstructorMask(otherData.getSecond(), UNSAFE_UNALIGNED)))
) {
return false;
}
Expand All @@ -771,6 +842,12 @@ public boolean equals(Object obj) {

@Override
public int hashCode() {
return Objects.hash(this.fuzzyKeysData);
int result = 1;
for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
result = 31 * result + Bytes.hashCode(fuzzyData.getFirst());
result =
31 * result + Bytes.hashCode(toConstructorMask(fuzzyData.getSecond(), UNSAFE_UNALIGNED));
}
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assumptions.assumeTrue;

import java.io.IOException;
import java.util.Arrays;
Expand All @@ -29,12 +32,18 @@
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.testclassification.FilterTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.opentest4j.AssertionFailedError;

import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;

import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair;

@Tag(FilterTests.TAG)
@Tag(SmallTests.TAG)
public class TestFuzzyRowFilter {
Expand Down Expand Up @@ -420,6 +429,122 @@ public void testReverseFilterListSkipsSameRowFuzzyHint() throws IOException {
}
}

/**
* Serializing a filter must not leak the internal mask encoding. On the unsafe path the stored
* mask is {-1 (fixed), 0 (non-fixed)}; {@code toByteArray} must emit the public constructor {0,
* 1} form so the round-tripped filter behaves identically (otherwise the leaked {-1, 0} is
* reparsed as an all-fixed mask). Processing a cell first must not change this.
*/
@Test
public void testSerializationAfterFilterCellPreservesBehavior() throws Exception {
FuzzyRowFilter original =
new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 })));
// Process a cell first to prove serialization is unaffected by scanning.
original.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 }));

FuzzyRowFilter parsed = FuzzyRowFilter.parseFrom(original.toByteArray());
// A row matching only via the wildcard position must still be INCLUDED after the round-trip.
assertEquals(Filter.ReturnCode.INCLUDE,
parsed.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 })));
}

/**
* Two filters built from the same rule (distinct array instances) must be equal and hash equally,
* i.e. {@code equals}/{@code hashCode} must be content-based, not identity-based, and consistent
* with each other and with the serialized ({0, 1}) form. This must also hold after one of them
* has processed a cell.
*/
@Test
public void testEqualsConsistentAfterFilterCell() {
FuzzyRowFilter fresh =
new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 })));
FuzzyRowFilter scanned =
new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 })));
scanned.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 }));

assertEquals(fresh, scanned);
assertEquals(fresh.hashCode(), scanned.hashCode());
}

/**
* Unsafe-path coverage for branch-2's is_mask_v2 plumbing: the flag round-trips (v1 stays v1,
* v2/client stays v2) and toByteArray always emits the public {0, 1} mask.
*/
@Test
public void testIsMaskV2WireRoundTrip() throws Exception {
// v1 in (no flag) -> stays v1 on the wire, mask normalized to public {0, 1, 0}.
byte[] v1Wire = FilterProtos.FuzzyRowFilter.newBuilder()
.addFuzzyKeysData(BytesBytesPair.newBuilder()
.setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 }))
.setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 })))
.build().toByteArray();
FilterProtos.FuzzyRowFilter v1Out =
FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v1Wire).toByteArray());
assertFalse(v1Out.getIsMaskV2());
assertArrayEquals(new byte[] { 0, 1, 0 }, v1Out.getFuzzyKeysData(0).getSecond().toByteArray());

// v2 in -> stays v2, mask normalized to public {0, 1, 0}.
byte[] v2Wire = FilterProtos.FuzzyRowFilter.newBuilder().setIsMaskV2(true)
.addFuzzyKeysData(BytesBytesPair.newBuilder()
.setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 }))
.setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 2, -1 })))
.build().toByteArray();
FilterProtos.FuzzyRowFilter v2Out =
FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v2Wire).toByteArray());
assertTrue(v2Out.getIsMaskV2());
assertArrayEquals(new byte[] { 0, 1, 0 }, v2Out.getFuzzyKeysData(0).getSecond().toByteArray());

// A client-built filter is always v2 on the wire.
FuzzyRowFilter client =
new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 })));
assertTrue(FilterProtos.FuzzyRowFilter.parseFrom(client.toByteArray()).getIsMaskV2());
}

/**
* getFuzzyKeys must expose the public {0, 1} form, not the internal stored mask (unsafe stores
* {-1, 0}) -- REST ScannerModel feeds it straight back into the constructor.
*/
@Test
public void testGetFuzzyKeysReturnsPublicForm() {
FuzzyRowFilter filter =
new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 })));
assertArrayEquals(new byte[] { 0, 1, 0 }, filter.getFuzzyKeys().get(0).getSecond());
}

/**
* Unsafe-path companion to the no-unsafe legacy-v1 test: a no-flag, internal {-1, 0} v1 wire must
* still enforce its fixed positions after parseFrom.
*/
@Test
public void testParseFromLegacyV1EncodedFilterEnforcesFixedPositions() throws Exception {
byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder()
.addFuzzyKeysData(BytesBytesPair.newBuilder()
.setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 }))
.setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 })))
.build().toByteArray();
assertEquals(Filter.ReturnCode.INCLUDE, FuzzyRowFilter.parseFrom(wire)
.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 })));
assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, FuzzyRowFilter.parseFrom(wire)
.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 9 })));
}

/**
* Twin of the no-unsafe all-fixed test: a no-flag all-zeros wire {0, 0, 0} is the legacy unsafe
* v1 all-wildcard encoding (HBASE-15676), so on the unsafe path it matches every row -- the
* deliberate opposite of the no-unsafe all-fixed reading. Guarded to the unsafe path.
*/
@Test
public void testParseFromLegacyV1AllZerosMaskIsAllWildcardOnUnsafe() throws Exception {
assumeTrue(HBasePlatformDependent.unaligned());
byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder()
.addFuzzyKeysData(BytesBytesPair.newBuilder()
.setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 2, 3 }))
.setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { 0, 0, 0 })))
.build().toByteArray();
assertEquals(Filter.ReturnCode.INCLUDE, FuzzyRowFilter.parseFrom(wire)
.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 9, 9, 9 })));
}

private static FuzzyRowFilter newReverseFuzzyRowFilter() {
FuzzyRowFilter filter =
new FuzzyRowFilter(Arrays.asList(new Pair<>(Bytes.toBytes("aaa"), new byte[] { 0, 1, 0 })));
Expand Down
Loading