From f303756c34e8981d64ec8630ef7cb57760a2c88e Mon Sep 17 00:00:00 2001 From: liuxiaocs7 Date: Sun, 21 Jun 2026 21:42:29 +0800 Subject: [PATCH] HBASE-30241 TestMobCloneSnapshotFromClientAfterSplittingRegion is flaky because a split may build whole-file HFileLinks instead of References --- ...romClientAfterSplittingRegionTestBase.java | 17 +- ...AfterSplittingRegionWithLinksTestBase.java | 238 ++++++++++++++++++ ...omClientAfterSplittingRegionWithLinks.java | 41 +++ ...omClientAfterSplittingRegionWithLinks.java | 70 ++++++ 4 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionWithLinksTestBase.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCloneSnapshotFromClientAfterSplittingRegionWithLinks.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMobCloneSnapshotFromClientAfterSplittingRegionWithLinks.java diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java index 5027da6762f9..089a16967d8d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionTestBase.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.assignment.RegionStates; +import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.junit.jupiter.api.TestTemplate; @@ -41,10 +42,24 @@ protected CloneSnapshotFromClientAfterSplittingRegionTestBase(int numReplicas) { private void splitRegion() throws IOException { int numRegions = admin.getRegions(tableName).size(); + // Major-compact every region into a single store file that spans the whole key range of the + // region. This guarantees that the split row falls inside an existing store file, so the split + // produces a reference file rather than a whole-file link (see HBASE-26421, which builds an + // HFileLink instead of a Reference when a store file lies entirely on one side of the split + // point). Without this, the randomly generated row keys may all fall on one side of the split + // point, leaving the snapshot with no reference files. The cloned table's meta would then be + // missing the parent split information, which is what HBASE-29111 guards against below. The + // region server has compaction disabled (see CloneSnapshotFromClientTestBase), so we compact + // the regions directly to bypass that; auto-compaction stays disabled afterwards so the + // post-split reference files are not compacted away. + for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(tableName)) { + if (RegionReplicaUtil.isDefaultReplica(region.getRegionInfo())) { + region.compact(true); + } + } try (Table k = TEST_UTIL.getConnection().getTable(tableName); ResultScanner scanner = k.getScanner(new Scan())) { // Split on the second row to make sure that the snapshot contains reference files. - // We also disable the compaction so that the reference files are not compacted away. scanner.next(); admin.split(tableName, scanner.next().getRow()); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionWithLinksTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionWithLinksTestBase.java new file mode 100644 index 000000000000..319572279b55 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/CloneSnapshotFromClientAfterSplittingRegionWithLinksTestBase.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.awaitility.Awaitility.await; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.master.assignment.RegionStates; +import org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HStore; +import org.apache.hadoop.hbase.regionserver.HStoreFile; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.PairOfSameType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestInfo; +import org.junit.jupiter.api.TestTemplate; + +/** + * Base class for testing the clone-snapshot flow when the snapshot was taken after a split that + * produced whole-file {@code HFileLink}s for the daughters instead of {@code Reference} files. + *

+ * Since HBASE-26421, a split builds an {@code HFileLink} (not a {@code Reference}) for a daughter + * whenever a store file lies entirely on one side of the split point. When every store file falls + * on one side, the snapshot contains no reference files at all, so the daughters link directly to + * the snapshot files and do not depend on the cloned parent region. This is the complement of the + * {@code Reference} case verified by {@code CloneSnapshotFromClientAfterSplittingRegionTestBase} + * (the regression HBASE-29111 guards against): here there is no parent-to-daughter mapping to + * record, and the cloned table must still be safe after the source table and snapshot are removed. + */ +public class CloneSnapshotFromClientAfterSplittingRegionWithLinksTestBase + extends CloneSnapshotFromClientTestBase { + + private static final byte[] QUALIFIER = Bytes.toBytes("q"); + + private static final int ROWS_PER_BATCH = 10; + + // Two disjoint key ranges and a split point strictly between them. After splitting, the "low" + // store file lies entirely below SPLIT_KEY and the "high" store file entirely above it, so each + // daughter receives one whole store file as an HFileLink and neither side gets a Reference. + private static final String LOW_PREFIX = "a"; + private static final String HIGH_PREFIX = "z"; + private static final byte[] SPLIT_KEY = Bytes.toBytes("m"); + + private TableName clonedTableName; + private String snapshotName; + + protected CloneSnapshotFromClientAfterSplittingRegionWithLinksTestBase(int numReplicas) { + super(numReplicas); + } + + protected static void setupConfiguration() { + CloneSnapshotFromClientTestBase.setupConfiguration(); + // CloneSnapshotFromClientTestBase already disables compaction, which keeps the two store files + // we create from being merged into one that would straddle the split point. On top of that, + // make archived files immediately eligible for cleaning so that the data-survival check below + // is meaningful: only the cloned table's HFileLink back-references should keep them alive. + TEST_UTIL.getConfiguration().setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, 0); + } + + /** + * Unlike the parent, build a controlled single-region table with two disjoint store files (in the + * test itself) rather than the default randomly-populated, pre-split table and snapshots, so the + * split deterministically produces whole-file HFileLinks. Hence we do not call {@code super}. + */ + @Override + @BeforeEach + public void setUp(TestInfo testInfo) throws Exception { + this.admin = TEST_UTIL.getAdmin(); + long tid = EnvironmentEdgeManager.currentTime(); + String methodName = testInfo.getTestMethod().get().getName() + + testInfo.getDisplayName().replaceAll("[^0-9A-Za-z_]", "_"); + tableName = TableName.valueOf(methodName + tid); + clonedTableName = TableName.valueOf(methodName + "-clone-" + tid); + snapshotName = "snaptb-links-" + tid; + // Keep the split parent around so that the snapshot (and therefore the clone) contains it. + admin.catalogJanitorSwitch(false); + } + + @Override + @AfterEach + public void tearDown() throws Exception { + admin.catalogJanitorSwitch(true); + if (clonedTableName != null && admin.tableExists(clonedTableName)) { + TEST_UTIL.deleteTable(clonedTableName); + } + super.tearDown(); + } + + /** + * Create a single-region table so that we fully control its store files before splitting. + */ + @Override + protected void createTable() throws IOException, InterruptedException { + TableDescriptor tableDescriptor = + TableDescriptorBuilder.newBuilder(tableName).setRegionReplication(numReplicas) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); + TEST_UTIL.getAdmin().createTable(tableDescriptor); + TEST_UTIL.waitUntilAllRegionsAssigned(tableName); + } + + @TestTemplate + public void testClonedTableWithLinksSurvivesSourceDeletion() throws Exception { + createTable(); + + // Write two store files whose key ranges are disjoint and sit on opposite sides of SPLIT_KEY. + int totalRows = loadTwoDisjointStoreFiles(); + + // Split between the two ranges. Each daughter receives one whole store file as an HFileLink, + // so the snapshot contains only HFileLinks and no Reference files. + int numRegions = admin.getRegions(tableName).size(); + admin.split(tableName, SPLIT_KEY); + await().atMost(Duration.ofSeconds(60)).untilAsserted( + () -> assertEquals(numRegions + numReplicas, admin.getRegions(tableName).size())); + + // Guard: the split must have produced only HFileLinks (no Reference files) for the daughters, + // otherwise this test would silently degrade into the Reference case. + assertDaughtersHaveOnlyLinks(); + + // Take a snapshot and clone it. + admin.snapshot(snapshotName, tableName); + admin.cloneSnapshot(snapshotName, clonedTableName); + SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName); + + // The cloned table must contain all rows. + verifyRowCount(TEST_UTIL, clonedTableName, totalRows); + + // Guard: because the daughters were cloned from whole-file HFileLinks (not References), the + // cloned table's meta does not record the parent's split daughters (SPLITA/SPLITB). The + // daughters link directly to the snapshot files and do not depend on the cloned parent. + assertClonedSplitParentHasNoDaughters(); + + // Remove the source table and the snapshot, then run the HFile cleaner. The cloned table's + // HFileLinks (and their back-references) must keep the underlying files alive in the archive. + TEST_UTIL.deleteTable(tableName); + admin.deleteSnapshot(snapshotName); + runHFileCleaner(); + + // Reopen the cloned table to force the store files to be re-resolved from disk, then verify + // that no data was lost. + admin.disableTable(clonedTableName); + admin.enableTable(clonedTableName); + SnapshotTestingUtils.waitForTableToBeOnline(TEST_UTIL, clonedTableName); + verifyRowCount(TEST_UTIL, clonedTableName, totalRows); + } + + private int loadTwoDisjointStoreFiles() throws IOException { + try (Table table = TEST_UTIL.getConnection().getTable(tableName)) { + putBatch(table, LOW_PREFIX); + TEST_UTIL.flush(tableName); + putBatch(table, HIGH_PREFIX); + TEST_UTIL.flush(tableName); + return countRows(table); + } + } + + private void putBatch(Table table, String prefix) throws IOException { + List puts = new ArrayList<>(); + for (int i = 0; i < ROWS_PER_BATCH; i++) { + Put put = new Put(Bytes.toBytes(prefix + String.format("%03d", i))); + put.addColumn(FAMILY, QUALIFIER, Bytes.toBytes(prefix + i)); + puts.add(put); + } + table.put(puts); + } + + private void assertDaughtersHaveOnlyLinks() throws IOException { + int linkFiles = 0; + for (HRegion region : TEST_UTIL.getHBaseCluster().getRegions(tableName)) { + if ( + !RegionReplicaUtil.isDefaultReplica(region.getRegionInfo()) + || region.getRegionInfo().isSplitParent() + ) { + continue; + } + for (HStore store : region.getStores()) { + for (HStoreFile sf : store.getStorefiles()) { + assertTrue(sf.getFileInfo().isLink(), + "Expected only whole-file HFileLinks after the split, but found a non-link store file: " + + sf.getPath()); + linkFiles++; + } + } + } + assertTrue(linkFiles >= 2, + "Expected at least two HFileLink files across the daughter regions, found " + linkFiles); + } + + private void assertClonedSplitParentHasNoDaughters() throws IOException { + RegionStates regionStates = + TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates(); + List splitParents = + regionStates.getRegionByStateOfTable(clonedTableName).get(RegionState.State.SPLIT); + assertNotNull(splitParents); + assertFalse(splitParents.isEmpty(), "The cloned table should contain a split parent region"); + for (RegionInfo splitParent : splitParents) { + Result result = MetaTableAccessor.getRegionResult(TEST_UTIL.getConnection(), splitParent); + PairOfSameType daughters = MetaTableAccessor.getDaughterRegions(result); + assertNull(daughters.getFirst(), + "Did not expect SPLITA to be recorded for an all-HFileLink clone"); + assertNull(daughters.getSecond(), + "Did not expect SPLITB to be recorded for an all-HFileLink clone"); + } + } + + private void runHFileCleaner() throws IOException { + TEST_UTIL.getMiniHBaseCluster().getMaster().getHFileCleaner().choreForTesting(); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCloneSnapshotFromClientAfterSplittingRegionWithLinks.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCloneSnapshotFromClientAfterSplittingRegionWithLinks.java new file mode 100644 index 000000000000..b4677614a9ca --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestCloneSnapshotFromClientAfterSplittingRegionWithLinks.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import org.apache.hadoop.hbase.HBaseParameterizedTestTemplate; +import org.apache.hadoop.hbase.testclassification.ClientTests; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; + +@Tag(LargeTests.TAG) +@Tag(ClientTests.TAG) +@HBaseParameterizedTestTemplate(name = "{index}: regionReplication={0}") +public class TestCloneSnapshotFromClientAfterSplittingRegionWithLinks + extends CloneSnapshotFromClientAfterSplittingRegionWithLinksTestBase { + + public TestCloneSnapshotFromClientAfterSplittingRegionWithLinks(int numReplicas) { + super(numReplicas); + } + + @BeforeAll + public static void setUpBeforeClass() throws Exception { + setupConfiguration(); + TEST_UTIL.startMiniCluster(3); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMobCloneSnapshotFromClientAfterSplittingRegionWithLinks.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMobCloneSnapshotFromClientAfterSplittingRegionWithLinks.java new file mode 100644 index 000000000000..cf69752561c8 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMobCloneSnapshotFromClientAfterSplittingRegionWithLinks.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import java.io.IOException; +import org.apache.hadoop.hbase.HBaseParameterizedTestTemplate; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.mob.MobConstants; +import org.apache.hadoop.hbase.snapshot.MobSnapshotTestingUtils; +import org.apache.hadoop.hbase.testclassification.ClientTests; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; + +@Tag(LargeTests.TAG) +@Tag(ClientTests.TAG) +@HBaseParameterizedTestTemplate(name = "{index}: regionReplication={0}") +public class TestMobCloneSnapshotFromClientAfterSplittingRegionWithLinks + extends CloneSnapshotFromClientAfterSplittingRegionWithLinksTestBase { + + public TestMobCloneSnapshotFromClientAfterSplittingRegionWithLinks(int numReplicas) { + super(numReplicas); + } + + protected static void setupConfiguration() { + CloneSnapshotFromClientAfterSplittingRegionWithLinksTestBase.setupConfiguration(); + TEST_UTIL.getConfiguration().setInt(MobConstants.MOB_FILE_CACHE_SIZE_KEY, 0); + } + + @BeforeAll + public static void setUpBeforeClass() throws Exception { + setupConfiguration(); + TEST_UTIL.startMiniCluster(3); + } + + @Override + protected void createTable() throws IOException, InterruptedException { + MobSnapshotTestingUtils.createMobTable(TEST_UTIL, tableName, new byte[0][], numReplicas, + FAMILY); + } + + @Override + protected int countRows(Table table) throws IOException { + return MobSnapshotTestingUtils.countMobRows(table); + } + + @Override + protected void verifyRowCount(final HBaseTestingUtil util, final TableName tableName, + long expectedRows) throws IOException { + // Read every cell value so the MOB files are actually resolved: the point of this test is that + // the MOB data is still readable after the source table and snapshot are removed. + MobSnapshotTestingUtils.verifyMobRowCount(util, tableName, expectedRows); + } +}