diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java index ab2ecd88e0b3..20eaafa06fbb 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase; import edu.umd.cs.findbugs.annotations.Nullable; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; @@ -106,4 +107,21 @@ default String getVersion() { * rounded to MB */ Map getRegionCachedInfo(); + + /** + * The available cache space on this region server (bytes), if reported in the server load. + */ + default long getCacheFreeSize() { + return 0L; + } + + /** + * Returns the region cold data information for the regions hosted on this server. Here, cold data + * refers only to region data that is classified as cold by the DataTieringManager according to + * the configured priority logic. These data should be kept out of block cache. + * @return map of region encoded name and its total cold data size, rounded to MB + */ + default Map getRegionColdDataSize() { + return Collections.emptyMap(); + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java index f1893186cee0..36bc701241e0 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java @@ -64,7 +64,7 @@ public static ServerMetrics toServerMetrics(ServerName serverName, public static ServerMetrics toServerMetrics(ServerName serverName, int versionNumber, String version, ClusterStatusProtos.ServerLoad serverLoadPB) { - return ServerMetricsBuilder.newBuilder(serverName) + ServerMetricsBuilder builder = ServerMetricsBuilder.newBuilder(serverName) .setRequestCountPerSecond(serverLoadPB.getNumberOfRequests()) .setRequestCount(serverLoadPB.getTotalNumberOfRequests()) .setInfoServerPort(serverLoadPB.getInfoServerPort()) @@ -86,7 +86,10 @@ public static ServerMetrics toServerMetrics(ServerName serverName, int versionNu .setRegionCachedInfo(serverLoadPB.getRegionCachedInfoMap()) .setReportTimestamp(serverLoadPB.getReportEndTime()) .setLastReportTimestamp(serverLoadPB.getReportStartTime()).setVersionNumber(versionNumber) - .setVersion(version).build(); + .setVersion(version) + .setCacheFreeSize(serverLoadPB.hasCacheFreeSize() ? serverLoadPB.getCacheFreeSize() : 0L) + .setRegionColdDataInfo(serverLoadPB.getRegionColdDataMap()); + return builder.build(); } public static List toCoprocessor(Collection names) { @@ -116,6 +119,7 @@ public static ClusterStatusProtos.ServerLoad toServerLoad(ServerMetrics metrics) if (metrics.getReplicationLoadSink() != null) { builder.setReplLoadSink(ProtobufUtil.toReplicationLoadSink(metrics.getReplicationLoadSink())); } + builder.setCacheFreeSize(metrics.getCacheFreeSize()); return builder.build(); } @@ -141,6 +145,8 @@ public static ServerMetricsBuilder newBuilder(ServerName sn) { private long lastReportTimestamp = 0; private final List tasks = new ArrayList<>(); private Map regionCachedInfo = new HashMap<>(); + private long cacheFreeSize; + private Map regionColdDataInfo = Collections.emptyMap(); private ServerMetricsBuilder(ServerName serverName) { this.serverName = serverName; @@ -226,10 +232,21 @@ public ServerMetricsBuilder setRegionCachedInfo(Map value) { return this; } + public ServerMetricsBuilder setCacheFreeSize(long blockCacheFreeSize) { + this.cacheFreeSize = blockCacheFreeSize; + return this; + } + + public ServerMetricsBuilder setRegionColdDataInfo(Map regionColdDataInfo) { + this.regionColdDataInfo = regionColdDataInfo; + return this; + } + public ServerMetrics build() { return new ServerMetricsImpl(serverName, versionNumber, version, requestCountPerSecond, requestCount, usedHeapSize, maxHeapSize, infoServerPort, sources, sink, regionStatus, - coprocessorNames, reportTimestamp, lastReportTimestamp, userMetrics, tasks, regionCachedInfo); + coprocessorNames, reportTimestamp, lastReportTimestamp, userMetrics, tasks, regionCachedInfo, + cacheFreeSize, regionColdDataInfo); } private static class ServerMetricsImpl implements ServerMetrics { @@ -251,13 +268,16 @@ private static class ServerMetricsImpl implements ServerMetrics { private final Map userMetrics; private final List tasks; private final Map regionCachedInfo; + private final long cacheFreeSize; + private final Map regionColdDataInfo; ServerMetricsImpl(ServerName serverName, int versionNumber, String version, long requestCountPerSecond, long requestCount, Size usedHeapSize, Size maxHeapSize, int infoServerPort, List sources, ReplicationLoadSink sink, Map regionStatus, Set coprocessorNames, long reportTimestamp, long lastReportTimestamp, Map userMetrics, List tasks, - Map regionCachedInfo) { + Map regionCachedInfo, long cacheFreeSize, + Map regionColdDataInfo) { this.serverName = Preconditions.checkNotNull(serverName); this.versionNumber = versionNumber; this.version = version; @@ -275,6 +295,8 @@ private static class ServerMetricsImpl implements ServerMetrics { this.lastReportTimestamp = lastReportTimestamp; this.tasks = tasks; this.regionCachedInfo = regionCachedInfo; + this.cacheFreeSize = cacheFreeSize; + this.regionColdDataInfo = regionColdDataInfo; } @Override @@ -372,6 +394,17 @@ public Map getRegionCachedInfo() { return Collections.unmodifiableMap(regionCachedInfo); } + @Override + public long getCacheFreeSize() { + return cacheFreeSize; + } + + @Override + public Map getRegionColdDataSize() { + return Collections + .unmodifiableMap(regionColdDataInfo != null ? regionColdDataInfo : Collections.emptyMap()); + } + @Override public String toString() { int storeCount = 0; diff --git a/hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto b/hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto index 3357b6103085..154bd5c5d458 100644 --- a/hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto +++ b/hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto @@ -313,15 +313,23 @@ message ServerLoad { */ repeated UserLoad userLoads = 12; - /** - * The metrics for region cached on this region server - */ - map regionCachedInfo = 13; - /** * The active monitored tasks */ repeated ServerTask tasks = 15; /* 15 here to stay in sync with master branch */ + + map regionCachedInfo = 16; + + /** +* Unallocated block cache capacity on this RegionServer, in bytes. +* Used by the master for cache-aware load balancing (optional). +*/ + optional uint64 cacheFreeSize = 17; + + /** + * The metrics for total region cold data size + */ + map regionColdData = 18; } message LiveServerInfo { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index efba0aee733b..8ebcf8e983cd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -128,6 +128,8 @@ class BalancerClusterState { private int[] regionServerIndexWithBestRegionCachedRatio; // Maps regionName -> oldServerName -> cache ratio of the region on the old server Map> regionCacheRatioOnOldServerMap; + // cache free space available on each server, aligned to the "servers" array indices; + long[] serverBlockCacheFreeSize; private final Supplier> shuffledServerIndicesSupplier = Suppliers.memoizeWithExpiration(() -> { @@ -148,20 +150,23 @@ public String getRack(ServerName server) { BalancerClusterState(Map> clusterState, Map> loads, RegionLocationFinder regionFinder, RackManager rackManager) { - this(null, clusterState, loads, regionFinder, rackManager, null); + this(null, clusterState, loads, regionFinder, rackManager, null, null); } protected BalancerClusterState(Map> clusterState, Map> loads, RegionLocationFinder regionFinder, - RackManager rackManager, Map> oldRegionServerRegionCacheRatio) { - this(null, clusterState, loads, regionFinder, rackManager, oldRegionServerRegionCacheRatio); + RackManager rackManager, Map> oldRegionServerRegionCacheRatio, + Map serverBlockCacheFreeByServer) { + this(null, clusterState, loads, regionFinder, rackManager, oldRegionServerRegionCacheRatio, + serverBlockCacheFreeByServer); } @SuppressWarnings("unchecked") BalancerClusterState(Collection unassignedRegions, Map> clusterState, Map> loads, RegionLocationFinder regionFinder, RackManager rackManager, - Map> oldRegionServerRegionCacheRatio) { + Map> oldRegionServerRegionCacheRatio, + Map serverBlockCacheFreeByServer) { if (unassignedRegions == null) { unassignedRegions = Collections.emptyList(); } @@ -385,6 +390,15 @@ protected BalancerClusterState(Map> clusterState, populateRegionPerLocationFromServer(regionsPerRack, colocatedReplicaCountsPerRack, serversPerRack); } + + this.serverBlockCacheFreeSize = new long[numServers]; + if (serverBlockCacheFreeByServer != null) { + for (int i = 0; i < numServers; i++) { + ServerName sn = servers[i]; + this.serverBlockCacheFreeSize[i] = + sn == null ? 0L : serverBlockCacheFreeByServer.getOrDefault(sn, 0L); + } + } } private void populateRegionPerLocationFromServer(int[][] regionsPerLocation, @@ -705,6 +719,18 @@ public int[] getOrComputeServerWithBestRegionCachedRatio() { return regionServerIndexWithBestRegionCachedRatio; } + /** + * Finds and return the latest reported cache ratio for the region on the RegionServer it's + * currently online. + */ + float getObservedRegionCacheRatio(int region) { + Deque dq = regionLoads[region]; + if (dq == null || dq.isEmpty()) { + return 0.0f; + } + return dq.getLast().getCurrentRegionCacheRatio(); + } + /** * Maps region index to rack index */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java index b82c68b37da3..37d7631bb14a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java @@ -186,7 +186,7 @@ private boolean isViolating(RegionPlan regionPlan) { private RegionPlanConditional createConditional(Class clazz, BalancerClusterState cluster) { if (cluster == null) { - cluster = new BalancerClusterState(Collections.emptyMap(), null, null, null, null); + cluster = new BalancerClusterState(Collections.emptyMap(), null, null, null, null, null); } try { Constructor ctor = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index fac0d82fe013..8efaf1952490 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -371,7 +371,7 @@ private BalancerClusterState createCluster(List servers, } } return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager, - null); + null, null); } private List findIdleServers(List servers) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java index 39486c3b4c35..a821c4061fdd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hbase.Size; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; @@ -61,6 +62,30 @@ public class CacheAwareLoadBalancer extends StochasticLoadBalancer { "hbase.master.balancer.stochastic.throttling.cacheRatio"; public static final float CACHE_RATIO_THRESHOLD_DEFAULT = 0.8f; + /** + * Below this cache ratio on the current host, a move may be considered for the free-space + * heuristic. + */ + public static final String LOW_CACHE_RATIO_FOR_RELOCATION_KEY = + "hbase.master.balancer.cacheaware.lowCacheRatioThreshold"; + public static final float LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT = 0.35f; + + /** + * Optimistic region cache ratio assumed for cost purposes when a better host has free cache space + * (actual warmup is not modeled). + */ + public static final String POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY = + "hbase.master.balancer.cacheaware.potentialCacheRatioAfterMove"; + public static final float POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT = 0.95f; + + /** + * Minimum free block cache on a target server, as a multiple of the region's on-disk size in + * bytes, required to count that server as a relocation opportunity. + */ + public static final String MIN_FREE_CACHE_SPACE_FACTOR_KEY = + "hbase.master.balancer.cacheaware.minFreeCacheSpaceFactor"; + public static final float MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT = 1.0f; + public Float ratioThreshold; private Long sleepTime; @@ -167,6 +192,23 @@ public void updateClusterMetrics(ClusterMetrics clusterMetrics) { updateRegionLoad(); } + protected Map getServerBlockCacheFreeBytes() { + if (clusterStatus == null) { + return null; + } + Map map = new HashMap<>(); + clusterStatus.getLiveServerMetrics().forEach((sn, sm) -> map.put(sn, sm.getCacheFreeSize())); + return map; + } + + @Override + protected BalancerClusterState createState(Map> clusterState, + Map> loads, RegionLocationFinder finder, + RackManager rackManager) { + return new BalancerClusterState(clusterState, loads, finder, rackManager, + regionCacheRatioOnOldServerMap, getServerBlockCacheFreeBytes()); + } + /** * Collect the amount of region cached for all the regions from all the active region servers. */ @@ -207,8 +249,16 @@ private void updateRegionLoad() { if (!ServerName.isSameAddress(currentServer, sn)) { int regionSizeMB = regionCacheRatioOnCurrentServerMap.get(regionEncodedName).getSecond(); + // The coldDataSize accounts for data size classified as "cold" by DataTieringManager, + // which should be kept out of cache. We sum cold region size in the cache ratio, as we + // don't want to move regions with low cache ratio due to data classified as cold. float regionCacheRatioOnOldServer = - regionSizeMB == 0 ? 0.0f : (float) regionSizeInCache / regionSizeMB; + regionSizeMB + == 0 + ? 0.0f + : (float) (regionSizeInCache + + sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0)) + / regionSizeMB; regionCacheRatioOnOldServerMap.put(regionEncodedName, new Pair<>(sn, regionCacheRatioOnOldServer)); } @@ -542,6 +592,9 @@ static class CacheAwareCostFunction extends CostFunction { private static final String CACHE_COST_KEY = "hbase.master.balancer.stochastic.cacheCost"; private double cacheRatio; private double bestCacheRatio; + private final float lowCacheRatioThreshold; + private final float potentialCacheRatioAfterMove; + private final float minFreeCacheSpaceFactor; private static final float DEFAULT_CACHE_COST = 20; @@ -552,25 +605,87 @@ static class CacheAwareCostFunction extends CostFunction { !isPersistentCache ? 0.0f : conf.getFloat(CACHE_COST_KEY, DEFAULT_CACHE_COST)); bestCacheRatio = 0.0; cacheRatio = 0.0; + lowCacheRatioThreshold = + conf.getFloat(LOW_CACHE_RATIO_FOR_RELOCATION_KEY, LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT); + potentialCacheRatioAfterMove = Math.min(1.0f, conf + .getFloat(POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY, POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT)); + minFreeCacheSpaceFactor = + conf.getFloat(MIN_FREE_CACHE_SPACE_FACTOR_KEY, MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT); } @Override void prepare(BalancerClusterState cluster) { super.prepare(cluster); - cacheRatio = 0.0; - bestCacheRatio = 0.0; + recomputeCacheRatio(cluster); + if (LOG.isDebugEnabled()) { + LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio); + } + } + private void recomputeCacheRatio(BalancerClusterState cluster) { + double[] currentWeighted = computeCurrentWeightedContributions(cluster); + double currentSum = 0.0; + double bestCacheSum = 0.0; for (int region = 0; region < cluster.numRegions; region++) { - cacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region, - cluster.regionIndexToServerIndex[region]); - bestCacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region, - getServerWithBestCacheRatioForRegion(region)); + currentSum += currentWeighted[region]; + // here we only get the server index where this region cache ratio is the highest + int serverIndexBestCache = cluster.getOrComputeServerWithBestRegionCachedRatio()[region]; + double currentHighestCache = + cluster.getOrComputeWeightedRegionCacheRatio(region, serverIndexBestCache); + // Get a hypothetical best cache ratio for this region if any server has enough free cache + // to host it. + double potentialHighestCache = + potentialBestWeightedFromFreeCache(cluster, region, currentHighestCache); + double actualHighest = Math.max(currentHighestCache, potentialHighestCache); + bestCacheSum += actualHighest; } + bestCacheRatio = bestCacheSum; + if (bestCacheSum <= 0.0) { + cacheRatio = cluster.numRegions == 0 ? 1.0 : 0.0; + } else { + cacheRatio = Math.min(1.0, currentSum / bestCacheSum); + } + } - cacheRatio = bestCacheRatio == 0 ? 1.0 : cacheRatio / bestCacheRatio; - if (LOG.isDebugEnabled()) { - LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio); + private double[] computeCurrentWeightedContributions(BalancerClusterState cluster) { + int totalRegions = cluster.numRegions; + double[] contrib = new double[totalRegions]; + for (int r = 0; r < totalRegions; r++) { + int s = cluster.regionIndexToServerIndex[r]; + int sizeMb = cluster.getTotalRegionHFileSizeMB(r); + if (sizeMb <= 0) { + contrib[r] = 0.0; + continue; + } + contrib[r] = cluster.getOrComputeWeightedRegionCacheRatio(r, s); + } + return contrib; + } + + /* + * If this region is cold in metrics and at least one RS (including its current host) reports + * enough free block cache to hold it, return an optimistic weighted cache score ({@link + * #potentialCacheRatioAfterMove} * region MB) so placement is not considered optimal solely + * from low ratios when capacity exists somewhere in the cluster. + */ + private double potentialBestWeightedFromFreeCache(BalancerClusterState cluster, int region, + double currentHighestCache) { + float observedRatio = cluster.getObservedRegionCacheRatio(region); + if (observedRatio >= lowCacheRatioThreshold) { + return 0.0; + } + int regionSizeMb = cluster.getTotalRegionHFileSizeMB(region); + if (regionSizeMb <= 0) { + return 0.0; + } + long regionSizeBytes = (long) regionSizeMb * 1024L * 1024L; + long requiredFree = (long) (regionSizeBytes * minFreeCacheSpaceFactor); + for (int s = 0; s < cluster.numServers; s++) { + if (cluster.serverBlockCacheFreeSize[s] >= requiredFree) { + return Math.max(currentHighestCache, regionSizeMb * potentialCacheRatioAfterMove); + } } + return 0.0; } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 821825ce0cef..24ba11325449 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -355,8 +355,7 @@ private void updateBalancerTableLoadInfo(TableName tableName, ) { finder = this.regionFinder; } - BalancerClusterState cluster = - new BalancerClusterState(loadOfOneTable, loads, finder, rackManager); + BalancerClusterState cluster = createState(loadOfOneTable, loads, finder, rackManager); initCosts(cluster); curOverallCost = computeCost(cluster, Double.MAX_VALUE); @@ -364,6 +363,12 @@ private void updateBalancerTableLoadInfo(TableName tableName, updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); } + protected BalancerClusterState createState(Map> clusterState, + Map> loads, RegionLocationFinder finder, + RackManager rackManager) { + return new BalancerClusterState(clusterState, loads, finder, rackManager); + } + @Override public void updateBalancerLoadInfo(Map>> loadOfAllTable) { @@ -624,8 +629,7 @@ protected List balanceTable(TableName tableName, // The clusterState that is given to this method contains the state // of all the regions in the table(s) (that's true today) // Keep track of servers to iterate through them. - BalancerClusterState cluster = new BalancerClusterState(loadOfOneTable, loads, finder, - rackManager, regionCacheRatioOnOldServerMap); + BalancerClusterState cluster = createState(loadOfOneTable, loads, finder, rackManager); long startTime = EnvironmentEdgeManager.currentTime(); cluster.setStopRequestedAt(startTime + maxRunningTime); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 686d2ceda0e0..4bd6e9f67f84 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -1538,6 +1538,12 @@ private ClusterStatusProtos.ServerLoad buildServerLoad(long reportStartTime, lon }); }); }); + serverLoad.setCacheFreeSize(regionServerWrapper.getBlockCacheFreeSize()); + if (DataTieringManager.getInstance() != null) { + DataTieringManager.getInstance().getRegionColdDataSize() + .forEach((regionName, coldDataSize) -> serverLoad.putRegionColdData(regionName, + roundSize(coldDataSize.getSecond(), unitMB))); + } serverLoad.setReportStartTime(reportStartTime); serverLoad.setReportEndTime(reportEndTime); if (this.infoServer != null) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java index b973a7e956e5..7e3910f2ac56 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java @@ -385,6 +385,11 @@ public Map getRegionCachedInfo() { return new HashMap<>(); } + @Override + public Map getRegionColdDataSize() { + return new HashMap<>(); + } + }; return serverMetrics; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java index d2a9d17cdba0..13137e78f49a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java @@ -224,7 +224,7 @@ static StochasticLoadBalancer buildStochasticLoadBalancer(BalancerClusterState c static BalancerClusterState createMockBalancerClusterState(Map> serverToRegions) { - return new BalancerClusterState(serverToRegions, null, null, null, null); + return new BalancerClusterState(serverToRegions, null, null, null, null, null); } /** diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java index 61bb59565ccf..946518dc9de2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java @@ -23,12 +23,22 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Deque; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.RegionMetrics; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.Size; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; @@ -262,12 +272,104 @@ public void testCacheCost() { } } + /** + * When block-cache persistence, cold regions (below + * {@link CacheAwareLoadBalancer#LOW_CACHE_RATIO_FOR_RELOCATION_KEY}) together with RS-reported + * block-cache free bytes inflate plausible best placement so weighted cache cost crosses + * {@code minCostNeedBalance}; {@link StochasticLoadBalancer#needsBalance} returns true even with + * evenly spread region-count skew. + */ + @Test + public void testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace() { + conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, "/tmp/prefetch.persistence"); + CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf); + int regionSizeMb = 64; + long cacheFreeInBytes = regionSizeMb * 1024L * 1024L; + // simulates a cache ratio lower than + // CacheAwareLoadBalancer.LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT + float simulatedCacheRatio = 0.1f; + Map> clusterServers = + mockClusterServersUnsorted(new int[] { 1, 1 }, 1); + List regions = new ArrayList<>(); + clusterServers.values().forEach(regions::addAll); + List serversList = getServersInInsertionOrder(clusterServers); + Map blockCacheFree = new HashMap<>(); + blockCacheFree.put(serversList.get(0), 0L); + blockCacheFree.put(serversList.get(1), cacheFreeInBytes); + BalancerClusterState cluster = new BalancerClusterState(clusterServers, + buildRegionLoads(regions, simulatedCacheRatio, regionSizeMb), null, null, + Collections.emptyMap(), blockCacheFree); + lb.initCosts(cluster); + assertTrue(lb.needsBalance( + TableName.valueOf("testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace"), + cluster)); + } + + /** + * Checks that needsBalance isn't true when regions report high cache ratios + */ + @Test + public void testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace() { + conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, "/tmp/prefetch.persistence"); + CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf); + int regionSizeMb = 64; + long cacheFreeInBytes = regionSizeMb * 1024L * 1024L; + Map> clusterServers = + mockClusterServersUnsorted(new int[] { 1, 1 }, 1); + List all = new ArrayList<>(); + clusterServers.values().forEach(all::addAll); + List serversList = getServersInInsertionOrder(clusterServers); + Map blockCacheFree = new HashMap<>(); + blockCacheFree.put(serversList.get(0), cacheFreeInBytes + 1024 * 1024); + blockCacheFree.put(serversList.get(1), cacheFreeInBytes + 1024 * 1024); + BalancerClusterState cluster = + new BalancerClusterState(clusterServers, buildRegionLoads(all, 1.0f, regionSizeMb), null, + null, Collections.emptyMap(), blockCacheFree); + lb.initCosts(cluster); + assertFalse(lb.needsBalance( + TableName.valueOf("testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace"), + cluster)); + } + + private CacheAwareLoadBalancer newCacheAwareBalancer(Configuration cfg) { + CacheAwareLoadBalancer lb = new CacheAwareLoadBalancer(); + lb.setMasterServices(services); + lb.loadConf(cfg); + return lb; + } + + private static Map> + buildRegionLoads(Collection regions, float cachedRatio, int regionSizeMb) { + RegionMetrics rm = mock(RegionMetrics.class); + when(rm.getReadRequestCount()).thenReturn(0L); + when(rm.getWriteRequestCount()).thenReturn(0L); + when(rm.getMemStoreSize()).thenReturn(Size.ZERO); + when(rm.getStoreFileSize()).thenReturn(Size.ZERO); + when(rm.getRegionSizeMB()).thenReturn(new Size(regionSizeMb, Size.Unit.MEGABYTE)); + when(rm.getCurrentRegionCachedRatio()).thenReturn(cachedRatio); + + BalancerRegionLoad brl = new BalancerRegionLoad(rm); + Map> loads = new HashMap<>(); + for (RegionInfo ri : regions) { + ArrayDeque dq = new ArrayDeque<>(1); + dq.add(brl); + loads.put(ri.getRegionNameAsString(), dq); + loads.put(ri.getEncodedName(), dq); + } + return loads; + } + + private static List + getServersInInsertionOrder(Map> cluster) { + return new ArrayList<>(cluster.keySet()); + } + private class MockClusterForCacheCost extends BalancerClusterState { private final Map, Float> regionServerCacheRatio = new HashMap<>(); public MockClusterForCacheCost(int[][] regionsArray) { // regions[0] is an array where index = serverIndex and value = number of regions - super(mockClusterServersUnsorted(regionsArray[0], 1), null, null, null, null); + super(mockClusterServersUnsorted(regionsArray[0], 1), null, null, null, null, null); Map> oldCacheRatio = new HashMap<>(); for (int i = 1; i < regionsArray.length; i++) { int regionIndex = i - 1; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java index 3977ad96dd9a..b3419586f8bf 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java @@ -198,7 +198,7 @@ private static class DummyBalancerClusterState extends BalancerClusterState { private final RegionInfo[] testRegions; DummyBalancerClusterState(BalancerClusterState bcs) { - super(bcs.clusterState, null, null, null, null); + super(bcs.clusterState, null, null, null, null, null); this.testRegions = bcs.regions; }