From 4def487cdef6f4fe1f44aaaa7d869e313f430d92 Mon Sep 17 00:00:00 2001 From: Tejas Lodaya <159110854+tejaslodayadd@users.noreply.github.com> Date: Sun, 17 May 2026 18:03:38 -0700 Subject: [PATCH] CASSANALYTICS-52: Downgrade noisy TokenPartitioner partition/range maps to DEBUG The partition map, reverse partition map, and initial-ranges log lines in TokenPartitioner emit one entry per token; with the default 256 vnodes per node this produces tens of thousands of characters of log output at INFO on every job startup, drowning out useful messages. Lower these to DEBUG in both TokenPartitioner implementations (cassandra-analytics-core bulk writer and cassandra-analytics-common bulk reader). Also switch the two concatenated info strings in the common variant to SLF4J placeholder form so the map's toString() is not built when DEBUG is suppressed. Scalar summaries ("Number of ranges", "Tasks to run", "Calculated number of splits") remain at INFO. Patch by Tejas Lodaya for CASSANALYTICS-52 --- CHANGES.txt | 1 + .../spark/data/partitioner/TokenPartitioner.java | 8 ++++---- .../cassandra/spark/bulkwriter/TokenPartitioner.java | 8 ++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4e5f27ed1..e0924e163 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,6 @@ 0.5.0 ----- + * Downgrade noisy TokenPartitioner partition/range maps to DEBUG (CASSANALYTICS-52) * Make BulkWriterConfig extensible (CASSANALYTICS-168) 0.4.0 diff --git a/cassandra-analytics-common/src/main/java/org/apache/cassandra/spark/data/partitioner/TokenPartitioner.java b/cassandra-analytics-common/src/main/java/org/apache/cassandra/spark/data/partitioner/TokenPartitioner.java index 6f63c357a..11aa26c5a 100644 --- a/cassandra-analytics-common/src/main/java/org/apache/cassandra/spark/data/partitioner/TokenPartitioner.java +++ b/cassandra-analytics-common/src/main/java/org/apache/cassandra/spark/data/partitioner/TokenPartitioner.java @@ -116,9 +116,9 @@ private void calculateTokenRangeMap() validateCompleteRangeCoverage(); validateRangesDoNotOverlap(); - LOGGER.info("Number of partitions {}", reversePartitionMap.size()); - LOGGER.info("Partition map " + partitionMap); - LOGGER.info("Reverse partition map " + reversePartitionMap); + LOGGER.debug("Number of partitions {}", reversePartitionMap.size()); + LOGGER.debug("Partition map {}", partitionMap); + LOGGER.debug("Reverse partition map {}", reversePartitionMap); } private static int calculateSplits(CassandraRing ring, int defaultParallelism, Integer cores) @@ -126,7 +126,7 @@ private static int calculateSplits(CassandraRing ring, int defaultParallelism, I int tasksToRun = Math.max(cores, defaultParallelism); LOGGER.info("Tasks to run: {}", tasksToRun); Map, List> rangeListMap = ring.rangeMap().asMapOfRanges(); - LOGGER.info("Initial ranges: {}", rangeListMap); + LOGGER.debug("Initial ranges: {}", rangeListMap); int ranges = rangeListMap.size(); LOGGER.info("Number of ranges: {}", ranges); int calculatedSplits = TokenPartitioner.divCeil(tasksToRun, ranges); diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/TokenPartitioner.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/TokenPartitioner.java index 479ddcae0..c0b049eda 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/TokenPartitioner.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/TokenPartitioner.java @@ -104,9 +104,9 @@ public TokenPartitioner(TokenRangeMapping tokenRangeMapping, private void logPartitionInfo() { - LOGGER.info("Number of partitions: {}", nrPartitions); - LOGGER.info("Partition map: {}", partitionMap); - LOGGER.info("Reverse partition map: {}", reversePartitionMap); + LOGGER.debug("Number of partitions: {}", nrPartitions); + LOGGER.debug("Partition map: {}", partitionMap); + LOGGER.debug("Reverse partition map: {}", reversePartitionMap); } /** @@ -278,7 +278,7 @@ public int calculateSplits(TokenRangeMapping tokenRangeMapping, } int tasksToRun = Math.max(cores, defaultParallelism); Map, List> rangeListMap = tokenRangeMapping.getRangeMap().asMapOfRanges(); - LOGGER.info("Initial ranges: {}", rangeListMap); + LOGGER.debug("Initial ranges: {}", rangeListMap); int ranges = rangeListMap.size(); LOGGER.info("Number of ranges: {}", ranges); int calculatedSplits = divCeil(tasksToRun, ranges);