Skip to content

Commit a53b4dd

Browse files
[FLUSS] Fix RocksDB JNI memory leak and add statistics config option
- Use getLongProperty() instead of getProperty() to avoid native string allocation in JNI layer during high-frequency Prometheus metrics scraping - Add kv.rocksdb.statistics.enabled config option (default: false) to allow disabling RocksDB statistics collection for reduced overhead - Refactor getPropertyValue to getPropertyLongValue and eliminate code duplication between overloaded methods - Add configuration documentation for the new option
1 parent 864bbe6 commit a53b4dd

5 files changed

Lines changed: 43 additions & 39 deletions

File tree

fluss-common/src/main/java/org/apache/fluss/config/ConfigOptions.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,6 +1590,19 @@ public class ConfigOptions {
15901590
+ "The RateLimiter is always enabled. The default value is Long.MAX_VALUE (effectively unlimited). "
15911591
+ "Set to a lower value (e.g., 100MB) to limit the rate.");
15921592

1593+
public static final ConfigOption<Boolean> KV_STATISTICS_ENABLED =
1594+
key("kv.rocksdb.statistics.enabled")
1595+
.booleanType()
1596+
.defaultValue(false)
1597+
.withDescription(
1598+
"Whether to enable RocksDB statistics collection for metrics. "
1599+
+ "When enabled, RocksDB will collect various statistics like bytes read/written, "
1600+
+ "compaction time, flush time, etc., which can be exposed through Fluss metrics. "
1601+
+ "Enabling statistics has a small performance overhead (typically < 5%). "
1602+
+ "If you experience performance issues or don't need RocksDB-level metrics, "
1603+
+ "you can disable this option to reduce overhead. "
1604+
+ "The default value is `true`.");
1605+
15931606
// --------------------------------------------------------------------------
15941607
// Provided configurable ColumnFamilyOptions within Fluss
15951608
// --------------------------------------------------------------------------

fluss-server/src/main/java/org/apache/fluss/server/kv/KvTablet.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,11 @@ public static KvTablet create(
227227
private static RocksDBKv buildRocksDBKv(
228228
Configuration configuration, File kvDir, RateLimiter sharedRateLimiter)
229229
throws IOException {
230-
// Enable statistics to support RocksDB statistics collection
230+
// Check if statistics collection is enabled from configuration
231+
boolean enableStatistics = configuration.get(ConfigOptions.KV_STATISTICS_ENABLED);
231232
RocksDBResourceContainer rocksDBResourceContainer =
232-
new RocksDBResourceContainer(configuration, kvDir, true, sharedRateLimiter);
233+
new RocksDBResourceContainer(
234+
configuration, kvDir, enableStatistics, sharedRateLimiter);
233235
RocksDBKvBuilder rocksDBKvBuilder =
234236
new RocksDBKvBuilder(
235237
kvDir,

fluss-server/src/main/java/org/apache/fluss/server/kv/rocksdb/RocksDBStatistics.java

Lines changed: 21 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ public long getWriteLatencyMicros() {
167167
* @return number of L0 files, or 0 if not available
168168
*/
169169
public long getNumFilesAtLevel0() {
170-
return getPropertyValue(defaultColumnFamilyHandle, "rocksdb.num-files-at-level0");
170+
return getPropertyLongValue(defaultColumnFamilyHandle, "rocksdb.num-files-at-level0");
171171
}
172172

173173
/**
@@ -176,7 +176,7 @@ public long getNumFilesAtLevel0() {
176176
* @return 1 if flush is pending, 0 otherwise
177177
*/
178178
public long getFlushPending() {
179-
return getPropertyValue("rocksdb.mem-table-flush-pending");
179+
return getPropertyLongValue("rocksdb.mem-table-flush-pending");
180180
}
181181

182182
/**
@@ -185,7 +185,7 @@ public long getFlushPending() {
185185
* @return 1 if compaction is pending, 0 otherwise
186186
*/
187187
public long getCompactionPending() {
188-
return getPropertyValue("rocksdb.compaction-pending");
188+
return getPropertyLongValue("rocksdb.compaction-pending");
189189
}
190190

191191
/**
@@ -308,60 +308,44 @@ private long getHistogramValue(HistogramType histogramType) {
308308
}
309309

310310
/**
311-
* Get property value from RocksDB with resource guard protection.
311+
* Get long property value from RocksDB with resource guard protection.
312+
*
313+
* <p>This method uses getLongProperty() instead of getProperty() to avoid string allocation in
314+
* the JNI layer.
312315
*
313316
* @param propertyName the property name to query
314317
* @return the property value as long, or 0 if not available or RocksDB is closed
315318
*/
316-
private long getPropertyValue(String propertyName) {
317-
try (ResourceGuard.Lease lease = resourceGuard.acquireResource()) {
318-
String value = db.getProperty(propertyName);
319-
if (value != null && !value.isEmpty()) {
320-
return Long.parseLong(value);
321-
}
322-
} catch (RocksDBException e) {
323-
LOG.debug(
324-
"Failed to get property {} from RocksDB (possibly closed or unavailable)",
325-
propertyName,
326-
e);
327-
} catch (NumberFormatException e) {
328-
LOG.debug("Failed to parse property {} value as long", propertyName, e);
329-
} catch (Exception e) {
330-
// ResourceGuard may throw exception if RocksDB is closed
331-
LOG.debug(
332-
"Failed to access RocksDB for property {} (possibly closed)", propertyName, e);
333-
}
334-
return 0L;
319+
private long getPropertyLongValue(String propertyName) {
320+
return getPropertyLongValue(null, propertyName);
335321
}
336322

337323
/**
338-
* Get property value from RocksDB for a specific column family with resource guard protection.
324+
* Get long property value from RocksDB for a specific column family with resource guard
325+
* protection.
339326
*
340327
* <p>Some RocksDB properties are column family specific and must be accessed through the column
341-
* family handle.
328+
* family handle. This method uses getLongProperty() instead of getProperty() to avoid string
329+
* allocation in the JNI layer.
342330
*
343-
* @param columnFamilyHandle the column family handle
331+
* @param columnFamilyHandle the column family handle, null for DB-level properties
344332
* @param propertyName the property name to query
345333
* @return the property value as long, or 0 if not available or RocksDB is closed
346334
*/
347-
private long getPropertyValue(ColumnFamilyHandle columnFamilyHandle, String propertyName) {
335+
private long getPropertyLongValue(
336+
@Nullable ColumnFamilyHandle columnFamilyHandle, String propertyName) {
348337
try (ResourceGuard.Lease lease = resourceGuard.acquireResource()) {
349-
if (columnFamilyHandle == null) {
350-
return 0L;
351-
}
352-
String value = db.getProperty(columnFamilyHandle, propertyName);
353-
if (value != null && !value.isEmpty()) {
354-
return Long.parseLong(value);
338+
if (columnFamilyHandle != null) {
339+
return db.getLongProperty(columnFamilyHandle, propertyName);
340+
} else {
341+
return db.getLongProperty(propertyName);
355342
}
356343
} catch (RocksDBException e) {
357344
LOG.debug(
358-
"Failed to get property {} from RocksDB column family (possibly closed or unavailable)",
345+
"Failed to get property {} from RocksDB (possibly closed or unavailable)",
359346
propertyName,
360347
e);
361-
} catch (NumberFormatException e) {
362-
LOG.debug("Failed to parse property {} value as long", propertyName, e);
363348
} catch (Exception e) {
364-
// ResourceGuard may throw exception if RocksDB is closed
365349
LOG.debug(
366350
"Failed to access RocksDB for property {} (possibly closed)", propertyName, e);
367351
}

fluss-server/src/test/java/org/apache/fluss/server/kv/KvTabletTest.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.fluss.server.kv;
1919

20+
import org.apache.fluss.config.ConfigOptions;
2021
import org.apache.fluss.config.Configuration;
2122
import org.apache.fluss.config.TableConfig;
2223
import org.apache.fluss.exception.InvalidTargetColumnException;
@@ -1419,6 +1420,9 @@ private Value valueOf(BinaryRow row) {
14191420

14201421
@Test
14211422
void testRocksDBMetrics() throws Exception {
1423+
// Enable RocksDB statistics for this test
1424+
conf.set(ConfigOptions.KV_STATISTICS_ENABLED, true);
1425+
14221426
// Initialize tablet with schema
14231427
initLogTabletAndKvTablet(DATA1_SCHEMA_PK, new HashMap<>());
14241428

website/docs/maintenance/configuration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ during the Fluss cluster working.
162162
| kv.rocksdb.bloom-filter.bits-per-key | Double | 10.0 | Bits per key that bloom filter will use, this only take effect when bloom filter is used. The default value is 10.0. |
163163
| kv.rocksdb.bloom-filter.block-based-mode | Boolean | false | If true, RocksDB will use block-based filter instead of full filter, this only take effect when bloom filter is used. The default value is `false`. |
164164
| kv.rocksdb.shared-rate-limiter-bytes-per-sec | MemorySize | Long.MAX_VALUE | The bytes per second rate limit for RocksDB flush and compaction operations shared across all RocksDB instances on the TabletServer. The rate limiter is always enabled. The default value is Long.MAX_VALUE (effectively unlimited). Set to a lower value (e.g., 100MB) to limit the rate. This configuration can be updated dynamically without server restart. See [Updating Configs](operations/updating-configs.md) for more details. |
165+
| kv.rocksdb.statistics.enabled | Boolean | true | Whether to enable RocksDB statistics collection for metrics. When enabled, RocksDB will collect various statistics like bytes read/written, compaction time, flush time, etc., which can be exposed through Fluss metrics. Enabling statistics has a small performance overhead (typically < 5%). If you experience performance issues or don't need RocksDB-level metrics, you can disable this option to reduce overhead. |
165166
| kv.recover.log-record-batch.max-size | MemorySize | 16mb | The max fetch size for fetching log to apply to kv during recovering kv. |
166167

167168
## Metrics

0 commit comments

Comments
 (0)