Skip to content

Commit 0be881d

Browse files
committed
TEZ-4503: Warn about large conf properties in payload
1 parent 7855c1f commit 0be881d

2 files changed

Lines changed: 43 additions & 0 deletions

File tree

tez-api/src/main/java/org/apache/tez/common/TezUtils.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import com.google.protobuf.ByteString;
3030

3131
import com.google.protobuf.CodedInputStream;
32+
import org.apache.tez.dag.api.TezConfiguration;
3233
import org.apache.tez.runtime.api.TaskContext;
3334
import org.slf4j.Logger;
3435
import org.slf4j.LoggerFactory;
@@ -43,6 +44,11 @@
4344
import org.xerial.snappy.SnappyInputStream;
4445
import org.xerial.snappy.SnappyOutputStream;
4546

47+
import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK;
48+
import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK_DEFAULT;
49+
import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD;
50+
import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT;
51+
4652
/**
4753
* Utility methods for setting up a DAG. Has helpers for setting up log4j configuration, converting
4854
* {@link org.apache.hadoop.conf.Configuration} to {@link org.apache.tez.dag.api.UserPayload} etc.
@@ -51,6 +57,14 @@
5157
public final class TezUtils {
5258

5359
private static final Logger LOG = LoggerFactory.getLogger(TezUtils.class);
60+
private static final int PROPERTY_THRESHOLD;
61+
private static final boolean PROPERTY_MASK;
62+
63+
static {
64+
TezConfiguration c = new TezConfiguration();
65+
PROPERTY_THRESHOLD = c.getInt(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT);
66+
PROPERTY_MASK = c.getBoolean(TEZ_LOGGING_PROPERTY_MASK, TEZ_LOGGING_PROPERTY_MASK_DEFAULT);
67+
}
5468

5569
private TezUtils() {}
5670

@@ -211,10 +225,19 @@ public static void populateConfProtoFromEntries(Iterable<Map.Entry<String, Strin
211225
kvp.setKey(key);
212226
kvp.setValue(val);
213227
confBuilder.addConfKeyValues(kvp);
228+
logEntryIfLarge(key, val);
214229
} else {
215230
LOG.debug("null value for key={}. Skipping.", key);
216231
}
217232
}
218233
}
219234

235+
private static void logEntryIfLarge(String key, String value) {
236+
if (value.length() > PROPERTY_THRESHOLD) {
237+
LOG.warn("Property '{}' is unusually big ({} bytes); large payload may lead to OOM.", key, value.length());
238+
if (!PROPERTY_MASK) {
239+
LOG.warn("Large property '{}': {}", key, value);
240+
}
241+
}
242+
}
220243
}

tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,6 +1580,26 @@ public TezConfiguration(boolean loadDefaults) {
15801580
TEZ_PREFIX + "generate.debug.artifacts";
15811581
public static final boolean TEZ_GENERATE_DEBUG_ARTIFACTS_DEFAULT = false;
15821582

1583+
/**
1584+
* Int value. Property size threshold (in bytes) for logging during payload serialization. Properties exceeding the
1585+
* threshold are considered unusually large and potentially problematic thus they should be logged.
1586+
*/
1587+
@ConfigurationScope(Scope.VERTEX)
1588+
@ConfigurationProperty(type="integer")
1589+
public static final String TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD =
1590+
TEZ_PREFIX + "logging.property.size.threshold";
1591+
public static final int TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT = 512 * 1024;
1592+
/**
1593+
* Boolean value. Whether property masking is enabled for logging. Properties may contain sensitive user information
1594+
* such as passwords, credentials, secrets, etc., so they shouldn't be logged unconditionally. When masking is
1595+
* enabled, the property value (content) is not displayed in the logs.
1596+
*/
1597+
@ConfigurationScope(Scope.VERTEX)
1598+
@ConfigurationProperty
1599+
public static final String TEZ_LOGGING_PROPERTY_MASK =
1600+
TEZ_PREFIX + "logging.property.mask";
1601+
public static final boolean TEZ_LOGGING_PROPERTY_MASK_DEFAULT = true;
1602+
15831603
/**
15841604
* Set of tasks for which specific launch command options need to be added.
15851605
* Format: "vertexName[csv of task ids];vertexName[csv of task ids].."

0 commit comments

Comments
 (0)