Skip to content

Commit 183fad5

Browse files
committed
[SYSTEMDS-3541] Exploratory workload-aware compression on intermediates
Added a config option for aggressive compression and extended the compression workload analyzer to detect aggregation operations and binary matrix-vector operations when inputs are compressed as a single column group. Updated cost estimation for compression on already compressed inputs and removed scalars from compressible intermediate candidates. Added support for double compressed binary matrix-matrix operations and implemented both single-threaded and multithreaded compressed binary matrix-vector operations with single column group encoding. Removed the relaxed compression threshold and added a logging statement for potential improvements in compressed binary matrix-vector operations. Enabled always sampling for binary matrix-vector operations in CLALibBinaryCellOp, expanded test coverage, and introduced a new compression algorithm test case for k-means with intermediate compression enabled. I also extended the CLALibBinaryCellOp binary matrix-vector (sparse & dense) op task to support left and right operations.
1 parent 78b23cf commit 183fad5

8 files changed

Lines changed: 481 additions & 92 deletions

File tree

src/main/java/org/apache/sysds/conf/DMLConfig.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ public class DMLConfig
7979
public static final String PARALLEL_TOKENIZE = "sysds.parallel.tokenize";
8080
public static final String PARALLEL_TOKENIZE_NUM_BLOCKS = "sysds.parallel.tokenize.numBlocks";
8181
public static final String COMPRESSED_LINALG = "sysds.compressed.linalg";
82+
public static final String COMPRESSED_LINALG_INTERMEDIATE = "sysds.compressed.linalg.intermediate";
8283
public static final String COMPRESSED_LOSSY = "sysds.compressed.lossy";
8384
public static final String COMPRESSED_VALID_COMPRESSIONS = "sysds.compressed.valid.compressions";
8485
public static final String COMPRESSED_OVERLAPPING = "sysds.compressed.overlapping";

src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,12 @@ public static boolean satisfiesAggressiveCompressionCondition(Hop hop) {
171171
satisfies |= HopRewriteUtils.isTernary(hop, OpOp3.CTABLE)
172172
&& hop.getInput(0).getDataType().isMatrix()
173173
&& hop.getInput(1).getDataType().isMatrix();
174-
satisfies |= HopRewriteUtils.isData(hop, OpOpData.PERSISTENTREAD) && !hop.isScalar();
174+
satisfies |= HopRewriteUtils.isData(hop, OpOpData.PERSISTENTREAD);
175175
satisfies |= HopRewriteUtils.isUnary(hop, OpOp1.ROUND, OpOp1.FLOOR, OpOp1.NOT, OpOp1.CEIL);
176176
satisfies |= HopRewriteUtils.isBinary(hop, OpOp2.EQUAL, OpOp2.NOTEQUAL, OpOp2.LESS,
177177
OpOp2.LESSEQUAL, OpOp2.GREATER, OpOp2.GREATEREQUAL, OpOp2.AND, OpOp2.OR, OpOp2.MODULUS);
178178
satisfies |= HopRewriteUtils.isTernary(hop, OpOp3.CTABLE);
179+
satisfies &= !hop.isScalar();
179180
}
180181
if(LOG.isDebugEnabled() && satisfies)
181182
LOG.debug("Operation Satisfies: " + hop);

src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,8 @@ private void classifyPhase() {
344344
// final int nRows = mb.getNumRows();
345345
final int nCols = mb.getNumColumns();
346346
// Assume the scaling of cocoding is at maximum square root good relative to number of columns.
347-
final double scale = Math.sqrt(nCols);
347+
final double scale = mb instanceof CompressedMatrixBlock &&
348+
((CompressedMatrixBlock) mb).getColGroups().size() == 1 ? 1 : Math.sqrt(nCols);
348349
final double threshold = _stats.estimatedCostCols / scale;
349350

350351
if(threshold < _stats.originalCost *

0 commit comments

Comments
 (0)