You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ba...@apache.org on 2021/08/27 19:17:37 UTC

[systemds] branch master updated: [SYSTEMDS-3107] Missing WorkloadTree if recompiled Hops

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 67774d3  [SYSTEMDS-3107] Missing WorkloadTree if recompiled Hops
67774d3 is described below

commit 67774d3f56a1b4f880b04b3716b7e6ee1b1c0f21
Author: baunsgaard <ba...@tugraz.at>
AuthorDate: Fri Aug 27 21:14:39 2021 +0200

    [SYSTEMDS-3107] Missing WorkloadTree if recompiled Hops
    
    This commit fixes the copy constructor of hops, to include the workloadtree.
    Unfortunally because the workload wasn't included in the copy constructor,
    i had a bunch of experiments running with a default cost calculation based
    on number of unique elements.
    
    also contained in this commit is a change of the default sampling rate to
    0.05 that improve the sample estimation in critical large joins of multiple
    columns.
---
 src/main/java/org/apache/sysds/conf/DMLConfig.java           |  2 +-
 src/main/java/org/apache/sysds/hops/Hop.java                 |  1 +
 .../sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java   |  2 +-
 .../apache/sysds/runtime/compress/cocode/CoCodeGreedy.java   | 12 ++++++------
 .../sysds/runtime/compress/colgroup/ColGroupFactory.java     |  2 +-
 5 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/main/java/org/apache/sysds/conf/DMLConfig.java b/src/main/java/org/apache/sysds/conf/DMLConfig.java
index e095297..0b7692b 100644
--- a/src/main/java/org/apache/sysds/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysds/conf/DMLConfig.java
@@ -129,7 +129,7 @@ public class DMLConfig
 		_defaultVals.put(COMPRESSED_LOSSY,       "false" );
 		_defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, "SDC,DDC");
 		_defaultVals.put(COMPRESSED_OVERLAPPING, "true" );
-		_defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.01");
+		_defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.05");
 		_defaultVals.put(COMPRESSED_COCODE,      "AUTO");
 		_defaultVals.put(COMPRESSED_COST_MODEL,  "AUTO");
 		_defaultVals.put(COMPRESSED_TRANSPOSE,   "auto");
diff --git a/src/main/java/org/apache/sysds/hops/Hop.java b/src/main/java/org/apache/sysds/hops/Hop.java
index 397952b..ececf52 100644
--- a/src/main/java/org/apache/sysds/hops/Hop.java
+++ b/src/main/java/org/apache/sysds/hops/Hop.java
@@ -1460,6 +1460,7 @@ public abstract class Hop implements ParseInfo {
 		_requiresCompression = that._requiresCompression;
 		_requiresDeCompression = that._requiresDeCompression;
 		_requiresLineageCaching = that._requiresLineageCaching;
+		_compressedWorkloadTree = that._compressedWorkloadTree;
 		_outputEmptyBlocks = that._outputEmptyBlocks;
 		
 		_beginLine = that._beginLine;
diff --git a/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java b/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
index 71d4904..324b272 100644
--- a/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
+++ b/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
@@ -59,7 +59,7 @@ public class IPAPassCompressionWorkloadAnalysis extends IPAPass {
 			CostEstimatorBuilder b = new CostEstimatorBuilder(tree);
 			// filter out compression plans that is known bad
 			if(b.shouldTryToCompress()){
-				tree.getRoot().setRequiresCompression(e.getValue());
+				tree.getRoot().setRequiresCompression(tree);
 				for(Hop h : tree.getDecompressList())
 					h.setRequiresDeCompression();
 			}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java
index 74797e2..51bfa46 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java
@@ -60,10 +60,9 @@ public class CoCodeGreedy extends AColumnCoCoder {
 
 		for(int i = 0; i < inputColumns.size(); i++)
 			workset.add(new ColIndexes(inputColumns.get(i).getColumns()));
-
 		// process merging iterations until no more change
 		while(workset.size() > 1) {
-			double changeInSize = 0;
+			double changeInCost = 0;
 			CompressedSizeInfoColGroup tmp = null;
 			ColIndexes selected1 = null, selected2 = null;
 			for(int i = 0; i < workset.size(); i++) {
@@ -77,7 +76,7 @@ public class CoCodeGreedy extends AColumnCoCoder {
 					// pruning filter : skip dominated candidates
 					// Since even if the entire size of one of the column lists is removed,
 					// it still does not improve compression
-					if(-Math.min(costC1, costC2) > changeInSize)
+					if(-Math.min(costC1, costC2) > changeInCost)
 						continue;
 
 					// Join the two column groups.
@@ -86,12 +85,13 @@ public class CoCodeGreedy extends AColumnCoCoder {
 					final double costC1C2 = _cest.getCostOfColumnGroup(c1c2Inf);
 
 					final double newSizeChangeIfSelected = costC1C2 - costC1 - costC2;
+
 					// Select the best join of either the currently selected
 					// or keep the old one.
-					if((tmp == null && newSizeChangeIfSelected < changeInSize) || tmp != null &&
-						(newSizeChangeIfSelected < changeInSize || newSizeChangeIfSelected == changeInSize &&
+					if((tmp == null && newSizeChangeIfSelected < changeInCost) || tmp != null &&
+						(newSizeChangeIfSelected < changeInCost || newSizeChangeIfSelected == changeInCost &&
 							c1c2Inf.getColumns().length < tmp.getColumns().length)) {
-						changeInSize = newSizeChangeIfSelected;
+						changeInCost = newSizeChangeIfSelected;
 						tmp = c1c2Inf;
 						selected1 = c1;
 						selected2 = c2;
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
index f42e382..2b95dba 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
@@ -218,7 +218,7 @@ public final class ColGroupFactory {
 			CompressionType estimatedBestCompressionType = cg.getBestCompressionType();
 			
 			if(estimatedBestCompressionType == CompressionType.SDC && cs.costComputationType == CostType.W_TREE) {
-				if(cg.getCompressionSize(CompressionType.DDC) * 3 < cg.getCompressionSize(CompressionType.SDC))
+				if(cg.getCompressionSize(CompressionType.DDC) < cg.getCompressionSize(CompressionType.SDC) * 3)
 					estimatedBestCompressionType = CompressionType.DDC;
 			}