You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ba...@apache.org on 2021/08/27 19:17:37 UTC
[systemds] branch master updated: [SYSTEMDS-3107] Missing
WorkloadTree if recompiled Hops
This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 67774d3 [SYSTEMDS-3107] Missing WorkloadTree if recompiled Hops
67774d3 is described below
commit 67774d3f56a1b4f880b04b3716b7e6ee1b1c0f21
Author: baunsgaard <ba...@tugraz.at>
AuthorDate: Fri Aug 27 21:14:39 2021 +0200
[SYSTEMDS-3107] Missing WorkloadTree if recompiled Hops
This commit fixes the copy constructor of hops, to include the workloadtree.
Unfortunally because the workload wasn't included in the copy constructor,
i had a bunch of experiments running with a default cost calculation based
on number of unique elements.
also contained in this commit is a change of the default sampling rate to
0.05 that improve the sample estimation in critical large joins of multiple
columns.
---
src/main/java/org/apache/sysds/conf/DMLConfig.java | 2 +-
src/main/java/org/apache/sysds/hops/Hop.java | 1 +
.../sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java | 2 +-
.../apache/sysds/runtime/compress/cocode/CoCodeGreedy.java | 12 ++++++------
.../sysds/runtime/compress/colgroup/ColGroupFactory.java | 2 +-
5 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/src/main/java/org/apache/sysds/conf/DMLConfig.java b/src/main/java/org/apache/sysds/conf/DMLConfig.java
index e095297..0b7692b 100644
--- a/src/main/java/org/apache/sysds/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysds/conf/DMLConfig.java
@@ -129,7 +129,7 @@ public class DMLConfig
_defaultVals.put(COMPRESSED_LOSSY, "false" );
_defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, "SDC,DDC");
_defaultVals.put(COMPRESSED_OVERLAPPING, "true" );
- _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.01");
+ _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.05");
_defaultVals.put(COMPRESSED_COCODE, "AUTO");
_defaultVals.put(COMPRESSED_COST_MODEL, "AUTO");
_defaultVals.put(COMPRESSED_TRANSPOSE, "auto");
diff --git a/src/main/java/org/apache/sysds/hops/Hop.java b/src/main/java/org/apache/sysds/hops/Hop.java
index 397952b..ececf52 100644
--- a/src/main/java/org/apache/sysds/hops/Hop.java
+++ b/src/main/java/org/apache/sysds/hops/Hop.java
@@ -1460,6 +1460,7 @@ public abstract class Hop implements ParseInfo {
_requiresCompression = that._requiresCompression;
_requiresDeCompression = that._requiresDeCompression;
_requiresLineageCaching = that._requiresLineageCaching;
+ _compressedWorkloadTree = that._compressedWorkloadTree;
_outputEmptyBlocks = that._outputEmptyBlocks;
_beginLine = that._beginLine;
diff --git a/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java b/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
index 71d4904..324b272 100644
--- a/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
+++ b/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
@@ -59,7 +59,7 @@ public class IPAPassCompressionWorkloadAnalysis extends IPAPass {
CostEstimatorBuilder b = new CostEstimatorBuilder(tree);
// filter out compression plans that is known bad
if(b.shouldTryToCompress()){
- tree.getRoot().setRequiresCompression(e.getValue());
+ tree.getRoot().setRequiresCompression(tree);
for(Hop h : tree.getDecompressList())
h.setRequiresDeCompression();
}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java
index 74797e2..51bfa46 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java
@@ -60,10 +60,9 @@ public class CoCodeGreedy extends AColumnCoCoder {
for(int i = 0; i < inputColumns.size(); i++)
workset.add(new ColIndexes(inputColumns.get(i).getColumns()));
-
// process merging iterations until no more change
while(workset.size() > 1) {
- double changeInSize = 0;
+ double changeInCost = 0;
CompressedSizeInfoColGroup tmp = null;
ColIndexes selected1 = null, selected2 = null;
for(int i = 0; i < workset.size(); i++) {
@@ -77,7 +76,7 @@ public class CoCodeGreedy extends AColumnCoCoder {
// pruning filter : skip dominated candidates
// Since even if the entire size of one of the column lists is removed,
// it still does not improve compression
- if(-Math.min(costC1, costC2) > changeInSize)
+ if(-Math.min(costC1, costC2) > changeInCost)
continue;
// Join the two column groups.
@@ -86,12 +85,13 @@ public class CoCodeGreedy extends AColumnCoCoder {
final double costC1C2 = _cest.getCostOfColumnGroup(c1c2Inf);
final double newSizeChangeIfSelected = costC1C2 - costC1 - costC2;
+
// Select the best join of either the currently selected
// or keep the old one.
- if((tmp == null && newSizeChangeIfSelected < changeInSize) || tmp != null &&
- (newSizeChangeIfSelected < changeInSize || newSizeChangeIfSelected == changeInSize &&
+ if((tmp == null && newSizeChangeIfSelected < changeInCost) || tmp != null &&
+ (newSizeChangeIfSelected < changeInCost || newSizeChangeIfSelected == changeInCost &&
c1c2Inf.getColumns().length < tmp.getColumns().length)) {
- changeInSize = newSizeChangeIfSelected;
+ changeInCost = newSizeChangeIfSelected;
tmp = c1c2Inf;
selected1 = c1;
selected2 = c2;
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
index f42e382..2b95dba 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
@@ -218,7 +218,7 @@ public final class ColGroupFactory {
CompressionType estimatedBestCompressionType = cg.getBestCompressionType();
if(estimatedBestCompressionType == CompressionType.SDC && cs.costComputationType == CostType.W_TREE) {
- if(cg.getCompressionSize(CompressionType.DDC) * 3 < cg.getCompressionSize(CompressionType.SDC))
+ if(cg.getCompressionSize(CompressionType.DDC) < cg.getCompressionSize(CompressionType.SDC) * 3)
estimatedBestCompressionType = CompressionType.DDC;
}