You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/09/02 23:30:24 UTC
[2/2] systemml git commit: [SYSTEMML-1881] Tuning parfor degree of
parallelism (over-provisioning)
[SYSTEMML-1881] Tuning parfor degree of parallelism (over-provisioning)
This patch addresses issues of under-utilized CPU resources in parfor
contexts. For example, on Kmeans or MSVM with few runs or classes that
are not a factor of the number of hardware threads, we assign the
remaining parallelism too conservatively. Consider Kmeans with 10 runs
and 16 hardware threads - in this case, we assign k=10 to parfor and k=1
to the operations in the parfor body. This patch fine-tunes this
assignment by slightly over-provisioning CPU resources, which is usually
a good idea due to barriers between operations. We now assign the
remaining operation parallelism with k=round(maxK/parforK).
On the perftest Kmeans 100K x 1K scenario with 50 classes, 10 runs, and
16 hardware threads, this patch improved performance from 196s to 168s
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ba73291c
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ba73291c
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ba73291c
Branch: refs/heads/master
Commit: ba73291c985d876eaeeb5719623461131bcc7f66
Parents: 2c57cf7
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Sep 2 14:30:11 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Sep 2 14:30:11 2017 -0700
----------------------------------------------------------------------
.../parfor/opt/OptimizerRuleBased.java | 34 +++++++++++++-------
1 file changed, 22 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/ba73291c/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
index b8da25a..9dada01 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
@@ -1247,12 +1247,12 @@ public class OptimizerRuleBased extends Optimizer
//constrain max parfor parallelism by problem size
int parforK = (int)((_N<kMax)? _N : kMax);
-
-
+
// if gpu mode is enabled, the amount of parallelism is set to
// the smaller of the number of iterations and the number of GPUs
// otherwise it default to the number of CPU cores and the
// operations are run in CP mode
+ //FIXME rework for nested parfor parallelism and body w/o gpu ops
if (DMLScript.USE_ACCELERATOR) {
long perGPUBudget = GPUContextPool.initialGPUMemBudget();
double maxMemUsage = getMaxCPOnlyBudget(n);
@@ -1264,15 +1264,14 @@ public class OptimizerRuleBased extends Optimizer
parforK + "]");
}
}
-
//set parfor degree of parallelism
pfpb.setDegreeOfParallelism(parforK);
- n.setK(parforK);
+ n.setK(parforK);
//distribute remaining parallelism
- int remainParforK = (int)Math.ceil(((double)(kMax-parforK+1))/parforK);
- int remainOpsK = Math.max(_lkmaxCP / parforK, 1);
+ int remainParforK = getRemainingParallelismParFor(kMax, parforK);
+ int remainOpsK = getRemainingParallelismOps(_lkmaxCP, parforK);
rAssignRemainingParallelism( n, remainParforK, remainOpsK );
}
else // ExecType.MR/ExecType.SPARK
@@ -1334,13 +1333,13 @@ public class OptimizerRuleBased extends Optimizer
//set parfor degree of parallelism
long id = c.getID();
c.setK(tmpK);
- ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter
- .getAbstractPlanMapping().getMappedProg(id)[1];
+ ParForProgramBlock pfpb = (ParForProgramBlock)
+ OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
pfpb.setDegreeOfParallelism(tmpK);
- //distribute remaining parallelism
- int remainParforK = (int)Math.ceil(((double)(parforK-tmpK+1))/tmpK);
- int remainOpsK = Math.max(opsK / tmpK, 1);
+ //distribute remaining parallelism
+ int remainParforK = getRemainingParallelismParFor(parforK, tmpK);
+ int remainOpsK = getRemainingParallelismOps(opsK, tmpK);
rAssignRemainingParallelism(c, remainParforK, remainOpsK);
}
else if( c.getNodeType() == NodeType.HOP )
@@ -1387,7 +1386,18 @@ public class OptimizerRuleBased extends Optimizer
}
}
}
-
+
+ private static int getRemainingParallelismParFor(int parforK, int tmpK) {
+ //compute max remaining parfor parallelism k such that k * tmpK <= parforK
+ return (int)Math.ceil((double)(parforK-tmpK+1) / tmpK);
+ }
+
+ private static int getRemainingParallelismOps(int opsK, int tmpK) {
+ //compute max remaining operations parallelism k with slight over-provisioning
+ //such that k * tmpK <= 1.5 * opsK; note that if parfor already exploits the
+ //maximum parallelism, this will not introduce any over-provisioning.
+ return (int)Math.max(Math.round((double)opsK / tmpK), 1);
+ }
///////
//REWRITE set task partitioner