You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/09/11 02:28:51 UTC

systemml git commit: [SYSTEMML-1899] Fix preads in jmlc/mlcontext w/ non-default formats

Repository: systemml
Updated Branches:
  refs/heads/master 9f7fae6e6 -> ee2aa4e13


[SYSTEMML-1899] Fix preads in jmlc/mlcontext w/ non-default formats

So far JMLC and MLContext disabled the read of meta data files for
persistent reads because these meta data files do not exist when data is
passed through the programmatic APIs. However, both APIs also support
traditional persistent reads, which currently fail because the meta data
files are not read, replacing the format with default textcell. This
patch makes persistent reads more robust by reading the meta data file
whenever it exists.

Furthermore, this patch also includes a minor modification of the
codegen optimizer to only apply partial cost when cost-based pruning is
enabled.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ee2aa4e1
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ee2aa4e1
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ee2aa4e1

Branch: refs/heads/master
Commit: ee2aa4e1335756470d084b0bd715209407ac1f78
Parents: 9f7fae6
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun Sep 10 19:28:22 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sun Sep 10 19:28:22 2017 -0700

----------------------------------------------------------------------
 .../sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java  | 7 ++++---
 src/main/java/org/apache/sysml/parser/DataExpression.java     | 5 +++--
 2 files changed, 7 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/ee2aa4e1/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
index 60cfd0c..a8f7365 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
@@ -92,8 +92,8 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 	private static final double SPARSE_SAFE_SPARSITY_EST = 0.1;
 	
 	//optimizer configuration
-	private static final boolean USE_COST_PRUNING = true;
-	private static final boolean USE_STRUCTURAL_PRUNING = true;
+	public static boolean USE_COST_PRUNING = true;
+	public static boolean USE_STRUCTURAL_PRUNING = true;
 	
 	private static final IDSequence COST_ID = new IDSequence();
 	private static final TemplateRow ROW_TPL = new TemplateRow();
@@ -235,7 +235,8 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection
 			}
 			
 			//cost assignment on hops. Stop early if exceeds bestC.
-			double C = getPlanCost(memo, part, matPoints, plan, costs._computeCosts, bestC);
+			double pCBound = USE_COST_PRUNING ? bestC : Double.MAX_VALUE;
+			double C = getPlanCost(memo, part, matPoints, plan, costs._computeCosts, pCBound);
 			if (LOG.isTraceEnabled())
 				LOG.trace("Enum: " + Arrays.toString(plan) + " -> " + C);
 			numEvalPartPlans += (C==Double.POSITIVE_INFINITY) ? 1 : 0;

http://git-wip-us.apache.org/repos/asf/systemml/blob/ee2aa4e1/src/main/java/org/apache/sysml/parser/DataExpression.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DataExpression.java b/src/main/java/org/apache/sysml/parser/DataExpression.java
index d487020..d1d896c 100644
--- a/src/main/java/org/apache/sysml/parser/DataExpression.java
+++ b/src/main/java/org/apache/sysml/parser/DataExpression.java
@@ -599,8 +599,9 @@ public class DataExpression extends DataIdentifier
 			String mtdFileName = getMTDFileName(inputFileName);
 
 			// track whether should attempt to read MTD file or not
-			boolean shouldReadMTD = _checkMetadata && !ConfigurationManager
-					.getCompilerConfigFlag(ConfigType.IGNORE_READ_WRITE_METADATA);
+			boolean shouldReadMTD = _checkMetadata
+				&& (!ConfigurationManager.getCompilerConfigFlag(ConfigType.IGNORE_READ_WRITE_METADATA)
+					|| MapReduceTool.existsFileOnHDFS(mtdFileName)); // existing mtd file
 
 			// Check for file existence (before metadata parsing for meaningful error messages)
 			if( shouldReadMTD //skip check for jmlc/mlcontext