You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/02/05 20:15:36 UTC

[1/2] incubator-systemml git commit: Fix backwards compatibility mr v1 (jobconf get instead of getdouble)

Repository: incubator-systemml
Updated Branches:
  refs/heads/master dc5de196f -> 603b4ca22


Fix backwards compatibility mr v1 (jobconf get instead of getdouble)

The method JobConf.getDouble(arg) did not exist in MR v1 which led to
backwards compatibility issues. We now use JobConf.get(arg) for double
but continue to use getInt/getLong as before because those already
existed in MR v1.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/154f1660
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/154f1660
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/154f1660

Branch: refs/heads/master
Commit: 154f1660fc9636dce26b315837207ad738c23728
Parents: dc5de19
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Feb 4 19:24:39 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Fri Feb 5 11:09:05 2016 -0800

----------------------------------------------------------------------
 .../apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/154f1660/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
index a4de41a..f2a7897 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
@@ -1190,8 +1190,8 @@ public class MRJobConfiguration
 			//(the single input constraint stems from internal runtime assumptions used to relate meta data to inputs)
 			long sizeSortBuff = InfrastructureAnalyzer.getRemoteMaxMemorySortBuffer();
 			long sizeHDFSBlk = InfrastructureAnalyzer.getHDFSBlockSize();
-			long newSplitSize = sizeHDFSBlk * 2;
-			double spillPercent = job.getDouble(MRConfigurationNames.MR_MAP_SORT_SPILL_PERCENT, 1.0);
+			long newSplitSize = sizeHDFSBlk * 2; //use generic config api for backwards compatibility
+			double spillPercent = Double.parseDouble(job.get(MRConfigurationNames.MR_MAP_SORT_SPILL_PERCENT, "1.0"));
 			int numPMap = OptimizerUtils.getNumMappers();
 			if( numPMap < totalInputSize/newSplitSize && sizeSortBuff*spillPercent >= newSplitSize && lpaths.size()==1 ) {
 				job.setLong(MRConfigurationNames.MR_INPUT_FILEINPUTFORMAT_SPLIT_MAXSIZE, newSplitSize);


[2/2] incubator-systemml git commit: Fix csv reblock multiple inputs w/ same name (new cse persistent reads)

Posted by mb...@apache.org.
Fix csv reblock multiple inputs w/ same name (new cse persistent reads)

If the same input file is passed to two different reads, until now, we
redundantly read and reblocked both inputs. This lead to specific
anomalies when two csv inputs with the same file name were piggybacked
into a single csvreblock job because unknown size handling and row id
handling are done wrt filenames. Since redundant reads are anyway highly
undesirable, we now perform common subexpression elimination also for
persistent reads, which merges reads if possible. In combination with
other IPA rewrites, we are even able to propagate this information over
the entire program allowing various simplifications. 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/603b4ca2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/603b4ca2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/603b4ca2

Branch: refs/heads/master
Commit: 603b4ca224069a088644a1066fa94113a2daed76
Parents: 154f166
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Feb 4 22:28:00 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Fri Feb 5 11:09:12 2016 -0800

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/hops/DataOp.java | 31 +++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/603b4ca2/src/main/java/org/apache/sysml/hops/DataOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataOp.java b/src/main/java/org/apache/sysml/hops/DataOp.java
index 0104879..0008a34 100644
--- a/src/main/java/org/apache/sysml/hops/DataOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataOp.java
@@ -526,6 +526,35 @@ public class DataOp extends Hop
 	@Override
 	public boolean compare( Hop that )
 	{
-		return false;
+		if( !(that instanceof DataOp) )
+			return false;
+		
+		//common subexpression elimination for redundant persistent reads, in order
+		//to avoid unnecessary read and reblocks as well as to prevent specific anomalies, e.g., 
+		//with multiple piggybacked csvreblock of the same input w/ unknown input sizes
+		
+		DataOp that2 = (DataOp)that;	
+		boolean ret = (  _dataop == that2._dataop
+				      && _dataop == DataOpTypes.PERSISTENTREAD
+					  && _fileName.equals(that2._fileName)
+					  && _inFormat == that2._inFormat
+					  && _inRowsInBlock == that2._inRowsInBlock
+					  && _inColsInBlock == that2._inColsInBlock
+					  && _paramIndexMap!=null && that2._paramIndexMap!=null );
+		
+		//above conditions also ensure consistency with regard to 
+		//(1) checkpointing, (2) reblock and (3) recompile.
+		
+		if( ret ) {
+			for( Entry<String,Integer> e : _paramIndexMap.entrySet() ) {
+				String key1 = e.getKey();
+				int pos1 = e.getValue();
+				int pos2 = that2._paramIndexMap.get(key1);
+				ret &= (   that2.getInput().get(pos2)!=null
+					    && getInput().get(pos1) == that2.getInput().get(pos2) );
+			}
+		}
+		
+		return ret;
 	}
 }