You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/02/05 20:15:37 UTC

[2/2] incubator-systemml git commit: Fix csv reblock multiple inputs w/ same name (new cse persistent reads)

Fix csv reblock multiple inputs w/ same name (new cse persistent reads)

If the same input file is passed to two different reads, until now, we
redundantly read and reblocked both inputs. This lead to specific
anomalies when two csv inputs with the same file name were piggybacked
into a single csvreblock job because unknown size handling and row id
handling are done wrt filenames. Since redundant reads are anyway highly
undesirable, we now perform common subexpression elimination also for
persistent reads, which merges reads if possible. In combination with
other IPA rewrites, we are even able to propagate this information over
the entire program allowing various simplifications. 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/603b4ca2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/603b4ca2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/603b4ca2

Branch: refs/heads/master
Commit: 603b4ca224069a088644a1066fa94113a2daed76
Parents: 154f166
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Feb 4 22:28:00 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Fri Feb 5 11:09:12 2016 -0800

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/hops/DataOp.java | 31 +++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/603b4ca2/src/main/java/org/apache/sysml/hops/DataOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataOp.java b/src/main/java/org/apache/sysml/hops/DataOp.java
index 0104879..0008a34 100644
--- a/src/main/java/org/apache/sysml/hops/DataOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataOp.java
@@ -526,6 +526,35 @@ public class DataOp extends Hop
 	@Override
 	public boolean compare( Hop that )
 	{
-		return false;
+		if( !(that instanceof DataOp) )
+			return false;
+		
+		//common subexpression elimination for redundant persistent reads, in order
+		//to avoid unnecessary read and reblocks as well as to prevent specific anomalies, e.g., 
+		//with multiple piggybacked csvreblock of the same input w/ unknown input sizes
+		
+		DataOp that2 = (DataOp)that;	
+		boolean ret = (  _dataop == that2._dataop
+				      && _dataop == DataOpTypes.PERSISTENTREAD
+					  && _fileName.equals(that2._fileName)
+					  && _inFormat == that2._inFormat
+					  && _inRowsInBlock == that2._inRowsInBlock
+					  && _inColsInBlock == that2._inColsInBlock
+					  && _paramIndexMap!=null && that2._paramIndexMap!=null );
+		
+		//above conditions also ensure consistency with regard to 
+		//(1) checkpointing, (2) reblock and (3) recompile.
+		
+		if( ret ) {
+			for( Entry<String,Integer> e : _paramIndexMap.entrySet() ) {
+				String key1 = e.getKey();
+				int pos1 = e.getValue();
+				int pos2 = that2._paramIndexMap.get(key1);
+				ret &= (   that2.getInput().get(pos2)!=null
+					    && getInput().get(pos1) == that2.getInput().get(pos2) );
+			}
+		}
+		
+		return ret;
 	}
 }