You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/09/12 06:46:02 UTC

systemml git commit: [SYSTEMML-1903] Fix codegen row and outer templates, autoencoder

Repository: systemml
Updated Branches:
  refs/heads/master 816a900b5 -> 8b6ebbcce


[SYSTEMML-1903] Fix codegen row and outer templates, autoencoder

This patch fixes special cases of cplan construction for row and outer
templates, where partial fusion plans of type row and outer point to
groups which contain multiple template types but all of a different type
(e.g., row -> outer, cell). In such cases, we now define the preferred
(i.e., compatible) secondary template. 

Furthermore, this patch also fixes the merge condition of outer
templates to check for joint inputs on the second matrix multiply.

Since we encountered these issues for different batch sizes of the auto
encoder script, we now also include this algorithm (with different batch
sizes), into the codegen test suite.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/8b6ebbcc
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/8b6ebbcc
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/8b6ebbcc

Branch: refs/heads/master
Commit: 8b6ebbcceb5379cb9fba3df345ca8ccd99748eb1
Parents: 816a900
Author: Matthias Boehm <mb...@gmail.com>
Authored: Mon Sep 11 23:45:56 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Mon Sep 11 23:45:56 2017 -0700

----------------------------------------------------------------------
 .../hops/codegen/template/CPlanMemoTable.java   |  11 ++
 .../codegen/template/TemplateOuterProduct.java  |  11 +-
 .../hops/codegen/template/TemplateRow.java      |   4 +-
 .../hops/codegen/template/TemplateUtils.java    |  33 +++-
 .../sysml/hops/rewrite/HopRewriteUtils.java     |  10 +-
 .../functions/codegen/AlgorithmAutoEncoder.java | 180 +++++++++++++++++++
 .../functions/codegen/ZPackageSuite.java        |   1 +
 7 files changed, 239 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/8b6ebbcc/src/main/java/org/apache/sysml/hops/codegen/template/CPlanMemoTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/CPlanMemoTable.java b/src/main/java/org/apache/sysml/hops/codegen/template/CPlanMemoTable.java
index 4adec25..6de3ec6 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/CPlanMemoTable.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/CPlanMemoTable.java
@@ -303,6 +303,17 @@ public class CPlanMemoTable
 			p -> (p.type==pref) ? -p.countPlanRefs() : p.type.getRank()+1));
 	}
 	
+	public MemoTableEntry getBest(long hopID, TemplateType pref1, TemplateType pref2) {
+		List<MemoTableEntry> tmp = get(hopID);
+		if( tmp == null || tmp.isEmpty() )
+			return null;
+
+		//single plan per type, get plan w/ best rank in preferred order
+		return Collections.min(tmp, Comparator.comparing(
+			p -> (p.type==pref1) ? -p.countPlanRefs()-4 :
+				(p.type==pref2) ? -p.countPlanRefs() : p.type.getRank()+1));
+	}
+	
 	public long[] getAllRefs(long hopID) {
 		long[] refs = new long[3];
 		for( MemoTableEntry me : get(hopID) )

http://git-wip-us.apache.org/repos/asf/systemml/blob/8b6ebbcc/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java
index ec2ee3b..e4fb464 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java
@@ -69,8 +69,9 @@ public class TemplateOuterProduct extends TemplateBase {
 			|| (hop instanceof BinaryOp && TemplateUtils.isOperationSupported(hop)
 				&& (TemplateUtils.isBinaryMatrixColVector(hop) || HopRewriteUtils.isBinaryMatrixScalarOperation(hop)
 				|| (HopRewriteUtils.isBinaryMatrixMatrixOperation(hop) && HopRewriteUtils.isBinary(hop, OpOp2.MULT, OpOp2.DIV)) )) 
-			|| HopRewriteUtils.isTransposeOperation(hop) 
-			|| (hop instanceof AggBinaryOp && !HopRewriteUtils.isOuterProductLikeMM(hop))
+			|| (HopRewriteUtils.isTransposeOperation(hop) && !HopRewriteUtils.isOuterProductLikeMM(input)) 
+			|| (hop instanceof AggBinaryOp && !HopRewriteUtils.isOuterProductLikeMM(hop)
+				&& TemplateUtils.containsOuterProduct(input, HopRewriteUtils.getOtherInput(hop, input)))
 			|| (hop instanceof AggUnaryOp && ((AggUnaryOp)hop).getDirection()==Direction.RowCol));
 	}
 
@@ -81,7 +82,7 @@ public class TemplateOuterProduct extends TemplateBase {
 			|| HopRewriteUtils.isBinaryMatrixScalarOperation(hop)
 			|| (HopRewriteUtils.isBinary(hop, OpOp2.MULT) 
 				&& HopRewriteUtils.isBinarySparseSafe(input)
-				&& !TemplateUtils.rContainsOuterProduct(input)));
+				&& !TemplateUtils.containsOuterProduct(input)));
 	}
 
 	@Override
@@ -144,13 +145,13 @@ public class TemplateOuterProduct extends TemplateBase {
 			return;
 		
 		//recursively process required childs
-		MemoTableEntry me = memo.getBest(hop.getHopID(), TemplateType.OUTER);
+		MemoTableEntry me = memo.getBest(hop.getHopID(), TemplateType.OUTER, TemplateType.CELL);
 		for( int i=0; i<hop.getInput().size(); i++ ) {
 			Hop c = hop.getInput().get(i);
 			if( me.isPlanRef(i) )
 				rConstructCplan(c, memo, tmp, inHops, inHops2, compileLiterals);
 			else {
-				CNodeData cdata = TemplateUtils.createCNodeData(c, compileLiterals);	
+				CNodeData cdata = TemplateUtils.createCNodeData(c, compileLiterals);
 				tmp.put(c.getHopID(), cdata);
 				inHops.add(c);
 			}

http://git-wip-us.apache.org/repos/asf/systemml/blob/8b6ebbcc/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index a3037ec..864dd33 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -208,13 +208,13 @@ public class TemplateRow extends TemplateBase
 			return;
 		
 		//recursively process required childs
-		MemoTableEntry me = memo.getBest(hop.getHopID(), TemplateType.ROW);
+		MemoTableEntry me = memo.getBest(hop.getHopID(), TemplateType.ROW, TemplateType.CELL);
 		for( int i=0; i<hop.getInput().size(); i++ ) {
 			Hop c = hop.getInput().get(i);
 			if( me!=null && me.isPlanRef(i) )
 				rConstructCplan(c, memo, tmp, inHops, inHops2, compileLiterals);
 			else {
-				CNodeData cdata = TemplateUtils.createCNodeData(c, compileLiterals);	
+				CNodeData cdata = TemplateUtils.createCNodeData(c, compileLiterals);
 				tmp.put(c.getHopID(), cdata);
 				inHops.add(c);
 			}

http://git-wip-us.apache.org/repos/asf/systemml/blob/8b6ebbcc/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index ec7bf19..1924914 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysml.hops.codegen.template;
 
+import java.util.ArrayList;
 import java.util.HashMap;
 
 import org.apache.commons.lang.ArrayUtils;
@@ -206,7 +207,7 @@ public class TemplateUtils
 		else if( B1 != null && output.getDim1()==B1.getDim2() && output.getDim2()==X.getDim2())
 			return RowType.COL_AGG_B1;
 		else
-			throw new RuntimeException("Unknown row type.");
+			throw new RuntimeException("Unknown row type for hop "+output.getHopID()+".");
 	}
 	
 	public static AggOp getAggOp(Hop hop) {
@@ -438,7 +439,14 @@ public class TemplateUtils
 		return ret;
 	}
 	
-	public static boolean rContainsOuterProduct(Hop current) {
+	public static boolean containsOuterProduct(Hop hop, Hop probe) {
+		hop.resetVisitStatus();
+		boolean ret = rContainsOuterProduct(hop, probe);
+		hop.resetVisitStatus();
+		return ret;
+	}
+	
+	private static boolean rContainsOuterProduct(Hop current) {
 		if( current.isVisited() )
 			return false;
 		boolean ret = false;
@@ -449,6 +457,27 @@ public class TemplateUtils
 		return ret;
 	}
 	
+	private static boolean rContainsOuterProduct(Hop current, Hop probe) {
+		if( current.isVisited() )
+			return false;
+		boolean ret = false;
+		ret |= HopRewriteUtils.isOuterProductLikeMM(current)
+			&& checkContainment(current.getInput(), probe, true);
+		for( int i=0; i<current.getInput().size() && !ret; i++ )
+			ret |= rContainsOuterProduct(current.getInput().get(i), probe);
+		current.setVisited();
+		return ret;
+	}
+	
+	private static boolean checkContainment(ArrayList<Hop> inputs, Hop probe, boolean inclTranspose) {
+		if( !inclTranspose )
+			return inputs.contains(probe);
+		for( Hop hop : inputs )
+			if( HopRewriteUtils.isTransposeOfItself(hop, probe) )
+				return true;
+		return false;
+	}
+	
 	public static void rFlipVectorLookups(CNode current) {
 		//flip vector lookups if necessary
 		if( isUnary(current, UnaryType.LOOKUP_C) )

http://git-wip-us.apache.org/repos/asf/systemml/blob/8b6ebbcc/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index 2f4c994..453bbd9 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -282,6 +282,13 @@ public class HopRewriteUtils
 				removeAllChildReferences(input);
 	}
 	
+	public static Hop getOtherInput(Hop hop, Hop input) {
+		for( Hop c : hop.getInput() )
+			if( c != input )
+				return c;
+		return null;
+	}
+	
 	public static Hop createDataGenOp( Hop input, double value ) 
 		throws HopsException
 	{		
@@ -1141,8 +1148,7 @@ public class HopRewriteUtils
 	
 	/**
 	 * Compares the size of outputs from hop1 and hop2, in terms of number
-	 * of matrix cells. Note that this methods throws a RuntimeException
-	 * if either hop has unknown dimensions. 
+	 * of matrix cells. 
 	 * 
 	 * @param hop1 high-level operator 1
 	 * @param hop2 high-level operator 2

http://git-wip-us.apache.org/repos/asf/systemml/blob/8b6ebbcc/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmAutoEncoder.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmAutoEncoder.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmAutoEncoder.java
new file mode 100644
index 0000000..70f5324
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmAutoEncoder.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.io.File;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class AlgorithmAutoEncoder extends AutomatedTestBase 
+{
+	private final static String TEST_NAME1 = "Algorithm_AutoEncoder";
+	private final static String TEST_DIR = "functions/codegen/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmAutoEncoder.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	private final static File   TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF);
+	
+	private final static int rows = 2468;
+	private final static int cols = 784;
+	
+	private final static double sparsity1 = 0.7; //dense
+	private final static double sparsity2 = 0.1; //sparse
+	
+	private final static int H1 = 500;
+	private final static int H2 = 2;
+	private final static double epochs = 2; 
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "w" })); 
+	}
+
+	@Test
+	public void testAutoEncoder256DenseCP() {
+		runGLMTest(256, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testAutoEncoder256DenseRewritesCP() {
+		runGLMTest(256, false, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testAutoEncoder256SparseCP() {
+		runGLMTest(256, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testAutoEncoder256SparseRewritesCP() {
+		runGLMTest(256, true, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testAutoEncoder512DenseCP() {
+		runGLMTest(512, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testAutoEncoder512DenseRewritesCP() {
+		runGLMTest(512, false, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testAutoEncoder512SparseCP() {
+		runGLMTest(512, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testAutoEncoder512SparseRewritesCP() {
+		runGLMTest(512, true, true, ExecType.CP);
+	}
+	
+	//Note: limited cases for SPARK, as lazy evaluation 
+	//causes very long execution time for this algorithm
+	
+	@Test
+	public void testAutoEncoder256DenseRewritesSpark() {
+		runGLMTest(256, false, true, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testAutoEncoder256SparseRewritesSpark() {
+		runGLMTest(256, true, true, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testAutoEncoder512DenseRewritesSpark() {
+		runGLMTest(512, false, true, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testAutoEncoder512SparseRewritesSpark() {
+		runGLMTest(512, true, true, ExecType.SPARK);
+	}
+	
+	private void runGLMTest(int batchsize, boolean sparse, boolean rewrites, ExecType instType)
+	{
+		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		try
+		{
+			String TEST_NAME = TEST_NAME1;
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config);
+			
+			fullDMLScriptName = "scripts/staging/autoencoder-2layer.dml";
+			programArgs = new String[]{ "-explain", "-stats", "-nvargs", "X="+input("X"),
+				"H1="+H1, "H2="+H2, "EPOCH="+epochs, "BATCH="+batchsize, 
+				"W1_out="+output("W1"), "b1_out="+output("b1"),
+				"W2_out="+output("W2"), "b2_out="+output("b2"),
+				"W3_out="+output("W3"), "b3_out="+output("b3"),
+				"W4_out="+output("W4"), "b4_out="+output("b4")};
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+			
+			//generate actual datasets
+			double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 714);
+			writeInputMatrixWithMTD("X", X, true);
+			
+			//run script
+			runTest(true, false, null, -1); 
+			//TODO R script
+			
+			Assert.assertTrue(heavyHittersContainsSubString("spoof") 
+				|| heavyHittersContainsSubString("sp_spoof"));
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}
+
+	/**
+	 * Override default configuration with custom test configuration to ensure
+	 * scratch space and local temporary directory locations are also updated.
+	 */
+	@Override
+	protected File getConfigTemplateFile() {
+		// Instrumentation in this test's output log to show custom configuration file used for template.
+		System.out.println("This test case overrides default configuration with " + TEST_CONF_FILE.getPath());
+		return TEST_CONF_FILE;
+	}
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/8b6ebbcc/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
----------------------------------------------------------------------
diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
index 3b24fcc..fda71a5 100644
--- a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
+++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
@@ -27,6 +27,7 @@ import org.junit.runners.Suite;
 @RunWith(Suite.class)
 @Suite.SuiteClasses({
 	AlgorithmARIMA.class,
+	AlgorithmAutoEncoder.class,
 	AlgorithmGLM.class,
 	AlgorithmKMeans.class,
 	AlgorithmL2SVM.class,