You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/07/12 18:32:20 UTC

[2/2] incubator-systemml git commit: [SYSTEMML-804] Size propagation frame transform functions, recompile

[SYSTEMML-804] Size propagation frame transform functions, recompile

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a39aecff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a39aecff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a39aecff

Branch: refs/heads/master
Commit: a39aecffa0868853b2c60ce412470b7074e0dd53
Parents: c7beb50
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Mon Jul 11 22:38:59 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Tue Jul 12 11:31:57 2016 -0700

----------------------------------------------------------------------
 .../sysml/hops/ParameterizedBuiltinOp.java      | 31 ++++++++--
 .../apache/sysml/hops/recompile/Recompiler.java |  8 ++-
 .../controlprogram/caching/FrameObject.java     |  1 +
 .../context/SparkExecutionContext.java          |  4 +-
 .../TransformFrameEncodeApplyTest.java          | 61 +++++++++++++++++++-
 .../TransformFrameEncodeDecodeTest.java         | 27 +++++++++
 6 files changed, 123 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a39aecff/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
index f1ca98c..b3aec91 100644
--- a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
+++ b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
@@ -1062,8 +1062,7 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 			Hop dir = getInput().get(_paramIndexMap.get("dir"));
 			double maxVal = HopRewriteUtils.getDoubleValueSafe((LiteralOp)max);
 			String dirVal = ((LiteralOp)dir).getStringValue();
-			if( mc.dimsKnown() )
-			{
+			if( mc.dimsKnown() ) {
 				long lnnz = mc.nnzKnown() ? mc.getNonZeros() : mc.getRows();
 				if( "cols".equals(dirVal) ) { //expand horizontally
 					ret = new long[]{mc.getRows(), UtilFunctions.toLong(maxVal), lnnz};
@@ -1073,6 +1072,20 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				}	
 			}
 		}
+		else if( _op == ParamBuiltinOp.TRANSFORMDECODE ) {
+			if( mc.dimsKnown() ) {
+				//rows: remain unchanged
+				//cols: dummy coding might decrease never increase cols 
+				return new long[]{mc.getRows(), mc.getCols(), mc.getRows()*mc.getCols()};
+			}
+		}
+		else if( _op == ParamBuiltinOp.TRANSFORMAPPLY ) {
+			if( mc.dimsKnown() ) {
+				//rows: omitting might decrease but never increase rows
+				//cols: dummy coding and binning might increase cols but nnz stays constant
+				return new long[]{mc.getRows(), mc.getCols(), mc.getRows()*mc.getCols()};
+			}
+		}
 		
 		return ret;
 	}
@@ -1205,11 +1218,21 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
 				
 				break;	
 			}
-			case TRANSFORMAPPLY: {
+			case TRANSFORMDECODE: {
 				Hop target = getInput().get(_paramIndexMap.get("target"));
-				setDim1( target.getDim1() ); //rows remain unchanged
+				//rows remain unchanged for recoding and dummy coding
+				setDim1( target.getDim1() );
+				//cols remain unchanged only if no dummy coding
+				//TODO parse json spec
+				break;
 			}
+			
+			case TRANSFORMAPPLY: {
+				//rows remain unchanged only if no omitting
+				//cols remain unchanged of no dummy coding 
+				//TODO parse json spec
 				break;
+			}
 			default:
 				//do nothing
 				break;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a39aecff/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
index 5e65bf1..f7204e8 100644
--- a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
+++ b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
@@ -1638,13 +1638,17 @@ public class Recompiler
 			if( vars.keySet().contains( varName ) )
 			{
 				Data dat = vars.get(varName);
-				if( dat instanceof MatrixObject )
-				{
+				if( dat instanceof MatrixObject ) {
 					MatrixObject mo = (MatrixObject) dat;
 					d.setDim1(mo.getNumRows());
 					d.setDim2(mo.getNumColumns());
 					d.setNnz(mo.getNnz());
 				}
+				else if( dat instanceof FrameObject ) {
+					FrameObject fo = (FrameObject) dat;
+					d.setDim1(fo.getNumRows());
+					d.setDim2(fo.getNumColumns());
+				}
 			}
 		}
 		//special case for persistent reads with unknown size (read-after-write)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a39aecff/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java
index db98a3e..e3d2332 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java
@@ -121,6 +121,7 @@ public class FrameObject extends CacheableData<FrameBlock>
 		//update matrix characteristics
 		MatrixCharacteristics mc = ((MatrixDimensionsMetaData) _metaData).getMatrixCharacteristics();
 		mc.setDimension( _data.getNumRows(),_data.getNumColumns() );
+		mc.setNonZeros(_data.getNumRows()*_data.getNumColumns());
 		
 		//update schema information
 		_schema = _data.getSchema();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a39aecff/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
index 58027ce..99614f2 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
@@ -979,8 +979,10 @@ public class SparkExecutionContext extends ExecutionContext
 		
 			//copy into output frame
 			out.copy( ix, ix+block.getNumRows()-1, 0, block.getNumColumns()-1, block );
-			if( ix == 0 )
+			if( ix == 0 ) {
+				out.setColumnNames(block.getColumnNames());
 				out.setColumnMetadata(block.getColumnMetadata());
+			}
 		}
 		
 		if (DMLScript.STATISTICS) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a39aecff/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java
index 27d58f9..2d17c17 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeApplyTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysml.test.integration.functions.transform;
 
+import org.junit.Assert;
 import org.junit.Test;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
@@ -29,6 +30,7 @@ import org.apache.sysml.runtime.util.DataConverter;
 import org.apache.sysml.test.integration.AutomatedTestBase;
 import org.apache.sysml.test.integration.TestConfiguration;
 import org.apache.sysml.test.utils.TestUtils;
+import org.apache.sysml.utils.Statistics;
 
 public class TransformFrameEncodeApplyTest extends AutomatedTestBase 
 {
@@ -77,6 +79,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesRecodeIDsHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.RECODE, false);
+	}
+	
+	@Test
 	public void testHomesDummycodeIDsSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.DUMMY, false);
 	}
@@ -87,6 +94,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesDummycodeIDsHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.DUMMY, false);
+	}
+	
+	@Test
 	public void testHomesBinningIDsSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.BIN, false);
 	}
@@ -97,6 +109,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesBinningIDsHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.BIN, false);
+	}
+	
+	@Test
 	public void testHomesOmitIDsSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.OMIT, false);
 	}
@@ -107,6 +124,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesOmitIDsHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.OMIT, false);
+	}
+	
+	@Test
 	public void testHomesImputeIDsSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.IMPUTE, false);
 	}
@@ -115,6 +137,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 	public void testHomesImputeIDsSparkCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", TransformType.IMPUTE, false);
 	}
+	
+	@Test
+	public void testHomesImputeIDsHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.IMPUTE, false);
+	}
 
 	@Test
 	public void testHomesRecodeColnamesSingleNodeCSV() {
@@ -127,6 +154,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesRecodeColnamesHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.RECODE, true);
+	}
+	
+	@Test
 	public void testHomesDummycodeColnamesSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.DUMMY, true);
 	}
@@ -137,6 +169,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesDummycodeColnamesHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.DUMMY, true);
+	}
+	
+	@Test
 	public void testHomesBinningColnamesSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.BIN, true);
 	}
@@ -147,6 +184,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesBinningColnamesHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.BIN, true);
+	}
+	
+	@Test
 	public void testHomesOmitColnamesSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.OMIT, true);
 	}
@@ -157,6 +199,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesOmitvColnamesHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.OMIT, true);
+	}
+	
+	@Test
 	public void testHomesImputeColnamesSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.IMPUTE, true);
 	}
@@ -166,6 +213,11 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 		runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", TransformType.IMPUTE, true);
 	}
 	
+	@Test
+	public void testHomesImputeColnamesHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.IMPUTE, true);
+	}
+	
 	/**
 	 * 
 	 * @param rt
@@ -202,7 +254,7 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 			
 			String HOME = SCRIPT_DIR + TEST_DIR;
 			fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
-			programArgs = new String[]{"-explain","-nvargs", 
+			programArgs = new String[]{"-explain", "recompile_hops", "-nvargs", 
 				"DATA=" + HOME + "input/" + DATASET,
 				"TFSPEC=" + HOME + "input/" + SPEC,
 				"TFDATA1=" + output("tfout1"),
@@ -219,7 +271,12 @@ public class TransformFrameEncodeApplyTest extends AutomatedTestBase
 			double[][] R2 = DataConverter.convertToDoubleMatrix(MatrixReaderFactory
 				.createMatrixReader(InputInfo.CSVInputInfo)
 				.readMatrixFromHDFS(output("tfout2"), -1L, -1L, 1000, 1000, -1));
-			TestUtils.compareMatrices(R1, R2, R1.length, R1[0].length, 0);			
+			TestUtils.compareMatrices(R1, R2, R1.length, R1[0].length, 0);		
+			
+			if( rt == RUNTIME_PLATFORM.HYBRID_SPARK ) {
+				Assert.assertEquals("Wrong number of executed Spark instructions: " + 
+					Statistics.getNoOfExecutedSPInst(), new Long(2), new Long(Statistics.getNoOfExecutedSPInst()));
+			}
 		}
 		catch(Exception ex) {
 			throw new RuntimeException(ex);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a39aecff/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java
index 0bdf4da..b676989 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformFrameEncodeDecodeTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysml.test.integration.functions.transform;
 
+import org.junit.Assert;
 import org.junit.Test;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
@@ -32,6 +33,7 @@ import org.apache.sysml.runtime.util.DataConverter;
 import org.apache.sysml.test.integration.AutomatedTestBase;
 import org.apache.sysml.test.integration.TestConfiguration;
 import org.apache.sysml.test.utils.TestUtils;
+import org.apache.sysml.utils.Statistics;
 
 public class TransformFrameEncodeDecodeTest extends AutomatedTestBase 
 {
@@ -72,6 +74,11 @@ public class TransformFrameEncodeDecodeTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesRecodeIDsHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.RECODE, false);
+	}
+	
+	@Test
 	public void testHomesDummycodeIDsSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.DUMMY, false);
 	}
@@ -82,6 +89,11 @@ public class TransformFrameEncodeDecodeTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesDummycodeIDsHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.DUMMY, false);
+	}
+	
+	@Test
 	public void testHomesRecodeColnamesSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.RECODE, true);
 	}
@@ -92,6 +104,11 @@ public class TransformFrameEncodeDecodeTest extends AutomatedTestBase
 	}
 	
 	@Test
+	public void testHomesRecodeColnamesHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.RECODE, true);
+	}
+	
+	@Test
 	public void testHomesDummycodeColnamesSingleNodeCSV() {
 		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", TransformType.DUMMY, true);
 	}
@@ -101,6 +118,11 @@ public class TransformFrameEncodeDecodeTest extends AutomatedTestBase
 		runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", TransformType.DUMMY, true);
 	}
 	
+	@Test
+	public void testHomesDummycodeColnamesHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", TransformType.DUMMY, true);
+	}
+	
 	/**
 	 * 
 	 * @param rt
@@ -153,6 +175,11 @@ public class TransformFrameEncodeDecodeTest extends AutomatedTestBase
 			String[][] R1 = DataConverter.convertToStringFrame(fb1);
 			String[][] R2 = DataConverter.convertToStringFrame(fb2);
 			TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);			
+			
+			if( rt == RUNTIME_PLATFORM.HYBRID_SPARK ) {
+				Assert.assertEquals("Wrong number of executed Spark instructions: " + 
+					Statistics.getNoOfExecutedSPInst(), new Long(2), new Long(Statistics.getNoOfExecutedSPInst()));
+			}
 		}
 		catch(Exception ex) {
 			throw new RuntimeException(ex);