You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/02/11 18:33:35 UTC

[1/4] incubator-systemml git commit: [SYSTEMML-1244] Fix robustness csv text read (quoted recoded maps)

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 1b4f1ec4d -> ca4e2600e


[SYSTEMML-1244] Fix robustness csv text read (quoted recoded maps)

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c87da2ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c87da2ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c87da2ce

Branch: refs/heads/master
Commit: c87da2ce8ffe4ab6a03fce4cd548703147f12fca
Parents: 1b4f1ec
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Feb 11 00:51:00 2017 +0100
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Feb 11 19:32:59 2017 +0100

----------------------------------------------------------------------
 .../sysml/runtime/io/IOUtilFunctions.java       |  46 +++++--
 .../transform/FrameCSVReadWriteTest.java        | 119 +++++++++++++++++
 .../TransformCSVFrameEncodeReadTest.java        | 130 +++++++++++++++++++
 .../functions/transform/FrameCSVReadWrite.dml   |  27 ++++
 .../transform/TransformCSVFrameEncodeRead.dml   |  29 +++++
 .../functions/misc/ZPackageSuite.java           |   3 +-
 .../functions/transform/ZPackageSuite.java      |   2 +
 7 files changed, 343 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c87da2ce/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java b/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java
index 492665d..3f0ea56 100644
--- a/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java
@@ -117,13 +117,12 @@ public class IOUtilFunctions
 	
 	/**
 	 * Splits a string by a specified delimiter into all tokens, including empty
-	 * while respecting the rules for quotes and escapes defined in RFC4180.
-	 * 
-	 * NOTE: use StringEscapeUtils.unescapeCsv(tmp) if needed afterwards.
+	 * while respecting the rules for quotes and escapes defined in RFC4180,
+	 * with robustness for various special cases.
 	 * 
 	 * @param str string to split
 	 * @param delim delimiter
-	 * @return string array
+	 * @return string array of tokens
 	 */
 	public static String[] splitCSV(String str, String delim)
 	{
@@ -135,6 +134,7 @@ public class IOUtilFunctions
 		ArrayList<String> tokens = new ArrayList<String>();
 		int from = 0, to = 0; 
 		int len = str.length();
+		int dlen = delim.length();
 		while( from < len  ) { // for all tokens
 			if( str.charAt(from) == CSV_QUOTE_CHAR 
 				&& str.indexOf(CSV_QUOTE_CHAR, from+1) > 0 ) {
@@ -143,8 +143,11 @@ public class IOUtilFunctions
 				while( to+1 < len && str.charAt(to+1)==CSV_QUOTE_CHAR )
 					to = str.indexOf(CSV_QUOTE_CHAR, to+2); // to + ""
 				to += 1; // last "
+				// handle remaining non-quoted characters "aa"a 
+				if( to<len-1 && !str.regionMatches(to, delim, 0, dlen) )
+					to = str.indexOf(delim, to+1);
 			}
-			else if(str.regionMatches(from, delim, 0, delim.length())) {
+			else if( str.regionMatches(from, delim, 0, dlen) ) {
 				to = from; // empty string
 			}
 			else { // default: unquoted non-empty
@@ -165,6 +168,16 @@ public class IOUtilFunctions
 		return tokens.toArray(new String[0]);
 	}
 
+	/**
+	 * Splits a string by a specified delimiter into all tokens, including empty
+	 * while respecting the rules for quotes and escapes defined in RFC4180,
+	 * with robustness for various special cases.
+	 * 
+	 * @param str string to split
+	 * @param delim delimiter
+	 * @param string array for tokens, length needs to match the number of tokens 
+	 * @return string array of tokens
+	 */
 	public static String[] splitCSV(String str, String delim, String[] tokens)
 	{
 		// check for empty input
@@ -174,6 +187,7 @@ public class IOUtilFunctions
 		// scan string and create individual tokens
 		int from = 0, to = 0; 
 		int len = str.length();
+		int dlen = delim.length();
 		int pos = 0;
 		while( from < len  ) { // for all tokens
 			if( str.charAt(from) == CSV_QUOTE_CHAR
@@ -183,8 +197,11 @@ public class IOUtilFunctions
 				while( to+1 < len && str.charAt(to+1)==CSV_QUOTE_CHAR )
 					to = str.indexOf(CSV_QUOTE_CHAR, to+2); // to + ""
 				to += 1; // last "
+				// handle remaining non-quoted characters "aa"a 
+				if( to<len-1 && !str.regionMatches(to, delim, 0, dlen) )
+					to = str.indexOf(delim, to+1);
 			}
-			else if(str.regionMatches(from, delim, 0, delim.length())) {
+			else if( str.regionMatches(from, delim, 0, dlen) ) {
 				to = from; // empty string
 			}
 			else { // default: unquoted non-empty
@@ -207,9 +224,10 @@ public class IOUtilFunctions
 	
 	/**
 	 * Counts the number of tokens defined by the given delimiter, respecting 
-	 * the rules for quotes and escapes defined in RFC4180.
+	 * the rules for quotes and escapes defined in RFC4180,
+	 * with robustness for various special cases.
 	 * 
-	 * @param str string
+	 * @param str string to split
 	 * @param delim delimiter
 	 * @return number of tokens split by the given delimiter
 	 */
@@ -223,6 +241,7 @@ public class IOUtilFunctions
 		int numTokens = 0;
 		int from = 0, to = 0; 
 		int len = str.length();
+		int dlen = delim.length();
 		while( from < len  ) { // for all tokens
 			if( str.charAt(from) == CSV_QUOTE_CHAR
 				&& str.indexOf(CSV_QUOTE_CHAR, from+1) > 0 ) {
@@ -231,8 +250,11 @@ public class IOUtilFunctions
 				while( to+1 < len && str.charAt(to+1)==CSV_QUOTE_CHAR ) 
 					to = str.indexOf(CSV_QUOTE_CHAR, to+2); // to + ""
 				to += 1; // last "
+				// handle remaining non-quoted characters "aa"a 
+				if( to<len-1 && !str.regionMatches(to, delim, 0, dlen) )
+					to = str.indexOf(delim, to+1);
 			}
-			else if(str.regionMatches(from, delim, 0, delim.length())) {
+			else if( str.regionMatches(from, delim, 0, dlen) ) {
 				to = from; // empty string
 			}
 			else { // default: unquoted non-empty
@@ -366,11 +388,11 @@ public class IOUtilFunctions
 					informat.getRecordReader(splits[i], job, Reporter.NULL);
 			try {
 				if( reader.next(key, value) ) {
-					String row = value.toString().trim();
-					if( row.startsWith(TfUtils.TXMTD_MVPREFIX) )
+					if( value.toString().startsWith(TfUtils.TXMTD_MVPREFIX) )
 						reader.next(key, value);
-					if( row.startsWith(TfUtils.TXMTD_NDPREFIX) )
+					if( value.toString().startsWith(TfUtils.TXMTD_NDPREFIX) )
 						reader.next(key, value);
+					String row = value.toString().trim();
 					if( !row.isEmpty() )
 						ncol = IOUtilFunctions.countTokensCSV(row, delim);
 				}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c87da2ce/src/test/java/org/apache/sysml/test/integration/functions/transform/FrameCSVReadWriteTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/transform/FrameCSVReadWriteTest.java b/src/test/java/org/apache/sysml/test/integration/functions/transform/FrameCSVReadWriteTest.java
new file mode 100644
index 0000000..35078f3
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/transform/FrameCSVReadWriteTest.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.transform;
+
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.io.FrameReader;
+import org.apache.sysml.runtime.io.FrameReaderFactory;
+import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
+import org.apache.sysml.runtime.matrix.data.FrameBlock;
+import org.apache.sysml.runtime.matrix.data.InputInfo;
+import org.apache.sysml.runtime.util.DataConverter;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class FrameCSVReadWriteTest extends AutomatedTestBase 
+{
+	private final static String TEST_NAME1 = "FrameCSVReadWrite";
+	private final static String TEST_DIR = "functions/transform/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + FrameCSVReadWriteTest.class.getSimpleName() + "/";
+	
+	//dataset and transform tasks without missing values
+	private final static String DATASET 	= "csv_mix/quotes1.csv";
+	
+	@Override
+	public void setUp()  {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, 
+			new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "R" }) );
+	}
+	
+	@Test
+	public void testCSVReadWriteSinglenode() {
+		runCSVQuotesReadWriteTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv");
+	}
+	
+	@Test
+	public void testCSVReadWriteHybrid() {
+		runCSVQuotesReadWriteTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv");
+	}
+	
+	@Test
+	public void testCSVReadWriteSpark() {
+		runCSVQuotesReadWriteTest(RUNTIME_PLATFORM.SPARK, "csv");
+	}
+	
+	
+	/**
+	 * 
+	 * @param rt
+	 * @param ofmt
+	 * @param dataset
+	 */
+	private void runCSVQuotesReadWriteTest( RUNTIME_PLATFORM rt, String ofmt )
+	{
+		//set runtime platform
+		RUNTIME_PLATFORM rtold = rtplatform;
+		boolean csvReblockOld = OptimizerUtils.ALLOW_FRAME_CSV_REBLOCK;
+		rtplatform = rt;
+
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		if( !ofmt.equals("csv") )
+			throw new RuntimeException("Unsupported test output format");
+		
+		try
+		{
+			getAndLoadTestConfiguration(TEST_NAME1);
+			
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
+			programArgs = new String[]{"-explain","-args", 
+				HOME + "input/" + DATASET, output("R") };
+	
+			OptimizerUtils.ALLOW_FRAME_CSV_REBLOCK = true;
+			runTest(true, false, null, -1); 
+			
+			//read input/output and compare
+			FrameReader reader1 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo, 
+					new CSVFileFormatProperties(false, ",", false));
+			FrameBlock fb1 = reader1.readFrameFromHDFS(HOME + "input/" + DATASET, -1L, -1L);
+			FrameReader reader2 = FrameReaderFactory.createFrameReader(InputInfo.CSVInputInfo);
+			FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
+			String[][] R1 = DataConverter.convertToStringFrame(fb1);
+			String[][] R2 = DataConverter.convertToStringFrame(fb2);
+			TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);			
+		}
+		catch(Exception ex) {
+			throw new RuntimeException(ex);
+		}
+		finally {
+			rtplatform = rtold;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_FRAME_CSV_REBLOCK = csvReblockOld;
+		}
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c87da2ce/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformCSVFrameEncodeReadTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformCSVFrameEncodeReadTest.java b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformCSVFrameEncodeReadTest.java
new file mode 100644
index 0000000..b28c2df
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/transform/TransformCSVFrameEncodeReadTest.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.transform;
+
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.io.FrameReader;
+import org.apache.sysml.runtime.io.FrameReaderTextCSV;
+import org.apache.sysml.runtime.io.FrameReaderTextCSVParallel;
+import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
+import org.apache.sysml.runtime.matrix.data.FrameBlock;
+import org.apache.sysml.runtime.util.DataConverter;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class TransformCSVFrameEncodeReadTest extends AutomatedTestBase 
+{
+	private final static String TEST_NAME1 = "TransformCSVFrameEncodeRead";
+	private final static String TEST_DIR = "functions/transform/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + TransformCSVFrameEncodeReadTest.class.getSimpleName() + "/";
+	
+	//dataset and transform tasks without missing values
+	private final static String DATASET 	= "csv_mix/quotes1.csv";
+	
+	@Override
+	public void setUp()  {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, 
+			new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "R" }) );
+	}
+	
+	@Test
+	public void testFrameReadMetaSingleNodeCSV() {
+		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", false);
+	}
+	
+	@Test
+	public void testFrameReadMetaSparkCSV() {
+		runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", false);
+	}
+	
+	@Test
+	public void testFrameReadMetaHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", false);
+	}
+	
+	@Test
+	public void testFrameParReadMetaSingleNodeCSV() {
+		runTransformTest(RUNTIME_PLATFORM.SINGLE_NODE, "csv", true);
+	}
+	
+	@Test
+	public void testFrameParReadMetaSparkCSV() {
+		runTransformTest(RUNTIME_PLATFORM.SPARK, "csv", true);
+	}
+	
+	@Test
+	public void testFrameParReadMetaHybridCSV() {
+		runTransformTest(RUNTIME_PLATFORM.HYBRID_SPARK, "csv", true);
+	}
+	
+	/**
+	 * 
+	 * @param rt
+	 * @param ofmt
+	 * @param dataset
+	 */
+	private void runTransformTest( RUNTIME_PLATFORM rt, String ofmt, boolean parRead )
+	{
+		//set runtime platform
+		RUNTIME_PLATFORM rtold = rtplatform;
+		boolean csvReblockOld = OptimizerUtils.ALLOW_FRAME_CSV_REBLOCK;
+		rtplatform = rt;
+
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK)
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		if( !ofmt.equals("csv") )
+			throw new RuntimeException("Unsupported test output format");
+		
+		try
+		{
+			getAndLoadTestConfiguration(TEST_NAME1);
+			
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
+			programArgs = new String[]{"-explain", "-stats","-args", 
+				HOME + "input/" + DATASET, output("R") };
+	
+			OptimizerUtils.ALLOW_FRAME_CSV_REBLOCK = true;
+			runTest(true, false, null, -1); 
+			
+			//read input/output and compare
+			FrameReader reader2 = parRead ? 
+				new FrameReaderTextCSVParallel( new CSVFileFormatProperties() ) : 
+				new FrameReaderTextCSV( new CSVFileFormatProperties()  );
+			FrameBlock fb2 = reader2.readFrameFromHDFS(output("R"), -1L, -1L);
+			System.out.println(DataConverter.toString(fb2));
+		}
+		catch(Exception ex) {
+			throw new RuntimeException(ex);
+		}
+		finally {
+			rtplatform = rtold;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_FRAME_CSV_REBLOCK = csvReblockOld;
+		}
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c87da2ce/src/test/scripts/functions/transform/FrameCSVReadWrite.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/FrameCSVReadWrite.dml b/src/test/scripts/functions/transform/FrameCSVReadWrite.dml
new file mode 100644
index 0000000..88f0cf5
--- /dev/null
+++ b/src/test/scripts/functions/transform/FrameCSVReadWrite.dml
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1, data_type="frame", format="csv");
+if(1==1){}
+
+print(toString(X));
+write(X, $2, format="csv");
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c87da2ce/src/test/scripts/functions/transform/TransformCSVFrameEncodeRead.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/transform/TransformCSVFrameEncodeRead.dml b/src/test/scripts/functions/transform/TransformCSVFrameEncodeRead.dml
new file mode 100644
index 0000000..9da935f
--- /dev/null
+++ b/src/test/scripts/functions/transform/TransformCSVFrameEncodeRead.dml
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+F1 = read($1, data_type="frame", format="csv");
+jspec = "{\"ids\": true, \"recode\": [1,2,3]}";
+
+[X, M] = transformencode(target=F1, spec=jspec);
+
+print(toString(M))
+write(M, $2, format="csv");
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c87da2ce/src/test_suites/java/org/apache/sysml/test/integration/functions/misc/ZPackageSuite.java
----------------------------------------------------------------------
diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/misc/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/misc/ZPackageSuite.java
index 6c40dd7..32b5f7b 100644
--- a/src/test_suites/java/org/apache/sysml/test/integration/functions/misc/ZPackageSuite.java
+++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/misc/ZPackageSuite.java
@@ -47,9 +47,10 @@ import org.junit.runners.Suite;
 	PrintMatrixTest.class,
 	ReadAfterWriteTest.class,
 	RewriteFusedRandTest.class,
+	RewriteLoopVectorization.class,
+	RewritePushdownSumBinaryMult.class,
 	RewritePushdownSumOnBinaryTest.class,
 	RewritePushdownUaggTest.class,
-	RewritePushdownSumBinaryMult.class,
 	RewriteSimplifyRowColSumMVMultTest.class,
 	RewriteSlicedMatrixMultTest.class,
 	RewriteFuseBinaryOpChainTest.class,

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c87da2ce/src/test_suites/java/org/apache/sysml/test/integration/functions/transform/ZPackageSuite.java
----------------------------------------------------------------------
diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/transform/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/transform/ZPackageSuite.java
index 1996b01..e36d4a0 100644
--- a/src/test_suites/java/org/apache/sysml/test/integration/functions/transform/ZPackageSuite.java
+++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/transform/ZPackageSuite.java
@@ -26,10 +26,12 @@ import org.junit.runners.Suite;
  *  won't run two of them at once. */
 @RunWith(Suite.class)
 @Suite.SuiteClasses({
+	FrameCSVReadWriteTest.class,
 	RunTest.class,
 	ScalingTest.class,
 	TransformAndApplyTest.class,
 	TransformCSVFrameEncodeDecodeTest.class,
+	TransformCSVFrameEncodeReadTest.class,
 	TransformEncodeDecodeTest.class,
 	TransformFrameApplyTest.class,
 	TransformFrameEncodeApplyTest.class,


[2/4] incubator-systemml git commit: [SYSTEMML-1243] Fix size update wdivmm/wsigmoid/wumm on rewrite

Posted by mb...@apache.org.
[SYSTEMML-1243] Fix size update wdivmm/wsigmoid/wumm on rewrite

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8c49730d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8c49730d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8c49730d

Branch: refs/heads/master
Commit: 8c49730d3a2eaebcaea2ff162eeb5e5dcc9839c9
Parents: c87da2c
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Feb 11 03:23:04 2017 +0100
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Feb 11 19:33:01 2017 +0100

----------------------------------------------------------------------
 .../RewriteAlgebraicSimplificationDynamic.java  | 37 ++++++++++++++------
 1 file changed, 26 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8c49730d/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
index 497675b..ed89a05 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
@@ -1570,7 +1570,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 						  OpOp4.WSIGMOID, W, Y, tX, false, false);
 				HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-
+				hnew.refreshSizeInformation();
+				
 				appliedPattern = true;
 				LOG.debug("Applied simplifyWeightedSigmoid1 (line "+hi.getBeginLine()+")");	
 			}
@@ -1599,7 +1600,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 						  OpOp4.WSIGMOID, W, Y, tX, false, true);
 				HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-
+				hnew.refreshSizeInformation();
+				
 				appliedPattern = true;
 				LOG.debug("Applied simplifyWeightedSigmoid2 (line "+hi.getBeginLine()+")");	
 			}
@@ -1625,7 +1627,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 						  OpOp4.WSIGMOID, W, Y, tX, true, false);
 				HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-
+				hnew.refreshSizeInformation();
+				
 				appliedPattern = true;
 				LOG.debug("Applied simplifyWeightedSigmoid3 (line "+hi.getBeginLine()+")");	
 			}			
@@ -1658,7 +1661,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WSIGMOID, W, Y, tX, true, true);
 					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-	
+					hnew.refreshSizeInformation();
+					
 					appliedPattern = true;
 					LOG.debug("Applied simplifyWeightedSigmoid4 (line "+hi.getBeginLine()+")");	
 				}
@@ -1715,6 +1719,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, new LiteralOp(-1), 1, mult, false);
 					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.refreshSizeInformation();
 					
 					//add output transpose for efficient target indexing (redundant t() removed by other rewrites)
 					hnew = HopRewriteUtils.createTranspose(hnew);
@@ -1749,6 +1754,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, X, 3, false, false); // 3=>DIV_LEFT_EPS
 					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.refreshSizeInformation();
 					
 					//add output transpose for efficient target indexing (redundant t() removed by other rewrites)
 					hnew = HopRewriteUtils.createTranspose(hnew);
@@ -1781,7 +1787,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, new LiteralOp(-1), 2, mult, false);
 					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-
+					hnew.refreshSizeInformation();
+					
 					appliedPattern = true;
 					LOG.debug("Applied simplifyWeightedDivMM2 (line "+hi.getBeginLine()+")");	
 				}
@@ -1812,7 +1819,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, X, 4, false, false); // 4=>DIV_RIGHT_EPS
 					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-
+					hnew.refreshSizeInformation();
+					
 					appliedPattern = true;
 					LOG.debug("Applied simplifyWeightedDivMM2e (line "+hi.getBeginLine()+")");	
 				}
@@ -1842,6 +1850,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, X, U, V, new LiteralOp(-1), 1, true, true);
 					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.refreshSizeInformation();
 					
 					//add output transpose for efficient target indexing (redundant t() removed by other rewrites)
 					hnew = HopRewriteUtils.createTranspose(hnew);
@@ -1875,7 +1884,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, X, U, V, new LiteralOp(-1), 2, true, true);
 					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-
+					hnew.refreshSizeInformation();
+					
 					appliedPattern = true;
 					LOG.debug("Applied simplifyWeightedDivMM4 (line "+hi.getBeginLine()+")");	
 				}
@@ -1905,6 +1915,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, X, 1, true, true);
 					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
+					hnew.refreshSizeInformation();
 					
 					//add output transpose for efficient target indexing (redundant t() removed by other rewrites)
 					hnew = HopRewriteUtils.createTranspose(hnew);
@@ -1938,7 +1949,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 					hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 							  OpOp4.WDIVMM, W, U, V, X, 2, true, true);
 					HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-
+					hnew.refreshSizeInformation();
+					
 					appliedPattern = true;
 					LOG.debug("Applied simplifyWeightedDivMM6 (line "+hi.getBeginLine()+")");	
 				}
@@ -1968,7 +1980,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 					  OpOp4.WDIVMM, W, U, V, new LiteralOp(-1), 0, true, false);
 			HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-
+			hnew.refreshSizeInformation();
+			
 			appliedPattern = true;
 			LOG.debug("Applied simplifyWeightedDivMM7 (line "+hi.getBeginLine()+")");	
 		}
@@ -2093,7 +2106,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 					  OpOp4.WUMM, W, U, V, mult, op, null);
 			HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-
+			hnew.refreshSizeInformation();
+			
 			appliedPattern = true;
 			LOG.debug("Applied simplifyWeightedUnaryMM1 (line "+hi.getBeginLine()+")");	
 		}
@@ -2145,7 +2159,8 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 				hnew = new QuaternaryOp(hi.getName(), DataType.MATRIX, ValueType.DOUBLE, 
 						  OpOp4.WUMM, W, U, V, mult, null, op);
 				HopRewriteUtils.setOutputBlocksizes(hnew, W.getRowsInBlock(), W.getColsInBlock());
-	
+				hnew.refreshSizeInformation();
+				
 				appliedPattern = true;
 				LOG.debug("Applied simplifyWeightedUnaryMM2 (line "+hi.getBeginLine()+")");	
 			}


[4/4] incubator-systemml git commit: [SYSTEMML-1249] Deprecate parfor perftesttool and cleanup unused code

Posted by mb...@apache.org.
[SYSTEMML-1249] Deprecate parfor perftesttool and cleanup unused code 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ca4e2600
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ca4e2600
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ca4e2600

Branch: refs/heads/master
Commit: ca4e2600e454fd2a093a155f3e622978b81d80ce
Parents: 0ae0123
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Feb 11 07:19:31 2017 +0100
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Feb 11 19:33:04 2017 +0100

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/hops/BinaryOp.java | 10 ++++++----
 src/main/java/org/apache/sysml/lops/AppendCP.java |  2 +-
 .../java/org/apache/sysml/parser/Expression.java  |  7 -------
 .../controlprogram/ParForProgramBlock.java        |  5 ++---
 .../controlprogram/parfor/ProgramConverter.java   |  1 -
 .../parfor/RemoteDPParWorkerReducer.java          |  2 --
 .../controlprogram/parfor/opt/CostEstimator.java  | 18 ++++++++++++------
 .../parfor/opt/CostEstimatorHops.java             | 13 +++++--------
 .../parfor/opt/CostEstimatorRuntime.java          |  3 +--
 .../controlprogram/parfor/opt/OptNode.java        |  1 -
 .../parfor/opt/OptNodeStatistics.java             |  6 ++----
 .../parfor/opt/OptTreeConverter.java              |  3 ++-
 .../parfor/opt/OptimizerConstrained.java          |  2 +-
 .../parfor/opt/OptimizerHeuristic.java            |  2 +-
 .../parfor/opt/OptimizerRuleBased.java            |  7 +++----
 .../controlprogram/parfor/opt/PerfTestDef.java    |  7 +++----
 .../parfor/opt/PerfTestExtFunctCP.java            |  1 +
 .../parfor/opt/PerfTestMemoryObserver.java        |  1 +
 .../controlprogram/parfor/opt/PerfTestTool.java   | 15 +++------------
 .../parfor/opt/PerfTestToolRegression.dml         |  1 +
 .../runtime/controlprogram/parfor/stat/Stat.java  |  2 --
 .../runtime/instructions/CPInstructionParser.java |  3 ++-
 .../runtime/instructions/mr/MRInstruction.java    |  2 +-
 .../matrix/mapred/MRConfigurationNames.java       |  1 -
 .../runtime/matrix/mapred/MRJobConfiguration.java |  3 ---
 .../runtime/matrix/operators/CMOperator.java      |  3 +--
 .../java/org/apache/sysml/utils/Statistics.java   |  2 --
 .../sysml/yarn/DMLAppMasterStatusReporter.java    |  4 ++--
 .../sysml/yarn/ropt/YarnClusterAnalyzer.java      |  1 -
 29 files changed, 51 insertions(+), 77 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/hops/BinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java b/src/main/java/org/apache/sysml/hops/BinaryOp.java
index 6269919..f1e6aaf 100644
--- a/src/main/java/org/apache/sysml/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java
@@ -74,22 +74,24 @@ public class BinaryOp extends Hop
 	private boolean outer = false;
 	
 	public static AppendMethod FORCED_APPEND_METHOD = null;
+	
+	
 	public enum AppendMethod { 
-		CP_APPEND, //in-memory general case append // TODO investigate unused enum constant
+		CP_APPEND, //in-memory general case append (implicitly selected for CP)
 		MR_MAPPEND, //map-only append (rhs must be vector and fit in mapper mem)
 		MR_RAPPEND, //reduce-only append (output must have at most one column block)
 		MR_GAPPEND, //map-reduce general case append (map-extend, aggregate)
 		SP_GAlignedAppend // special case for general case in Spark where left.getCols() % left.getColsPerBlock() == 0
 	};
 	
-	private enum MMBinaryMethod{
-		CP_BINARY,
+	private enum MMBinaryMethod {
+		CP_BINARY, //(implicitly selected for CP) 
 		MR_BINARY_R, //both mm, mv 
 		MR_BINARY_M, //only mv (mr/spark)
 		MR_BINARY_OUTER_M,
 		MR_BINARY_OUTER_R, //only vv 
 		MR_BINARY_UAGG_CHAIN, //(mr/spark)
-	}
+	};
 	
 	private BinaryOp() {
 		//default constructor for clone

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/lops/AppendCP.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/AppendCP.java b/src/main/java/org/apache/sysml/lops/AppendCP.java
index d527f45..6b2d361 100644
--- a/src/main/java/org/apache/sysml/lops/AppendCP.java
+++ b/src/main/java/org/apache/sysml/lops/AppendCP.java
@@ -27,7 +27,7 @@ import org.apache.sysml.parser.Expression.*;
 
 public class AppendCP extends Lop
 {
-	public static final String OPCODE = "append"; // TODO investigate unused constant
+	public static final String OPCODE = "append";
 
 	private boolean _cbind = true;
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/parser/Expression.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/Expression.java b/src/main/java/org/apache/sysml/parser/Expression.java
index 3919a6a..9c3b248 100644
--- a/src/main/java/org/apache/sysml/parser/Expression.java
+++ b/src/main/java/org/apache/sysml/parser/Expression.java
@@ -166,13 +166,6 @@ public abstract class Expression
 	};
 
 	/**
-	 * External built-in function operators.
-	 */
-	public enum ExtBuiltinFunctionOp { // TODO investigate unused enum
-		EIGEN, CHOLESKY
-	};
-
-	/**
 	 * Data types (matrix, scalar, frame, object, unknown).
 	 */
 	public enum DataType {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
index 5e1aacf..f6c90f3 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
@@ -81,12 +81,12 @@ import org.apache.sysml.runtime.controlprogram.parfor.TaskPartitionerNaive;
 import org.apache.sysml.runtime.controlprogram.parfor.TaskPartitionerStatic;
 import org.apache.sysml.runtime.controlprogram.parfor.mqo.RuntimePiggybacking;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimatorHops;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptTree;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptTreeConverter;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptimizationWrapper;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptimizerRuleBased;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.ProgramRecompiler;
 import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.controlprogram.parfor.stat.Stat;
@@ -147,8 +147,7 @@ public class ParForProgramBlock extends ForProgramBlock
 		COLUMN_WISE,
 		COLUMN_BLOCK_WISE,
 		COLUMN_BLOCK_WISE_N,
-		BLOCK_WISE_M_N,
-		UNSPECIFIED; // TODO investigate unused enum constant
+		BLOCK_WISE_M_N;
 
 		/**
 		 * Note: Robust version of valueOf in order to return NONE without exception

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/ProgramConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/ProgramConverter.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/ProgramConverter.java
index 3ac5722..f94be23 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/ProgramConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/ProgramConverter.java
@@ -147,7 +147,6 @@ public class ProgramConverter
 	public static final String NOT_SUPPORTED_MR_PARFOR           = "Not supported: Nested ParFOR REMOTE_MR due to possible deadlocks." +
 			                                                       "(LOCAL can be used for innner ParFOR)";
 	public static final String NOT_SUPPORTED_PB                  = "Not supported: type of program block";
-	public static final String NOT_SUPPORTED_EXECUTION_CONTEXT   = "Parsing of external system execution context not supported yet."; // TODO investigate unused constant
 	
 	////////////////////////////////
 	// CREATION of DEEP COPIES

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java
index 73cab89..3f26945 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParWorkerReducer.java
@@ -20,7 +20,6 @@
 package org.apache.sysml.runtime.controlprogram.parfor;
 
 import java.io.IOException;
-import java.util.HashMap;
 import java.util.Iterator;
 
 import org.apache.hadoop.io.LongWritable;
@@ -70,7 +69,6 @@ public class RemoteDPParWorkerReducer extends ParWorker
 		
 	//MR ParWorker attributes  
 	protected String  _stringID       = null;
-	protected HashMap<String, String> _rvarFnames = null; // TODO investigate unused field
 
 	//cached collector/reporter
 	protected OutputCollector<Writable, Writable> _out = null;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
index f76d1ef..6e428d1 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimator.java
@@ -27,7 +27,6 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ParamType;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasure;
 
 /**
  * Base class for all potential cost estimators
@@ -36,18 +35,25 @@ import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasu
  * 
  */
 public abstract class CostEstimator 
-{
-	
+{	
 	protected static final Log LOG = LogFactory.getLog(CostEstimator.class.getName());
-    
-	
+    	
 	//default parameters
 	public static final double DEFAULT_EST_PARALLELISM = 1.0; //default degree of parallelism: serial
 	public static final long   FACTOR_NUM_ITERATIONS   = 10; //default problem size
 	public static final double DEFAULT_TIME_ESTIMATE   = 5;  //default execution time: 5ms
 	public static final double DEFAULT_MEM_ESTIMATE_CP = 1024; //default memory consumption: 1KB 
-	public static final double DEFAULT_MEM_ESTIMATE_MR = 10*1024*1024; //default memory consumption: 20MB // TODO investigate unused constant
+	public static final double DEFAULT_MEM_ESTIMATE_MR = 20*1024*1024; //default memory consumption: 20MB 
+
+	public enum TestMeasure {
+		EXEC_TIME,
+		MEMORY_USAGE	
+	}
 	
+	public enum DataFormat {
+		DENSE,
+		SPARSE
+	}
 	
 	/**
 	 * Main leaf node estimation method - to be overwritten by specific cost estimators

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java
index 9ee7c2b..02ba9ed 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorHops.java
@@ -26,20 +26,17 @@ import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.NodeType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.Optimizer.CostModelType;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 
 public class CostEstimatorHops extends CostEstimator
 {
-	
-	public static long DEFAULT_MEM_MR = -1;
-	public static long DEFAULT_MEM_SP = 20*1024*1024;
+	public static double DEFAULT_MEM_MR = -1;
+	public static double DEFAULT_MEM_SP = 20*1024*1024;
 	
 	private OptTreePlanMappingAbstract _map = null;
 	
-	static
-	{
-		DEFAULT_MEM_MR = 20*1024*1024; //20MB
+	static {
+		DEFAULT_MEM_MR = DEFAULT_MEM_ESTIMATE_MR; //20MB
 		if( InfrastructureAnalyzer.isLocalMode() )
 			DEFAULT_MEM_MR = DEFAULT_MEM_MR + InfrastructureAnalyzer.getRemoteMaxMemorySortBuffer();
 	}
@@ -65,7 +62,7 @@ public class CostEstimatorHops extends CostEstimator
 		double value = h.getMemEstimate();
 		
 		//handle specific cases 
-		long DEFAULT_MEM_REMOTE = OptimizerUtils.isSparkExecutionMode() ? 
+		double DEFAULT_MEM_REMOTE = OptimizerUtils.isSparkExecutionMode() ? 
 								DEFAULT_MEM_SP : DEFAULT_MEM_MR;
 		
 		if( value >= DEFAULT_MEM_REMOTE )   	  

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java
index 1a5d8f4..cbb8260 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/CostEstimatorRuntime.java
@@ -23,8 +23,6 @@ package org.apache.sysml.runtime.controlprogram.parfor.opt;
 import org.apache.sysml.lops.Lop;
 import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.DataFormat;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestVariable;
 
 /**
@@ -39,6 +37,7 @@ import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestVaria
  * TODO: inst names as constants in perftesttool
  * TODO: complexity corrections for sparse matrices
  */
+@SuppressWarnings("deprecation")
 public class CostEstimatorRuntime extends CostEstimator
 {	
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java
index 271d018..7968c6a 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNode.java
@@ -66,7 +66,6 @@ public class OptNode
 	}
 	
 	public enum ParamType{
-		OPTYPE, // TODO investigate unused enum constant
 		OPSTRING,
 		TASK_PARTITIONER,
 		TASK_SIZE,

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNodeStatistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNodeStatistics.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNodeStatistics.java
index 639bb13..1f4a35a 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNodeStatistics.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptNodeStatistics.java
@@ -19,7 +19,7 @@
 
 package org.apache.sysml.runtime.controlprogram.parfor.opt;
 
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.DataFormat;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat;
 
 /**
  * 
@@ -28,9 +28,7 @@ import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.DataForma
  * 
  */
 public class OptNodeStatistics 
-{
-
-	
+{	
 	public static final long       DEFAULT_DIMENSION  = 100;
 	public static final double     DEFAULT_SPARSITY   = 1.0;		
 	public static final DataFormat DEFAULT_DATAFORMAT = DataFormat.DENSE;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java
index 71d0931..44d4de2 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptTreeConverter.java
@@ -57,11 +57,11 @@ import org.apache.sysml.runtime.controlprogram.ProgramBlock;
 import org.apache.sysml.runtime.controlprogram.WhileProgramBlock;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.NodeType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ParamType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.Optimizer.PlanInputType;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.DataFormat;
 import org.apache.sysml.runtime.instructions.Instruction;
 import org.apache.sysml.runtime.instructions.MRJobInstruction;
 import org.apache.sysml.runtime.instructions.cp.ComputationCPInstruction;
@@ -82,6 +82,7 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock;
  * NOTE: currently only one abstract and one runtime plan at a time.
  * This implies that only one parfor optimization can happen at a time.
  */
+@SuppressWarnings("deprecation")
 public class OptTreeConverter 
 {		
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
index 1c2e23c..9754e6f 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerConstrained.java
@@ -37,9 +37,9 @@ import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PResultMerge;
 import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PTaskPartitioner;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ParamType;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasure;
 
 /**
  * Rule-Based ParFor Optimizer (time: O(n)):

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java
index 53d7c9d..a53f7f1 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerHeuristic.java
@@ -21,7 +21,7 @@ package org.apache.sysml.runtime.controlprogram.parfor.opt;
 
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.POptMode;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasure;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
 
 
 /**

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
index 532c41c..98fa664 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
@@ -89,10 +89,10 @@ import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
 import org.apache.sysml.runtime.controlprogram.parfor.ResultMergeLocalFile;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.NodeType;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ParamType;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.instructions.Instruction;
 import org.apache.sysml.runtime.instructions.cp.Data;
@@ -148,11 +148,10 @@ import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer;
  */
 public class OptimizerRuleBased extends Optimizer
 {
-	
 	public static final double PROB_SIZE_THRESHOLD_REMOTE = 100; //wrt # top-level iterations (min)
 	public static final double PROB_SIZE_THRESHOLD_PARTITIONING = 2; //wrt # top-level iterations (min)
 	public static final double PROB_SIZE_THRESHOLD_MB = 256*1024*1024; //wrt overall memory consumption (min)
-	public static final int MAX_REPLICATION_FACTOR_PARTITIONING = 5; // TODO investigate unused constant
+	public static final int MAX_REPLICATION_FACTOR_PARTITIONING = 5;
 	public static final int MAX_REPLICATION_FACTOR_EXPORT = 7;    
 	public static final boolean ALLOW_REMOTE_NESTED_PARALLELISM = false;
 	public static final boolean APPLY_REWRITE_NESTED_PARALLELISM = false;
@@ -1079,7 +1078,7 @@ public class OptimizerRuleBased extends Optimizer
 			replication = (int)Math.min( _N, _rnk );
 			
 			//account for internal max constraint (note hadoop will warn if max > 10)
-			replication = (int)Math.min( replication, MAX_REPLICATION_FACTOR_EXPORT );
+			replication = (int)Math.min( replication, MAX_REPLICATION_FACTOR_PARTITIONING );
 			
 			//account for remaining hdfs capacity
 			try {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestDef.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestDef.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestDef.java
index 91b4843..08c02ef 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestDef.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestDef.java
@@ -19,9 +19,9 @@
 
 package org.apache.sysml.runtime.controlprogram.parfor.opt;
 
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.DataFormat;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.InternalTestVariable;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestVariable;
 
 /**
@@ -31,10 +31,9 @@ import org.apache.sysml.runtime.controlprogram.parfor.opt.PerfTestTool.TestVaria
  * and an instruction.
  *
  */
+@Deprecated
 public class PerfTestDef 
 {
-
-	
 	//logical properties
 	private TestMeasure    _measure;
 	private TestVariable   _lvariable;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestExtFunctCP.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestExtFunctCP.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestExtFunctCP.java
index eda6be6..5a1fac8 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestExtFunctCP.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestExtFunctCP.java
@@ -33,6 +33,7 @@ import org.apache.sysml.udf.Matrix.ValueType;
  * measure the general behavior of package support.
  *
  */
+@Deprecated
 public class PerfTestExtFunctCP extends PackageFunction 
 {	
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestMemoryObserver.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestMemoryObserver.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestMemoryObserver.java
index 0f8a8bf..0698cbe 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestMemoryObserver.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestMemoryObserver.java
@@ -30,6 +30,7 @@ import java.lang.ref.WeakReference;
  * Protocol: (1) measure start, (2) start thread, (3) *do some work*, (4) join thread, (5) get max memory.
  *  
  */
+@Deprecated
 public class PerfTestMemoryObserver implements Runnable
 {
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
index 9635707..c130031 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
@@ -64,6 +64,8 @@ import org.apache.sysml.runtime.controlprogram.caching.LazyWriteBuffer;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat;
+import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
 import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
@@ -95,6 +97,7 @@ import au.com.bytecode.opencsv.CSVWriter;
  * 
  * 
  */
+@Deprecated
 public class PerfTestTool 
 {
 	
@@ -159,12 +162,6 @@ public class PerfTestTool
 	private static boolean    _flagReadData = false; 
 	private static HashMap<Integer,HashMap<Integer,CostFunction>> _profile = null;
 	
-	public enum TestMeasure //logical test measure
-	{
-		EXEC_TIME,
-		MEMORY_USAGE	
-	}
-	
 	public enum TestVariable //logical test variable
 	{
 		DATA_SIZE,
@@ -194,12 +191,6 @@ public class PerfTestTool
 		BINARY_UNARY
 	}
 	
-	public enum DataFormat //logical data format
-	{
-		DENSE,
-		SPARSE
-	}
-	
 	public enum TestConstants //logical test constants
 	{
 		DFS_READ_THROUGHPUT,

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
index e6556fa..c216d52 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
@@ -21,6 +21,7 @@
 
 
 #PerfTestTool: DML template for estimation cost functions.
+#Deprecated in SystemML 0.13
 
 dynRead = externalFunction(Matrix[Double] d, String fname, Integer m, Integer n) 
 return (Matrix[Double] D) 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/Stat.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/Stat.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/Stat.java
index a0192d5..8270f29 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/Stat.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/stat/Stat.java
@@ -26,8 +26,6 @@ package org.apache.sysml.runtime.controlprogram.parfor.stat;
  */
 public enum Stat
 {
-	//parfor parser statistics
-	PARSE_T, // TODO investigate unused enum constant
 	//parfor optimizer statistics
 	OPT_T,
 	OPT_OPTIMIZER,

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
index f631527..9cf56d9 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
@@ -22,6 +22,7 @@ package org.apache.sysml.runtime.instructions;
 
 import java.util.HashMap;
 
+import org.apache.sysml.lops.AppendCP;
 import org.apache.sysml.lops.DataGen;
 import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.lops.UnaryCP;
@@ -240,7 +241,7 @@ public class CPInstructionParser extends InstructionParser
 		// User-defined function Opcodes
 		String2CPInstructionType.put( "extfunct"   	, CPINSTRUCTION_TYPE.External);
 
-		String2CPInstructionType.put( "append", CPINSTRUCTION_TYPE.Append);
+		String2CPInstructionType.put( AppendCP.OPCODE, CPINSTRUCTION_TYPE.Append);
 		
 		// data generation opcodes
 		String2CPInstructionType.put( DataGen.RAND_OPCODE   , CPINSTRUCTION_TYPE.Rand);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/instructions/mr/MRInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/mr/MRInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/mr/MRInstruction.java
index fbdd3b7..68132fc 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/mr/MRInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/mr/MRInstruction.java
@@ -34,7 +34,7 @@ public abstract class MRInstruction extends Instruction
 	public enum MRINSTRUCTION_TYPE { INVALID, Append, Aggregate, ArithmeticBinary, ArithmeticBinary2, AggregateBinary, AggregateUnary, 
 		Rand, Seq, CSVReblock, CSVWrite, Transform,
 		Reblock, Reorg, Replicate, Unary, CombineBinary, CombineUnary, CombineTernary, PickByCount, Partition,
-		Ternary, Quaternary, CM_N_COV, Combine /* TODO investigate unused enum constant */, MapGroupedAggregate, GroupedAggregate, RangeReIndex, ZeroOut, MMTSJ, PMMJ, MatrixReshape, ParameterizedBuiltin, Sort, MapMultChain,
+		Ternary, Quaternary, CM_N_COV, MapGroupedAggregate, GroupedAggregate, RangeReIndex, ZeroOut, MMTSJ, PMMJ, MatrixReshape, ParameterizedBuiltin, Sort, MapMultChain,
 		CumsumAggregate, CumsumSplit, CumsumOffset, BinUaggChain, UaggOuterChain, RemoveEmpty}; 
 	
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
index 167bf74..5e6f589 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRConfigurationNames.java
@@ -37,7 +37,6 @@ public abstract class MRConfigurationNames {
 	public static final String DFS_REPLICATION = "dfs.replication"; // hdfs-default.xml
 	public static final String IO_FILE_BUFFER_SIZE = "io.file.buffer.size"; // core-default.xml
 	public static final String IO_SERIALIZATIONS = "io.serializations"; // core-default.xml
-	public static final String MR_APPLICATION_CLASSPATH = "mapreduce.application.classpath"; // mapred-default.xml // TODO investigate unused constant
 	public static final String MR_CHILD_JAVA_OPTS = "mapred.child.java.opts"; // mapred-default.xml
 	public static final String MR_FRAMEWORK_NAME = "mapreduce.framework.name"; // mapred-default.xml
 	public static final String MR_JOBTRACKER_STAGING_ROOT_DIR = "mapreduce.jobtracker.staging.root.dir"; // mapred-default.xml

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
index 7867132..a7d881d 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/MRJobConfiguration.java
@@ -230,9 +230,6 @@ public class MRJobConfiguration
 	 */
 	public static final String NUM_NONZERO_CELLS="nonzeros";
 
-	public static final String PARFOR_NUMTASKS="numtasks"; // TODO investigate unused constant
-	public static final String PARFOR_NUMITERATOINS="numiterations"; // TODO investigate unused constant
-	
 	public static final String TF_NUM_COLS 		= "transform.num.columns";
 	public static final String TF_HAS_HEADER 	= "transform.has.header";
 	public static final String TF_DELIM 		= "transform.field.delimiter";

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/runtime/matrix/operators/CMOperator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/operators/CMOperator.java b/src/main/java/org/apache/sysml/runtime/matrix/operators/CMOperator.java
index da67712..e89ced6 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/operators/CMOperator.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/operators/CMOperator.java
@@ -32,8 +32,7 @@ public class CMOperator extends Operator
 	public enum AggregateOperationTypes {
 		SUM,
 		COUNT,
-		MEAN,
-		CM, // TODO investigate unused enum constant
+		MEAN, //a.k.a. CM
 		CM2,
 		CM3,
 		CM4,

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/utils/Statistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/Statistics.java b/src/main/java/org/apache/sysml/utils/Statistics.java
index 5ded743..87be90b 100644
--- a/src/main/java/org/apache/sysml/utils/Statistics.java
+++ b/src/main/java/org/apache/sysml/utils/Statistics.java
@@ -48,11 +48,9 @@ public class Statistics
 {	
 	private static long compileStartTime = 0;
 	private static long compileEndTime = 0;
-	public static long compileTime = 0; // TODO investigate unused field
 	
 	private static long execStartTime = 0;
 	private static long execEndTime = 0;
-	public static long execTime = 0; // TODO investigate unused field
 
 	// number of compiled/executed MR jobs
 	private static int iNoOfExecutedMRJobs = 0;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/yarn/DMLAppMasterStatusReporter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/yarn/DMLAppMasterStatusReporter.java b/src/main/java/org/apache/sysml/yarn/DMLAppMasterStatusReporter.java
index d680471..4b5e106 100644
--- a/src/main/java/org/apache/sysml/yarn/DMLAppMasterStatusReporter.java
+++ b/src/main/java/org/apache/sysml/yarn/DMLAppMasterStatusReporter.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
 
 public class DMLAppMasterStatusReporter extends Thread
 {
-	public static long DEFAULT_REPORT_INTERVAL = 5000; // TODO investigate unused field
+	public static final long DEFAULT_REPORT_INTERVAL = 5000;
 	private static final Log LOG = LogFactory.getLog(DMLAppMasterStatusReporter.class);
 
 	private AMRMClient<ContainerRequest> _rmClient;
@@ -37,7 +37,7 @@ public class DMLAppMasterStatusReporter extends Thread
 	public DMLAppMasterStatusReporter(AMRMClient<ContainerRequest> rmClient, long interval) 
 	{
 		_rmClient = rmClient;
-		_interval = interval;
+		_interval = interval>0 ? interval : DEFAULT_REPORT_INTERVAL;
 		_stop = false;
 	}
 	

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4e2600/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java b/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
index 6fd826b..e05f40b 100644
--- a/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
+++ b/src/main/java/org/apache/sysml/yarn/ropt/YarnClusterAnalyzer.java
@@ -61,7 +61,6 @@ public class YarnClusterAnalyzer
 	public static long _remoteJVMMaxMemMap    = -1;
 	public static long _remoteJVMMaxMemReduce = -1;
 	public static long _remoteMRSortMem = -1;
-	public static boolean _localJT      = false; // TODO investigate unused field
 	public static long _blocksize       = -1;
 	
 	// Map from StatementBlock.ID to remoteJVMMaxMem (in bytes)



[3/4] incubator-systemml git commit: [SYSTEMML-1248] Fix loop rewrite update-in-place (exclude local vars)

Posted by mb...@apache.org.
[SYSTEMML-1248] Fix loop rewrite update-in-place (exclude local vars)

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0ae01230
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0ae01230
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0ae01230

Branch: refs/heads/master
Commit: 0ae01230d617cf5ae322d00f44b5fb520b3ee850
Parents: 8c49730
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Feb 11 04:45:05 2017 +0100
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Feb 11 19:33:02 2017 +0100

----------------------------------------------------------------------
 .../hops/rewrite/RewriteMarkLoopVariablesUpdateInPlace.java     | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0ae01230/src/main/java/org/apache/sysml/hops/rewrite/RewriteMarkLoopVariablesUpdateInPlace.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteMarkLoopVariablesUpdateInPlace.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteMarkLoopVariablesUpdateInPlace.java
index af06bfc..6e3621f 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteMarkLoopVariablesUpdateInPlace.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteMarkLoopVariablesUpdateInPlace.java
@@ -63,9 +63,12 @@ public class RewriteMarkLoopVariablesUpdateInPlace extends StatementBlockRewrite
 		{
 			ArrayList<String> candidates = new ArrayList<String>(); 
 			VariableSet updated = sb.variablesUpdated();
+			VariableSet liveout = sb.liveOut();
 			
 			for( String varname : updated.getVariableNames() ) {
-				if( updated.getVariable(varname).getDataType()==DataType.MATRIX) {
+				if( updated.getVariable(varname).getDataType()==DataType.MATRIX
+					&& liveout.containsVariable(varname) ) //exclude local vars 
+				{
 					if( sb instanceof WhileStatementBlock ) {
 						WhileStatement wstmt = (WhileStatement) sb.getStatement(0);
 						if( rIsApplicableForUpdateInPlace(wstmt.getBody(), varname) )