You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/02/24 20:27:36 UTC

[4/6] incubator-systemml git commit: [SYSTEMML-1302] Remove parfor perftesttool, cleanup heuristic optimizer

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
deleted file mode 100644
index c130031..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
+++ /dev/null
@@ -1,1411 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor.opt;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.StringTokenizer;
-
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLOutputFactory;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-import javax.xml.stream.XMLStreamWriter;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLException;
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.lops.Lop;
-import org.apache.sysml.lops.MMTSJ.MMTSJType;
-import org.apache.sysml.parser.DMLProgram;
-import org.apache.sysml.parser.DataIdentifier;
-import org.apache.sysml.parser.Expression.DataType;
-import org.apache.sysml.parser.Expression.ValueType;
-import org.apache.sysml.parser.ExternalFunctionStatement;
-import org.apache.sysml.parser.ParseException;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlockCP;
-import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
-import org.apache.sysml.runtime.controlprogram.Program;
-import org.apache.sysml.runtime.controlprogram.ProgramBlock;
-import org.apache.sysml.runtime.controlprogram.caching.CacheException;
-import org.apache.sysml.runtime.controlprogram.caching.LazyWriteBuffer;
-import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
-import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
-import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing;
-import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
-import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
-import org.apache.sysml.runtime.instructions.CPInstructionParser;
-import org.apache.sysml.runtime.instructions.Instruction;
-import org.apache.sysml.runtime.instructions.MRJobInstruction;
-import org.apache.sysml.runtime.instructions.cp.Data;
-import org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction;
-import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction;
-import org.apache.sysml.runtime.io.IOUtilFunctions;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
-import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
-import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.runtime.util.MapReduceTool;
-
-import au.com.bytecode.opencsv.CSVReader;
-import au.com.bytecode.opencsv.CSVWriter;
-
-/**
- * DML Instructions Performance Test Tool: 
- * 
- * Creates an offline performance profile (required once per installation) of DML instructions.
- * The profile is a combination of all individual statistical models trained per combination of 
- * instruction and test configuration. In order to train those models, we execute and measure
- * real executions of DML instructions on random input data. Finally, during runtime, the profile
- * is used by the costs estimator in order to create statistic estimates for cost-based optimization.
- * 
- * 
- */
-@Deprecated
-public class PerfTestTool 
-{
-	
-	//public parameters (used for estimation)
-	public static final long    MIN_DATASIZE           = 1000;
-	public static final long    MAX_DATASIZE           = 1000000; 
-	public static final long    DEFAULT_DATASIZE       = 500000;//(MAX_DATASIZE-MIN_DATASIZE)/2;
-	public static final long    DATASIZE_MR_SCALE      = 20;
-	public static final double  MIN_SPARSITY           = 0.1;
-	public static final double  MAX_SPARSITY           = 1.0;
-	public static final double  DEFAULT_SPARSITY       = 0.5;//(MAX_SPARSITY-MIN_SPARSITY)/2;
-	
-	//internal parameters
-	private static final boolean READ_STATS_ON_STARTUP  = false;
-	private static final int     TEST_REPETITIONS       = 10; 
-	private static final int     NUM_SAMPLES_PER_TEST   = 11; 
-	private static final int     MODEL_MAX_ORDER        = 2;
-	private static final boolean MODEL_INTERCEPT        = true;
-	
-	private static final String  PERF_TOOL_DIR          = "./conf/PerfTestTool/";
-//	private static final String  PERF_RESULTS_FNAME     = PERF_TOOL_DIR + "%id%.dat";
-	private static final String  PERF_PROFILE_FNAME     = PERF_TOOL_DIR + "performance_profile.xml";
-	private static final String  DML_SCRIPT_FNAME       = "./src/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml";
-	private static final String  DML_TMP_FNAME          = PERF_TOOL_DIR + "temp.dml";
-	
-	//XML profile tags and attributes
-	private static final String  XML_PROFILE            = "profile";
-	private static final String  XML_DATE               = "date";
-	private static final String  XML_INSTRUCTION        = "instruction";
-	private static final String  XML_ID                 = "id";
-	private static final String  XML_NAME               = "name";
-	private static final String  XML_COSTFUNCTION       = "cost_function";
-	private static final String  XML_MEASURE            = "measure";
-	private static final String  XML_VARIABLE           = "lvariable";
-	private static final String  XML_INTERNAL_VARIABLES = "pvariables";
-	private static final String  XML_DATAFORMAT         = "dataformat";
-	private static final String  XML_ELEMENT_DELIMITER  = "\u002c"; //","; 
-		
-	//ID sequences for instructions and test definitions
-	private static IDSequence _seqInst     = null;
-	private static IDSequence _seqTestDef  = null;
-	
-	//registered instructions and test definitions
-	private static HashMap<Integer, PerfTestDef>   _regTestDef        = null; 
-	private static HashMap<Integer, Instruction>   _regInst           = null;
-	private static HashMap<Integer, String>        _regInst_IDNames   = null;
-	private static HashMap<String, Integer>        _regInst_NamesID   = null;
-	private static HashMap<Integer, Integer[]>     _regInst_IDTestDef = null; 
-	private static HashMap<Integer, Boolean>       _regInst_IDVectors = null;
-	private static HashMap<Integer, IOSchema>      _regInst_IDIOSchema = null;
-	
-	protected static final Log LOG = LogFactory.getLog(PerfTestTool.class.getName());
-	
-	
-	private static Integer[] _defaultConf  = null;
-//	private static Integer[] _MRConf  = null;
-	
-	//raw measurement data (instID, physical defID, results)
-	private static HashMap<Integer,HashMap<Integer,LinkedList<Double>>> _results = null;
-		
-	//profile data 
-	private static boolean    _flagReadData = false; 
-	private static HashMap<Integer,HashMap<Integer,CostFunction>> _profile = null;
-	
-	public enum TestVariable //logical test variable
-	{
-		DATA_SIZE,
-		SPARSITY,
-		PARALLELISM,
-		
-		//some mr specific conf properites
-		SORT_IO_MEM
-	}
-	
-	public enum InternalTestVariable //physical test variable
-	{
-		DATA_SIZE,
-		DIM1_SIZE,
-		DIM2_SIZE,
-		DIM3_SIZE,
-		SPARSITY,
-		SORT_IO_MEM
-	}
-	
-	public enum IOSchema
-	{
-		NONE_NONE,
-		NONE_UNARY,
-		UNARY_UNARY,
-		BINARY_NONE,
-		BINARY_UNARY
-	}
-	
-	public enum TestConstants //logical test constants
-	{
-		DFS_READ_THROUGHPUT,
-		DFS_WRITE_THROUGHPUT,
-		LFS_READ_THROUGHPUT,
-		LFS_WRITE_THROUGHPUT
-	}
-	
-	static
-	{
-		//init repository
-		_seqInst      = new IDSequence();
-		_seqTestDef   = new IDSequence();		
-		_regTestDef   = new HashMap<Integer, PerfTestDef>();
-		_regInst      = new HashMap<Integer, Instruction>();
-		_regInst_IDNames = new HashMap<Integer, String>();
-		_regInst_NamesID = new HashMap<String, Integer>();		
-		_regInst_IDTestDef = new HashMap<Integer, Integer[]>();
-		_regInst_IDVectors = new HashMap<Integer, Boolean>();
-		_regInst_IDIOSchema = new HashMap<Integer, IOSchema>();
-		_results      = new HashMap<Integer, HashMap<Integer,LinkedList<Double>>>();
-		_profile      = new HashMap<Integer, HashMap<Integer,CostFunction>>();
-		_flagReadData = false;
-		
-		//load existing profile if required
-		try
-		{
-			if( READ_STATS_ON_STARTUP )
-				readProfile( PERF_PROFILE_FNAME );
-		}
-		catch(Exception ex)
-		{
-			throw new RuntimeException(ex);
-		}
-	}
-
-	public static void lazyInit() 
-		throws DMLRuntimeException
-	{
-		//read profile for first access
-		if( !_flagReadData )
-		{
-			try
-			{
-				//register all testdefs and instructions
-				registerTestConfigurations();
-				registerInstructions();
-				
-				//read profile
-				readProfile( PERF_PROFILE_FNAME );
-			}
-			catch(Exception ex)
-			{
-				throw new DMLRuntimeException(ex);
-			}	
-		}
-		
-		if( _profile == null )
-			throw new DMLRuntimeException("Performance test results have not been loaded completely.");
-	}
-
-	public static boolean isRegisteredInstruction(String opStr)
-		throws DMLRuntimeException 
-	{
-		//init if required
-		lazyInit();
-		
-		//determine if inst registered
-		return _regInst_NamesID.containsKey(opStr);
-	}
-
-	public static CostFunction getCostFunction( String instName, TestMeasure measure, TestVariable variable, DataFormat dataformat )
-		throws DMLRuntimeException
-	{		
-		//init if required
-		lazyInit();
-		
-		CostFunction tmp = null;
-		int instID = getInstructionID( instName );
-		if( instID != -1 ) //existing profile
-		{
-			int tdefID = getMappedTestDefID(instID, measure, variable, dataformat);		
-			tmp = _profile.get(instID).get(tdefID);
-		}
-		return tmp;
-	}
-
-	@SuppressWarnings("all")
-	public static boolean runTest()
-	{
-		boolean ret = false;
-	
-		try
-		{
-			Timing time = new Timing();
-			time.start();
-			
-			//init caching
-			LazyWriteBuffer.init();
-			
-			//register all testdefs and instructions
-			registerTestConfigurations();
-			registerInstructions();
-			
-			//execute tests for all confs and all instructions
-			executeTest();
-			
-			//compute regression models
-			int rows = NUM_SAMPLES_PER_TEST;
-			int cols = MODEL_MAX_ORDER + (MODEL_INTERCEPT ? 1 : 0);
-			HashMap<Integer,Long> tmp = writeResults( PERF_TOOL_DIR );
-			computeRegressionModels( DML_SCRIPT_FNAME, DML_TMP_FNAME, PERF_TOOL_DIR, tmp.size(), rows, cols);
-			readRegressionModels( PERF_TOOL_DIR, tmp);
-			
-			//execConstantRuntimeTest();
-			//execConstantMemoryTest();
-		
-			//write final profile to XML file
-			writeProfile(PERF_TOOL_DIR, PERF_PROFILE_FNAME);
-			System.out.format("SystemML PERFORMANCE TEST TOOL: finished profiling (in %.2f min), profile written to "+PERF_PROFILE_FNAME+"%n", time.stop()/60000);
-			
-			ret = true;
-		}
-		catch(Exception ex)
-		{
-			LOG.error("Failed to run performance test.", ex);
-		}
-		
-		return ret;
-	}
-
-	private static void registerTestConfigurations()
-	{
-		//reset ID Sequence for consistent IDs
-		_seqTestDef.reset();
-		
-		//register default testdefs //TODO
-		TestMeasure[] M = new TestMeasure[]{ TestMeasure.EXEC_TIME/*, TestMeasure.MEMORY_USAGE*/ };
-		DataFormat[] D =  new DataFormat[]{DataFormat.DENSE/*,DataFormat.SPARSE*/};
-		Integer[] defaultConf = new Integer[M.length*D.length*2];		
-		int i=0;
-		for( TestMeasure m : M ) //for all measures
-			for( DataFormat d : D ) //for all data formats
-			{
-				defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.DATA_SIZE, d, InternalTestVariable.DATA_SIZE,
-                        MIN_DATASIZE, MAX_DATASIZE, NUM_SAMPLES_PER_TEST ) );
-				defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.SPARSITY, d, InternalTestVariable.SPARSITY,
-						MIN_SPARSITY, MAX_SPARSITY, NUM_SAMPLES_PER_TEST ) );
-			}
-		
-
-		//register advanced (multi-dim) test defs
-		//FIXME enable
-		/*for( TestMeasure m : M ) //for all measures
-			for( DataFormat d : D ) //for all data formats
-			{
-				registerTestDef( new PerfTestDef( m, TestVariable.DATA_SIZE, d,
-                        new InternalTestVariable[]{InternalTestVariable.DIM1_SIZE,InternalTestVariable.DIM2_SIZE,InternalTestVariable.DIM3_SIZE}, 
-                        MIN_DIMSIZE, MAX_DIMSIZE, NUM_SAMPLES_PER_TEST ) );
-			}?*
-
-			
-		//register MR specific instructions FIXME: just for test
-		/*Integer[] mrConf = new Integer[D.length];
-		i = 0;
-		for( DataFormat d : D )
-		{
-			mrConf[i++] = registerTestDef( new PerfTestDef(TestMeasure.EXEC_TIME, TestVariable.SORT_IO_MEM, d,
-					                         InternalTestVariable.SORT_IO_MEM,
-				                             MIN_SORT_IO_MEM, MAX_SORT_IO_MEM, NUM_SAMPLES_PER_TEST ) );
-		}*/
-		
-		//set default testdefs
-		_defaultConf = defaultConf;
-		//_MRConf = mrConf;
-	}
-
-	private static void registerInstructions() 
-		throws DMLRuntimeException
-	{
-		//reset ID sequences for consistent IDs
-		_seqInst.reset();
-		
-		///////
-		// CP instructions
-		
-		//matrix multiply mmtsj
-		registerInstruction( "CP"+Lop.OPERAND_DELIMITOR+"tsmm", CPInstructionParser.parseSingleInstruction("CP"+Lop.OPERAND_DELIMITOR+"tsmm"+Lop.OPERAND_DELIMITOR+"A"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+"C"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+MMTSJType.LEFT),
-						     getDefaultTestDefs(), false, IOSchema.UNARY_UNARY ); 
-		
-		/*
-		//matrix multiply 
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-						     getDefaultTestDefs(), false, IOSchema.BINARY_UNARY ); 
-		////registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-		////		             changeToMuliDimTestDefs(TestVariable.DATA_SIZE, getDefaultTestDefs()) ); 
-		//rand
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"Rand", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"Rand"+Lops.OPERAND_DELIMITOR+"rows=1"+Lops.OPERAND_DELIMITOR+"cols=1"+Lops.OPERAND_DELIMITOR+"rowsInBlock=1000"+Lops.OPERAND_DELIMITOR+"colsInBlock=1000"+Lops.OPERAND_DELIMITOR+"min=1.0"+Lops.OPERAND_DELIMITOR+"max=100.0"+Lops.OPERAND_DELIMITOR+"sparsity=1.0"+Lops.OPERAND_DELIMITOR+"seed=7"+Lops.OPERAND_DELIMITOR+"pdf=uniform"+Lops.OPERAND_DELIMITOR+"dir=."+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-				 			 getDefaultTestDefs(), false, IOSchema.NONE_UNARY );
-		//matrix transpose
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"r'", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"r'"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-	 			 			 getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
-		//sum
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"uak+", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"uak+"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"), //needs B instead of C
-	 			             getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
-		//external function
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"extfunct", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"extfunct"+Lops.OPERAND_DELIMITOR+DMLProgram.DEFAULT_NAMESPACE+""+Lops.OPERAND_DELIMITOR+"execPerfTestExtFunct"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"A"+Lops.OPERAND_DELIMITOR+"C"),
-	                         getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );		
-		//central moment
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cm", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cm"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"2"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"INT"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-	            			 getDefaultTestDefs(), true, IOSchema.UNARY_NONE ); 
-		//co-variance
-		registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cov", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cov"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
-     						 getDefaultTestDefs(), true, IOSchema.BINARY_NONE );
-		*/
-		
-		/*
-		///////
-		// MR instructions
-		registerInstruction( "jobtypeMMRJ", createMRJobInstruction(JobType.MMRJ,
-							                    MRInstructionParser.parseSingleInstruction("MR"+Lops.OPERAND_DELIMITOR+
-							                    		                                   "rmm"+Lops.OPERAND_DELIMITOR+
-							                    		                                   "0"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+
-							                    		                                   "1"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+
-							                    		                                   "2"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE ")),
-							 _MRConf, false, IOSchema.BINARY_UNARY ); 		
-
-		*/
-		/*ADD ADDITIONAL INSTRUCTIONS HERE*/
-		
-		
-		
-		//extend list to all (expensive) instructions; maybe also: createvar, assignvar, cpvar, rm, mv, setfilename, rmfilevar
-		
-	}
-	
-/*
-	private static Instruction createMRJobInstruction(JobType type, MRInstruction inst) 
-	{
-		MRJobInstruction mrinst = new MRJobInstruction(type);
-		
-		if( type == JobType.MMRJ )
-		{
-			ArrayList<String> inLab = new ArrayList<String>();
-			ArrayList<String> outLab = new ArrayList<String>();
-			inLab.add("A");
-			inLab.add("B");
-			outLab.add("C");
-			
-			mrinst.setMMRJInstructions(new String[]{"A","B"}, 
-									   "", 
-									   inst.toString(), 
-									   "", 
-									   "", 
-									   new String[]{"C"},
-									   new byte[]{2},
-									   10, 1 );
-			
-		}
-		
-		
-		return mrinst;
-	}
-*/
-
-	private static int registerTestDef( PerfTestDef def )
-	{
-		int ID = (int)_seqTestDef.getNextID();
-		
-		_regTestDef.put( ID, def );
-		
-		return ID;
-	}
-
-	private static void registerInstruction( String iname, Instruction inst, Integer[] testDefIDs, boolean vectors, IOSchema schema )
-	{
-		int ID = (int)_seqInst.getNextID();
-		registerInstruction(ID, iname, inst, testDefIDs, vectors, schema);
-	}
-
-	private static void registerInstruction( int ID, String iname, Instruction inst, Integer[] testDefIDs, boolean vector, IOSchema schema )
-	{
-		_regInst.put( ID, inst );
-		_regInst_IDNames.put( ID, iname );
-		_regInst_NamesID.put( iname, ID );
-		_regInst_IDTestDef.put( ID, testDefIDs );
-		_regInst_IDVectors.put( ID, vector );
-		_regInst_IDIOSchema.put( ID, schema );
-	}
-
-	private static int getMappedTestDefID( int instID, TestMeasure measure, TestVariable variable, DataFormat dataformat )
-	{
-		int ret = -1;
-		
-		for( Integer defID : _regInst_IDTestDef.get(instID) )
-		{
-			PerfTestDef def = _regTestDef.get(defID);
-			if(   def.getMeasure()==measure 
-				&& def.getVariable()==variable 
-				&& def.getDataformat()==dataformat )
-			{
-				ret = defID;
-				break;
-			}
-		}
-		
-		return ret;
-	}
-
-	@SuppressWarnings("unused")
-	private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable pvariable )
-	{
-		return getTestDefID(measure, lvariable, dataformat, new InternalTestVariable[]{pvariable});
-	}
-
-	private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable[] pvariables )
-	{
-		int ret = -1;
-		
-		for( Entry<Integer,PerfTestDef> e : _regTestDef.entrySet() )
-		{
-			PerfTestDef def = e.getValue();
-			TestMeasure tmp1 = def.getMeasure();
-			TestVariable tmp2 = def.getVariable();
-			DataFormat tmp3 = def.getDataformat();
-			InternalTestVariable[] tmp4 = def.getInternalVariables();
-			
-			if( tmp1==measure && tmp2==lvariable && tmp3==dataformat )
-			{
-				boolean flag = true;
-				for( int i=0; i<tmp4.length; i++ )
-					flag &= ( tmp4[i] == pvariables[i] );	
-				
-				if( flag )
-				{
-					ret = e.getKey();
-					break;
-				}
-			}
-		}
-
-		return ret;
-	}
-
-	private static int getInstructionID( String instName )
-	{
-		Integer ret = _regInst_NamesID.get( instName );
-		return ( ret!=null )? ret : -1;
-	}
-
-	@SuppressWarnings("unused")
-	private static Integer[] getAllTestDefs()
-	{
-		return _regTestDef.keySet().toArray(new Integer[0]);
-	}
-
-	private static Integer[] getDefaultTestDefs()
-	{
-		return _defaultConf;
-	}
-
-	@SuppressWarnings("unused")
-	private static Integer[] changeToMuliDimTestDefs( TestVariable v, Integer[] IDs )
-	{
-		Integer[] tmp = new Integer[IDs.length];
-		
-		for( int i=0; i<tmp.length; i++ )
-		{
-			PerfTestDef def = _regTestDef.get(IDs[i]);
-			if( def.getVariable() == v ) //filter logical variables
-			{
-				//find multidim version
-				InternalTestVariable[] in = null;
-				switch( v )
-				{
-					case DATA_SIZE: 
-						in = new InternalTestVariable[]{InternalTestVariable.DIM1_SIZE,InternalTestVariable.DIM2_SIZE,InternalTestVariable.DIM3_SIZE}; 
-						break;
-					default:
-						//do nothing
-				}
-				
-				int newid = getTestDefID(def.getMeasure(), def.getVariable(), def.getDataformat(), in );
-				
-				//exchange testdef ID
-				tmp[i] = newid;
-			}
-			else
-			{
-				tmp[i] = IDs[i];
-			}
-		}
-		
-		return tmp;
-	}
-
-	private static void executeTest( ) 
-		throws DMLRuntimeException, IOException
-	{
-		System.out.println("SystemML PERFORMANCE TEST TOOL:");
-		
-		//foreach registered instruction	
-		for( Entry<Integer,Instruction> inst : _regInst.entrySet() )
-		{
-			int instID = inst.getKey();
-			System.out.println( "Running INSTRUCTION "+_regInst_IDNames.get(instID) );
-		
-			Integer[] testDefIDs = _regInst_IDTestDef.get(instID);
-			boolean vectors = _regInst_IDVectors.get(instID);
-			IOSchema schema = _regInst_IDIOSchema.get(instID);
-			
-			//create tmp program block and set instruction
-			Program prog = new Program();
-			ProgramBlock pb = new ProgramBlock( prog );
-			ArrayList<Instruction> ainst = new ArrayList<Instruction>();
-			ainst.add( inst.getValue() );
-			pb.setInstructions(ainst);
-			
-			ExecutionContext ec = ExecutionContextFactory.createContext();
-			
-			//foreach registered test configuration
-			for( Integer defID : testDefIDs )
-			{
-				PerfTestDef def = _regTestDef.get(defID);
-				TestMeasure m = def.getMeasure();
-				TestVariable lv = def.getVariable();
-				DataFormat df = def.getDataformat();
-				InternalTestVariable[] pv = def.getInternalVariables();
-				double min = def.getMin();
-				double max = def.getMax();
-				double samples = def.getNumSamples();
-				
-				System.out.println( "Running TESTDEF(measure="+m+", variable="+String.valueOf(lv)+" "+pv.length+", format="+String.valueOf(df)+")" );
-				
-				//vary input variable
-				LinkedList<Double> dmeasure = new LinkedList<Double>();
-				LinkedList<Double> dvariable = generateSequence(min, max, samples);					
-				int plen = pv.length;
-				
-				if( plen == 1 ) //1D function 
-				{
-					for( Double var : dvariable )
-					{
-						dmeasure.add(executeTestCase1D(m, pv[0], df, var, pb, vectors, schema, ec));
-					}
-				}
-				else //multi-dim function
-				{
-					//init index stack
-					int[] index = new int[plen];
-					for( int i=0; i<plen; i++ )
-						index[i] = 0;
-					
-					//execute test 
-					int dlen = dvariable.size();
-					double[] buff = new double[plen];
-					while( index[0]<dlen )
-					{
-						//set buffer values
-						for( int i=0; i<plen; i++ )
-							buff[i] = dvariable.get(index[i]);
-						
-						//core execution
-						dmeasure.add(executeTestCaseMD(m, pv, df, buff, pb, schema, ec)); //not applicable for vector flag
-						
-						//increment indexes
-						for( int i=plen-1; i>=0; i-- )
-						{
-							if(i==plen-1)
-								index[i]++;
-							else if( index[i+1] >= dlen )
-							{
-								index[i]++;
-								index[i+1]=0;
-							}
-						}
-					}
-				}
-				
-								
-				//append values to results
-				if( !_results.containsKey(instID) )
-					_results.put(instID, new HashMap<Integer, LinkedList<Double>>());
-				_results.get(instID).put(defID, dmeasure);
-	
-			}
-		}
-	}
-
-	private static double executeTestCase1D( TestMeasure m, InternalTestVariable v, DataFormat df, double varValue, ProgramBlock pb, boolean vectors, IOSchema schema, ExecutionContext ec ) 
-		throws DMLRuntimeException, IOException
-	{
-		double datasize = -1;
-		double dim1 = -1, dim2 = -1;
-		double sparsity = -1;
-		//double sortio = -1;
-		
-		System.out.println( "VAR VALUE "+varValue );
-	
-		//set test variables
-		switch ( v )
-		{
-			case DATA_SIZE:
-				datasize = varValue;
-				sparsity = DEFAULT_SPARSITY;
-				break;
-			case SPARSITY:
-				datasize = DEFAULT_DATASIZE;
-				sparsity = varValue;
-				break;
-			case SORT_IO_MEM: //FIXME
-				datasize = DEFAULT_DATASIZE * DATASIZE_MR_SCALE;
-				sparsity = DEFAULT_SPARSITY;
-				//sortio = varValue;
-				break;	
-			default:
-				//do nothing
-		}
-		
-		//set specific dimensions
-		if( vectors )
-		{
-			dim1 = datasize;
-			dim2 = 1;
-		}
-		else
-		{
-			dim1 = Math.sqrt( datasize );
-			dim2 = dim1;
-		}
-		
-		//instruction-specific configurations
-		Instruction inst = pb.getInstruction(0); //always exactly one instruction
-		if( inst instanceof DataGenCPInstruction )
-		{
-			DataGenCPInstruction rand = (DataGenCPInstruction) inst;
-			rand.setRows((long)dim1);
-			rand.setCols((long)dim2);
-			rand.setSparsity(sparsity);
-		}
-		else if ( inst instanceof FunctionCallCPInstruction ) //ExternalFunctionInvocationInstruction
-		{
-			Program prog = pb.getProgram();
-			ArrayList<DataIdentifier> in = new ArrayList<DataIdentifier>();
-			DataIdentifier dat1 = new DataIdentifier("A");
-			dat1.setDataType(DataType.MATRIX);
-			dat1.setValueType(ValueType.DOUBLE);
-			in.add(dat1);
-			ArrayList<DataIdentifier> out = new ArrayList<DataIdentifier>();
-			DataIdentifier dat2 = new DataIdentifier("C");
-			dat2.setDataType(DataType.MATRIX);
-			dat2.setValueType(ValueType.DOUBLE);
-			out.add(dat2);
-			HashMap<String, String> params = new HashMap<String, String>();
-			params.put(ExternalFunctionStatement.CLASS_NAME, PerfTestExtFunctCP.class.getName());			
-			ExternalFunctionProgramBlockCP fpb = new ExternalFunctionProgramBlockCP(prog, in, out, params, PERF_TOOL_DIR);	
-			prog.addFunctionProgramBlock(DMLProgram.DEFAULT_NAMESPACE, "execPerfTestExtFunct", fpb);
-		}
-		else if ( inst instanceof MRJobInstruction )
-		{
-			//FIXME hardcoded for test
-			//MMRJMR.SORT_IO_MEM = sortio;
-		}
-		
-		//generate input and output matrices
-		LocalVariableMap vars = ec.getVariables();
-		vars.removeAll();
-		double mem1 = PerfTestMemoryObserver.getUsedMemory();
-		if( schema!=IOSchema.NONE_NONE && schema!=IOSchema.NONE_UNARY )
-			vars.put("A", generateInputDataset(PERF_TOOL_DIR+"/A", dim1, dim2, sparsity, df));
-		if( schema==IOSchema.BINARY_NONE || schema==IOSchema.BINARY_UNARY || schema==IOSchema.UNARY_UNARY )
-			vars.put("B", generateInputDataset(PERF_TOOL_DIR+"/B", dim1, dim2, sparsity, df));
-		if( schema==IOSchema.NONE_UNARY || schema==IOSchema.UNARY_UNARY || schema==IOSchema.BINARY_UNARY)
-			vars.put("C", generateEmptyResult(PERF_TOOL_DIR+"/C", dim1, dim2, df));
-		double mem2 = PerfTestMemoryObserver.getUsedMemory();
-		
-		//foreach repetition
-		double value = 0;
-		for( int i=0; i<TEST_REPETITIONS; i++ )
-		{
-			System.out.println("run "+i);
-			value += executeGenericProgramBlock( m, pb, ec );
-		}
-		value/=TEST_REPETITIONS;
-		
-		//result correction and print result
-		switch( m )
-		{
-			case EXEC_TIME: System.out.println("--- RESULT: "+value+" ms"); break;
-			case MEMORY_USAGE: 
-				//System.out.println("--- RESULT: "+value+" byte"); 
-				if( (mem2-mem1) > 0 )
-					value = value + mem2-mem1; //correction: input sizes added
-				System.out.println("--- RESULT: "+value+" byte"); break;
-			default: System.out.println("--- RESULT: "+value); break;
-		}
-		
-		return value;
-	}
-
-	private static double executeTestCaseMD( TestMeasure m, InternalTestVariable[] v, DataFormat df, double[] varValue, ProgramBlock pb, IOSchema schema, ExecutionContext ec ) 
-		throws DMLRuntimeException, IOException
-	{
-		//double datasize = DEFAULT_DATASIZE;
-		double sparsity = DEFAULT_SPARSITY;
-		double dim1 = -1;
-		double dim2 = -1;
-		double dim3 = -1;
-
-		
-		for( int i=0; i<v.length; i++ )
-		{
-			System.out.println( "VAR VALUE "+varValue[i] );
-				
-			switch( v[i] )
-			{
-				case DIM1_SIZE: dim1=varValue[i]; break;
-				case DIM2_SIZE: dim2=varValue[i]; break;
-				case DIM3_SIZE: dim3=varValue[i]; break;
-				default: //do nothing
-			}
-		}
-		
-		//generate input and output matrices
-		LocalVariableMap vars = ec.getVariables();
-		vars.removeAll();
-		double mem1 = PerfTestMemoryObserver.getUsedMemory();
-		if( schema!=IOSchema.NONE_NONE && schema!=IOSchema.NONE_UNARY )
-			 vars.put("A", generateInputDataset(PERF_TOOL_DIR+"/A", dim1, dim2, sparsity, df));
-		if( schema==IOSchema.BINARY_NONE || schema==IOSchema.BINARY_UNARY || schema==IOSchema.UNARY_UNARY )
-			 vars.put("B", generateInputDataset(PERF_TOOL_DIR+"/B", dim2, dim3, sparsity, df));
-		if( schema==IOSchema.NONE_UNARY || schema==IOSchema.UNARY_UNARY || schema==IOSchema.BINARY_UNARY)
-			vars.put("C", generateEmptyResult(PERF_TOOL_DIR+"/C", dim1, dim3, df));
-		double mem2 = PerfTestMemoryObserver.getUsedMemory();
-		
-		//foreach repetition
-		double value = 0;
-		for( int i=0; i<TEST_REPETITIONS; i++ )
-		{
-			System.out.println("run "+i);
-			value += executeGenericProgramBlock( m, pb, ec );
-		}
-		value/=TEST_REPETITIONS;
-		
-		//result correction and print result
-		switch( m )
-		{
-			case EXEC_TIME: System.out.println("--- RESULT: "+value+" ms"); break;
-			case MEMORY_USAGE: 
-				//System.out.println("--- RESULT: "+value+" byte"); 
-				if( (mem2-mem1) > 0 )
-					value = value + mem2-mem1; //correction: input sizes added
-				System.out.println("--- RESULT: "+value+" byte"); break;
-			default: System.out.println("--- RESULT: "+value); break;
-		}
-		
-		return value;
-	}
-
-	public static double executeGenericProgramBlock( TestMeasure measure, ProgramBlock pb, ExecutionContext ec ) 
-		throws DMLRuntimeException
-	{
-		double value = 0;
-		try
-		{
-			switch( measure )
-			{
-			 	case EXEC_TIME: 
-			 		Timing time = new Timing(); 
-			 		time.start();
-			 		pb.execute( ec );
-			 		value = time.stop();
-			 		break;
-			 	case MEMORY_USAGE:
-			 		PerfTestMemoryObserver mo = new PerfTestMemoryObserver();
-			 		mo.measureStartMem();
-			 		Thread t = new Thread(mo);
-			 		t.start();
-			 		pb.execute( ec );
-			 		mo.setStopped();
-			 		value = mo.getMaxMemConsumption();
-			 		t.join();
-			 		break;
-			}
-		}
-		catch(Exception ex)
-		{
-			throw new DMLRuntimeException(ex);
-		}
-		
-		//clear matrixes from cache
-		for( String str : ec.getVariables().keySet() )
-		{
-			Data dat = ec.getVariable(str); 
-			if( dat instanceof MatrixObject )
-				((MatrixObject)dat).clearData();		
-		}
-		
-		return value;
-	}
-
-	public static LinkedList<Double> generateSequence( double min, double max, double num )
-	{
-		LinkedList<Double> data = new LinkedList<Double>();
-		double increment = (max-min)/(num-1);
-		
-		for( int i=0; i<num; i++ )
-			data.add( Double.valueOf(min+i*increment) );
-		
-		return data;
-	}
-
-	public static MatrixObject generateInputDataset(String fname, double dim1, double dim2, double sparsity, DataFormat df) 
-		throws IOException, CacheException
-	{		
-		int d1 = (int) dim1;
-		int d2 = (int) dim2;
-		
-		System.out.println(d1+" "+d2);
-		
-		//create random test data
-		double[][] d = generateTestMatrix(d1, d2, 1, 100, sparsity, 7);
-		
-		//create matrix block
-		MatrixBlock mb = null;
-		switch( df ) 
-		{
-			case DENSE:
-				mb = new MatrixBlock(d1,d2,false);
-				break;
-			case SPARSE:
-				mb = new MatrixBlock(d1,d2,true, (int)(sparsity*dim1*dim2));
-				break;
-		}
-		
-		//insert data
-		for(int i=0; i < d1; i++)
-			for(int j=0; j < d2; j++)
-				if( d[i][j]!=0 )
-					mb.setValue(i, j, d[i][j]);		
-		
-		MapReduceTool.deleteFileIfExistOnHDFS(fname);
-		
-		MatrixCharacteristics mc = new MatrixCharacteristics(d1, d2, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
-		MatrixFormatMetaData md = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
-		MatrixObject mo = new MatrixObject(ValueType.DOUBLE,fname,md);
-		mo.acquireModify(mb);
-		mo.release();
-		mo.exportData(); //write to HDFS
-		
-		return mo;
-	}
-
-	public static MatrixObject generateEmptyResult(String fname, double dim1, double dim2, DataFormat df ) 
-		throws IOException, CacheException
-	{
-		int d1 = (int)dim1;
-		int d2 = (int)dim2;
-		
-		/*
-		MatrixBlock mb = null;
-		switch( df ) 
-		{
-			case DENSE:
-				mb = new MatrixBlock(dim,dim,false);
-				break;
-			case SPARSE:
-				mb = new MatrixBlock(dim,dim,true);
-				break;
-		}*/
-		
-		MatrixCharacteristics mc = new MatrixCharacteristics(d1, d2, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
-		MatrixFormatMetaData md = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
-		MatrixObject mo = new MatrixObject(ValueType.DOUBLE,fname,md);
-		
-		return mo;
-	}
-	
-
-	/**
-	 * NOTE: This is a copy of TestUtils.generateTestMatrix, it was replicated in order to prevent
-	 * dependency of SystemML.jar to our test package.
-	 * 
-	 * @param rows number of rows
-	 * @param cols number of columns
-	 * @param min minimum value
-	 * @param max maximum value
-	 * @param sparsity sparsity as a percentage
-	 * @param seed random seed value (-1 if use System time)
-	 * @return matrix as 2D double array
-	 */
-	public static double[][] generateTestMatrix(int rows, int cols, double min, double max, double sparsity, long seed) {
-		double[][] matrix = new double[rows][cols];
-		Random random;
-		if (seed == -1)
-			random = new Random(System.nanoTime());
-		else
-			random = new Random(seed);
-
-		for (int i = 0; i < rows; i++) {
-			for (int j = 0; j < cols; j++) {
-				if (random.nextDouble() > sparsity)
-					continue;
-				matrix[i][j] = (random.nextDouble() * (max - min) + min);
-			}
-		}
-
-		return matrix;
-	}
-
-	@SuppressWarnings("all")
-	private static HashMap<Integer,Long> writeResults( String dirname ) 
-		throws IOException, DMLRuntimeException 
-	{
-		HashMap<Integer,Long> map = new HashMap<Integer, Long>();
-		int count = 1;
-		int offset = (MODEL_INTERCEPT ? 1 : 0);
-		int cols = MODEL_MAX_ORDER + offset;
-		
-		for( Entry<Integer,HashMap<Integer,LinkedList<Double>>> inst : _results.entrySet() )
-		{
-			int instID = inst.getKey();
-			HashMap<Integer,LinkedList<Double>> instCF = inst.getValue();
-			
-			for( Entry<Integer,LinkedList<Double>> cfun : instCF.entrySet() )
-			{
-				int tDefID = cfun.getKey();
-				long ID = IDHandler.concatIntIDsToLong(instID, tDefID);
-				LinkedList<Double> dmeasure = cfun.getValue();
-				
-				PerfTestDef def = _regTestDef.get(tDefID);
-				LinkedList<Double> dvariable = generateSequence(def.getMin(), def.getMax(), NUM_SAMPLES_PER_TEST);
-				int dlen = dvariable.size();
-				int plen = def.getInternalVariables().length;
-				
-				//write variable data set
-				CSVWriter writer1 = new CSVWriter( new FileWriter( dirname+count+"_in1.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER);						
-				if( plen == 1 ) //one dimensional function
-				{
-					//write 1, x, x^2, x^3, ...
-					String[] sbuff = new String[cols];
-					for( Double val : dvariable )
-		    		{
-		    			for( int j=0; j<cols; j++ )
-	    					sbuff[j] = String.valueOf( Math.pow(val, j+1-offset) );
-					    writer1.writeNext(sbuff);
-		    		}
-				}
-				else // multi-dimensional function
-				{
-					//write 1, x,y,z,x^2,y^2,z^2, xy, xz, yz, xyz
-					
-					String[] sbuff = new String[(int)Math.pow(2,plen)-1+plen+offset-1]; 
-					//String[] sbuff = new String[plen+offset];
-					if(offset==1)
-						sbuff[0]="1";
-					
-					//init index stack
-					int[] index = new int[plen];
-					for( int i=0; i<plen; i++ )
-						index[i] = 0;
-					
-					//execute test 
-					double[] buff = new double[plen];
-					while( index[0]<dlen )
-					{
-						//set buffer values
-						for( int i=0; i<plen; i++ )
-							buff[i] = dvariable.get(index[i]);
-						
-						//core writing
-						for( int i=1; i<=plen; i++ )
-						{
-							if( i==1 )
-							{
-								for( int j=0; j<plen; j++ )
-									sbuff[offset+j] = String.valueOf( buff[j] );
-								for( int j=0; j<plen; j++ )
-									sbuff[offset+plen+j] = String.valueOf( Math.pow(buff[j],2) );
-							}
-							else if( i==2 )
-							{
-								int ix=0;
-								for( int j=0; j<plen-1; j++ )
-									for( int k=j+1; k<plen; k++, ix++ )
-										sbuff[offset+2*plen+ix] = String.valueOf( buff[j]*buff[k] );
-							}
-							else if( i==plen )
-							{
-								//double tmp=1;
-								//for( int j=0; j<plen; j++ )
-								//	tmp *= buff[j];
-								//sbuff[offset+2*plen+plen*(plen-1)/2] = String.valueOf(tmp);
-							}
-							else
-								throw new DMLRuntimeException("More than 3 dims currently not supported.");
-								
-						}
-							
-						//for( int i=0; i<plen; i++ )	
-	    				//	sbuff[offset+i] = String.valueOf( buff[i] );
-						
-					    writer1.writeNext(sbuff);
-
-						//increment indexes
-						for( int i=plen-1; i>=0; i-- )
-						{
-							if(i==plen-1)
-								index[i]++;
-							else if( index[i+1] >= dlen )
-							{
-								index[i]++;
-								index[i+1]=0;
-							}
-						}
-					}
-				}				
-			    writer1.close();
-				
-			    
-				//write measure data set
-				CSVWriter writer2 = new CSVWriter( new FileWriter( dirname+count+"_in2.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER);		
-				String[] buff2 = new String[1];
-				for( Double val : dmeasure )
-				{
-					buff2[0] = String.valueOf( val );
-					writer2.writeNext(buff2);
-				}
-				writer2.close();
-			
-				map.put(count, ID);
-				count++;
-			}
-		}
-		
-		return map;
-	}
-
-	private static void computeRegressionModels( String dmlname, String dmltmpname, String dir, int models, int rows, int cols ) 
-		throws IOException, ParseException, DMLException
-	{
-		//clean scratch space 
-		//AutomatedTestBase.cleanupScratchSpace();
-		
-		//read DML template
-		StringBuilder buffer = new StringBuilder();
-		BufferedReader br = new BufferedReader( new FileReader(new File( dmlname )) );
-	
-		try
-		{
-			String line = null;
-			while( (line=br.readLine()) != null )
-			{
-				buffer.append(line);
-				buffer.append("\n");
-			}
-		}
-		finally
-		{
-			if( br != null )
-				br.close();
-		}
-		
-		//replace parameters
-		String template = buffer.toString();
-		template = template.replaceAll("%numModels%", String.valueOf(models));
-		template = template.replaceAll("%numRows%", String.valueOf(rows));
-		template = template.replaceAll("%numCols%", String.valueOf(cols));
-		template = template.replaceAll("%indir%", String.valueOf(dir));
-		
-		// write temp DML file
-		File fout = new File(dmltmpname);
-		FileOutputStream fos = new FileOutputStream(fout);
-		try {
-			fos.write(template.getBytes());
-		}
-		finally
-		{
-			if( fos != null )
-				fos.close();
-		}
-		
-		// execute DML script
-		DMLScript.main(new String[] { "-f", dmltmpname });
-	}
-
-	private static void readRegressionModels( String dname, HashMap<Integer,Long> IDMapping ) 
-		throws IOException
-	{
-		for( Entry<Integer,Long> e : IDMapping.entrySet() )
-		{
-			int count = e.getKey();
-			long ID = e.getValue();
-			int instID = IDHandler.extractIntIDFromLong(ID, 1);
-			int tDefID = IDHandler.extractIntIDFromLong(ID, 2);
-			
-			//read file and parse
-			LinkedList<Double> params = new LinkedList<Double>();
-			CSVReader reader1 = new CSVReader( new FileReader(dname+count+"_out.csv"), ',' );
-			String[] nextline = null;
-			while( (nextline = reader1.readNext()) != null )
-			{
-				params.add(Double.parseDouble(nextline[0]));
-			}
-			reader1.close();
-			
-			double[] dparams = new double[params.size()];
-			int i=0;
-			for( Double d : params )
-			{
-				dparams[i] = d;
-				i++;
-			}
-			
-			//create new cost function
-			boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1;
-			CostFunction cf = new CostFunction(dparams, multidim); 
-			
-			//append to profile
-			if( !_profile.containsKey(instID) )
-				_profile.put(instID, new HashMap<Integer, CostFunction>());
-			_profile.get(instID).put(tDefID, cf);
-		}
-	}
-
-	private static String serializeTestVariables( InternalTestVariable[] vars )
-	{
-		StringBuilder sb = new StringBuilder();
-		for( int i=0; i<vars.length; i++ )
-		{
-			if( i>0 )
-				sb.append( XML_ELEMENT_DELIMITER );
-			sb.append( String.valueOf(vars[i]) );
-		}
-		return sb.toString();
-	}
-
-	private static InternalTestVariable[] parseTestVariables(String vars)
-	{
-		StringTokenizer st = new StringTokenizer(vars, XML_ELEMENT_DELIMITER);
-		InternalTestVariable[] v = new InternalTestVariable[st.countTokens()];
-		for( int i=0; i<v.length; i++ )
-			v[i] = InternalTestVariable.valueOf(st.nextToken());
-		return v;
-	}
-
-	private static String serializeParams( double[] vals )
-	{
-		StringBuilder sb = new StringBuilder();
-		for( int i=0; i<vals.length; i++ )
-		{
-			if( i>0 )
-				sb.append( XML_ELEMENT_DELIMITER );
-			sb.append( String.valueOf(vals[i]) );
-		}
-		return sb.toString();
-	}
-
-	private static double[] parseParams( String valStr )
-	{
-		StringTokenizer st = new StringTokenizer(valStr, XML_ELEMENT_DELIMITER);
-		double[] params = new double[st.countTokens()];
-		for( int i=0; i<params.length; i++ )
-			params[i] = Double.parseDouble(st.nextToken());
-		return params;
-	}
-
-	private static void readProfile( String fname ) 
-		throws XMLStreamException, IOException
-	{
-		//init profile map
-		_profile = new HashMap<Integer, HashMap<Integer,CostFunction>>();
-		
-		//read existing profile
-		FileInputStream fis = new FileInputStream( fname );
-
-		try
-		{
-			//xml parsing
-			XMLInputFactory xif = XMLInputFactory.newInstance();
-			XMLStreamReader xsr = xif.createXMLStreamReader( fis );
-			
-			int e = xsr.nextTag(); // profile start
-			
-			while( true ) //read all instructions
-			{
-				e = xsr.nextTag(); // instruction start
-				if( e == XMLStreamConstants.END_ELEMENT )
-					break; //reached profile end tag
-				
-				//parse instruction
-				int ID = Integer.parseInt( xsr.getAttributeValue(null, XML_ID) );
-				//String name = xsr.getAttributeValue(null, XML_NAME).trim().replaceAll(" ", Lops.OPERAND_DELIMITOR);
-				HashMap<Integer, CostFunction> tmp = new HashMap<Integer, CostFunction>();
-				_profile.put( ID, tmp );
-				
-				while( true )
-				{
-					e = xsr.nextTag(); // cost function start
-					if( e == XMLStreamConstants.END_ELEMENT )
-						break; //reached instruction end tag
-					
-					//parse cost function
-					TestMeasure m = TestMeasure.valueOf( xsr.getAttributeValue(null, XML_MEASURE) );
-					TestVariable lv = TestVariable.valueOf( xsr.getAttributeValue(null, XML_VARIABLE) );
-					InternalTestVariable[] pv = parseTestVariables( xsr.getAttributeValue(null, XML_INTERNAL_VARIABLES) );
-					DataFormat df = DataFormat.valueOf( xsr.getAttributeValue(null, XML_DATAFORMAT) );
-					int tDefID = getTestDefID(m, lv, df, pv);
-					
-					xsr.next(); //read characters
-					double[] params = parseParams(xsr.getText());
-					boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1;
-					CostFunction cf = new CostFunction( params, multidim );
-					tmp.put(tDefID, cf);
-				
-					xsr.nextTag(); // cost function end
-					//System.out.println("added cost function");
-				}
-			}
-			xsr.close();
-		}
-		finally
-		{
-			IOUtilFunctions.closeSilently(fis);
-		}
-		
-		//mark profile as successfully read
-		_flagReadData = true;
-	}
-	
-	/**
-	 * StAX for efficient streaming XML writing.
-	 * 
-	 * @param dname directory name
-	 * @param fname file name
-	 * @throws IOException if IOException occurs
-	 * @throws XMLStreamException if XMLStreamException occurs
-	 */
-	private static void writeProfile( String dname, String fname ) 
-		throws IOException, XMLStreamException 
-	{
-		//create initial directory and file 
-		File dir =  new File( dname );
-		if( !dir.exists() )
-			dir.mkdir();
-		File f = new File( fname );
-		f.createNewFile();
-		
-		FileOutputStream fos = new FileOutputStream( f );
-		
-		try
-		{
-			//create document
-			XMLOutputFactory xof = XMLOutputFactory.newInstance();
-			XMLStreamWriter xsw = xof.createXMLStreamWriter( fos );
-			//TODO use an alternative way for intentation
-			//xsw = new IndentingXMLStreamWriter( xsw ); //remove this line if no indenting required
-			
-			//write document content
-			xsw.writeStartDocument();
-			xsw.writeStartElement( XML_PROFILE );
-			xsw.writeAttribute(XML_DATE, String.valueOf(new Date()) );
-			
-			//foreach instruction (boundle of cost functions)
-			for( Entry<Integer,HashMap<Integer,CostFunction>> inst : _profile.entrySet() )
-			{
-				int instID = inst.getKey();
-				String instName = _regInst_IDNames.get( instID );
-						
-				xsw.writeStartElement( XML_INSTRUCTION ); 
-				xsw.writeAttribute(XML_ID, String.valueOf( instID ));
-				xsw.writeAttribute(XML_NAME, instName.replaceAll(Lop.OPERAND_DELIMITOR, " "));
-				
-				//foreach testdef cost function
-				for( Entry<Integer,CostFunction> cfun : inst.getValue().entrySet() )
-				{
-					int tdefID = cfun.getKey();
-					PerfTestDef def = _regTestDef.get(tdefID);
-					CostFunction cf = cfun.getValue();
-					
-					xsw.writeStartElement( XML_COSTFUNCTION );
-					xsw.writeAttribute( XML_ID, String.valueOf( tdefID ));
-					xsw.writeAttribute( XML_MEASURE, def.getMeasure().toString() );
-					xsw.writeAttribute( XML_VARIABLE, def.getVariable().toString() );
-					xsw.writeAttribute( XML_INTERNAL_VARIABLES, serializeTestVariables(def.getInternalVariables()) );
-					xsw.writeAttribute( XML_DATAFORMAT, def.getDataformat().toString() );
-					xsw.writeCharacters(serializeParams( cf.getParams() ));
-					xsw.writeEndElement();// XML_COSTFUNCTION
-				}
-				
-				xsw.writeEndElement(); //XML_INSTRUCTION
-			}
-			
-			xsw.writeEndElement();//XML_PROFILE
-			xsw.writeEndDocument();
-			xsw.close();
-		}
-		finally
-		{
-			IOUtilFunctions.closeSilently(fos);
-		}
-	}
-
-	
-	
-	/**
-	 * Main for invoking the actual performance test in order to produce profile.xml
-	 * 
-	 * @param args string arguments to main() method
-	 */
-	public static void main(String[] args)
-	{
-		//execute the local / remote performance test
-		PerfTestTool.runTest(); 
-	}
-
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
deleted file mode 100644
index c216d52..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
+++ /dev/null
@@ -1,59 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-#PerfTestTool: DML template for estimation cost functions.
-#Deprecated in SystemML 0.13
-
-dynRead = externalFunction(Matrix[Double] d, String fname, Integer m, Integer n) 
-return (Matrix[Double] D) 
-implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicReadMatrix2DCP",exectype="mem") 
-
-dynWrite = externalFunction(Matrix[Double] R, String fname) 
-return (Matrix[Double] D) 
-implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicWriteMatrix2DCP",exectype="mem") 
-
-solve = externalFunction(Matrix[Double] A, Matrix[Double] y) 
-return (Matrix[Double] b) 
-implemented in (classname="org.apache.sysml.packagesupport.LinearSolverWrapperCP",exectype="mem") 
-
-k = %numModels%;
-m = -1; 
-n = -1;
-
-dummy = matrix(1,rows=1,cols=1); 
-
-for( i in 1:k, par=8, mode=LOCAL )
-{
-   sin1 = "./conf/PerfTestTool/"+i+"_in1.csv";   
-   sin2 = "./conf/PerfTestTool/"+i+"_in2.csv";   
-   
-   D = dynRead( dummy, sin1, m, n );
-   y = dynRead( dummy, sin2, m, 1 );
-   
-   A = t(D) %*% D; # X'X
-   b = t(D) %*% y; # X'y
-   beta = solve(A,b); 
-
-   sout = "./conf/PerfTestTool/"+i+"_out.csv";   
-   
-   X=dynWrite( beta, sout );
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java b/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
index beb08bd..343d846 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
@@ -30,15 +30,13 @@ import org.apache.sysml.test.utils.TestUtils;
 
 public class ParForRulebasedOptimizerTest extends AutomatedTestBase 
 {
-	
-	private final static String TEST_NAME1 = "parfor_optimizer1";
-	private final static String TEST_NAME2 = "parfor_optimizer2";
-	private final static String TEST_NAME3 = "parfor_optimizer3";
+	private final static String TEST_NAME1 = "parfor_optimizer1"; //+b for dml 
+	private final static String TEST_NAME2 = "parfor_optimizer2"; //+b for dml
+	private final static String TEST_NAME3 = "parfor_optimizer3"; //+b for dml
 	private final static String TEST_DIR = "functions/parfor/";
 	private final static String TEST_CLASS_DIR = TEST_DIR + ParForRulebasedOptimizerTest.class.getSimpleName() + "/";
 	private final static double eps = 1e-10;
-	
-	
+		
 	private final static int rows1 = 1000; //small CP
 	private final static int rows2 = 10000; //large MR
 	
@@ -67,82 +65,127 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
 
 	
 	@Test
-	public void testParForOptimizerCorrelationSmallSmall() 
-	{
-		runParForOptimizerTest(1, false, false);
+	public void testParForRulebasedOptimizerCorrelationSmallSmall() {
+		runParForOptimizerTest(1, false, false, false);
 	}
 	
+	@Test
+	public void testParForRulebasedOptimizerCorrelationSmallLarge() {
+		runParForOptimizerTest(1, false, true, false);
+	}
 	
 	@Test
-	public void testParForOptimizerCorrelationSmallLarge() 
-	{
-		runParForOptimizerTest(1, false, true);
+	public void testParForRulebasedOptimizerCorrelationLargeSmall() {
+		runParForOptimizerTest(1, true, false, false);
 	}
 	
+	@Test
+	public void testParForRulebasedOptimizerCorrelationLargeLarge() {
+		runParForOptimizerTest(1, true, true, false);
+	}
 	
 	@Test
-	public void testParForOptimizerCorrelationLargeSmall() 
-	{
-		runParForOptimizerTest(1, true, false);
+	public void testParForRulebasedOptimizerBivariateStatsSmallSmall() {
+		runParForOptimizerTest(2, false, false, false);
 	}
 	
 	@Test
-	public void testParForOptimizerCorrelationLargeLarge() 
-	{
-		runParForOptimizerTest(1, true, true);
+	public void testParForRulebasedOptimizerBivariateStatsSmallLarge() {
+		runParForOptimizerTest(2, false, true, false);
 	}
 	
+	@Test
+	public void testParForRulebasedOptimizerBivariateStatsLargeSmall() {
+		runParForOptimizerTest(2, true, false, false);
+	}
 	
 	@Test
-	public void testParForOptimizerBivariateStatsSmallSmall() 
-	{
-		runParForOptimizerTest(2, false, false);
+	public void testParForRulebasedOptimizerBivariateStatsLargeLarge() {
+		runParForOptimizerTest(2, true, true, false);
 	}
 	
 	@Test
-	public void testParForOptimizerBivariateStatsSmallLarge() 
-	{
-		runParForOptimizerTest(2, false, true);
+	public void testParForRulebasedOptimizerFunctionInvocationSmallSmall() {
+		runParForOptimizerTest(3, false, false, false);
 	}
 	
 	@Test
-	public void testParForOptimizerBivariateStatsLargeSmall() 
-	{
-		runParForOptimizerTest(2, true, false);
+	public void testParForRulebasedOptimizerFunctionInvocationSmallLarge() {
+		runParForOptimizerTest(3, false, true, false);
 	}
 	
 	@Test
-	public void testParForOptimizerBivariateStatsLargeLarge() 
-	{
-		runParForOptimizerTest(2, true, true);
+	public void testParForRulebasedOptimizerFunctionInvocationLargeSmall() {
+		runParForOptimizerTest(3, true, false, false);
 	}
 	
 	@Test
-	public void testParForOptimizerFunctionInvocationSmallSmall() 
-	{
-		runParForOptimizerTest(3, false, false);
+	public void testParForRulebasedOptimizerFunctionInvocationLargeLarge() {
+		runParForOptimizerTest(3, true, true, false);
 	}
 	
 	@Test
-	public void testParForOptimizerFunctionInvocationSmallLarge() 
-	{
-		runParForOptimizerTest(3, false, true);
+	public void testParForHeuristicOptimizerCorrelationSmallSmall() {
+		runParForOptimizerTest(1, false, false, true);
 	}
 	
 	@Test
-	public void testParForOptimizerFunctionInvocationLargeSmall() 
-	{
-		runParForOptimizerTest(3, true, false);
+	public void testParForHeuristicOptimizerCorrelationSmallLarge() {
+		runParForOptimizerTest(1, false, true, true);
 	}
 	
 	@Test
-	public void testParForOptimizerFunctionInvocationLargeLarge() 
-	{
-		runParForOptimizerTest(3, true, true);
+	public void testParForHeuristicOptimizerCorrelationLargeSmall() {
+		runParForOptimizerTest(1, true, false, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerCorrelationLargeLarge() {
+		runParForOptimizerTest(1, true, true, true);
 	}
 	
+	@Test
+	public void testParForHeuristicOptimizerBivariateStatsSmallSmall() {
+		runParForOptimizerTest(2, false, false, true);
+	}
 	
-	private void runParForOptimizerTest( int scriptNum, boolean largeRows, boolean largeCols )
+	@Test
+	public void testParForHeuristicOptimizerBivariateStatsSmallLarge() {
+		runParForOptimizerTest(2, false, true, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerBivariateStatsLargeSmall() {
+		runParForOptimizerTest(2, true, false, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerBivariateStatsLargeLarge() {
+		runParForOptimizerTest(2, true, true, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerFunctionInvocationSmallSmall() {
+		runParForOptimizerTest(3, false, false, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerFunctionInvocationSmallLarge() {
+		runParForOptimizerTest(3, false, true, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerFunctionInvocationLargeSmall() {
+		runParForOptimizerTest(3, true, false, true);
+	}
+	
+	@Test
+	public void testParForHeuristicOptimizerFunctionInvocationLargeLarge() {
+		runParForOptimizerTest(3, true, true, true);
+	}
+	
+	
+	private void runParForOptimizerTest( int scriptNum, boolean largeRows, boolean largeCols, boolean timebasedOpt )
 	{
 		//find right rows and cols configuration
 		int rows=-1, cols=-1;  
@@ -171,31 +214,34 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
 		switch( scriptNum )
 		{
 			case 1: 
-				runUnaryTest(scriptNum, rows, cols);
+				runUnaryTest(scriptNum, timebasedOpt, rows, cols);
 				break;
 			case 2:
-				runNaryTest(scriptNum, rows, cols);
+				runNaryTest(scriptNum, timebasedOpt, rows, cols);
 				break;
 			case 3: 
-				runUnaryTest(scriptNum, rows, cols);
+				runUnaryTest(scriptNum, timebasedOpt, rows, cols);
 				break;	
 		}
 	}
 	
-	private void runUnaryTest(int scriptNum, int rows, int cols )
+	private void runUnaryTest(int scriptNum, boolean timebasedOpt, int rows, int cols )
 	{
 		TestConfiguration config = null;
 		String HOME = SCRIPT_DIR + TEST_DIR;
 		if( scriptNum==1 )
 		{
 			config=getTestConfiguration(TEST_NAME1);
-			fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
+			String testname = TEST_NAME1 + (timebasedOpt ? "b" : "");
+			fullDMLScriptName = HOME + testname + ".dml";
 		}
 		else if( scriptNum==3 )
 		{
 			config=getTestConfiguration(TEST_NAME3);
-			fullDMLScriptName = HOME + TEST_NAME3 + ".dml";
+			String testname = TEST_NAME3 + (timebasedOpt ? "b" : "");
+			fullDMLScriptName = HOME + testname + ".dml";
 		}
+		
 		config.addVariable("rows", rows);
 		config.addVariable("cols", cols);
 		loadTestConfiguration(config);
@@ -235,7 +281,7 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
 		TestUtils.compareMatrices(dmlfile, rfile, eps, "DML", "R");		
 	}
 	
-	private void runNaryTest(int scriptNum, int rows, int cols)
+	private void runNaryTest(int scriptNum, boolean timebasedOpt, int rows, int cols)
 	{
 		TestConfiguration config = getTestConfiguration(TEST_NAME2);
 		config.addVariable("rows", rows);
@@ -244,7 +290,8 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
 		
 		/* This is for running the junit test the new way, i.e., construct the arguments directly */
 		String HOME = SCRIPT_DIR + TEST_DIR;
-		fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
+		String testname = TEST_NAME2 + (timebasedOpt ? "b" : "");
+		fullDMLScriptName = HOME + testname + ".dml";
 		programArgs = new String[]{"-args", 
 			input("D"),
 			input("S1"), input("S2"),

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer1b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer1b.dml b/src/test/scripts/functions/parfor/parfor_optimizer1b.dml
new file mode 100644
index 0000000..cd0a3f7
--- /dev/null
+++ b/src/test/scripts/functions/parfor/parfor_optimizer1b.dml
@@ -0,0 +1,53 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:(n-1), opt=HEURISTIC )
+{
+   X = V[,i];                 
+   m2X = moment(X,2);
+   sigmaX = sqrt(m2X * (W/(W-1.0)) );
+      
+   parfor( j in (i+1):n )  
+   {  
+      Y = V[,j];
+
+      #corr computation    
+      m2Y = moment(Y,2);
+      sigmaY = sqrt(m2Y * (W/(W-1.0)) );      
+      covXY = cov(X,Y);      
+      rXY = covXY / (sigmaX*sigmaY); 
+      
+      #print("R[("+i+","+j+")]="+rXY); 
+      R[i,j] = dummy * rXY; 
+      
+   }
+}   
+
+write(R, $4);       
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer2b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer2b.dml b/src/test/scripts/functions/parfor/parfor_optimizer2b.dml
new file mode 100644
index 0000000..6b41058
--- /dev/null
+++ b/src/test/scripts/functions/parfor/parfor_optimizer2b.dml
@@ -0,0 +1,277 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+#numpairs = s1size * s2size;
+#print(s1size + ", " + s2size + ", " + numpairs);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+dummy = matrix(1, rows=1, cols=1);
+
+
+parfor( i in 1:s1size, check=0, opt=HEURISTIC) {
+    a1 = as.scalar(S1[,i]);
+    k1 = as.scalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, check=0) {
+        pairID = (i-1)*s2size+j; 
+        a2 = as.scalar(S2[,j]);
+        k2 = as.scalar(K2[1,j]);
+        A2 = D[,a2];
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = dummy*r;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = dummy*chisq;
+                basestats[3,pairID] = dummy*df;
+                basestats[4,pairID] = dummy*pval;
+                basestats[5,pairID] = dummy*cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = dummy*sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal   
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = dummy*eta;
+                basestats[8,pairID] = dummy*f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = dummy*chisq;
+                basestats[3,pairID] = dummy*df;
+                basestats[4,pairID] = dummy*pval;
+                basestats[5,pairID] = dummy*cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = degFreedom;
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    dummy = matrix(1, rows=1, cols=1);
+    Rks = X;
+    size = nrow(X);
+    for(i in 1:size) {
+        prefixSum = 0.0;
+        if( i>1 ){
+           prefixSum = sum(X[1:(i-1),1]);
+        } 
+        Rks[i,1] = dummy * (prefixSum + ((as.scalar(X[i,1])+1)/2));
+    }
+    Ranks = Rks;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+
+    covXY = 0.0;
+    for(i in 1:catA) {
+        covXY = covXY + sum((F[i,]/(W-1)) * (as.scalar(C[i,1])-meanX) * (t(D[,1])-meanY));
+    }
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer3b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer3b.dml b/src/test/scripts/functions/parfor/parfor_optimizer3b.dml
new file mode 100644
index 0000000..6eae759
--- /dev/null
+++ b/src/test/scripts/functions/parfor/parfor_optimizer3b.dml
@@ -0,0 +1,52 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+nd = $4;
+
+R = matrix(0, rows=1,cols=nd); 
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:(n/2), opt=HEURISTIC )
+{
+   X = V[ ,i];                 
+   Y = V[ ,n-i+1];                 
+   sx = execSum(X);
+   sy = execSum(Y);
+   R[1,i] = dummy*( sx+sy ); 
+}   
+
+write(R, $5);       
+
+
+execSum = function(Matrix[Double] X) return (Double sx) 
+{
+   if( ncol(X) > 0 )
+   {
+      sx = sum(X);    
+   }
+   else
+   {
+      sx = sum(X);
+   }
+}
\ No newline at end of file