You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/02/24 20:27:36 UTC
[4/6] incubator-systemml git commit: [SYSTEMML-1302] Remove parfor
perftesttool, cleanup heuristic optimizer
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
deleted file mode 100644
index c130031..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java
+++ /dev/null
@@ -1,1411 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor.opt;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.Map.Entry;
-import java.util.Random;
-import java.util.StringTokenizer;
-
-import javax.xml.stream.XMLInputFactory;
-import javax.xml.stream.XMLOutputFactory;
-import javax.xml.stream.XMLStreamConstants;
-import javax.xml.stream.XMLStreamException;
-import javax.xml.stream.XMLStreamReader;
-import javax.xml.stream.XMLStreamWriter;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLException;
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.lops.Lop;
-import org.apache.sysml.lops.MMTSJ.MMTSJType;
-import org.apache.sysml.parser.DMLProgram;
-import org.apache.sysml.parser.DataIdentifier;
-import org.apache.sysml.parser.Expression.DataType;
-import org.apache.sysml.parser.Expression.ValueType;
-import org.apache.sysml.parser.ExternalFunctionStatement;
-import org.apache.sysml.parser.ParseException;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlockCP;
-import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
-import org.apache.sysml.runtime.controlprogram.Program;
-import org.apache.sysml.runtime.controlprogram.ProgramBlock;
-import org.apache.sysml.runtime.controlprogram.caching.CacheException;
-import org.apache.sysml.runtime.controlprogram.caching.LazyWriteBuffer;
-import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
-import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
-import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat;
-import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing;
-import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler;
-import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
-import org.apache.sysml.runtime.instructions.CPInstructionParser;
-import org.apache.sysml.runtime.instructions.Instruction;
-import org.apache.sysml.runtime.instructions.MRJobInstruction;
-import org.apache.sysml.runtime.instructions.cp.Data;
-import org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction;
-import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction;
-import org.apache.sysml.runtime.io.IOUtilFunctions;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
-import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
-import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.runtime.util.MapReduceTool;
-
-import au.com.bytecode.opencsv.CSVReader;
-import au.com.bytecode.opencsv.CSVWriter;
-
-/**
- * DML Instructions Performance Test Tool:
- *
- * Creates an offline performance profile (required once per installation) of DML instructions.
- * The profile is a combination of all individual statistical models trained per combination of
- * instruction and test configuration. In order to train those models, we execute and measure
- * real executions of DML instructions on random input data. Finally, during runtime, the profile
- * is used by the costs estimator in order to create statistic estimates for cost-based optimization.
- *
- *
- */
-@Deprecated
-public class PerfTestTool
-{
-
- //public parameters (used for estimation)
- public static final long MIN_DATASIZE = 1000;
- public static final long MAX_DATASIZE = 1000000;
- public static final long DEFAULT_DATASIZE = 500000;//(MAX_DATASIZE-MIN_DATASIZE)/2;
- public static final long DATASIZE_MR_SCALE = 20;
- public static final double MIN_SPARSITY = 0.1;
- public static final double MAX_SPARSITY = 1.0;
- public static final double DEFAULT_SPARSITY = 0.5;//(MAX_SPARSITY-MIN_SPARSITY)/2;
-
- //internal parameters
- private static final boolean READ_STATS_ON_STARTUP = false;
- private static final int TEST_REPETITIONS = 10;
- private static final int NUM_SAMPLES_PER_TEST = 11;
- private static final int MODEL_MAX_ORDER = 2;
- private static final boolean MODEL_INTERCEPT = true;
-
- private static final String PERF_TOOL_DIR = "./conf/PerfTestTool/";
-// private static final String PERF_RESULTS_FNAME = PERF_TOOL_DIR + "%id%.dat";
- private static final String PERF_PROFILE_FNAME = PERF_TOOL_DIR + "performance_profile.xml";
- private static final String DML_SCRIPT_FNAME = "./src/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml";
- private static final String DML_TMP_FNAME = PERF_TOOL_DIR + "temp.dml";
-
- //XML profile tags and attributes
- private static final String XML_PROFILE = "profile";
- private static final String XML_DATE = "date";
- private static final String XML_INSTRUCTION = "instruction";
- private static final String XML_ID = "id";
- private static final String XML_NAME = "name";
- private static final String XML_COSTFUNCTION = "cost_function";
- private static final String XML_MEASURE = "measure";
- private static final String XML_VARIABLE = "lvariable";
- private static final String XML_INTERNAL_VARIABLES = "pvariables";
- private static final String XML_DATAFORMAT = "dataformat";
- private static final String XML_ELEMENT_DELIMITER = "\u002c"; //",";
-
- //ID sequences for instructions and test definitions
- private static IDSequence _seqInst = null;
- private static IDSequence _seqTestDef = null;
-
- //registered instructions and test definitions
- private static HashMap<Integer, PerfTestDef> _regTestDef = null;
- private static HashMap<Integer, Instruction> _regInst = null;
- private static HashMap<Integer, String> _regInst_IDNames = null;
- private static HashMap<String, Integer> _regInst_NamesID = null;
- private static HashMap<Integer, Integer[]> _regInst_IDTestDef = null;
- private static HashMap<Integer, Boolean> _regInst_IDVectors = null;
- private static HashMap<Integer, IOSchema> _regInst_IDIOSchema = null;
-
- protected static final Log LOG = LogFactory.getLog(PerfTestTool.class.getName());
-
-
- private static Integer[] _defaultConf = null;
-// private static Integer[] _MRConf = null;
-
- //raw measurement data (instID, physical defID, results)
- private static HashMap<Integer,HashMap<Integer,LinkedList<Double>>> _results = null;
-
- //profile data
- private static boolean _flagReadData = false;
- private static HashMap<Integer,HashMap<Integer,CostFunction>> _profile = null;
-
- public enum TestVariable //logical test variable
- {
- DATA_SIZE,
- SPARSITY,
- PARALLELISM,
-
- //some mr specific conf properites
- SORT_IO_MEM
- }
-
- public enum InternalTestVariable //physical test variable
- {
- DATA_SIZE,
- DIM1_SIZE,
- DIM2_SIZE,
- DIM3_SIZE,
- SPARSITY,
- SORT_IO_MEM
- }
-
- public enum IOSchema
- {
- NONE_NONE,
- NONE_UNARY,
- UNARY_UNARY,
- BINARY_NONE,
- BINARY_UNARY
- }
-
- public enum TestConstants //logical test constants
- {
- DFS_READ_THROUGHPUT,
- DFS_WRITE_THROUGHPUT,
- LFS_READ_THROUGHPUT,
- LFS_WRITE_THROUGHPUT
- }
-
- static
- {
- //init repository
- _seqInst = new IDSequence();
- _seqTestDef = new IDSequence();
- _regTestDef = new HashMap<Integer, PerfTestDef>();
- _regInst = new HashMap<Integer, Instruction>();
- _regInst_IDNames = new HashMap<Integer, String>();
- _regInst_NamesID = new HashMap<String, Integer>();
- _regInst_IDTestDef = new HashMap<Integer, Integer[]>();
- _regInst_IDVectors = new HashMap<Integer, Boolean>();
- _regInst_IDIOSchema = new HashMap<Integer, IOSchema>();
- _results = new HashMap<Integer, HashMap<Integer,LinkedList<Double>>>();
- _profile = new HashMap<Integer, HashMap<Integer,CostFunction>>();
- _flagReadData = false;
-
- //load existing profile if required
- try
- {
- if( READ_STATS_ON_STARTUP )
- readProfile( PERF_PROFILE_FNAME );
- }
- catch(Exception ex)
- {
- throw new RuntimeException(ex);
- }
- }
-
- public static void lazyInit()
- throws DMLRuntimeException
- {
- //read profile for first access
- if( !_flagReadData )
- {
- try
- {
- //register all testdefs and instructions
- registerTestConfigurations();
- registerInstructions();
-
- //read profile
- readProfile( PERF_PROFILE_FNAME );
- }
- catch(Exception ex)
- {
- throw new DMLRuntimeException(ex);
- }
- }
-
- if( _profile == null )
- throw new DMLRuntimeException("Performance test results have not been loaded completely.");
- }
-
- public static boolean isRegisteredInstruction(String opStr)
- throws DMLRuntimeException
- {
- //init if required
- lazyInit();
-
- //determine if inst registered
- return _regInst_NamesID.containsKey(opStr);
- }
-
- public static CostFunction getCostFunction( String instName, TestMeasure measure, TestVariable variable, DataFormat dataformat )
- throws DMLRuntimeException
- {
- //init if required
- lazyInit();
-
- CostFunction tmp = null;
- int instID = getInstructionID( instName );
- if( instID != -1 ) //existing profile
- {
- int tdefID = getMappedTestDefID(instID, measure, variable, dataformat);
- tmp = _profile.get(instID).get(tdefID);
- }
- return tmp;
- }
-
- @SuppressWarnings("all")
- public static boolean runTest()
- {
- boolean ret = false;
-
- try
- {
- Timing time = new Timing();
- time.start();
-
- //init caching
- LazyWriteBuffer.init();
-
- //register all testdefs and instructions
- registerTestConfigurations();
- registerInstructions();
-
- //execute tests for all confs and all instructions
- executeTest();
-
- //compute regression models
- int rows = NUM_SAMPLES_PER_TEST;
- int cols = MODEL_MAX_ORDER + (MODEL_INTERCEPT ? 1 : 0);
- HashMap<Integer,Long> tmp = writeResults( PERF_TOOL_DIR );
- computeRegressionModels( DML_SCRIPT_FNAME, DML_TMP_FNAME, PERF_TOOL_DIR, tmp.size(), rows, cols);
- readRegressionModels( PERF_TOOL_DIR, tmp);
-
- //execConstantRuntimeTest();
- //execConstantMemoryTest();
-
- //write final profile to XML file
- writeProfile(PERF_TOOL_DIR, PERF_PROFILE_FNAME);
- System.out.format("SystemML PERFORMANCE TEST TOOL: finished profiling (in %.2f min), profile written to "+PERF_PROFILE_FNAME+"%n", time.stop()/60000);
-
- ret = true;
- }
- catch(Exception ex)
- {
- LOG.error("Failed to run performance test.", ex);
- }
-
- return ret;
- }
-
- private static void registerTestConfigurations()
- {
- //reset ID Sequence for consistent IDs
- _seqTestDef.reset();
-
- //register default testdefs //TODO
- TestMeasure[] M = new TestMeasure[]{ TestMeasure.EXEC_TIME/*, TestMeasure.MEMORY_USAGE*/ };
- DataFormat[] D = new DataFormat[]{DataFormat.DENSE/*,DataFormat.SPARSE*/};
- Integer[] defaultConf = new Integer[M.length*D.length*2];
- int i=0;
- for( TestMeasure m : M ) //for all measures
- for( DataFormat d : D ) //for all data formats
- {
- defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.DATA_SIZE, d, InternalTestVariable.DATA_SIZE,
- MIN_DATASIZE, MAX_DATASIZE, NUM_SAMPLES_PER_TEST ) );
- defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.SPARSITY, d, InternalTestVariable.SPARSITY,
- MIN_SPARSITY, MAX_SPARSITY, NUM_SAMPLES_PER_TEST ) );
- }
-
-
- //register advanced (multi-dim) test defs
- //FIXME enable
- /*for( TestMeasure m : M ) //for all measures
- for( DataFormat d : D ) //for all data formats
- {
- registerTestDef( new PerfTestDef( m, TestVariable.DATA_SIZE, d,
- new InternalTestVariable[]{InternalTestVariable.DIM1_SIZE,InternalTestVariable.DIM2_SIZE,InternalTestVariable.DIM3_SIZE},
- MIN_DIMSIZE, MAX_DIMSIZE, NUM_SAMPLES_PER_TEST ) );
- }?*
-
-
- //register MR specific instructions FIXME: just for test
- /*Integer[] mrConf = new Integer[D.length];
- i = 0;
- for( DataFormat d : D )
- {
- mrConf[i++] = registerTestDef( new PerfTestDef(TestMeasure.EXEC_TIME, TestVariable.SORT_IO_MEM, d,
- InternalTestVariable.SORT_IO_MEM,
- MIN_SORT_IO_MEM, MAX_SORT_IO_MEM, NUM_SAMPLES_PER_TEST ) );
- }*/
-
- //set default testdefs
- _defaultConf = defaultConf;
- //_MRConf = mrConf;
- }
-
- private static void registerInstructions()
- throws DMLRuntimeException
- {
- //reset ID sequences for consistent IDs
- _seqInst.reset();
-
- ///////
- // CP instructions
-
- //matrix multiply mmtsj
- registerInstruction( "CP"+Lop.OPERAND_DELIMITOR+"tsmm", CPInstructionParser.parseSingleInstruction("CP"+Lop.OPERAND_DELIMITOR+"tsmm"+Lop.OPERAND_DELIMITOR+"A"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+"C"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+MMTSJType.LEFT),
- getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
-
- /*
- //matrix multiply
- registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
- getDefaultTestDefs(), false, IOSchema.BINARY_UNARY );
- ////registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
- //// changeToMuliDimTestDefs(TestVariable.DATA_SIZE, getDefaultTestDefs()) );
- //rand
- registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"Rand", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"Rand"+Lops.OPERAND_DELIMITOR+"rows=1"+Lops.OPERAND_DELIMITOR+"cols=1"+Lops.OPERAND_DELIMITOR+"rowsInBlock=1000"+Lops.OPERAND_DELIMITOR+"colsInBlock=1000"+Lops.OPERAND_DELIMITOR+"min=1.0"+Lops.OPERAND_DELIMITOR+"max=100.0"+Lops.OPERAND_DELIMITOR+"sparsity=1.0"+Lops.OPERAND_DELIMITOR+"seed=7"+Lops.OPERAND_DELIMITOR+"pdf=uniform"+Lops.OPERAND_DELIMITOR+"dir=."+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
- getDefaultTestDefs(), false, IOSchema.NONE_UNARY );
- //matrix transpose
- registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"r'", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"r'"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
- getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
- //sum
- registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"uak+", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"uak+"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"), //needs B instead of C
- getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
- //external function
- registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"extfunct", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"extfunct"+Lops.OPERAND_DELIMITOR+DMLProgram.DEFAULT_NAMESPACE+""+Lops.OPERAND_DELIMITOR+"execPerfTestExtFunct"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"A"+Lops.OPERAND_DELIMITOR+"C"),
- getDefaultTestDefs(), false, IOSchema.UNARY_UNARY );
- //central moment
- registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cm", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cm"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"2"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"INT"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
- getDefaultTestDefs(), true, IOSchema.UNARY_NONE );
- //co-variance
- registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cov", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cov"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"),
- getDefaultTestDefs(), true, IOSchema.BINARY_NONE );
- */
-
- /*
- ///////
- // MR instructions
- registerInstruction( "jobtypeMMRJ", createMRJobInstruction(JobType.MMRJ,
- MRInstructionParser.parseSingleInstruction("MR"+Lops.OPERAND_DELIMITOR+
- "rmm"+Lops.OPERAND_DELIMITOR+
- "0"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+
- "1"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+
- "2"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE ")),
- _MRConf, false, IOSchema.BINARY_UNARY );
-
- */
- /*ADD ADDITIONAL INSTRUCTIONS HERE*/
-
-
-
- //extend list to all (expensive) instructions; maybe also: createvar, assignvar, cpvar, rm, mv, setfilename, rmfilevar
-
- }
-
-/*
- private static Instruction createMRJobInstruction(JobType type, MRInstruction inst)
- {
- MRJobInstruction mrinst = new MRJobInstruction(type);
-
- if( type == JobType.MMRJ )
- {
- ArrayList<String> inLab = new ArrayList<String>();
- ArrayList<String> outLab = new ArrayList<String>();
- inLab.add("A");
- inLab.add("B");
- outLab.add("C");
-
- mrinst.setMMRJInstructions(new String[]{"A","B"},
- "",
- inst.toString(),
- "",
- "",
- new String[]{"C"},
- new byte[]{2},
- 10, 1 );
-
- }
-
-
- return mrinst;
- }
-*/
-
- private static int registerTestDef( PerfTestDef def )
- {
- int ID = (int)_seqTestDef.getNextID();
-
- _regTestDef.put( ID, def );
-
- return ID;
- }
-
- private static void registerInstruction( String iname, Instruction inst, Integer[] testDefIDs, boolean vectors, IOSchema schema )
- {
- int ID = (int)_seqInst.getNextID();
- registerInstruction(ID, iname, inst, testDefIDs, vectors, schema);
- }
-
- private static void registerInstruction( int ID, String iname, Instruction inst, Integer[] testDefIDs, boolean vector, IOSchema schema )
- {
- _regInst.put( ID, inst );
- _regInst_IDNames.put( ID, iname );
- _regInst_NamesID.put( iname, ID );
- _regInst_IDTestDef.put( ID, testDefIDs );
- _regInst_IDVectors.put( ID, vector );
- _regInst_IDIOSchema.put( ID, schema );
- }
-
- private static int getMappedTestDefID( int instID, TestMeasure measure, TestVariable variable, DataFormat dataformat )
- {
- int ret = -1;
-
- for( Integer defID : _regInst_IDTestDef.get(instID) )
- {
- PerfTestDef def = _regTestDef.get(defID);
- if( def.getMeasure()==measure
- && def.getVariable()==variable
- && def.getDataformat()==dataformat )
- {
- ret = defID;
- break;
- }
- }
-
- return ret;
- }
-
- @SuppressWarnings("unused")
- private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable pvariable )
- {
- return getTestDefID(measure, lvariable, dataformat, new InternalTestVariable[]{pvariable});
- }
-
- private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable[] pvariables )
- {
- int ret = -1;
-
- for( Entry<Integer,PerfTestDef> e : _regTestDef.entrySet() )
- {
- PerfTestDef def = e.getValue();
- TestMeasure tmp1 = def.getMeasure();
- TestVariable tmp2 = def.getVariable();
- DataFormat tmp3 = def.getDataformat();
- InternalTestVariable[] tmp4 = def.getInternalVariables();
-
- if( tmp1==measure && tmp2==lvariable && tmp3==dataformat )
- {
- boolean flag = true;
- for( int i=0; i<tmp4.length; i++ )
- flag &= ( tmp4[i] == pvariables[i] );
-
- if( flag )
- {
- ret = e.getKey();
- break;
- }
- }
- }
-
- return ret;
- }
-
- private static int getInstructionID( String instName )
- {
- Integer ret = _regInst_NamesID.get( instName );
- return ( ret!=null )? ret : -1;
- }
-
- @SuppressWarnings("unused")
- private static Integer[] getAllTestDefs()
- {
- return _regTestDef.keySet().toArray(new Integer[0]);
- }
-
- private static Integer[] getDefaultTestDefs()
- {
- return _defaultConf;
- }
-
- @SuppressWarnings("unused")
- private static Integer[] changeToMuliDimTestDefs( TestVariable v, Integer[] IDs )
- {
- Integer[] tmp = new Integer[IDs.length];
-
- for( int i=0; i<tmp.length; i++ )
- {
- PerfTestDef def = _regTestDef.get(IDs[i]);
- if( def.getVariable() == v ) //filter logical variables
- {
- //find multidim version
- InternalTestVariable[] in = null;
- switch( v )
- {
- case DATA_SIZE:
- in = new InternalTestVariable[]{InternalTestVariable.DIM1_SIZE,InternalTestVariable.DIM2_SIZE,InternalTestVariable.DIM3_SIZE};
- break;
- default:
- //do nothing
- }
-
- int newid = getTestDefID(def.getMeasure(), def.getVariable(), def.getDataformat(), in );
-
- //exchange testdef ID
- tmp[i] = newid;
- }
- else
- {
- tmp[i] = IDs[i];
- }
- }
-
- return tmp;
- }
-
- private static void executeTest( )
- throws DMLRuntimeException, IOException
- {
- System.out.println("SystemML PERFORMANCE TEST TOOL:");
-
- //foreach registered instruction
- for( Entry<Integer,Instruction> inst : _regInst.entrySet() )
- {
- int instID = inst.getKey();
- System.out.println( "Running INSTRUCTION "+_regInst_IDNames.get(instID) );
-
- Integer[] testDefIDs = _regInst_IDTestDef.get(instID);
- boolean vectors = _regInst_IDVectors.get(instID);
- IOSchema schema = _regInst_IDIOSchema.get(instID);
-
- //create tmp program block and set instruction
- Program prog = new Program();
- ProgramBlock pb = new ProgramBlock( prog );
- ArrayList<Instruction> ainst = new ArrayList<Instruction>();
- ainst.add( inst.getValue() );
- pb.setInstructions(ainst);
-
- ExecutionContext ec = ExecutionContextFactory.createContext();
-
- //foreach registered test configuration
- for( Integer defID : testDefIDs )
- {
- PerfTestDef def = _regTestDef.get(defID);
- TestMeasure m = def.getMeasure();
- TestVariable lv = def.getVariable();
- DataFormat df = def.getDataformat();
- InternalTestVariable[] pv = def.getInternalVariables();
- double min = def.getMin();
- double max = def.getMax();
- double samples = def.getNumSamples();
-
- System.out.println( "Running TESTDEF(measure="+m+", variable="+String.valueOf(lv)+" "+pv.length+", format="+String.valueOf(df)+")" );
-
- //vary input variable
- LinkedList<Double> dmeasure = new LinkedList<Double>();
- LinkedList<Double> dvariable = generateSequence(min, max, samples);
- int plen = pv.length;
-
- if( plen == 1 ) //1D function
- {
- for( Double var : dvariable )
- {
- dmeasure.add(executeTestCase1D(m, pv[0], df, var, pb, vectors, schema, ec));
- }
- }
- else //multi-dim function
- {
- //init index stack
- int[] index = new int[plen];
- for( int i=0; i<plen; i++ )
- index[i] = 0;
-
- //execute test
- int dlen = dvariable.size();
- double[] buff = new double[plen];
- while( index[0]<dlen )
- {
- //set buffer values
- for( int i=0; i<plen; i++ )
- buff[i] = dvariable.get(index[i]);
-
- //core execution
- dmeasure.add(executeTestCaseMD(m, pv, df, buff, pb, schema, ec)); //not applicable for vector flag
-
- //increment indexes
- for( int i=plen-1; i>=0; i-- )
- {
- if(i==plen-1)
- index[i]++;
- else if( index[i+1] >= dlen )
- {
- index[i]++;
- index[i+1]=0;
- }
- }
- }
- }
-
-
- //append values to results
- if( !_results.containsKey(instID) )
- _results.put(instID, new HashMap<Integer, LinkedList<Double>>());
- _results.get(instID).put(defID, dmeasure);
-
- }
- }
- }
-
- private static double executeTestCase1D( TestMeasure m, InternalTestVariable v, DataFormat df, double varValue, ProgramBlock pb, boolean vectors, IOSchema schema, ExecutionContext ec )
- throws DMLRuntimeException, IOException
- {
- double datasize = -1;
- double dim1 = -1, dim2 = -1;
- double sparsity = -1;
- //double sortio = -1;
-
- System.out.println( "VAR VALUE "+varValue );
-
- //set test variables
- switch ( v )
- {
- case DATA_SIZE:
- datasize = varValue;
- sparsity = DEFAULT_SPARSITY;
- break;
- case SPARSITY:
- datasize = DEFAULT_DATASIZE;
- sparsity = varValue;
- break;
- case SORT_IO_MEM: //FIXME
- datasize = DEFAULT_DATASIZE * DATASIZE_MR_SCALE;
- sparsity = DEFAULT_SPARSITY;
- //sortio = varValue;
- break;
- default:
- //do nothing
- }
-
- //set specific dimensions
- if( vectors )
- {
- dim1 = datasize;
- dim2 = 1;
- }
- else
- {
- dim1 = Math.sqrt( datasize );
- dim2 = dim1;
- }
-
- //instruction-specific configurations
- Instruction inst = pb.getInstruction(0); //always exactly one instruction
- if( inst instanceof DataGenCPInstruction )
- {
- DataGenCPInstruction rand = (DataGenCPInstruction) inst;
- rand.setRows((long)dim1);
- rand.setCols((long)dim2);
- rand.setSparsity(sparsity);
- }
- else if ( inst instanceof FunctionCallCPInstruction ) //ExternalFunctionInvocationInstruction
- {
- Program prog = pb.getProgram();
- ArrayList<DataIdentifier> in = new ArrayList<DataIdentifier>();
- DataIdentifier dat1 = new DataIdentifier("A");
- dat1.setDataType(DataType.MATRIX);
- dat1.setValueType(ValueType.DOUBLE);
- in.add(dat1);
- ArrayList<DataIdentifier> out = new ArrayList<DataIdentifier>();
- DataIdentifier dat2 = new DataIdentifier("C");
- dat2.setDataType(DataType.MATRIX);
- dat2.setValueType(ValueType.DOUBLE);
- out.add(dat2);
- HashMap<String, String> params = new HashMap<String, String>();
- params.put(ExternalFunctionStatement.CLASS_NAME, PerfTestExtFunctCP.class.getName());
- ExternalFunctionProgramBlockCP fpb = new ExternalFunctionProgramBlockCP(prog, in, out, params, PERF_TOOL_DIR);
- prog.addFunctionProgramBlock(DMLProgram.DEFAULT_NAMESPACE, "execPerfTestExtFunct", fpb);
- }
- else if ( inst instanceof MRJobInstruction )
- {
- //FIXME hardcoded for test
- //MMRJMR.SORT_IO_MEM = sortio;
- }
-
- //generate input and output matrices
- LocalVariableMap vars = ec.getVariables();
- vars.removeAll();
- double mem1 = PerfTestMemoryObserver.getUsedMemory();
- if( schema!=IOSchema.NONE_NONE && schema!=IOSchema.NONE_UNARY )
- vars.put("A", generateInputDataset(PERF_TOOL_DIR+"/A", dim1, dim2, sparsity, df));
- if( schema==IOSchema.BINARY_NONE || schema==IOSchema.BINARY_UNARY || schema==IOSchema.UNARY_UNARY )
- vars.put("B", generateInputDataset(PERF_TOOL_DIR+"/B", dim1, dim2, sparsity, df));
- if( schema==IOSchema.NONE_UNARY || schema==IOSchema.UNARY_UNARY || schema==IOSchema.BINARY_UNARY)
- vars.put("C", generateEmptyResult(PERF_TOOL_DIR+"/C", dim1, dim2, df));
- double mem2 = PerfTestMemoryObserver.getUsedMemory();
-
- //foreach repetition
- double value = 0;
- for( int i=0; i<TEST_REPETITIONS; i++ )
- {
- System.out.println("run "+i);
- value += executeGenericProgramBlock( m, pb, ec );
- }
- value/=TEST_REPETITIONS;
-
- //result correction and print result
- switch( m )
- {
- case EXEC_TIME: System.out.println("--- RESULT: "+value+" ms"); break;
- case MEMORY_USAGE:
- //System.out.println("--- RESULT: "+value+" byte");
- if( (mem2-mem1) > 0 )
- value = value + mem2-mem1; //correction: input sizes added
- System.out.println("--- RESULT: "+value+" byte"); break;
- default: System.out.println("--- RESULT: "+value); break;
- }
-
- return value;
- }
-
- private static double executeTestCaseMD( TestMeasure m, InternalTestVariable[] v, DataFormat df, double[] varValue, ProgramBlock pb, IOSchema schema, ExecutionContext ec )
- throws DMLRuntimeException, IOException
- {
- //double datasize = DEFAULT_DATASIZE;
- double sparsity = DEFAULT_SPARSITY;
- double dim1 = -1;
- double dim2 = -1;
- double dim3 = -1;
-
-
- for( int i=0; i<v.length; i++ )
- {
- System.out.println( "VAR VALUE "+varValue[i] );
-
- switch( v[i] )
- {
- case DIM1_SIZE: dim1=varValue[i]; break;
- case DIM2_SIZE: dim2=varValue[i]; break;
- case DIM3_SIZE: dim3=varValue[i]; break;
- default: //do nothing
- }
- }
-
- //generate input and output matrices
- LocalVariableMap vars = ec.getVariables();
- vars.removeAll();
- double mem1 = PerfTestMemoryObserver.getUsedMemory();
- if( schema!=IOSchema.NONE_NONE && schema!=IOSchema.NONE_UNARY )
- vars.put("A", generateInputDataset(PERF_TOOL_DIR+"/A", dim1, dim2, sparsity, df));
- if( schema==IOSchema.BINARY_NONE || schema==IOSchema.BINARY_UNARY || schema==IOSchema.UNARY_UNARY )
- vars.put("B", generateInputDataset(PERF_TOOL_DIR+"/B", dim2, dim3, sparsity, df));
- if( schema==IOSchema.NONE_UNARY || schema==IOSchema.UNARY_UNARY || schema==IOSchema.BINARY_UNARY)
- vars.put("C", generateEmptyResult(PERF_TOOL_DIR+"/C", dim1, dim3, df));
- double mem2 = PerfTestMemoryObserver.getUsedMemory();
-
- //foreach repetition
- double value = 0;
- for( int i=0; i<TEST_REPETITIONS; i++ )
- {
- System.out.println("run "+i);
- value += executeGenericProgramBlock( m, pb, ec );
- }
- value/=TEST_REPETITIONS;
-
- //result correction and print result
- switch( m )
- {
- case EXEC_TIME: System.out.println("--- RESULT: "+value+" ms"); break;
- case MEMORY_USAGE:
- //System.out.println("--- RESULT: "+value+" byte");
- if( (mem2-mem1) > 0 )
- value = value + mem2-mem1; //correction: input sizes added
- System.out.println("--- RESULT: "+value+" byte"); break;
- default: System.out.println("--- RESULT: "+value); break;
- }
-
- return value;
- }
-
- public static double executeGenericProgramBlock( TestMeasure measure, ProgramBlock pb, ExecutionContext ec )
- throws DMLRuntimeException
- {
- double value = 0;
- try
- {
- switch( measure )
- {
- case EXEC_TIME:
- Timing time = new Timing();
- time.start();
- pb.execute( ec );
- value = time.stop();
- break;
- case MEMORY_USAGE:
- PerfTestMemoryObserver mo = new PerfTestMemoryObserver();
- mo.measureStartMem();
- Thread t = new Thread(mo);
- t.start();
- pb.execute( ec );
- mo.setStopped();
- value = mo.getMaxMemConsumption();
- t.join();
- break;
- }
- }
- catch(Exception ex)
- {
- throw new DMLRuntimeException(ex);
- }
-
- //clear matrixes from cache
- for( String str : ec.getVariables().keySet() )
- {
- Data dat = ec.getVariable(str);
- if( dat instanceof MatrixObject )
- ((MatrixObject)dat).clearData();
- }
-
- return value;
- }
-
- public static LinkedList<Double> generateSequence( double min, double max, double num )
- {
- LinkedList<Double> data = new LinkedList<Double>();
- double increment = (max-min)/(num-1);
-
- for( int i=0; i<num; i++ )
- data.add( Double.valueOf(min+i*increment) );
-
- return data;
- }
-
- public static MatrixObject generateInputDataset(String fname, double dim1, double dim2, double sparsity, DataFormat df)
- throws IOException, CacheException
- {
- int d1 = (int) dim1;
- int d2 = (int) dim2;
-
- System.out.println(d1+" "+d2);
-
- //create random test data
- double[][] d = generateTestMatrix(d1, d2, 1, 100, sparsity, 7);
-
- //create matrix block
- MatrixBlock mb = null;
- switch( df )
- {
- case DENSE:
- mb = new MatrixBlock(d1,d2,false);
- break;
- case SPARSE:
- mb = new MatrixBlock(d1,d2,true, (int)(sparsity*dim1*dim2));
- break;
- }
-
- //insert data
- for(int i=0; i < d1; i++)
- for(int j=0; j < d2; j++)
- if( d[i][j]!=0 )
- mb.setValue(i, j, d[i][j]);
-
- MapReduceTool.deleteFileIfExistOnHDFS(fname);
-
- MatrixCharacteristics mc = new MatrixCharacteristics(d1, d2, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
- MatrixFormatMetaData md = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
- MatrixObject mo = new MatrixObject(ValueType.DOUBLE,fname,md);
- mo.acquireModify(mb);
- mo.release();
- mo.exportData(); //write to HDFS
-
- return mo;
- }
-
- public static MatrixObject generateEmptyResult(String fname, double dim1, double dim2, DataFormat df )
- throws IOException, CacheException
- {
- int d1 = (int)dim1;
- int d2 = (int)dim2;
-
- /*
- MatrixBlock mb = null;
- switch( df )
- {
- case DENSE:
- mb = new MatrixBlock(dim,dim,false);
- break;
- case SPARSE:
- mb = new MatrixBlock(dim,dim,true);
- break;
- }*/
-
- MatrixCharacteristics mc = new MatrixCharacteristics(d1, d2, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
- MatrixFormatMetaData md = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
- MatrixObject mo = new MatrixObject(ValueType.DOUBLE,fname,md);
-
- return mo;
- }
-
-
- /**
- * NOTE: This is a copy of TestUtils.generateTestMatrix, it was replicated in order to prevent
- * dependency of SystemML.jar to our test package.
- *
- * @param rows number of rows
- * @param cols number of columns
- * @param min minimum value
- * @param max maximum value
- * @param sparsity sparsity as a percentage
- * @param seed random seed value (-1 if use System time)
- * @return matrix as 2D double array
- */
- public static double[][] generateTestMatrix(int rows, int cols, double min, double max, double sparsity, long seed) {
- double[][] matrix = new double[rows][cols];
- Random random;
- if (seed == -1)
- random = new Random(System.nanoTime());
- else
- random = new Random(seed);
-
- for (int i = 0; i < rows; i++) {
- for (int j = 0; j < cols; j++) {
- if (random.nextDouble() > sparsity)
- continue;
- matrix[i][j] = (random.nextDouble() * (max - min) + min);
- }
- }
-
- return matrix;
- }
-
- @SuppressWarnings("all")
- private static HashMap<Integer,Long> writeResults( String dirname )
- throws IOException, DMLRuntimeException
- {
- HashMap<Integer,Long> map = new HashMap<Integer, Long>();
- int count = 1;
- int offset = (MODEL_INTERCEPT ? 1 : 0);
- int cols = MODEL_MAX_ORDER + offset;
-
- for( Entry<Integer,HashMap<Integer,LinkedList<Double>>> inst : _results.entrySet() )
- {
- int instID = inst.getKey();
- HashMap<Integer,LinkedList<Double>> instCF = inst.getValue();
-
- for( Entry<Integer,LinkedList<Double>> cfun : instCF.entrySet() )
- {
- int tDefID = cfun.getKey();
- long ID = IDHandler.concatIntIDsToLong(instID, tDefID);
- LinkedList<Double> dmeasure = cfun.getValue();
-
- PerfTestDef def = _regTestDef.get(tDefID);
- LinkedList<Double> dvariable = generateSequence(def.getMin(), def.getMax(), NUM_SAMPLES_PER_TEST);
- int dlen = dvariable.size();
- int plen = def.getInternalVariables().length;
-
- //write variable data set
- CSVWriter writer1 = new CSVWriter( new FileWriter( dirname+count+"_in1.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER);
- if( plen == 1 ) //one dimensional function
- {
- //write 1, x, x^2, x^3, ...
- String[] sbuff = new String[cols];
- for( Double val : dvariable )
- {
- for( int j=0; j<cols; j++ )
- sbuff[j] = String.valueOf( Math.pow(val, j+1-offset) );
- writer1.writeNext(sbuff);
- }
- }
- else // multi-dimensional function
- {
- //write 1, x,y,z,x^2,y^2,z^2, xy, xz, yz, xyz
-
- String[] sbuff = new String[(int)Math.pow(2,plen)-1+plen+offset-1];
- //String[] sbuff = new String[plen+offset];
- if(offset==1)
- sbuff[0]="1";
-
- //init index stack
- int[] index = new int[plen];
- for( int i=0; i<plen; i++ )
- index[i] = 0;
-
- //execute test
- double[] buff = new double[plen];
- while( index[0]<dlen )
- {
- //set buffer values
- for( int i=0; i<plen; i++ )
- buff[i] = dvariable.get(index[i]);
-
- //core writing
- for( int i=1; i<=plen; i++ )
- {
- if( i==1 )
- {
- for( int j=0; j<plen; j++ )
- sbuff[offset+j] = String.valueOf( buff[j] );
- for( int j=0; j<plen; j++ )
- sbuff[offset+plen+j] = String.valueOf( Math.pow(buff[j],2) );
- }
- else if( i==2 )
- {
- int ix=0;
- for( int j=0; j<plen-1; j++ )
- for( int k=j+1; k<plen; k++, ix++ )
- sbuff[offset+2*plen+ix] = String.valueOf( buff[j]*buff[k] );
- }
- else if( i==plen )
- {
- //double tmp=1;
- //for( int j=0; j<plen; j++ )
- // tmp *= buff[j];
- //sbuff[offset+2*plen+plen*(plen-1)/2] = String.valueOf(tmp);
- }
- else
- throw new DMLRuntimeException("More than 3 dims currently not supported.");
-
- }
-
- //for( int i=0; i<plen; i++ )
- // sbuff[offset+i] = String.valueOf( buff[i] );
-
- writer1.writeNext(sbuff);
-
- //increment indexes
- for( int i=plen-1; i>=0; i-- )
- {
- if(i==plen-1)
- index[i]++;
- else if( index[i+1] >= dlen )
- {
- index[i]++;
- index[i+1]=0;
- }
- }
- }
- }
- writer1.close();
-
-
- //write measure data set
- CSVWriter writer2 = new CSVWriter( new FileWriter( dirname+count+"_in2.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER);
- String[] buff2 = new String[1];
- for( Double val : dmeasure )
- {
- buff2[0] = String.valueOf( val );
- writer2.writeNext(buff2);
- }
- writer2.close();
-
- map.put(count, ID);
- count++;
- }
- }
-
- return map;
- }
-
- private static void computeRegressionModels( String dmlname, String dmltmpname, String dir, int models, int rows, int cols )
- throws IOException, ParseException, DMLException
- {
- //clean scratch space
- //AutomatedTestBase.cleanupScratchSpace();
-
- //read DML template
- StringBuilder buffer = new StringBuilder();
- BufferedReader br = new BufferedReader( new FileReader(new File( dmlname )) );
-
- try
- {
- String line = null;
- while( (line=br.readLine()) != null )
- {
- buffer.append(line);
- buffer.append("\n");
- }
- }
- finally
- {
- if( br != null )
- br.close();
- }
-
- //replace parameters
- String template = buffer.toString();
- template = template.replaceAll("%numModels%", String.valueOf(models));
- template = template.replaceAll("%numRows%", String.valueOf(rows));
- template = template.replaceAll("%numCols%", String.valueOf(cols));
- template = template.replaceAll("%indir%", String.valueOf(dir));
-
- // write temp DML file
- File fout = new File(dmltmpname);
- FileOutputStream fos = new FileOutputStream(fout);
- try {
- fos.write(template.getBytes());
- }
- finally
- {
- if( fos != null )
- fos.close();
- }
-
- // execute DML script
- DMLScript.main(new String[] { "-f", dmltmpname });
- }
-
- private static void readRegressionModels( String dname, HashMap<Integer,Long> IDMapping )
- throws IOException
- {
- for( Entry<Integer,Long> e : IDMapping.entrySet() )
- {
- int count = e.getKey();
- long ID = e.getValue();
- int instID = IDHandler.extractIntIDFromLong(ID, 1);
- int tDefID = IDHandler.extractIntIDFromLong(ID, 2);
-
- //read file and parse
- LinkedList<Double> params = new LinkedList<Double>();
- CSVReader reader1 = new CSVReader( new FileReader(dname+count+"_out.csv"), ',' );
- String[] nextline = null;
- while( (nextline = reader1.readNext()) != null )
- {
- params.add(Double.parseDouble(nextline[0]));
- }
- reader1.close();
-
- double[] dparams = new double[params.size()];
- int i=0;
- for( Double d : params )
- {
- dparams[i] = d;
- i++;
- }
-
- //create new cost function
- boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1;
- CostFunction cf = new CostFunction(dparams, multidim);
-
- //append to profile
- if( !_profile.containsKey(instID) )
- _profile.put(instID, new HashMap<Integer, CostFunction>());
- _profile.get(instID).put(tDefID, cf);
- }
- }
-
- private static String serializeTestVariables( InternalTestVariable[] vars )
- {
- StringBuilder sb = new StringBuilder();
- for( int i=0; i<vars.length; i++ )
- {
- if( i>0 )
- sb.append( XML_ELEMENT_DELIMITER );
- sb.append( String.valueOf(vars[i]) );
- }
- return sb.toString();
- }
-
- private static InternalTestVariable[] parseTestVariables(String vars)
- {
- StringTokenizer st = new StringTokenizer(vars, XML_ELEMENT_DELIMITER);
- InternalTestVariable[] v = new InternalTestVariable[st.countTokens()];
- for( int i=0; i<v.length; i++ )
- v[i] = InternalTestVariable.valueOf(st.nextToken());
- return v;
- }
-
- private static String serializeParams( double[] vals )
- {
- StringBuilder sb = new StringBuilder();
- for( int i=0; i<vals.length; i++ )
- {
- if( i>0 )
- sb.append( XML_ELEMENT_DELIMITER );
- sb.append( String.valueOf(vals[i]) );
- }
- return sb.toString();
- }
-
- private static double[] parseParams( String valStr )
- {
- StringTokenizer st = new StringTokenizer(valStr, XML_ELEMENT_DELIMITER);
- double[] params = new double[st.countTokens()];
- for( int i=0; i<params.length; i++ )
- params[i] = Double.parseDouble(st.nextToken());
- return params;
- }
-
- private static void readProfile( String fname )
- throws XMLStreamException, IOException
- {
- //init profile map
- _profile = new HashMap<Integer, HashMap<Integer,CostFunction>>();
-
- //read existing profile
- FileInputStream fis = new FileInputStream( fname );
-
- try
- {
- //xml parsing
- XMLInputFactory xif = XMLInputFactory.newInstance();
- XMLStreamReader xsr = xif.createXMLStreamReader( fis );
-
- int e = xsr.nextTag(); // profile start
-
- while( true ) //read all instructions
- {
- e = xsr.nextTag(); // instruction start
- if( e == XMLStreamConstants.END_ELEMENT )
- break; //reached profile end tag
-
- //parse instruction
- int ID = Integer.parseInt( xsr.getAttributeValue(null, XML_ID) );
- //String name = xsr.getAttributeValue(null, XML_NAME).trim().replaceAll(" ", Lops.OPERAND_DELIMITOR);
- HashMap<Integer, CostFunction> tmp = new HashMap<Integer, CostFunction>();
- _profile.put( ID, tmp );
-
- while( true )
- {
- e = xsr.nextTag(); // cost function start
- if( e == XMLStreamConstants.END_ELEMENT )
- break; //reached instruction end tag
-
- //parse cost function
- TestMeasure m = TestMeasure.valueOf( xsr.getAttributeValue(null, XML_MEASURE) );
- TestVariable lv = TestVariable.valueOf( xsr.getAttributeValue(null, XML_VARIABLE) );
- InternalTestVariable[] pv = parseTestVariables( xsr.getAttributeValue(null, XML_INTERNAL_VARIABLES) );
- DataFormat df = DataFormat.valueOf( xsr.getAttributeValue(null, XML_DATAFORMAT) );
- int tDefID = getTestDefID(m, lv, df, pv);
-
- xsr.next(); //read characters
- double[] params = parseParams(xsr.getText());
- boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1;
- CostFunction cf = new CostFunction( params, multidim );
- tmp.put(tDefID, cf);
-
- xsr.nextTag(); // cost function end
- //System.out.println("added cost function");
- }
- }
- xsr.close();
- }
- finally
- {
- IOUtilFunctions.closeSilently(fis);
- }
-
- //mark profile as successfully read
- _flagReadData = true;
- }
-
- /**
- * StAX for efficient streaming XML writing.
- *
- * @param dname directory name
- * @param fname file name
- * @throws IOException if IOException occurs
- * @throws XMLStreamException if XMLStreamException occurs
- */
- private static void writeProfile( String dname, String fname )
- throws IOException, XMLStreamException
- {
- //create initial directory and file
- File dir = new File( dname );
- if( !dir.exists() )
- dir.mkdir();
- File f = new File( fname );
- f.createNewFile();
-
- FileOutputStream fos = new FileOutputStream( f );
-
- try
- {
- //create document
- XMLOutputFactory xof = XMLOutputFactory.newInstance();
- XMLStreamWriter xsw = xof.createXMLStreamWriter( fos );
- //TODO use an alternative way for intentation
- //xsw = new IndentingXMLStreamWriter( xsw ); //remove this line if no indenting required
-
- //write document content
- xsw.writeStartDocument();
- xsw.writeStartElement( XML_PROFILE );
- xsw.writeAttribute(XML_DATE, String.valueOf(new Date()) );
-
- //foreach instruction (boundle of cost functions)
- for( Entry<Integer,HashMap<Integer,CostFunction>> inst : _profile.entrySet() )
- {
- int instID = inst.getKey();
- String instName = _regInst_IDNames.get( instID );
-
- xsw.writeStartElement( XML_INSTRUCTION );
- xsw.writeAttribute(XML_ID, String.valueOf( instID ));
- xsw.writeAttribute(XML_NAME, instName.replaceAll(Lop.OPERAND_DELIMITOR, " "));
-
- //foreach testdef cost function
- for( Entry<Integer,CostFunction> cfun : inst.getValue().entrySet() )
- {
- int tdefID = cfun.getKey();
- PerfTestDef def = _regTestDef.get(tdefID);
- CostFunction cf = cfun.getValue();
-
- xsw.writeStartElement( XML_COSTFUNCTION );
- xsw.writeAttribute( XML_ID, String.valueOf( tdefID ));
- xsw.writeAttribute( XML_MEASURE, def.getMeasure().toString() );
- xsw.writeAttribute( XML_VARIABLE, def.getVariable().toString() );
- xsw.writeAttribute( XML_INTERNAL_VARIABLES, serializeTestVariables(def.getInternalVariables()) );
- xsw.writeAttribute( XML_DATAFORMAT, def.getDataformat().toString() );
- xsw.writeCharacters(serializeParams( cf.getParams() ));
- xsw.writeEndElement();// XML_COSTFUNCTION
- }
-
- xsw.writeEndElement(); //XML_INSTRUCTION
- }
-
- xsw.writeEndElement();//XML_PROFILE
- xsw.writeEndDocument();
- xsw.close();
- }
- finally
- {
- IOUtilFunctions.closeSilently(fos);
- }
- }
-
-
-
- /**
- * Main for invoking the actual performance test in order to produce profile.xml
- *
- * @param args string arguments to main() method
- */
- public static void main(String[] args)
- {
- //execute the local / remote performance test
- PerfTestTool.runTest();
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
deleted file mode 100644
index c216d52..0000000
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml
+++ /dev/null
@@ -1,59 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-#PerfTestTool: DML template for estimation cost functions.
-#Deprecated in SystemML 0.13
-
-dynRead = externalFunction(Matrix[Double] d, String fname, Integer m, Integer n)
-return (Matrix[Double] D)
-implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicReadMatrix2DCP",exectype="mem")
-
-dynWrite = externalFunction(Matrix[Double] R, String fname)
-return (Matrix[Double] D)
-implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicWriteMatrix2DCP",exectype="mem")
-
-solve = externalFunction(Matrix[Double] A, Matrix[Double] y)
-return (Matrix[Double] b)
-implemented in (classname="org.apache.sysml.packagesupport.LinearSolverWrapperCP",exectype="mem")
-
-k = %numModels%;
-m = -1;
-n = -1;
-
-dummy = matrix(1,rows=1,cols=1);
-
-for( i in 1:k, par=8, mode=LOCAL )
-{
- sin1 = "./conf/PerfTestTool/"+i+"_in1.csv";
- sin2 = "./conf/PerfTestTool/"+i+"_in2.csv";
-
- D = dynRead( dummy, sin1, m, n );
- y = dynRead( dummy, sin2, m, 1 );
-
- A = t(D) %*% D; # X'X
- b = t(D) %*% y; # X'y
- beta = solve(A,b);
-
- sout = "./conf/PerfTestTool/"+i+"_out.csv";
-
- X=dynWrite( beta, sout );
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java b/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
index beb08bd..343d846 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java
@@ -30,15 +30,13 @@ import org.apache.sysml.test.utils.TestUtils;
public class ParForRulebasedOptimizerTest extends AutomatedTestBase
{
-
- private final static String TEST_NAME1 = "parfor_optimizer1";
- private final static String TEST_NAME2 = "parfor_optimizer2";
- private final static String TEST_NAME3 = "parfor_optimizer3";
+ private final static String TEST_NAME1 = "parfor_optimizer1"; //+b for dml
+ private final static String TEST_NAME2 = "parfor_optimizer2"; //+b for dml
+ private final static String TEST_NAME3 = "parfor_optimizer3"; //+b for dml
private final static String TEST_DIR = "functions/parfor/";
private final static String TEST_CLASS_DIR = TEST_DIR + ParForRulebasedOptimizerTest.class.getSimpleName() + "/";
private final static double eps = 1e-10;
-
-
+
private final static int rows1 = 1000; //small CP
private final static int rows2 = 10000; //large MR
@@ -67,82 +65,127 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
@Test
- public void testParForOptimizerCorrelationSmallSmall()
- {
- runParForOptimizerTest(1, false, false);
+ public void testParForRulebasedOptimizerCorrelationSmallSmall() {
+ runParForOptimizerTest(1, false, false, false);
}
+ @Test
+ public void testParForRulebasedOptimizerCorrelationSmallLarge() {
+ runParForOptimizerTest(1, false, true, false);
+ }
@Test
- public void testParForOptimizerCorrelationSmallLarge()
- {
- runParForOptimizerTest(1, false, true);
+ public void testParForRulebasedOptimizerCorrelationLargeSmall() {
+ runParForOptimizerTest(1, true, false, false);
}
+ @Test
+ public void testParForRulebasedOptimizerCorrelationLargeLarge() {
+ runParForOptimizerTest(1, true, true, false);
+ }
@Test
- public void testParForOptimizerCorrelationLargeSmall()
- {
- runParForOptimizerTest(1, true, false);
+ public void testParForRulebasedOptimizerBivariateStatsSmallSmall() {
+ runParForOptimizerTest(2, false, false, false);
}
@Test
- public void testParForOptimizerCorrelationLargeLarge()
- {
- runParForOptimizerTest(1, true, true);
+ public void testParForRulebasedOptimizerBivariateStatsSmallLarge() {
+ runParForOptimizerTest(2, false, true, false);
}
+ @Test
+ public void testParForRulebasedOptimizerBivariateStatsLargeSmall() {
+ runParForOptimizerTest(2, true, false, false);
+ }
@Test
- public void testParForOptimizerBivariateStatsSmallSmall()
- {
- runParForOptimizerTest(2, false, false);
+ public void testParForRulebasedOptimizerBivariateStatsLargeLarge() {
+ runParForOptimizerTest(2, true, true, false);
}
@Test
- public void testParForOptimizerBivariateStatsSmallLarge()
- {
- runParForOptimizerTest(2, false, true);
+ public void testParForRulebasedOptimizerFunctionInvocationSmallSmall() {
+ runParForOptimizerTest(3, false, false, false);
}
@Test
- public void testParForOptimizerBivariateStatsLargeSmall()
- {
- runParForOptimizerTest(2, true, false);
+ public void testParForRulebasedOptimizerFunctionInvocationSmallLarge() {
+ runParForOptimizerTest(3, false, true, false);
}
@Test
- public void testParForOptimizerBivariateStatsLargeLarge()
- {
- runParForOptimizerTest(2, true, true);
+ public void testParForRulebasedOptimizerFunctionInvocationLargeSmall() {
+ runParForOptimizerTest(3, true, false, false);
}
@Test
- public void testParForOptimizerFunctionInvocationSmallSmall()
- {
- runParForOptimizerTest(3, false, false);
+ public void testParForRulebasedOptimizerFunctionInvocationLargeLarge() {
+ runParForOptimizerTest(3, true, true, false);
}
@Test
- public void testParForOptimizerFunctionInvocationSmallLarge()
- {
- runParForOptimizerTest(3, false, true);
+ public void testParForHeuristicOptimizerCorrelationSmallSmall() {
+ runParForOptimizerTest(1, false, false, true);
}
@Test
- public void testParForOptimizerFunctionInvocationLargeSmall()
- {
- runParForOptimizerTest(3, true, false);
+ public void testParForHeuristicOptimizerCorrelationSmallLarge() {
+ runParForOptimizerTest(1, false, true, true);
}
@Test
- public void testParForOptimizerFunctionInvocationLargeLarge()
- {
- runParForOptimizerTest(3, true, true);
+ public void testParForHeuristicOptimizerCorrelationLargeSmall() {
+ runParForOptimizerTest(1, true, false, true);
+ }
+
+ @Test
+ public void testParForHeuristicOptimizerCorrelationLargeLarge() {
+ runParForOptimizerTest(1, true, true, true);
}
+ @Test
+ public void testParForHeuristicOptimizerBivariateStatsSmallSmall() {
+ runParForOptimizerTest(2, false, false, true);
+ }
- private void runParForOptimizerTest( int scriptNum, boolean largeRows, boolean largeCols )
+ @Test
+ public void testParForHeuristicOptimizerBivariateStatsSmallLarge() {
+ runParForOptimizerTest(2, false, true, true);
+ }
+
+ @Test
+ public void testParForHeuristicOptimizerBivariateStatsLargeSmall() {
+ runParForOptimizerTest(2, true, false, true);
+ }
+
+ @Test
+ public void testParForHeuristicOptimizerBivariateStatsLargeLarge() {
+ runParForOptimizerTest(2, true, true, true);
+ }
+
+ @Test
+ public void testParForHeuristicOptimizerFunctionInvocationSmallSmall() {
+ runParForOptimizerTest(3, false, false, true);
+ }
+
+ @Test
+ public void testParForHeuristicOptimizerFunctionInvocationSmallLarge() {
+ runParForOptimizerTest(3, false, true, true);
+ }
+
+ @Test
+ public void testParForHeuristicOptimizerFunctionInvocationLargeSmall() {
+ runParForOptimizerTest(3, true, false, true);
+ }
+
+ @Test
+ public void testParForHeuristicOptimizerFunctionInvocationLargeLarge() {
+ runParForOptimizerTest(3, true, true, true);
+ }
+
+
+ private void runParForOptimizerTest( int scriptNum, boolean largeRows, boolean largeCols, boolean timebasedOpt )
{
//find right rows and cols configuration
int rows=-1, cols=-1;
@@ -171,31 +214,34 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
switch( scriptNum )
{
case 1:
- runUnaryTest(scriptNum, rows, cols);
+ runUnaryTest(scriptNum, timebasedOpt, rows, cols);
break;
case 2:
- runNaryTest(scriptNum, rows, cols);
+ runNaryTest(scriptNum, timebasedOpt, rows, cols);
break;
case 3:
- runUnaryTest(scriptNum, rows, cols);
+ runUnaryTest(scriptNum, timebasedOpt, rows, cols);
break;
}
}
- private void runUnaryTest(int scriptNum, int rows, int cols )
+ private void runUnaryTest(int scriptNum, boolean timebasedOpt, int rows, int cols )
{
TestConfiguration config = null;
String HOME = SCRIPT_DIR + TEST_DIR;
if( scriptNum==1 )
{
config=getTestConfiguration(TEST_NAME1);
- fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
+ String testname = TEST_NAME1 + (timebasedOpt ? "b" : "");
+ fullDMLScriptName = HOME + testname + ".dml";
}
else if( scriptNum==3 )
{
config=getTestConfiguration(TEST_NAME3);
- fullDMLScriptName = HOME + TEST_NAME3 + ".dml";
+ String testname = TEST_NAME3 + (timebasedOpt ? "b" : "");
+ fullDMLScriptName = HOME + testname + ".dml";
}
+
config.addVariable("rows", rows);
config.addVariable("cols", cols);
loadTestConfiguration(config);
@@ -235,7 +281,7 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
TestUtils.compareMatrices(dmlfile, rfile, eps, "DML", "R");
}
- private void runNaryTest(int scriptNum, int rows, int cols)
+ private void runNaryTest(int scriptNum, boolean timebasedOpt, int rows, int cols)
{
TestConfiguration config = getTestConfiguration(TEST_NAME2);
config.addVariable("rows", rows);
@@ -244,7 +290,8 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase
/* This is for running the junit test the new way, i.e., construct the arguments directly */
String HOME = SCRIPT_DIR + TEST_DIR;
- fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
+ String testname = TEST_NAME2 + (timebasedOpt ? "b" : "");
+ fullDMLScriptName = HOME + testname + ".dml";
programArgs = new String[]{"-args",
input("D"),
input("S1"), input("S2"),
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer1b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer1b.dml b/src/test/scripts/functions/parfor/parfor_optimizer1b.dml
new file mode 100644
index 0000000..cd0a3f7
--- /dev/null
+++ b/src/test/scripts/functions/parfor/parfor_optimizer1b.dml
@@ -0,0 +1,53 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+V = read($1,rows=$2,cols=$3);
+m = $2;
+n = $3;
+W = m;
+
+R = matrix(0, rows=n,cols=n);
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:(n-1), opt=HEURISTIC )
+{
+ X = V[,i];
+ m2X = moment(X,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+
+ parfor( j in (i+1):n )
+ {
+ Y = V[,j];
+
+ #corr computation
+ m2Y = moment(Y,2);
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+ covXY = cov(X,Y);
+ rXY = covXY / (sigmaX*sigmaY);
+
+ #print("R[("+i+","+j+")]="+rXY);
+ R[i,j] = dummy * rXY;
+
+ }
+}
+
+write(R, $4);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer2b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer2b.dml b/src/test/scripts/functions/parfor/parfor_optimizer2b.dml
new file mode 100644
index 0000000..6b41058
--- /dev/null
+++ b/src/test/scripts/functions/parfor/parfor_optimizer2b.dml
@@ -0,0 +1,277 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs
+ * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n}
+ * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n)
+ *
+ * Seven inputs:
+ * $1) D - input data
+ * $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ * $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ * $4) K1 - kind for attributes in S1
+ * $5) K2 - kind for attributes in S2
+ * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ * $6) numPairs - total number of pairs (m*n)
+ * $7) maxC - maximum number of categories in any categorical attribute
+ *
+ * One output:
+ * $6) output directory in which following four statistics files are created
+ * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs
+ * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ * + categorical.counts -
+ * + categorical.means -
+ * + categorical.variances -
+ * -> Values in these three matrices are applicable only for scale-categorical attribute pairs.
+ * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8); # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11; # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+#numpairs = s1size * s2size;
+#print(s1size + ", " + s2size + ", " + numpairs);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+dummy = matrix(1, rows=1, cols=1);
+
+
+parfor( i in 1:s1size, check=0, opt=HEURISTIC) {
+ a1 = as.scalar(S1[,i]);
+ k1 = as.scalar(K1[1,i]);
+ A1 = D[,a1];
+
+ parfor( j in 1:s2size, check=0) {
+ pairID = (i-1)*s2size+j;
+ a2 = as.scalar(S2[,j]);
+ k2 = as.scalar(K2[1,j]);
+ A2 = D[,a2];
+
+ if (k1 == k2) {
+ if (k1 == 1) {
+ # scale-scale
+ print("[" + i + "," + j + "] scale-scale");
+ r = bivar_ss(A1,A2);
+ basestats[1,pairID] = dummy*r;
+ } else {
+ # nominal-nominal or ordinal-ordinal
+ print("[" + i + "," + j + "] categorical-categorical");
+ [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
+ basestats[2,pairID] = dummy*chisq;
+ basestats[3,pairID] = dummy*df;
+ basestats[4,pairID] = dummy*pval;
+ basestats[5,pairID] = dummy*cramersv;
+
+ if ( k1 == 3 ) {
+ # ordinal-ordinal
+ print("[" + i + "," + j + "] ordinal-ordinal");
+ sp = bivar_oo(A1, A2);
+ basestats[6,pairID] = dummy*sp;
+ }
+ }
+ }
+ else {
+ if (k1 == 1 | k2 == 1) {
+ # Scale-nominal/ordinal
+ print("[" + i + "," + j + "] scale-categorical");
+
+ if ( k1 == 1 ) {
+ [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+ }
+ else {
+ [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+ }
+ basestats[7,pairID] = dummy*eta;
+ basestats[8,pairID] = dummy*f;
+ cat_counts[,pairID] = counts;
+ cat_means[,pairID] = means;
+ cat_vars[,pairID] = vars;
+ }
+ else {
+ # nominal-ordinal or ordinal-nominal
+ print("[" + i + "," + j + "] categorical-categorical");
+ [chisq, df, pval, cramersv] = bivar_cc(A1,A2);
+ basestats[2,pairID] = dummy*chisq;
+ basestats[3,pairID] = dummy*df;
+ basestats[4,pairID] = dummy*pval;
+ basestats[5,pairID] = dummy*cramersv;
+ }
+ }
+ }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) {
+
+ # Contingency Table
+ F = table(A,B);
+
+ # Chi-Squared
+ W = sum(F);
+ r = rowSums(F);
+ c = colSums(F);
+ E = (r %*% c)/W;
+ T = (F-E)^2/E;
+ chi_squared = sum(T);
+
+ # compute p-value
+ degFreedom = (nrow(F)-1)*(ncol(F)-1);
+ pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+ # Cramer's V
+ R = nrow(F);
+ C = ncol(F);
+ q = min(R,C);
+ cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+ # Assign return values
+ chisq = chi_squared;
+ df = degFreedom;
+ pval = pValue;
+ cramersv = cramers_v;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+ # Unweighted co-variance
+ covXY = cov(X,Y);
+
+ # compute standard deviations for both X and Y by computing 2^nd central moment
+ W = nrow(X);
+ m2X = moment(X,2);
+ m2Y = moment(Y,2);
+ sigmaX = sqrt(m2X * (W/(W-1.0)) );
+ sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+ # Pearson's R
+ R = covXY / (sigmaX*sigmaY);
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+
+ # mean and variance in target variable
+ W = nrow(A);
+ my = mean(Y);
+ varY = moment(Y,2) * W/(W-1.0)
+
+ # category-wise (frequencies, means, variances)
+ CFreqs = aggregate(target=Y, groups=A, fn="count");
+ CMeans = aggregate(target=Y, groups=A, fn="mean");
+ CVars = aggregate(target=Y, groups=A, fn="variance");
+
+ # number of categories
+ R = nrow(CFreqs);
+
+ Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+ anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+ anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+ AnovaF = anova_num/anova_den;
+}
+
+# -----------------------------------------------------------------------------------------------------------
+
+
+# -----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+ dummy = matrix(1, rows=1, cols=1);
+ Rks = X;
+ size = nrow(X);
+ for(i in 1:size) {
+ prefixSum = 0.0;
+ if( i>1 ){
+ prefixSum = sum(X[1:(i-1),1]);
+ }
+ Rks[i,1] = dummy * (prefixSum + ((as.scalar(X[i,1])+1)/2));
+ }
+ Ranks = Rks;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+ # compute contingency table
+ F = table(A,B);
+
+ catA = nrow(F); # number of categories in A
+ catB = ncol(F); # number of categories in B
+
+ # compute category-wise counts for both the attributes
+ R = rowSums(F);
+ S = colSums(F);
+
+ # compute scores, both are column vectors
+ [C] = computeRanks(R);
+ meanX = mean(C,R);
+
+ columnS = t(S);
+ [D] = computeRanks(columnS);
+
+ # scores (C,D) are individual values, and counts (R,S) act as weights
+ meanY = mean(D,columnS);
+
+ W = sum(F); # total weight, or total #cases
+ varX = moment(C,R,2)*(W/(W-1.0));
+ varY = moment(D,columnS,2)*(W/(W-1.0));
+
+ covXY = 0.0;
+ for(i in 1:catA) {
+ covXY = covXY + sum((F[i,]/(W-1)) * (as.scalar(C[i,1])-meanX) * (t(D[,1])-meanY));
+ }
+
+ sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# -----------------------------------------------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer3b.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/parfor/parfor_optimizer3b.dml b/src/test/scripts/functions/parfor/parfor_optimizer3b.dml
new file mode 100644
index 0000000..6eae759
--- /dev/null
+++ b/src/test/scripts/functions/parfor/parfor_optimizer3b.dml
@@ -0,0 +1,52 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+V = read($1,rows=$2,cols=$3);
+n = $3;
+nd = $4;
+
+R = matrix(0, rows=1,cols=nd);
+dummy = matrix(1, rows=1, cols=1);
+
+parfor( i in 1:(n/2), opt=HEURISTIC )
+{
+ X = V[ ,i];
+ Y = V[ ,n-i+1];
+ sx = execSum(X);
+ sy = execSum(Y);
+ R[1,i] = dummy*( sx+sy );
+}
+
+write(R, $5);
+
+
+execSum = function(Matrix[Double] X) return (Double sx)
+{
+ if( ncol(X) > 0 )
+ {
+ sx = sum(X);
+ }
+ else
+ {
+ sx = sum(X);
+ }
+}
\ No newline at end of file